Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1997-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ULOC.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/01/97    aliu        Creation.
     13 *   08/21/98    stephen     JDK 1.2 sync
     14 *   12/08/98    rtg         New Locale implementation and C API
     15 *   03/15/99    damiba      overhaul.
     16 *   04/06/99    stephen     changed setDefault() to realloc and copy
     17 *   06/14/99    stephen     Changed calls to ures_open for new params
     18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
     19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
     20 *                           brought canonicalization code into line with spec
     21 *****************************************************************************/
     22 
     23 /*
     24    POSIX's locale format, from putil.c: [no spaces]
     25 
     26      ll [ _CC ] [ . MM ] [ @ VV]
     27 
     28      l = lang, C = ctry, M = charmap, V = variant
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/uloc.h"
     34 
     35 #include "putilimp.h"
     36 #include "ustr_imp.h"
     37 #include "ulocimp.h"
     38 #include "umutex.h"
     39 #include "cstring.h"
     40 #include "cmemory.h"
     41 #include "ucln_cmn.h"
     42 #include "locmap.h"
     43 #include "uarrsort.h"
     44 #include "uenumimp.h"
     45 #include "uassert.h"
     46 
     47 #include <stdio.h> /* for sprintf */
     48 
     49 /* ### Declarations **************************************************/
     50 
     51 /* Locale stuff from locid.cpp */
     52 U_CFUNC void locale_set_default(const char *id);
     53 U_CFUNC const char *locale_get_default(void);
     54 U_CFUNC int32_t
     55 locale_getKeywords(const char *localeID,
     56             char prev,
     57             char *keywords, int32_t keywordCapacity,
     58             char *values, int32_t valuesCapacity, int32_t *valLen,
     59             UBool valuesToo,
     60             UErrorCode *status);
     61 
     62 /* ### Data tables **************************************************/
     63 
     64 /**
     65  * Table of language codes, both 2- and 3-letter, with preference
     66  * given to 2-letter codes where possible.  Includes 3-letter codes
     67  * that lack a 2-letter equivalent.
     68  *
     69  * This list must be in sorted order.  This list is returned directly
     70  * to the user by some API.
     71  *
     72  * This list must be kept in sync with LANGUAGES_3, with corresponding
     73  * entries matched.
     74  *
     75  * This table should be terminated with a NULL entry, followed by a
     76  * second list, and another NULL entry.  The first list is visible to
     77  * user code when this array is returned by API.  The second list
     78  * contains codes we support, but do not expose through user API.
     79  *
     80  * Notes
     81  *
     82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
     83  * include the revisions up to 2001/7/27 *CWB*
     84  *
     85  * The 3 character codes are the terminology codes like RFC 3066.  This
     86  * is compatible with prior ICU codes
     87  *
     88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
     89  * table but now at the end of the table because 3 character codes are
     90  * duplicates.  This avoids bad searches going from 3 to 2 character
     91  * codes.
     92  *
     93  * The range qaa-qtz is reserved for local use
     94  */
     95 static const char * const LANGUAGES[] = {
     96     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
     97     "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
     98     "ang", "anp", "apa",
     99     "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
    100     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
    101     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
    102     "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
    103     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
    104     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
    105     "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
    106     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
    107     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
    108     "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
    109     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
    110     "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
    111     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
    112     "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
    113     "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
    114     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
    115     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
    116     "grc", "gsw", "gu",  "gv", "gwi",
    117     "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
    118     "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
    119     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
    120     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
    121     "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
    122     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
    123     "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
    124     "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
    125     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
    126     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
    127     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
    128     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
    129     "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",
    130     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
    131     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
    132     "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
    133     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
    134     "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",
    135     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
    136     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
    137     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    138     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
    139     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
    140     "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
    141     "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
    142     "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
    143     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
    144     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
    145     "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
    146     "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",
    147     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
    148     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",
    149     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
    150     "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
    151     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
    152     "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
    153     "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",
    154     "zu",  "zun", "zxx", "zza",
    155 NULL,
    156     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
    157 NULL
    158 };
    159 static const char* const DEPRECATED_LANGUAGES[]={
    160     "in", "iw", "ji", "jw", NULL, NULL
    161 };
    162 static const char* const REPLACEMENT_LANGUAGES[]={
    163     "id", "he", "yi", "jv", NULL, NULL
    164 };
    165 
    166 /**
    167  * Table of 3-letter language codes.
    168  *
    169  * This is a lookup table used to convert 3-letter language codes to
    170  * their 2-letter equivalent, where possible.  It must be kept in sync
    171  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
    172  * same language as LANGUAGES_3[i].  The commented-out lines are
    173  * copied from LANGUAGES to make eyeballing this baby easier.
    174  *
    175  * Where a 3-letter language code has no 2-letter equivalent, the
    176  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
    177  *
    178  * This table should be terminated with a NULL entry, followed by a
    179  * second list, and another NULL entry.  The two lists correspond to
    180  * the two lists in LANGUAGES.
    181  */
    182 static const char * const LANGUAGES_3[] = {
    183 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
    184     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
    185 /*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
    186     "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
    187 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
    188     "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
    189 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
    190     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
    191 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
    192     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
    193 /*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
    194     "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
    195 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
    196     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
    197 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
    198     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
    199 /*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
    200     "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
    201 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
    202     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
    203 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
    204     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
    205 /*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
    206     "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
    207 /*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
    208     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
    209 /*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
    210     "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
    211 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
    212     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
    213 /*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
    214     "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
    215 /*  "fr",  "frm", "fro", "frr", "frs", "fur", "fy",  "ga",  "gaa", "gay",    */
    216     "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
    217 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
    218     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
    219 /*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
    220     "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
    221 /*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
    222     "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
    223 /*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
    224     "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
    225 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
    226     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
    227 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
    228     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
    229 /*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
    230     "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
    231 /*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
    232     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
    233 /*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
    234     "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
    235 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
    236     "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
    237 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
    238     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
    239 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
    240     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
    241 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
    242     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
    243 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
    244     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
    245 /*  "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
    246     "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
    247 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
    248     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
    249 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
    250     "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
    251 /*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
    252     "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
    253 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
    254     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
    255 /*  "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",    */
    256     "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
    257 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
    258     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
    259 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
    260     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
    261 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
    262     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    263 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
    264     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
    265 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
    266     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
    267 /*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
    268     "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
    269 /*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
    270     "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
    271 /*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
    272     "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
    273 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
    274     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
    275 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
    276     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
    277 /*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
    278     "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
    279 /*  "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",    */
    280     "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
    281 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
    282     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
    283 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",    */
    284     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
    285 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
    286     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
    287 /*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
    288     "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
    289 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
    290     "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
    291 /*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
    292     "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
    293 /*  "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
    294     "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
    295 /*  "zu",  "zun", "zxx", "zza",                                         */
    296     "zul", "zun", "zxx", "zza",
    297 NULL,
    298 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
    299     "ind", "heb", "yid", "jaw", "srp",
    300 NULL
    301 };
    302 
    303 /**
    304  * Table of 2-letter country codes.
    305  *
    306  * This list must be in sorted order.  This list is returned directly
    307  * to the user by some API.
    308  *
    309  * This list must be kept in sync with COUNTRIES_3, with corresponding
    310  * entries matched.
    311  *
    312  * This table should be terminated with a NULL entry, followed by a
    313  * second list, and another NULL entry.  The first list is visible to
    314  * user code when this array is returned by API.  The second list
    315  * contains codes we support, but do not expose through user API.
    316  *
    317  * Notes:
    318  *
    319  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
    320  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
    321  * new codes keeping the old ones for compatibility updated to include
    322  * 1999/12/03 revisions *CWB*
    323  *
    324  * RO(ROM) is now RO(ROU) according to
    325  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
    326  */
    327 static const char * const COUNTRIES[] = {
    328     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
    329     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
    330     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
    331     "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
    332     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
    333     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
    334     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
    335     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
    336     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
    337     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
    338     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
    339     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
    340     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
    341     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
    342     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
    343     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
    344     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
    345     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
    346     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
    347     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
    348     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
    349     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
    350     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
    351     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
    352     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
    353     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
    354     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
    355     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
    356     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
    357     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
    358 NULL,
    359     "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
    360 NULL
    361 };
    362 
    363 static const char* const DEPRECATED_COUNTRIES[] ={
    364     "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
    365 };
    366 static const char* const REPLACEMENT_COUNTRIES[] = {
    367 /*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
    368     "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
    369 };
    370 
    371 /**
    372  * Table of 3-letter country codes.
    373  *
    374  * This is a lookup table used to convert 3-letter country codes to
    375  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
    376  * For all valid i, COUNTRIES[i] must refer to the same country as
    377  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
    378  * to make eyeballing this baby easier.
    379  *
    380  * This table should be terminated with a NULL entry, followed by a
    381  * second list, and another NULL entry.  The two lists correspond to
    382  * the two lists in COUNTRIES.
    383  */
    384 static const char * const COUNTRIES_3[] = {
    385 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
    386     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
    387 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
    388     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
    389 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
    390     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
    391 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
    392     "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
    393 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
    394     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
    395 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
    396     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
    397 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
    398     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
    399 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
    400     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
    401 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
    402     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
    403 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
    404     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
    405 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
    406     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
    407 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
    408     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
    409 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
    410     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
    411 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
    412     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
    413 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
    414     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
    415 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
    416     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
    417 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
    418     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
    419 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
    420     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
    421 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
    422     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
    423 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
    424     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
    425 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
    426     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
    427 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
    428     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
    429 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
    430     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
    431 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
    432     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
    433 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
    434     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
    435 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
    436     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
    437 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
    438     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
    439 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
    440     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
    441 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    442     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
    443 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
    444     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
    445 NULL,
    446 /*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
    447     "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
    448 NULL
    449 };
    450 
    451 typedef struct CanonicalizationMap {
    452     const char *id;          /* input ID */
    453     const char *canonicalID; /* canonicalized output ID */
    454     const char *keyword;     /* keyword, or NULL if none */
    455     const char *value;       /* keyword value, or NULL if kw==NULL */
    456 } CanonicalizationMap;
    457 
    458 /**
    459  * A map to canonicalize locale IDs.  This handles a variety of
    460  * different semantic kinds of transformations.
    461  */
    462 static const CanonicalizationMap CANONICALIZE_MAP[] = {
    463     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
    464     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
    465     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
    466     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
    467     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
    468     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
    469     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
    470     { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
    471     { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
    472     { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
    473     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
    474     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
    475     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
    476     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
    477     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
    478     { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
    479     { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
    480     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
    481     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
    482     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
    483     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
    484     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
    485     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
    486     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
    487     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
    488     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
    489     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
    490     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
    491     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
    492     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
    493     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
    494     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
    495     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
    496     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
    497     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
    498     { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
    499     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
    500     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
    501     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
    502     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
    503     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
    504     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
    505     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
    506     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
    507     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
    508     { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
    509     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
    510     { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
    511     { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
    512     { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
    513     { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
    514     { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
    515     { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
    516 };
    517 
    518 typedef struct VariantMap {
    519     const char *variant;          /* input ID */
    520     const char *keyword;     /* keyword, or NULL if none */
    521     const char *value;       /* keyword value, or NULL if kw==NULL */
    522 } VariantMap;
    523 
    524 static const VariantMap VARIANT_MAP[] = {
    525     { "EURO",   "currency", "EUR" },
    526     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
    527     { "STROKE", "collation", "stroke" }  /* Solaris variant */
    528 };
    529 
    530 /* ### BCP47 Conversion *******************************************/
    531 /* Test if the locale id has BCP47 u extension and does not have '@' */
    532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
    533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
    534 #define _ConvertBCP47(finalID, id, buffer, length,err) \
    535         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
    536             finalID=id; \
    537         } else { \
    538             finalID=buffer; \
    539         }
    540 /* Gets the size of the shortest subtag in the given localeID. */
    541 static int32_t getShortestSubtagLength(const char *localeID) {
    542     int32_t localeIDLength = uprv_strlen(localeID);
    543     int32_t length = localeIDLength;
    544     int32_t tmpLength = 0;
    545     int32_t i;
    546     UBool reset = TRUE;
    547 
    548     for (i = 0; i < localeIDLength; i++) {
    549         if (localeID[i] != '_' && localeID[i] != '-') {
    550             if (reset) {
    551                 tmpLength = 0;
    552                 reset = FALSE;
    553             }
    554             tmpLength++;
    555         } else {
    556             if (tmpLength != 0 && tmpLength < length) {
    557                 length = tmpLength;
    558             }
    559             reset = TRUE;
    560         }
    561     }
    562 
    563     return length;
    564 }
    565 
    566 /* ### Keywords **************************************************/
    567 
    568 #define ULOC_KEYWORD_BUFFER_LEN 25
    569 #define ULOC_MAX_NO_KEYWORDS 25
    570 
    571 U_CAPI const char * U_EXPORT2
    572 locale_getKeywordsStart(const char *localeID) {
    573     const char *result = NULL;
    574     if((result = uprv_strchr(localeID, '@')) != NULL) {
    575         return result;
    576     }
    577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    578     else {
    579         /* We do this because the @ sign is variant, and the @ sign used on one
    580         EBCDIC machine won't be compiled the same way on other EBCDIC based
    581         machines. */
    582         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    583         const uint8_t *charToFind = ebcdicSigns;
    584         while(*charToFind) {
    585             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
    586                 return result;
    587             }
    588             charToFind++;
    589         }
    590     }
    591 #endif
    592     return NULL;
    593 }
    594 
    595 /**
    596  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
    597  * @param keywordName incoming name to be canonicalized
    598  * @param status return status (keyword too long)
    599  * @return length of the keyword name
    600  */
    601 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
    602 {
    603   int32_t i;
    604   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
    605 
    606   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
    607     /* keyword name too long for internal buffer */
    608     *status = U_INTERNAL_PROGRAM_ERROR;
    609           return 0;
    610   }
    611 
    612   /* normalize the keyword name */
    613   for(i = 0; i < keywordNameLen; i++) {
    614     buf[i] = uprv_tolower(keywordName[i]);
    615   }
    616   buf[i] = 0;
    617 
    618   return keywordNameLen;
    619 }
    620 
    621 typedef struct {
    622     char keyword[ULOC_KEYWORD_BUFFER_LEN];
    623     int32_t keywordLen;
    624     const char *valueStart;
    625     int32_t valueLen;
    626 } KeywordStruct;
    627 
    628 static int32_t U_CALLCONV
    629 compareKeywordStructs(const void *context, const void *left, const void *right) {
    630     const char* leftString = ((const KeywordStruct *)left)->keyword;
    631     const char* rightString = ((const KeywordStruct *)right)->keyword;
    632     return uprv_strcmp(leftString, rightString);
    633 }
    634 
    635 /**
    636  * Both addKeyword and addValue must already be in canonical form.
    637  * Either both addKeyword and addValue are NULL, or neither is NULL.
    638  * If they are not NULL they must be zero terminated.
    639  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
    640  */
    641 static int32_t
    642 _getKeywords(const char *localeID,
    643              char prev,
    644              char *keywords, int32_t keywordCapacity,
    645              char *values, int32_t valuesCapacity, int32_t *valLen,
    646              UBool valuesToo,
    647              const char* addKeyword,
    648              const char* addValue,
    649              UErrorCode *status)
    650 {
    651     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
    652 
    653     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    654     int32_t numKeywords = 0;
    655     const char* pos = localeID;
    656     const char* equalSign = NULL;
    657     const char* semicolon = NULL;
    658     int32_t i = 0, j, n;
    659     int32_t keywordsLen = 0;
    660     int32_t valuesLen = 0;
    661 
    662     if(prev == '@') { /* start of keyword definition */
    663         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
    664         do {
    665             UBool duplicate = FALSE;
    666             /* skip leading spaces */
    667             while(*pos == ' ') {
    668                 pos++;
    669             }
    670             if (!*pos) { /* handle trailing "; " */
    671                 break;
    672             }
    673             if(numKeywords == maxKeywords) {
    674                 *status = U_INTERNAL_PROGRAM_ERROR;
    675                 return 0;
    676             }
    677             equalSign = uprv_strchr(pos, '=');
    678             semicolon = uprv_strchr(pos, ';');
    679             /* lack of '=' [foo@currency] is illegal */
    680             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
    681             if(!equalSign || (semicolon && semicolon<equalSign)) {
    682                 *status = U_INVALID_FORMAT_ERROR;
    683                 return 0;
    684             }
    685             /* need to normalize both keyword and keyword name */
    686             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
    687                 /* keyword name too long for internal buffer */
    688                 *status = U_INTERNAL_PROGRAM_ERROR;
    689                 return 0;
    690             }
    691             for(i = 0, n = 0; i < equalSign - pos; ++i) {
    692                 if (pos[i] != ' ') {
    693                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
    694                 }
    695             }
    696             keywordList[numKeywords].keyword[n] = 0;
    697             keywordList[numKeywords].keywordLen = n;
    698             /* now grab the value part. First we skip the '=' */
    699             equalSign++;
    700             /* then we leading spaces */
    701             while(*equalSign == ' ') {
    702                 equalSign++;
    703             }
    704             keywordList[numKeywords].valueStart = equalSign;
    705 
    706             pos = semicolon;
    707             i = 0;
    708             if(pos) {
    709                 while(*(pos - i - 1) == ' ') {
    710                     i++;
    711                 }
    712                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
    713                 pos++;
    714             } else {
    715                 i = (int32_t)uprv_strlen(equalSign);
    716                 while(equalSign[i-1] == ' ') {
    717                     i--;
    718                 }
    719                 keywordList[numKeywords].valueLen = i;
    720             }
    721             /* If this is a duplicate keyword, then ignore it */
    722             for (j=0; j<numKeywords; ++j) {
    723                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
    724                     duplicate = TRUE;
    725                     break;
    726                 }
    727             }
    728             if (!duplicate) {
    729                 ++numKeywords;
    730             }
    731         } while(pos);
    732 
    733         /* Handle addKeyword/addValue. */
    734         if (addKeyword != NULL) {
    735             UBool duplicate = FALSE;
    736             U_ASSERT(addValue != NULL);
    737             /* Search for duplicate; if found, do nothing. Explicit keyword
    738                overrides addKeyword. */
    739             for (j=0; j<numKeywords; ++j) {
    740                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
    741                     duplicate = TRUE;
    742                     break;
    743                 }
    744             }
    745             if (!duplicate) {
    746                 if (numKeywords == maxKeywords) {
    747                     *status = U_INTERNAL_PROGRAM_ERROR;
    748                     return 0;
    749                 }
    750                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
    751                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
    752                 keywordList[numKeywords].valueStart = addValue;
    753                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
    754                 ++numKeywords;
    755             }
    756         } else {
    757             U_ASSERT(addValue == NULL);
    758         }
    759 
    760         /* now we have a list of keywords */
    761         /* we need to sort it */
    762         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
    763 
    764         /* Now construct the keyword part */
    765         for(i = 0; i < numKeywords; i++) {
    766             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
    767                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
    768                 if(valuesToo) {
    769                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
    770                 } else {
    771                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
    772                 }
    773             }
    774             keywordsLen += keywordList[i].keywordLen + 1;
    775             if(valuesToo) {
    776                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
    777                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
    778                 }
    779                 keywordsLen += keywordList[i].valueLen;
    780 
    781                 if(i < numKeywords - 1) {
    782                     if(keywordsLen < keywordCapacity) {
    783                         keywords[keywordsLen] = ';';
    784                     }
    785                     keywordsLen++;
    786                 }
    787             }
    788             if(values) {
    789                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
    790                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
    791                     values[valuesLen + keywordList[i].valueLen] = 0;
    792                 }
    793                 valuesLen += keywordList[i].valueLen + 1;
    794             }
    795         }
    796         if(values) {
    797             values[valuesLen] = 0;
    798             if(valLen) {
    799                 *valLen = valuesLen;
    800             }
    801         }
    802         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
    803     } else {
    804         return 0;
    805     }
    806 }
    807 
    808 U_CFUNC int32_t
    809 locale_getKeywords(const char *localeID,
    810                    char prev,
    811                    char *keywords, int32_t keywordCapacity,
    812                    char *values, int32_t valuesCapacity, int32_t *valLen,
    813                    UBool valuesToo,
    814                    UErrorCode *status) {
    815     return _getKeywords(localeID, prev, keywords, keywordCapacity,
    816                         values, valuesCapacity, valLen, valuesToo,
    817                         NULL, NULL, status);
    818 }
    819 
    820 U_CAPI int32_t U_EXPORT2
    821 uloc_getKeywordValue(const char* localeID,
    822                      const char* keywordName,
    823                      char* buffer, int32_t bufferCapacity,
    824                      UErrorCode* status)
    825 {
    826     const char* startSearchHere = NULL;
    827     const char* nextSeparator = NULL;
    828     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    829     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    830     int32_t i = 0;
    831     int32_t result = 0;
    832 
    833     if(status && U_SUCCESS(*status) && localeID) {
    834       char tempBuffer[ULOC_FULLNAME_CAPACITY];
    835       const char* tmpLocaleID;
    836 
    837       if (_hasBCP47Extension(localeID)) {
    838           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
    839       } else {
    840           tmpLocaleID=localeID;
    841       }
    842 
    843       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
    844       if(startSearchHere == NULL) {
    845           /* no keywords, return at once */
    846           return 0;
    847       }
    848 
    849       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    850       if(U_FAILURE(*status)) {
    851         return 0;
    852       }
    853 
    854       /* find the first keyword */
    855       while(startSearchHere) {
    856           startSearchHere++;
    857           /* skip leading spaces (allowed?) */
    858           while(*startSearchHere == ' ') {
    859               startSearchHere++;
    860           }
    861           nextSeparator = uprv_strchr(startSearchHere, '=');
    862           /* need to normalize both keyword and keyword name */
    863           if(!nextSeparator) {
    864               break;
    865           }
    866           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
    867               /* keyword name too long for internal buffer */
    868               *status = U_INTERNAL_PROGRAM_ERROR;
    869               return 0;
    870           }
    871           for(i = 0; i < nextSeparator - startSearchHere; i++) {
    872               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
    873           }
    874           /* trim trailing spaces */
    875           while(startSearchHere[i-1] == ' ') {
    876               i--;
    877           }
    878           localeKeywordNameBuffer[i] = 0;
    879 
    880           startSearchHere = uprv_strchr(nextSeparator, ';');
    881 
    882           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
    883               nextSeparator++;
    884               while(*nextSeparator == ' ') {
    885                   nextSeparator++;
    886               }
    887               /* we actually found the keyword. Copy the value */
    888               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
    889                   while(*(startSearchHere-1) == ' ') {
    890                       startSearchHere--;
    891                   }
    892                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
    893                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
    894               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
    895                   i = (int32_t)uprv_strlen(nextSeparator);
    896                   while(nextSeparator[i - 1] == ' ') {
    897                       i--;
    898                   }
    899                   uprv_strncpy(buffer, nextSeparator, i);
    900                   result = u_terminateChars(buffer, bufferCapacity, i, status);
    901               } else {
    902                   /* give a bigger buffer, please */
    903                   *status = U_BUFFER_OVERFLOW_ERROR;
    904                   if(startSearchHere) {
    905                       result = (int32_t)(startSearchHere - nextSeparator);
    906                   } else {
    907                       result = (int32_t)uprv_strlen(nextSeparator);
    908                   }
    909               }
    910               return result;
    911           }
    912       }
    913     }
    914     return 0;
    915 }
    916 
    917 U_CAPI int32_t U_EXPORT2
    918 uloc_setKeywordValue(const char* keywordName,
    919                      const char* keywordValue,
    920                      char* buffer, int32_t bufferCapacity,
    921                      UErrorCode* status)
    922 {
    923     /* TODO: sorting. removal. */
    924     int32_t keywordNameLen;
    925     int32_t keywordValueLen;
    926     int32_t bufLen;
    927     int32_t needLen = 0;
    928     int32_t foundValueLen;
    929     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
    930     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    931     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    932     int32_t i = 0;
    933     int32_t rc;
    934     char* nextSeparator = NULL;
    935     char* nextEqualsign = NULL;
    936     char* startSearchHere = NULL;
    937     char* keywordStart = NULL;
    938     char *insertHere = NULL;
    939     if(U_FAILURE(*status)) {
    940         return -1;
    941     }
    942     if(bufferCapacity>1) {
    943         bufLen = (int32_t)uprv_strlen(buffer);
    944     } else {
    945         *status = U_ILLEGAL_ARGUMENT_ERROR;
    946         return 0;
    947     }
    948     if(bufferCapacity<bufLen) {
    949         /* The capacity is less than the length?! Is this NULL terminated? */
    950         *status = U_ILLEGAL_ARGUMENT_ERROR;
    951         return 0;
    952     }
    953     if(keywordValue && !*keywordValue) {
    954         keywordValue = NULL;
    955     }
    956     if(keywordValue) {
    957         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
    958     } else {
    959         keywordValueLen = 0;
    960     }
    961     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    962     if(U_FAILURE(*status)) {
    963         return 0;
    964     }
    965     startSearchHere = (char*)locale_getKeywordsStart(buffer);
    966     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
    967         if(!keywordValue) { /* no keywords = nothing to remove */
    968             return bufLen;
    969         }
    970 
    971         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
    972         if(startSearchHere) { /* had a single @ */
    973             needLen--; /* already had the @ */
    974             /* startSearchHere points at the @ */
    975         } else {
    976             startSearchHere=buffer+bufLen;
    977         }
    978         if(needLen >= bufferCapacity) {
    979             *status = U_BUFFER_OVERFLOW_ERROR;
    980             return needLen; /* no change */
    981         }
    982         *startSearchHere = '@';
    983         startSearchHere++;
    984         uprv_strcpy(startSearchHere, keywordNameBuffer);
    985         startSearchHere += keywordNameLen;
    986         *startSearchHere = '=';
    987         startSearchHere++;
    988         uprv_strcpy(startSearchHere, keywordValue);
    989         startSearchHere+=keywordValueLen;
    990         return needLen;
    991     } /* end shortcut - no @ */
    992 
    993     keywordStart = startSearchHere;
    994     /* search for keyword */
    995     while(keywordStart) {
    996         keywordStart++;
    997         /* skip leading spaces (allowed?) */
    998         while(*keywordStart == ' ') {
    999             keywordStart++;
   1000         }
   1001         nextEqualsign = uprv_strchr(keywordStart, '=');
   1002         /* need to normalize both keyword and keyword name */
   1003         if(!nextEqualsign) {
   1004             break;
   1005         }
   1006         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
   1007             /* keyword name too long for internal buffer */
   1008             *status = U_INTERNAL_PROGRAM_ERROR;
   1009             return 0;
   1010         }
   1011         for(i = 0; i < nextEqualsign - keywordStart; i++) {
   1012             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
   1013         }
   1014         /* trim trailing spaces */
   1015         while(keywordStart[i-1] == ' ') {
   1016             i--;
   1017         }
   1018         localeKeywordNameBuffer[i] = 0;
   1019 
   1020         nextSeparator = uprv_strchr(nextEqualsign, ';');
   1021         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
   1022         if(rc == 0) {
   1023             nextEqualsign++;
   1024             while(*nextEqualsign == ' ') {
   1025                 nextEqualsign++;
   1026             }
   1027             /* we actually found the keyword. Change the value */
   1028             if (nextSeparator) {
   1029                 keywordAtEnd = 0;
   1030                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
   1031             } else {
   1032                 keywordAtEnd = 1;
   1033                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
   1034             }
   1035             if(keywordValue) { /* adding a value - not removing */
   1036               if(foundValueLen == keywordValueLen) {
   1037                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1038                 return bufLen; /* no change in size */
   1039               } else if(foundValueLen > keywordValueLen) {
   1040                 int32_t delta = foundValueLen - keywordValueLen;
   1041                 if(nextSeparator) { /* RH side */
   1042                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
   1043                 }
   1044                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1045                 bufLen -= delta;
   1046                 buffer[bufLen]=0;
   1047                 return bufLen;
   1048               } else { /* FVL < KVL */
   1049                 int32_t delta = keywordValueLen - foundValueLen;
   1050                 if((bufLen+delta) >= bufferCapacity) {
   1051                   *status = U_BUFFER_OVERFLOW_ERROR;
   1052                   return bufLen+delta;
   1053                 }
   1054                 if(nextSeparator) { /* RH side */
   1055                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
   1056                 }
   1057                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1058                 bufLen += delta;
   1059                 buffer[bufLen]=0;
   1060                 return bufLen;
   1061               }
   1062             } else { /* removing a keyword */
   1063               if(keywordAtEnd) {
   1064                 /* zero out the ';' or '@' just before startSearchhere */
   1065                 keywordStart[-1] = 0;
   1066                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
   1067               } else {
   1068                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
   1069                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
   1070                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
   1071               }
   1072             }
   1073         } else if(rc<0){ /* end match keyword */
   1074           /* could insert at this location. */
   1075           insertHere = keywordStart;
   1076         }
   1077         keywordStart = nextSeparator;
   1078     } /* end loop searching */
   1079 
   1080     if(!keywordValue) {
   1081       return bufLen; /* removal of non-extant keyword - no change */
   1082     }
   1083 
   1084     /* we know there is at least one keyword. */
   1085     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
   1086     if(needLen >= bufferCapacity) {
   1087         *status = U_BUFFER_OVERFLOW_ERROR;
   1088         return needLen; /* no change */
   1089     }
   1090 
   1091     if(insertHere) {
   1092       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
   1093       keywordStart = insertHere;
   1094     } else {
   1095       keywordStart = buffer+bufLen;
   1096       *keywordStart = ';';
   1097       keywordStart++;
   1098     }
   1099     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
   1100     keywordStart += keywordNameLen;
   1101     *keywordStart = '=';
   1102     keywordStart++;
   1103     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
   1104     keywordStart+=keywordValueLen;
   1105     if(insertHere) {
   1106       *keywordStart = ';';
   1107       keywordStart++;
   1108     }
   1109     buffer[needLen]=0;
   1110     return needLen;
   1111 }
   1112 
   1113 /* ### ID parsing implementation **************************************************/
   1114 
   1115 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
   1116 
   1117 /*returns TRUE if one of the special prefixes is here (s=string)
   1118   'x-' or 'i-' */
   1119 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
   1120 
   1121 /* Dot terminates it because of POSIX form  where dot precedes the codepage
   1122  * except for variant
   1123  */
   1124 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
   1125 
   1126 static char* _strnchr(const char* str, int32_t len, char c) {
   1127     U_ASSERT(str != 0 && len >= 0);
   1128     while (len-- != 0) {
   1129         char d = *str;
   1130         if (d == c) {
   1131             return (char*) str;
   1132         } else if (d == 0) {
   1133             break;
   1134         }
   1135         ++str;
   1136     }
   1137     return NULL;
   1138 }
   1139 
   1140 /**
   1141  * Lookup 'key' in the array 'list'.  The array 'list' should contain
   1142  * a NULL entry, followed by more entries, and a second NULL entry.
   1143  *
   1144  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
   1145  * COUNTRIES_3.
   1146  */
   1147 static int16_t _findIndex(const char* const* list, const char* key)
   1148 {
   1149     const char* const* anchor = list;
   1150     int32_t pass = 0;
   1151 
   1152     /* Make two passes through two NULL-terminated arrays at 'list' */
   1153     while (pass++ < 2) {
   1154         while (*list) {
   1155             if (uprv_strcmp(key, *list) == 0) {
   1156                 return (int16_t)(list - anchor);
   1157             }
   1158             list++;
   1159         }
   1160         ++list;     /* skip final NULL *CWB*/
   1161     }
   1162     return -1;
   1163 }
   1164 
   1165 /* count the length of src while copying it to dest; return strlen(src) */
   1166 static U_INLINE int32_t
   1167 _copyCount(char *dest, int32_t destCapacity, const char *src) {
   1168     const char *anchor;
   1169     char c;
   1170 
   1171     anchor=src;
   1172     for(;;) {
   1173         if((c=*src)==0) {
   1174             return (int32_t)(src-anchor);
   1175         }
   1176         if(destCapacity<=0) {
   1177             return (int32_t)((src-anchor)+uprv_strlen(src));
   1178         }
   1179         ++src;
   1180         *dest++=c;
   1181         --destCapacity;
   1182     }
   1183 }
   1184 
   1185 U_CFUNC const char*
   1186 uloc_getCurrentCountryID(const char* oldID){
   1187     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
   1188     if (offset >= 0) {
   1189         return REPLACEMENT_COUNTRIES[offset];
   1190     }
   1191     return oldID;
   1192 }
   1193 U_CFUNC const char*
   1194 uloc_getCurrentLanguageID(const char* oldID){
   1195     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
   1196     if (offset >= 0) {
   1197         return REPLACEMENT_LANGUAGES[offset];
   1198     }
   1199     return oldID;
   1200 }
   1201 /*
   1202  * the internal functions _getLanguage(), _getCountry(), _getVariant()
   1203  * avoid duplicating code to handle the earlier locale ID pieces
   1204  * in the functions for the later ones by
   1205  * setting the *pEnd pointer to where they stopped parsing
   1206  *
   1207  * TODO try to use this in Locale
   1208  */
   1209 U_CFUNC int32_t
   1210 ulocimp_getLanguage(const char *localeID,
   1211                     char *language, int32_t languageCapacity,
   1212                     const char **pEnd) {
   1213     int32_t i=0;
   1214     int32_t offset;
   1215     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
   1216 
   1217     /* if it starts with i- or x- then copy that prefix */
   1218     if(_isIDPrefix(localeID)) {
   1219         if(i<languageCapacity) {
   1220             language[i]=(char)uprv_tolower(*localeID);
   1221         }
   1222         if(i<languageCapacity) {
   1223             language[i+1]='-';
   1224         }
   1225         i+=2;
   1226         localeID+=2;
   1227     }
   1228 
   1229     /* copy the language as far as possible and count its length */
   1230     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
   1231         if(i<languageCapacity) {
   1232             language[i]=(char)uprv_tolower(*localeID);
   1233         }
   1234         if(i<3) {
   1235             lang[i]=(char)uprv_tolower(*localeID);
   1236         }
   1237         i++;
   1238         localeID++;
   1239     }
   1240 
   1241     if(i==3) {
   1242         /* convert 3 character code to 2 character code if possible *CWB*/
   1243         offset=_findIndex(LANGUAGES_3, lang);
   1244         if(offset>=0) {
   1245             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
   1246         }
   1247     }
   1248 
   1249     if(pEnd!=NULL) {
   1250         *pEnd=localeID;
   1251     }
   1252     return i;
   1253 }
   1254 
   1255 U_CFUNC int32_t
   1256 ulocimp_getScript(const char *localeID,
   1257                   char *script, int32_t scriptCapacity,
   1258                   const char **pEnd)
   1259 {
   1260     int32_t idLen = 0;
   1261 
   1262     if (pEnd != NULL) {
   1263         *pEnd = localeID;
   1264     }
   1265 
   1266     /* copy the second item as far as possible and count its length */
   1267     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1268         idLen++;
   1269     }
   1270 
   1271     /* If it's exactly 4 characters long, then it's a script and not a country. */
   1272     if (idLen == 4) {
   1273         int32_t i;
   1274         if (pEnd != NULL) {
   1275             *pEnd = localeID+idLen;
   1276         }
   1277         if(idLen > scriptCapacity) {
   1278             idLen = scriptCapacity;
   1279         }
   1280         if (idLen >= 1) {
   1281             script[0]=(char)uprv_toupper(*(localeID++));
   1282         }
   1283         for (i = 1; i < idLen; i++) {
   1284             script[i]=(char)uprv_tolower(*(localeID++));
   1285         }
   1286     }
   1287     else {
   1288         idLen = 0;
   1289     }
   1290     return idLen;
   1291 }
   1292 
   1293 U_CFUNC int32_t
   1294 ulocimp_getCountry(const char *localeID,
   1295                    char *country, int32_t countryCapacity,
   1296                    const char **pEnd)
   1297 {
   1298     int32_t idLen=0;
   1299     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
   1300     int32_t offset;
   1301 
   1302     /* copy the country as far as possible and count its length */
   1303     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1304         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
   1305             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
   1306         }
   1307         idLen++;
   1308     }
   1309 
   1310     /* the country should be either length 2 or 3 */
   1311     if (idLen == 2 || idLen == 3) {
   1312         UBool gotCountry = FALSE;
   1313         /* convert 3 character code to 2 character code if possible *CWB*/
   1314         if(idLen==3) {
   1315             offset=_findIndex(COUNTRIES_3, cnty);
   1316             if(offset>=0) {
   1317                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
   1318                 gotCountry = TRUE;
   1319             }
   1320         }
   1321         if (!gotCountry) {
   1322             int32_t i = 0;
   1323             for (i = 0; i < idLen; i++) {
   1324                 if (i < countryCapacity) {
   1325                     country[i]=(char)uprv_toupper(localeID[i]);
   1326                 }
   1327             }
   1328         }
   1329         localeID+=idLen;
   1330     } else {
   1331         idLen = 0;
   1332     }
   1333 
   1334     if(pEnd!=NULL) {
   1335         *pEnd=localeID;
   1336     }
   1337 
   1338     return idLen;
   1339 }
   1340 
   1341 /**
   1342  * @param needSeparator if true, then add leading '_' if any variants
   1343  * are added to 'variant'
   1344  */
   1345 static int32_t
   1346 _getVariantEx(const char *localeID,
   1347               char prev,
   1348               char *variant, int32_t variantCapacity,
   1349               UBool needSeparator) {
   1350     int32_t i=0;
   1351 
   1352     /* get one or more variant tags and separate them with '_' */
   1353     if(_isIDSeparator(prev)) {
   1354         /* get a variant string after a '-' or '_' */
   1355         while(!_isTerminator(*localeID)) {
   1356             if (needSeparator) {
   1357                 if (i<variantCapacity) {
   1358                     variant[i] = '_';
   1359                 }
   1360                 ++i;
   1361                 needSeparator = FALSE;
   1362             }
   1363             if(i<variantCapacity) {
   1364                 variant[i]=(char)uprv_toupper(*localeID);
   1365                 if(variant[i]=='-') {
   1366                     variant[i]='_';
   1367                 }
   1368             }
   1369             i++;
   1370             localeID++;
   1371         }
   1372     }
   1373 
   1374     /* if there is no variant tag after a '-' or '_' then look for '@' */
   1375     if(i==0) {
   1376         if(prev=='@') {
   1377             /* keep localeID */
   1378         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
   1379             ++localeID; /* point after the '@' */
   1380         } else {
   1381             return 0;
   1382         }
   1383         while(!_isTerminator(*localeID)) {
   1384             if (needSeparator) {
   1385                 if (i<variantCapacity) {
   1386                     variant[i] = '_';
   1387                 }
   1388                 ++i;
   1389                 needSeparator = FALSE;
   1390             }
   1391             if(i<variantCapacity) {
   1392                 variant[i]=(char)uprv_toupper(*localeID);
   1393                 if(variant[i]=='-' || variant[i]==',') {
   1394                     variant[i]='_';
   1395                 }
   1396             }
   1397             i++;
   1398             localeID++;
   1399         }
   1400     }
   1401 
   1402     return i;
   1403 }
   1404 
   1405 static int32_t
   1406 _getVariant(const char *localeID,
   1407             char prev,
   1408             char *variant, int32_t variantCapacity) {
   1409     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
   1410 }
   1411 
   1412 /**
   1413  * Delete ALL instances of a variant from the given list of one or
   1414  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
   1415  * @param variants the source string of one or more variants,
   1416  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
   1417  * terminated; if it is, trailing zero will NOT be maintained.
   1418  * @param variantsLen length of variants
   1419  * @param toDelete variant to delete, without separators, e.g.  "EURO"
   1420  * or "PREEURO"; not zero terminated
   1421  * @param toDeleteLen length of toDelete
   1422  * @return number of characters deleted from variants
   1423  */
   1424 static int32_t
   1425 _deleteVariant(char* variants, int32_t variantsLen,
   1426                const char* toDelete, int32_t toDeleteLen)
   1427 {
   1428     int32_t delta = 0; /* number of chars deleted */
   1429     for (;;) {
   1430         UBool flag = FALSE;
   1431         if (variantsLen < toDeleteLen) {
   1432             return delta;
   1433         }
   1434         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
   1435             (variantsLen == toDeleteLen ||
   1436              (flag=(variants[toDeleteLen] == '_'))))
   1437         {
   1438             int32_t d = toDeleteLen + (flag?1:0);
   1439             variantsLen -= d;
   1440             delta += d;
   1441             if (variantsLen > 0) {
   1442                 uprv_memmove(variants, variants+d, variantsLen);
   1443             }
   1444         } else {
   1445             char* p = _strnchr(variants, variantsLen, '_');
   1446             if (p == NULL) {
   1447                 return delta;
   1448             }
   1449             ++p;
   1450             variantsLen -= (int32_t)(p - variants);
   1451             variants = p;
   1452         }
   1453     }
   1454 }
   1455 
   1456 /* Keyword enumeration */
   1457 
   1458 typedef struct UKeywordsContext {
   1459     char* keywords;
   1460     char* current;
   1461 } UKeywordsContext;
   1462 
   1463 static void U_CALLCONV
   1464 uloc_kw_closeKeywords(UEnumeration *enumerator) {
   1465     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
   1466     uprv_free(enumerator->context);
   1467     uprv_free(enumerator);
   1468 }
   1469 
   1470 static int32_t U_CALLCONV
   1471 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
   1472     char *kw = ((UKeywordsContext *)en->context)->keywords;
   1473     int32_t result = 0;
   1474     while(*kw) {
   1475         result++;
   1476         kw += uprv_strlen(kw)+1;
   1477     }
   1478     return result;
   1479 }
   1480 
   1481 static const char* U_CALLCONV
   1482 uloc_kw_nextKeyword(UEnumeration* en,
   1483                     int32_t* resultLength,
   1484                     UErrorCode* status) {
   1485     const char* result = ((UKeywordsContext *)en->context)->current;
   1486     int32_t len = 0;
   1487     if(*result) {
   1488         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
   1489         ((UKeywordsContext *)en->context)->current += len+1;
   1490     } else {
   1491         result = NULL;
   1492     }
   1493     if (resultLength) {
   1494         *resultLength = len;
   1495     }
   1496     return result;
   1497 }
   1498 
   1499 static void U_CALLCONV
   1500 uloc_kw_resetKeywords(UEnumeration* en,
   1501                       UErrorCode* status) {
   1502     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
   1503 }
   1504 
   1505 static const UEnumeration gKeywordsEnum = {
   1506     NULL,
   1507     NULL,
   1508     uloc_kw_closeKeywords,
   1509     uloc_kw_countKeywords,
   1510     uenum_unextDefault,
   1511     uloc_kw_nextKeyword,
   1512     uloc_kw_resetKeywords
   1513 };
   1514 
   1515 U_CAPI UEnumeration* U_EXPORT2
   1516 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
   1517 {
   1518     UKeywordsContext *myContext = NULL;
   1519     UEnumeration *result = NULL;
   1520 
   1521     if(U_FAILURE(*status)) {
   1522         return NULL;
   1523     }
   1524     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   1525     /* Null pointer test */
   1526     if (result == NULL) {
   1527         *status = U_MEMORY_ALLOCATION_ERROR;
   1528         return NULL;
   1529     }
   1530     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
   1531     myContext = uprv_malloc(sizeof(UKeywordsContext));
   1532     if (myContext == NULL) {
   1533         *status = U_MEMORY_ALLOCATION_ERROR;
   1534         uprv_free(result);
   1535         return NULL;
   1536     }
   1537     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
   1538     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
   1539     myContext->keywords[keywordListSize] = 0;
   1540     myContext->current = myContext->keywords;
   1541     result->context = myContext;
   1542     return result;
   1543 }
   1544 
   1545 U_CAPI UEnumeration* U_EXPORT2
   1546 uloc_openKeywords(const char* localeID,
   1547                         UErrorCode* status)
   1548 {
   1549     int32_t i=0;
   1550     char keywords[256];
   1551     int32_t keywordsCapacity = 256;
   1552     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1553     const char* tmpLocaleID;
   1554 
   1555     if(status==NULL || U_FAILURE(*status)) {
   1556         return 0;
   1557     }
   1558 
   1559     if (_hasBCP47Extension(localeID)) {
   1560         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
   1561     } else {
   1562         if (localeID==NULL) {
   1563            localeID=uloc_getDefault();
   1564         }
   1565         tmpLocaleID=localeID;
   1566     }
   1567 
   1568     /* Skip the language */
   1569     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1570     if(_isIDSeparator(*tmpLocaleID)) {
   1571         const char *scriptID;
   1572         /* Skip the script if available */
   1573         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1574         if(scriptID != tmpLocaleID+1) {
   1575             /* Found optional script */
   1576             tmpLocaleID = scriptID;
   1577         }
   1578         /* Skip the Country */
   1579         if (_isIDSeparator(*tmpLocaleID)) {
   1580             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
   1581             if(_isIDSeparator(*tmpLocaleID)) {
   1582                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
   1583             }
   1584         }
   1585     }
   1586 
   1587     /* keywords are located after '@' */
   1588     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
   1589         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
   1590     }
   1591 
   1592     if(i) {
   1593         return uloc_openKeywordList(keywords, i, status);
   1594     } else {
   1595         return NULL;
   1596     }
   1597 }
   1598 
   1599 
   1600 /* bit-flags for 'options' parameter of _canonicalize */
   1601 #define _ULOC_STRIP_KEYWORDS 0x2
   1602 #define _ULOC_CANONICALIZE   0x1
   1603 
   1604 #define OPTION_SET(options, mask) ((options & mask) != 0)
   1605 
   1606 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
   1607 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
   1608 
   1609 /**
   1610  * Canonicalize the given localeID, to level 1 or to level 2,
   1611  * depending on the options.  To specify level 1, pass in options=0.
   1612  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
   1613  *
   1614  * This is the code underlying uloc_getName and uloc_canonicalize.
   1615  */
   1616 static int32_t
   1617 _canonicalize(const char* localeID,
   1618               char* result,
   1619               int32_t resultCapacity,
   1620               uint32_t options,
   1621               UErrorCode* err) {
   1622     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
   1623     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1624     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1625     const char* origLocaleID;
   1626     const char* tmpLocaleID;
   1627     const char* keywordAssign = NULL;
   1628     const char* separatorIndicator = NULL;
   1629     const char* addKeyword = NULL;
   1630     const char* addValue = NULL;
   1631     char* name;
   1632     char* variant = NULL; /* pointer into name, or NULL */
   1633 
   1634     if (U_FAILURE(*err)) {
   1635         return 0;
   1636     }
   1637 
   1638     if (_hasBCP47Extension(localeID)) {
   1639         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1640     } else {
   1641         if (localeID==NULL) {
   1642            localeID=uloc_getDefault();
   1643         }
   1644         tmpLocaleID=localeID;
   1645     }
   1646 
   1647     origLocaleID=tmpLocaleID;
   1648 
   1649     /* if we are doing a full canonicalization, then put results in
   1650        localeBuffer, if necessary; otherwise send them to result. */
   1651     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
   1652         (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
   1653         name = localeBuffer;
   1654         nameCapacity = sizeof(localeBuffer);
   1655     } else {
   1656         name = result;
   1657         nameCapacity = resultCapacity;
   1658     }
   1659 
   1660     /* get all pieces, one after another, and separate with '_' */
   1661     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
   1662 
   1663     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
   1664         const char *d = uloc_getDefault();
   1665 
   1666         len = (int32_t)uprv_strlen(d);
   1667 
   1668         if (name != NULL) {
   1669             uprv_strncpy(name, d, len);
   1670         }
   1671     } else if(_isIDSeparator(*tmpLocaleID)) {
   1672         const char *scriptID;
   1673 
   1674         ++fieldCount;
   1675         if(len<nameCapacity) {
   1676             name[len]='_';
   1677         }
   1678         ++len;
   1679 
   1680         scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);
   1681         if(scriptSize > 0) {
   1682             /* Found optional script */
   1683             tmpLocaleID = scriptID;
   1684             ++fieldCount;
   1685             len+=scriptSize;
   1686             if (_isIDSeparator(*tmpLocaleID)) {
   1687                 /* If there is something else, then we add the _ */
   1688                 if(len<nameCapacity) {
   1689                     name[len]='_';
   1690                 }
   1691                 ++len;
   1692             }
   1693         }
   1694 
   1695         if (_isIDSeparator(*tmpLocaleID)) {
   1696             const char *cntryID;
   1697             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID);
   1698             if (cntrySize > 0) {
   1699                 /* Found optional country */
   1700                 tmpLocaleID = cntryID;
   1701                 len+=cntrySize;
   1702             }
   1703             if(_isIDSeparator(*tmpLocaleID)) {
   1704                 /* If there is something else, then we add the _  if we found country before.*/
   1705                 if (cntrySize > 0) {
   1706                     ++fieldCount;
   1707                     if(len<nameCapacity) {
   1708                         name[len]='_';
   1709                     }
   1710                     ++len;
   1711                 }
   1712 
   1713                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);
   1714                 if (variantSize > 0) {
   1715                     variant = name+len;
   1716                     len += variantSize;
   1717                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
   1718                 }
   1719             }
   1720         }
   1721     }
   1722 
   1723     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
   1724     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
   1725         UBool done = FALSE;
   1726         do {
   1727             char c = *tmpLocaleID;
   1728             switch (c) {
   1729             case 0:
   1730             case '@':
   1731                 done = TRUE;
   1732                 break;
   1733             default:
   1734                 if (len<nameCapacity) {
   1735                     name[len] = c;
   1736                 }
   1737                 ++len;
   1738                 ++tmpLocaleID;
   1739                 break;
   1740             }
   1741         } while (!done);
   1742     }
   1743 
   1744     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
   1745        After this, tmpLocaleID either points to '@' or is NULL */
   1746     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
   1747         keywordAssign = uprv_strchr(tmpLocaleID, '=');
   1748         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
   1749     }
   1750 
   1751     /* Copy POSIX-style variant, if any [mr@FOO] */
   1752     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
   1753         tmpLocaleID != NULL && keywordAssign == NULL) {
   1754         for (;;) {
   1755             char c = *tmpLocaleID;
   1756             if (c == 0) {
   1757                 break;
   1758             }
   1759             if (len<nameCapacity) {
   1760                 name[len] = c;
   1761             }
   1762             ++len;
   1763             ++tmpLocaleID;
   1764         }
   1765     }
   1766 
   1767     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
   1768         /* Handle @FOO variant if @ is present and not followed by = */
   1769         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
   1770             int32_t posixVariantSize;
   1771             /* Add missing '_' if needed */
   1772             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
   1773                 do {
   1774                     if(len<nameCapacity) {
   1775                         name[len]='_';
   1776                     }
   1777                     ++len;
   1778                     ++fieldCount;
   1779                 } while(fieldCount<2);
   1780             }
   1781             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
   1782                                              (UBool)(variantSize > 0));
   1783             if (posixVariantSize > 0) {
   1784                 if (variant == NULL) {
   1785                     variant = name+len;
   1786                 }
   1787                 len += posixVariantSize;
   1788                 variantSize += posixVariantSize;
   1789             }
   1790         }
   1791 
   1792         /* Handle generic variants first */
   1793         if (variant) {
   1794             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
   1795                 const char* variantToCompare = VARIANT_MAP[j].variant;
   1796                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
   1797                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
   1798                 len -= variantLen;
   1799                 if (variantLen > 0) {
   1800                     if (name[len-1] == '_') { /* delete trailing '_' */
   1801                         --len;
   1802                     }
   1803                     addKeyword = VARIANT_MAP[j].keyword;
   1804                     addValue = VARIANT_MAP[j].value;
   1805                     break;
   1806                 }
   1807             }
   1808             if (name[len-1] == '_') { /* delete trailing '_' */
   1809                 --len;
   1810             }
   1811         }
   1812 
   1813         /* Look up the ID in the canonicalization map */
   1814         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
   1815             const char* id = CANONICALIZE_MAP[j].id;
   1816             int32_t n = (int32_t)uprv_strlen(id);
   1817             if (len == n && uprv_strncmp(name, id, n) == 0) {
   1818                 if (n == 0 && tmpLocaleID != NULL) {
   1819                     break; /* Don't remap "" if keywords present */
   1820                 }
   1821                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
   1822                 if (CANONICALIZE_MAP[j].keyword) {
   1823                     addKeyword = CANONICALIZE_MAP[j].keyword;
   1824                     addValue = CANONICALIZE_MAP[j].value;
   1825                 }
   1826                 break;
   1827             }
   1828         }
   1829     }
   1830 
   1831     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
   1832         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
   1833             (!separatorIndicator || separatorIndicator > keywordAssign)) {
   1834             if(len<nameCapacity) {
   1835                 name[len]='@';
   1836             }
   1837             ++len;
   1838             ++fieldCount;
   1839             len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
   1840                                 addKeyword, addValue, err);
   1841         } else if (addKeyword != NULL) {
   1842             U_ASSERT(addValue != NULL);
   1843             /* inelegant but works -- later make _getKeywords do this? */
   1844             len += _copyCount(name+len, nameCapacity-len, "@");
   1845             len += _copyCount(name+len, nameCapacity-len, addKeyword);
   1846             len += _copyCount(name+len, nameCapacity-len, "=");
   1847             len += _copyCount(name+len, nameCapacity-len, addValue);
   1848         }
   1849     }
   1850 
   1851     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
   1852         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
   1853     }
   1854 
   1855     return u_terminateChars(result, resultCapacity, len, err);
   1856 }
   1857 
   1858 /* ### ID parsing API **************************************************/
   1859 
   1860 U_CAPI int32_t  U_EXPORT2
   1861 uloc_getParent(const char*    localeID,
   1862                char* parent,
   1863                int32_t parentCapacity,
   1864                UErrorCode* err)
   1865 {
   1866     const char *lastUnderscore;
   1867     int32_t i;
   1868 
   1869     if (U_FAILURE(*err))
   1870         return 0;
   1871 
   1872     if (localeID == NULL)
   1873         localeID = uloc_getDefault();
   1874 
   1875     lastUnderscore=uprv_strrchr(localeID, '_');
   1876     if(lastUnderscore!=NULL) {
   1877         i=(int32_t)(lastUnderscore-localeID);
   1878     } else {
   1879         i=0;
   1880     }
   1881 
   1882     if(i>0 && parent != localeID) {
   1883         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
   1884     }
   1885     return u_terminateChars(parent, parentCapacity, i, err);
   1886 }
   1887 
   1888 U_CAPI int32_t U_EXPORT2
   1889 uloc_getLanguage(const char*    localeID,
   1890          char* language,
   1891          int32_t languageCapacity,
   1892          UErrorCode* err)
   1893 {
   1894     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
   1895     int32_t i=0;
   1896 
   1897     if (err==NULL || U_FAILURE(*err)) {
   1898         return 0;
   1899     }
   1900 
   1901     if(localeID==NULL) {
   1902         localeID=uloc_getDefault();
   1903     }
   1904 
   1905     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
   1906     return u_terminateChars(language, languageCapacity, i, err);
   1907 }
   1908 
   1909 U_CAPI int32_t U_EXPORT2
   1910 uloc_getScript(const char*    localeID,
   1911          char* script,
   1912          int32_t scriptCapacity,
   1913          UErrorCode* err)
   1914 {
   1915     int32_t i=0;
   1916 
   1917     if(err==NULL || U_FAILURE(*err)) {
   1918         return 0;
   1919     }
   1920 
   1921     if(localeID==NULL) {
   1922         localeID=uloc_getDefault();
   1923     }
   1924 
   1925     /* skip the language */
   1926     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1927     if(_isIDSeparator(*localeID)) {
   1928         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
   1929     }
   1930     return u_terminateChars(script, scriptCapacity, i, err);
   1931 }
   1932 
   1933 U_CAPI int32_t  U_EXPORT2
   1934 uloc_getCountry(const char* localeID,
   1935             char* country,
   1936             int32_t countryCapacity,
   1937             UErrorCode* err)
   1938 {
   1939     int32_t i=0;
   1940 
   1941     if(err==NULL || U_FAILURE(*err)) {
   1942         return 0;
   1943     }
   1944 
   1945     if(localeID==NULL) {
   1946         localeID=uloc_getDefault();
   1947     }
   1948 
   1949     /* Skip the language */
   1950     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1951     if(_isIDSeparator(*localeID)) {
   1952         const char *scriptID;
   1953         /* Skip the script if available */
   1954         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
   1955         if(scriptID != localeID+1) {
   1956             /* Found optional script */
   1957             localeID = scriptID;
   1958         }
   1959         if(_isIDSeparator(*localeID)) {
   1960             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
   1961         }
   1962     }
   1963     return u_terminateChars(country, countryCapacity, i, err);
   1964 }
   1965 
   1966 U_CAPI int32_t  U_EXPORT2
   1967 uloc_getVariant(const char* localeID,
   1968                 char* variant,
   1969                 int32_t variantCapacity,
   1970                 UErrorCode* err)
   1971 {
   1972     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1973     const char* tmpLocaleID;
   1974     int32_t i=0;
   1975 
   1976     if(err==NULL || U_FAILURE(*err)) {
   1977         return 0;
   1978     }
   1979 
   1980     if (_hasBCP47Extension(localeID)) {
   1981         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1982     } else {
   1983         if (localeID==NULL) {
   1984            localeID=uloc_getDefault();
   1985         }
   1986         tmpLocaleID=localeID;
   1987     }
   1988 
   1989     /* Skip the language */
   1990     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1991     if(_isIDSeparator(*tmpLocaleID)) {
   1992         const char *scriptID;
   1993         /* Skip the script if available */
   1994         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1995         if(scriptID != tmpLocaleID+1) {
   1996             /* Found optional script */
   1997             tmpLocaleID = scriptID;
   1998         }
   1999         /* Skip the Country */
   2000         if (_isIDSeparator(*tmpLocaleID)) {
   2001             const char *cntryID;
   2002             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
   2003             if (cntryID != tmpLocaleID+1) {
   2004                 /* Found optional country */
   2005                 tmpLocaleID = cntryID;
   2006             }
   2007             if(_isIDSeparator(*tmpLocaleID)) {
   2008                 /* If there was no country ID, skip a possible extra IDSeparator */
   2009                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
   2010                     tmpLocaleID++;
   2011                 }
   2012                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
   2013             }
   2014         }
   2015     }
   2016 
   2017     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
   2018     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
   2019 /*
   2020     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
   2021         i=_getVariant(localeID+1, '@', variant, variantCapacity);
   2022     }
   2023 */
   2024     return u_terminateChars(variant, variantCapacity, i, err);
   2025 }
   2026 
   2027 U_CAPI int32_t  U_EXPORT2
   2028 uloc_getName(const char* localeID,
   2029              char* name,
   2030              int32_t nameCapacity,
   2031              UErrorCode* err)
   2032 {
   2033     return _canonicalize(localeID, name, nameCapacity, 0, err);
   2034 }
   2035 
   2036 U_CAPI int32_t  U_EXPORT2
   2037 uloc_getBaseName(const char* localeID,
   2038                  char* name,
   2039                  int32_t nameCapacity,
   2040                  UErrorCode* err)
   2041 {
   2042     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
   2043 }
   2044 
   2045 U_CAPI int32_t  U_EXPORT2
   2046 uloc_canonicalize(const char* localeID,
   2047                   char* name,
   2048                   int32_t nameCapacity,
   2049                   UErrorCode* err)
   2050 {
   2051     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
   2052 }
   2053 
   2054 U_CAPI const char*  U_EXPORT2
   2055 uloc_getISO3Language(const char* localeID)
   2056 {
   2057     int16_t offset;
   2058     char lang[ULOC_LANG_CAPACITY];
   2059     UErrorCode err = U_ZERO_ERROR;
   2060 
   2061     if (localeID == NULL)
   2062     {
   2063         localeID = uloc_getDefault();
   2064     }
   2065     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
   2066     if (U_FAILURE(err))
   2067         return "";
   2068     offset = _findIndex(LANGUAGES, lang);
   2069     if (offset < 0)
   2070         return "";
   2071     return LANGUAGES_3[offset];
   2072 }
   2073 
   2074 U_CAPI const char*  U_EXPORT2
   2075 uloc_getISO3Country(const char* localeID)
   2076 {
   2077     int16_t offset;
   2078     char cntry[ULOC_LANG_CAPACITY];
   2079     UErrorCode err = U_ZERO_ERROR;
   2080 
   2081     if (localeID == NULL)
   2082     {
   2083         localeID = uloc_getDefault();
   2084     }
   2085     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
   2086     if (U_FAILURE(err))
   2087         return "";
   2088     offset = _findIndex(COUNTRIES, cntry);
   2089     if (offset < 0)
   2090         return "";
   2091 
   2092     return COUNTRIES_3[offset];
   2093 }
   2094 
   2095 U_CAPI uint32_t  U_EXPORT2
   2096 uloc_getLCID(const char* localeID)
   2097 {
   2098     UErrorCode status = U_ZERO_ERROR;
   2099     char       langID[ULOC_FULLNAME_CAPACITY];
   2100 
   2101     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
   2102     if (U_FAILURE(status)) {
   2103         return 0;
   2104     }
   2105 
   2106     return uprv_convertToLCID(langID, localeID, &status);
   2107 }
   2108 
   2109 U_CAPI int32_t U_EXPORT2
   2110 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
   2111                 UErrorCode *status)
   2112 {
   2113     int32_t length;
   2114     const char *posix = uprv_convertToPosix(hostid, status);
   2115     if (U_FAILURE(*status) || posix == NULL) {
   2116         return 0;
   2117     }
   2118     length = (int32_t)uprv_strlen(posix);
   2119     if (length+1 > localeCapacity) {
   2120         *status = U_BUFFER_OVERFLOW_ERROR;
   2121     }
   2122     else {
   2123         uprv_strcpy(locale, posix);
   2124     }
   2125     return length;
   2126 }
   2127 
   2128 /* ### Default locale **************************************************/
   2129 
   2130 U_CAPI const char*  U_EXPORT2
   2131 uloc_getDefault()
   2132 {
   2133     return locale_get_default();
   2134 }
   2135 
   2136 U_CAPI void  U_EXPORT2
   2137 uloc_setDefault(const char*   newDefaultLocale,
   2138              UErrorCode* err)
   2139 {
   2140     if (U_FAILURE(*err))
   2141         return;
   2142     /* the error code isn't currently used for anything by this function*/
   2143 
   2144     /* propagate change to C++ */
   2145     locale_set_default(newDefaultLocale);
   2146 }
   2147 
   2148 /**
   2149  * Returns a list of all language codes defined in ISO 639.  This is a pointer
   2150  * to an array of pointers to arrays of char.  All of these pointers are owned
   2151  * by ICU-- do not delete them, and do not write through them.  The array is
   2152  * terminated with a null pointer.
   2153  */
   2154 U_CAPI const char* const*  U_EXPORT2
   2155 uloc_getISOLanguages()
   2156 {
   2157     return LANGUAGES;
   2158 }
   2159 
   2160 /**
   2161  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
   2162  * pointer to an array of pointers to arrays of char.  All of these pointers are
   2163  * owned by ICU-- do not delete them, and do not write through them.  The array is
   2164  * terminated with a null pointer.
   2165  */
   2166 U_CAPI const char* const*  U_EXPORT2
   2167 uloc_getISOCountries()
   2168 {
   2169     return COUNTRIES;
   2170 }
   2171 
   2172 
   2173 /* this function to be moved into cstring.c later */
   2174 static char gDecimal = 0;
   2175 
   2176 static /* U_CAPI */
   2177 double
   2178 /* U_EXPORT2 */
   2179 _uloc_strtod(const char *start, char **end) {
   2180     char *decimal;
   2181     char *myEnd;
   2182     char buf[30];
   2183     double rv;
   2184     if (!gDecimal) {
   2185         char rep[5];
   2186         /* For machines that decide to change the decimal on you,
   2187         and try to be too smart with localization.
   2188         This normally should be just a '.'. */
   2189         sprintf(rep, "%+1.1f", 1.0);
   2190         gDecimal = rep[2];
   2191     }
   2192 
   2193     if(gDecimal == '.') {
   2194         return uprv_strtod(start, end); /* fall through to OS */
   2195     } else {
   2196         uprv_strncpy(buf, start, 29);
   2197         buf[29]=0;
   2198         decimal = uprv_strchr(buf, '.');
   2199         if(decimal) {
   2200             *decimal = gDecimal;
   2201         } else {
   2202             return uprv_strtod(start, end); /* no decimal point */
   2203         }
   2204         rv = uprv_strtod(buf, &myEnd);
   2205         if(end) {
   2206             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
   2207         }
   2208         return rv;
   2209     }
   2210 }
   2211 
   2212 typedef struct {
   2213     float q;
   2214     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
   2215     char *locale;
   2216 } _acceptLangItem;
   2217 
   2218 static int32_t U_CALLCONV
   2219 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
   2220 {
   2221     const _acceptLangItem *aa = (const _acceptLangItem*)a;
   2222     const _acceptLangItem *bb = (const _acceptLangItem*)b;
   2223 
   2224     int32_t rc = 0;
   2225     if(bb->q < aa->q) {
   2226         rc = -1;  /* A > B */
   2227     } else if(bb->q > aa->q) {
   2228         rc = 1;   /* A < B */
   2229     } else {
   2230         rc = 0;   /* A = B */
   2231     }
   2232 
   2233     if(rc==0) {
   2234         rc = uprv_stricmp(aa->locale, bb->locale);
   2235     }
   2236 
   2237 #if defined(ULOC_DEBUG)
   2238     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
   2239     aa->locale, aa->q,
   2240     bb->locale, bb->q,
   2241     rc);*/
   2242 #endif
   2243 
   2244     return rc;
   2245 }
   2246 
   2247 /*
   2248 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
   2249 */
   2250 
   2251 U_CAPI int32_t U_EXPORT2
   2252 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
   2253                             const char *httpAcceptLanguage,
   2254                             UEnumeration* availableLocales,
   2255                             UErrorCode *status)
   2256 {
   2257     _acceptLangItem *j;
   2258     _acceptLangItem smallBuffer[30];
   2259     char **strs;
   2260     char tmp[ULOC_FULLNAME_CAPACITY +1];
   2261     int32_t n = 0;
   2262     const char *itemEnd;
   2263     const char *paramEnd;
   2264     const char *s;
   2265     const char *t;
   2266     int32_t res;
   2267     int32_t i;
   2268     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
   2269     int32_t jSize;
   2270     char *tempstr; /* Use for null pointer check */
   2271 
   2272     j = smallBuffer;
   2273     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
   2274     if(U_FAILURE(*status)) {
   2275         return -1;
   2276     }
   2277 
   2278     for(s=httpAcceptLanguage;s&&*s;) {
   2279         while(isspace(*s)) /* eat space at the beginning */
   2280             s++;
   2281         itemEnd=uprv_strchr(s,',');
   2282         paramEnd=uprv_strchr(s,';');
   2283         if(!itemEnd) {
   2284             itemEnd = httpAcceptLanguage+l; /* end of string */
   2285         }
   2286         if(paramEnd && paramEnd<itemEnd) {
   2287             /* semicolon (;) is closer than end (,) */
   2288             t = paramEnd+1;
   2289             if(*t=='q') {
   2290                 t++;
   2291             }
   2292             while(isspace(*t)) {
   2293                 t++;
   2294             }
   2295             if(*t=='=') {
   2296                 t++;
   2297             }
   2298             while(isspace(*t)) {
   2299                 t++;
   2300             }
   2301             j[n].q = (float)_uloc_strtod(t,NULL);
   2302         } else {
   2303             /* no semicolon - it's 1.0 */
   2304             j[n].q = 1.0f;
   2305             paramEnd = itemEnd;
   2306         }
   2307         j[n].dummy=0;
   2308         /* eat spaces prior to semi */
   2309         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
   2310             ;
   2311         /* Check for null pointer from uprv_strndup */
   2312         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
   2313         if (tempstr == NULL) {
   2314             *status = U_MEMORY_ALLOCATION_ERROR;
   2315             return -1;
   2316         }
   2317         j[n].locale = tempstr;
   2318         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
   2319         if(strcmp(j[n].locale,tmp)) {
   2320             uprv_free(j[n].locale);
   2321             j[n].locale=uprv_strdup(tmp);
   2322         }
   2323 #if defined(ULOC_DEBUG)
   2324         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
   2325 #endif
   2326         n++;
   2327         s = itemEnd;
   2328         while(*s==',') { /* eat duplicate commas */
   2329             s++;
   2330         }
   2331         if(n>=jSize) {
   2332             if(j==smallBuffer) {  /* overflowed the small buffer. */
   2333                 j = uprv_malloc(sizeof(j[0])*(jSize*2));
   2334                 if(j!=NULL) {
   2335                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
   2336                 }
   2337 #if defined(ULOC_DEBUG)
   2338                 fprintf(stderr,"malloced at size %d\n", jSize);
   2339 #endif
   2340             } else {
   2341                 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
   2342 #if defined(ULOC_DEBUG)
   2343                 fprintf(stderr,"re-alloced at size %d\n", jSize);
   2344 #endif
   2345             }
   2346             jSize *= 2;
   2347             if(j==NULL) {
   2348                 *status = U_MEMORY_ALLOCATION_ERROR;
   2349                 return -1;
   2350             }
   2351         }
   2352     }
   2353     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
   2354     if(U_FAILURE(*status)) {
   2355         if(j != smallBuffer) {
   2356 #if defined(ULOC_DEBUG)
   2357             fprintf(stderr,"freeing j %p\n", j);
   2358 #endif
   2359             uprv_free(j);
   2360         }
   2361         return -1;
   2362     }
   2363     strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
   2364     /* Check for null pointer */
   2365     if (strs == NULL) {
   2366         uprv_free(j); /* Free to avoid memory leak */
   2367         *status = U_MEMORY_ALLOCATION_ERROR;
   2368         return -1;
   2369     }
   2370     for(i=0;i<n;i++) {
   2371 #if defined(ULOC_DEBUG)
   2372         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
   2373 #endif
   2374         strs[i]=j[i].locale;
   2375     }
   2376     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
   2377         (const char**)strs, n, availableLocales, status);
   2378     for(i=0;i<n;i++) {
   2379         uprv_free(strs[i]);
   2380     }
   2381     uprv_free(strs);
   2382     if(j != smallBuffer) {
   2383 #if defined(ULOC_DEBUG)
   2384         fprintf(stderr,"freeing j %p\n", j);
   2385 #endif
   2386         uprv_free(j);
   2387     }
   2388     return res;
   2389 }
   2390 
   2391 
   2392 U_CAPI int32_t U_EXPORT2
   2393 uloc_acceptLanguage(char *result, int32_t resultAvailable,
   2394                     UAcceptResult *outResult, const char **acceptList,
   2395                     int32_t acceptListCount,
   2396                     UEnumeration* availableLocales,
   2397                     UErrorCode *status)
   2398 {
   2399     int32_t i,j;
   2400     int32_t len;
   2401     int32_t maxLen=0;
   2402     char tmp[ULOC_FULLNAME_CAPACITY+1];
   2403     const char *l;
   2404     char **fallbackList;
   2405     if(U_FAILURE(*status)) {
   2406         return -1;
   2407     }
   2408     fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
   2409     if(fallbackList==NULL) {
   2410         *status = U_MEMORY_ALLOCATION_ERROR;
   2411         return -1;
   2412     }
   2413     for(i=0;i<acceptListCount;i++) {
   2414 #if defined(ULOC_DEBUG)
   2415         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
   2416 #endif
   2417         while((l=uenum_next(availableLocales, NULL, status))) {
   2418 #if defined(ULOC_DEBUG)
   2419             fprintf(stderr,"  %s\n", l);
   2420 #endif
   2421             len = (int32_t)uprv_strlen(l);
   2422             if(!uprv_strcmp(acceptList[i], l)) {
   2423                 if(outResult) {
   2424                     *outResult = ULOC_ACCEPT_VALID;
   2425                 }
   2426 #if defined(ULOC_DEBUG)
   2427                 fprintf(stderr, "MATCH! %s\n", l);
   2428 #endif
   2429                 if(len>0) {
   2430                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2431                 }
   2432                 for(j=0;j<i;j++) {
   2433                     uprv_free(fallbackList[j]);
   2434                 }
   2435                 uprv_free(fallbackList);
   2436                 return u_terminateChars(result, resultAvailable, len, status);
   2437             }
   2438             if(len>maxLen) {
   2439                 maxLen = len;
   2440             }
   2441         }
   2442         uenum_reset(availableLocales, status);
   2443         /* save off parent info */
   2444         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2445             fallbackList[i] = uprv_strdup(tmp);
   2446         } else {
   2447             fallbackList[i]=0;
   2448         }
   2449     }
   2450 
   2451     for(maxLen--;maxLen>0;maxLen--) {
   2452         for(i=0;i<acceptListCount;i++) {
   2453             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
   2454 #if defined(ULOC_DEBUG)
   2455                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
   2456 #endif
   2457                 while((l=uenum_next(availableLocales, NULL, status))) {
   2458 #if defined(ULOC_DEBUG)
   2459                     fprintf(stderr,"  %s\n", l);
   2460 #endif
   2461                     len = (int32_t)uprv_strlen(l);
   2462                     if(!uprv_strcmp(fallbackList[i], l)) {
   2463                         if(outResult) {
   2464                             *outResult = ULOC_ACCEPT_FALLBACK;
   2465                         }
   2466 #if defined(ULOC_DEBUG)
   2467                         fprintf(stderr, "fallback MATCH! %s\n", l);
   2468 #endif
   2469                         if(len>0) {
   2470                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2471                         }
   2472                         for(j=0;j<acceptListCount;j++) {
   2473                             uprv_free(fallbackList[j]);
   2474                         }
   2475                         uprv_free(fallbackList);
   2476                         return u_terminateChars(result, resultAvailable, len, status);
   2477                     }
   2478                 }
   2479                 uenum_reset(availableLocales, status);
   2480 
   2481                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2482                     uprv_free(fallbackList[i]);
   2483                     fallbackList[i] = uprv_strdup(tmp);
   2484                 } else {
   2485                     uprv_free(fallbackList[i]);
   2486                     fallbackList[i]=0;
   2487                 }
   2488             }
   2489         }
   2490         if(outResult) {
   2491             *outResult = ULOC_ACCEPT_FAILED;
   2492         }
   2493     }
   2494     for(i=0;i<acceptListCount;i++) {
   2495         uprv_free(fallbackList[i]);
   2496     }
   2497     uprv_free(fallbackList);
   2498     return -1;
   2499 }
   2500 
   2501 /*eof*/
   2502