Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1997-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ULOC.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/01/97    aliu        Creation.
     13 *   08/21/98    stephen     JDK 1.2 sync
     14 *   12/08/98    rtg         New Locale implementation and C API
     15 *   03/15/99    damiba      overhaul.
     16 *   04/06/99    stephen     changed setDefault() to realloc and copy
     17 *   06/14/99    stephen     Changed calls to ures_open for new params
     18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
     19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
     20 *                           brought canonicalization code into line with spec
     21 *****************************************************************************/
     22 
     23 /*
     24    POSIX's locale format, from putil.c: [no spaces]
     25 
     26      ll [ _CC ] [ . MM ] [ @ VV]
     27 
     28      l = lang, C = ctry, M = charmap, V = variant
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/uloc.h"
     34 
     35 #include "putilimp.h"
     36 #include "ustr_imp.h"
     37 #include "ulocimp.h"
     38 #include "umutex.h"
     39 #include "cstring.h"
     40 #include "cmemory.h"
     41 #include "ucln_cmn.h"
     42 #include "locmap.h"
     43 #include "uarrsort.h"
     44 #include "uenumimp.h"
     45 #include "uassert.h"
     46 
     47 #include <stdio.h> /* for sprintf */
     48 
     49 /* ### Declarations **************************************************/
     50 
     51 /* Locale stuff from locid.cpp */
     52 U_CFUNC void locale_set_default(const char *id);
     53 U_CFUNC const char *locale_get_default(void);
     54 U_CFUNC int32_t
     55 locale_getKeywords(const char *localeID,
     56             char prev,
     57             char *keywords, int32_t keywordCapacity,
     58             char *values, int32_t valuesCapacity, int32_t *valLen,
     59             UBool valuesToo,
     60             UErrorCode *status);
     61 
     62 /* ### Data tables **************************************************/
     63 
     64 /**
     65  * Table of language codes, both 2- and 3-letter, with preference
     66  * given to 2-letter codes where possible.  Includes 3-letter codes
     67  * that lack a 2-letter equivalent.
     68  *
     69  * This list must be in sorted order.  This list is returned directly
     70  * to the user by some API.
     71  *
     72  * This list must be kept in sync with LANGUAGES_3, with corresponding
     73  * entries matched.
     74  *
     75  * This table should be terminated with a NULL entry, followed by a
     76  * second list, and another NULL entry.  The first list is visible to
     77  * user code when this array is returned by API.  The second list
     78  * contains codes we support, but do not expose through user API.
     79  *
     80  * Notes
     81  *
     82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
     83  * include the revisions up to 2001/7/27 *CWB*
     84  *
     85  * The 3 character codes are the terminology codes like RFC 3066.  This
     86  * is compatible with prior ICU codes
     87  *
     88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
     89  * table but now at the end of the table because 3 character codes are
     90  * duplicates.  This avoids bad searches going from 3 to 2 character
     91  * codes.
     92  *
     93  * The range qaa-qtz is reserved for local use
     94  */
     95 static const char * const LANGUAGES[] = {
     96     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
     97     "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
     98     "ang", "anp", "apa",
     99     "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
    100     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
    101     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
    102     "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
    103     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
    104     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
    105     "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
    106     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
    107     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
    108     "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
    109     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
    110     "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
    111     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
    112     "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
    113     "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
    114     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
    115     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
    116     "grc", "gsw", "gu",  "gv", "gwi",
    117     "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
    118     "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
    119     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
    120     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
    121     "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
    122     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
    123     "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
    124     "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
    125     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
    126     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
    127     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
    128     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
    129     "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",
    130     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
    131     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
    132     "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
    133     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
    134     "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",
    135     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
    136     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
    137     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    138     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
    139     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
    140     "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
    141     "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
    142     "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
    143     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
    144     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
    145     "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
    146     "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",
    147     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
    148     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",
    149     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
    150     "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
    151     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
    152     "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
    153     "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",
    154     "zu",  "zun", "zxx", "zza",
    155 NULL,
    156     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
    157 NULL
    158 };
    159 static const char* const DEPRECATED_LANGUAGES[]={
    160     "in", "iw", "ji", "jw", NULL, NULL
    161 };
    162 static const char* const REPLACEMENT_LANGUAGES[]={
    163     "id", "he", "yi", "jv", NULL, NULL
    164 };
    165 
    166 /**
    167  * Table of 3-letter language codes.
    168  *
    169  * This is a lookup table used to convert 3-letter language codes to
    170  * their 2-letter equivalent, where possible.  It must be kept in sync
    171  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
    172  * same language as LANGUAGES_3[i].  The commented-out lines are
    173  * copied from LANGUAGES to make eyeballing this baby easier.
    174  *
    175  * Where a 3-letter language code has no 2-letter equivalent, the
    176  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
    177  *
    178  * This table should be terminated with a NULL entry, followed by a
    179  * second list, and another NULL entry.  The two lists correspond to
    180  * the two lists in LANGUAGES.
    181  */
    182 static const char * const LANGUAGES_3[] = {
    183 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
    184     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
    185 /*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
    186     "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
    187 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
    188     "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
    189 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
    190     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
    191 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
    192     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
    193 /*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
    194     "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
    195 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
    196     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
    197 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
    198     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
    199 /*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
    200     "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
    201 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
    202     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
    203 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
    204     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
    205 /*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
    206     "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
    207 /*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
    208     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
    209 /*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
    210     "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
    211 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
    212     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
    213 /*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
    214     "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
    215 /*  "fr",  "frm", "fro", "frr", "frs", "fur", "fy",  "ga",  "gaa", "gay",    */
    216     "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
    217 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
    218     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
    219 /*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
    220     "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
    221 /*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
    222     "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
    223 /*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
    224     "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
    225 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
    226     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
    227 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
    228     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
    229 /*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
    230     "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
    231 /*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
    232     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
    233 /*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
    234     "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
    235 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
    236     "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
    237 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
    238     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
    239 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
    240     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
    241 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
    242     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
    243 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
    244     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
    245 /*  "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
    246     "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
    247 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
    248     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
    249 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
    250     "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
    251 /*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
    252     "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
    253 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
    254     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
    255 /*  "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",    */
    256     "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
    257 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
    258     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
    259 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
    260     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
    261 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
    262     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    263 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
    264     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
    265 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
    266     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
    267 /*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
    268     "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
    269 /*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
    270     "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
    271 /*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
    272     "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
    273 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
    274     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
    275 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
    276     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
    277 /*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
    278     "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
    279 /*  "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",    */
    280     "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
    281 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
    282     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
    283 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",    */
    284     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
    285 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
    286     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
    287 /*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
    288     "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
    289 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
    290     "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
    291 /*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
    292     "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
    293 /*  "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
    294     "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
    295 /*  "zu",  "zun", "zxx", "zza",                                         */
    296     "zul", "zun", "zxx", "zza",
    297 NULL,
    298 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
    299     "ind", "heb", "yid", "jaw", "srp",
    300 NULL
    301 };
    302 
    303 /**
    304  * Table of 2-letter country codes.
    305  *
    306  * This list must be in sorted order.  This list is returned directly
    307  * to the user by some API.
    308  *
    309  * This list must be kept in sync with COUNTRIES_3, with corresponding
    310  * entries matched.
    311  *
    312  * This table should be terminated with a NULL entry, followed by a
    313  * second list, and another NULL entry.  The first list is visible to
    314  * user code when this array is returned by API.  The second list
    315  * contains codes we support, but do not expose through user API.
    316  *
    317  * Notes:
    318  *
    319  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
    320  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
    321  * new codes keeping the old ones for compatibility updated to include
    322  * 1999/12/03 revisions *CWB*
    323  *
    324  * RO(ROM) is now RO(ROU) according to
    325  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
    326  */
    327 static const char * const COUNTRIES[] = {
    328     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
    329     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
    330     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
    331     "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
    332     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
    333     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
    334     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
    335     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
    336     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
    337     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
    338     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
    339     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
    340     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
    341     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
    342     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
    343     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
    344     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
    345     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
    346     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
    347     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
    348     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
    349     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
    350     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
    351     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
    352     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
    353     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
    354     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
    355     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
    356     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
    357     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
    358 NULL,
    359     "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
    360 NULL
    361 };
    362 
    363 static const char* const DEPRECATED_COUNTRIES[] ={
    364     "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
    365 };
    366 static const char* const REPLACEMENT_COUNTRIES[] = {
    367 /*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
    368     "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
    369 };
    370 
    371 /**
    372  * Table of 3-letter country codes.
    373  *
    374  * This is a lookup table used to convert 3-letter country codes to
    375  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
    376  * For all valid i, COUNTRIES[i] must refer to the same country as
    377  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
    378  * to make eyeballing this baby easier.
    379  *
    380  * This table should be terminated with a NULL entry, followed by a
    381  * second list, and another NULL entry.  The two lists correspond to
    382  * the two lists in COUNTRIES.
    383  */
    384 static const char * const COUNTRIES_3[] = {
    385 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
    386     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
    387 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
    388     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
    389 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
    390     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
    391 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
    392     "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
    393 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
    394     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
    395 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
    396     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
    397 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
    398     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
    399 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
    400     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
    401 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
    402     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
    403 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
    404     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
    405 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
    406     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
    407 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
    408     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
    409 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
    410     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
    411 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
    412     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
    413 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
    414     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
    415 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
    416     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
    417 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
    418     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
    419 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
    420     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
    421 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
    422     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
    423 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
    424     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
    425 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
    426     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
    427 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
    428     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
    429 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
    430     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
    431 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
    432     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
    433 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
    434     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
    435 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
    436     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
    437 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
    438     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
    439 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
    440     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
    441 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    442     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
    443 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
    444     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
    445 NULL,
    446 /*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
    447     "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
    448 NULL
    449 };
    450 
    451 typedef struct CanonicalizationMap {
    452     const char *id;          /* input ID */
    453     const char *canonicalID; /* canonicalized output ID */
    454     const char *keyword;     /* keyword, or NULL if none */
    455     const char *value;       /* keyword value, or NULL if kw==NULL */
    456 } CanonicalizationMap;
    457 
    458 /**
    459  * A map to canonicalize locale IDs.  This handles a variety of
    460  * different semantic kinds of transformations.
    461  */
    462 static const CanonicalizationMap CANONICALIZE_MAP[] = {
    463     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
    464     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
    465     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
    466     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
    467     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
    468     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
    469     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
    470     { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
    471     { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
    472     { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
    473     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
    474     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
    475     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
    476     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
    477     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
    478     { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
    479     { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
    480     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
    481     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
    482     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
    483     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
    484     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
    485     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
    486     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
    487     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
    488     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
    489     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
    490     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
    491     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
    492     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
    493     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
    494     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
    495     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
    496     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
    497     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
    498     { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
    499     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
    500     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
    501     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
    502     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
    503     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
    504     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
    505     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
    506     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
    507     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
    508     { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
    509     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
    510     { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
    511     { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
    512     { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
    513     { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
    514     { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
    515     { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
    516 };
    517 
    518 typedef struct VariantMap {
    519     const char *variant;          /* input ID */
    520     const char *keyword;     /* keyword, or NULL if none */
    521     const char *value;       /* keyword value, or NULL if kw==NULL */
    522 } VariantMap;
    523 
    524 static const VariantMap VARIANT_MAP[] = {
    525     { "EURO",   "currency", "EUR" },
    526     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
    527     { "STROKE", "collation", "stroke" }  /* Solaris variant */
    528 };
    529 
    530 /* ### BCP47 Conversion *******************************************/
    531 /* Test if the locale id has BCP47 u extension and does not have '@' */
    532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
    533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
    534 #define _ConvertBCP47(finalID, id, buffer, length,err) \
    535         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
    536             finalID=id; \
    537         } else { \
    538             finalID=buffer; \
    539         }
    540 /* Gets the size of the shortest subtag in the given localeID. */
    541 static int32_t getShortestSubtagLength(const char *localeID) {
    542     int32_t localeIDLength = uprv_strlen(localeID);
    543     int32_t length = localeIDLength;
    544     int32_t tmpLength = 0;
    545     int32_t i;
    546     UBool reset = TRUE;
    547 
    548     for (i = 0; i < localeIDLength; i++) {
    549         if (localeID[i] != '_' && localeID[i] != '-') {
    550             if (reset) {
    551                 tmpLength = 0;
    552                 reset = FALSE;
    553             }
    554             tmpLength++;
    555         } else {
    556             if (tmpLength != 0 && tmpLength < length) {
    557                 length = tmpLength;
    558             }
    559             reset = TRUE;
    560         }
    561     }
    562 
    563     return length;
    564 }
    565 
    566 /* ### Keywords **************************************************/
    567 
    568 #define ULOC_KEYWORD_BUFFER_LEN 25
    569 #define ULOC_MAX_NO_KEYWORDS 25
    570 
    571 U_CAPI const char * U_EXPORT2
    572 locale_getKeywordsStart(const char *localeID) {
    573     const char *result = NULL;
    574     if((result = uprv_strchr(localeID, '@')) != NULL) {
    575         return result;
    576     }
    577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    578     else {
    579         /* We do this because the @ sign is variant, and the @ sign used on one
    580         EBCDIC machine won't be compiled the same way on other EBCDIC based
    581         machines. */
    582         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    583         const uint8_t *charToFind = ebcdicSigns;
    584         while(*charToFind) {
    585             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
    586                 return result;
    587             }
    588             charToFind++;
    589         }
    590     }
    591 #endif
    592     return NULL;
    593 }
    594 
    595 /**
    596  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
    597  * @param keywordName incoming name to be canonicalized
    598  * @param status return status (keyword too long)
    599  * @return length of the keyword name
    600  */
    601 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
    602 {
    603   int32_t i;
    604   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
    605 
    606   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
    607     /* keyword name too long for internal buffer */
    608     *status = U_INTERNAL_PROGRAM_ERROR;
    609           return 0;
    610   }
    611 
    612   /* normalize the keyword name */
    613   for(i = 0; i < keywordNameLen; i++) {
    614     buf[i] = uprv_tolower(keywordName[i]);
    615   }
    616   buf[i] = 0;
    617 
    618   return keywordNameLen;
    619 }
    620 
    621 typedef struct {
    622     char keyword[ULOC_KEYWORD_BUFFER_LEN];
    623     int32_t keywordLen;
    624     const char *valueStart;
    625     int32_t valueLen;
    626 } KeywordStruct;
    627 
    628 static int32_t U_CALLCONV
    629 compareKeywordStructs(const void *context, const void *left, const void *right) {
    630     const char* leftString = ((const KeywordStruct *)left)->keyword;
    631     const char* rightString = ((const KeywordStruct *)right)->keyword;
    632     return uprv_strcmp(leftString, rightString);
    633 }
    634 
    635 /**
    636  * Both addKeyword and addValue must already be in canonical form.
    637  * Either both addKeyword and addValue are NULL, or neither is NULL.
    638  * If they are not NULL they must be zero terminated.
    639  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
    640  */
    641 static int32_t
    642 _getKeywords(const char *localeID,
    643              char prev,
    644              char *keywords, int32_t keywordCapacity,
    645              char *values, int32_t valuesCapacity, int32_t *valLen,
    646              UBool valuesToo,
    647              const char* addKeyword,
    648              const char* addValue,
    649              UErrorCode *status)
    650 {
    651     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
    652 
    653     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    654     int32_t numKeywords = 0;
    655     const char* pos = localeID;
    656     const char* equalSign = NULL;
    657     const char* semicolon = NULL;
    658     int32_t i = 0, j, n;
    659     int32_t keywordsLen = 0;
    660     int32_t valuesLen = 0;
    661 
    662     if(prev == '@') { /* start of keyword definition */
    663         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
    664         do {
    665             UBool duplicate = FALSE;
    666             /* skip leading spaces */
    667             while(*pos == ' ') {
    668                 pos++;
    669             }
    670             if (!*pos) { /* handle trailing "; " */
    671                 break;
    672             }
    673             if(numKeywords == maxKeywords) {
    674                 *status = U_INTERNAL_PROGRAM_ERROR;
    675                 return 0;
    676             }
    677             equalSign = uprv_strchr(pos, '=');
    678             semicolon = uprv_strchr(pos, ';');
    679             /* lack of '=' [foo@currency] is illegal */
    680             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
    681             if(!equalSign || (semicolon && semicolon<equalSign)) {
    682                 *status = U_INVALID_FORMAT_ERROR;
    683                 return 0;
    684             }
    685             /* need to normalize both keyword and keyword name */
    686             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
    687                 /* keyword name too long for internal buffer */
    688                 *status = U_INTERNAL_PROGRAM_ERROR;
    689                 return 0;
    690             }
    691             for(i = 0, n = 0; i < equalSign - pos; ++i) {
    692                 if (pos[i] != ' ') {
    693                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
    694                 }
    695             }
    696 
    697             /* zero-length keyword is an error. */
    698             if (n == 0) {
    699                 *status = U_INVALID_FORMAT_ERROR;
    700                 return 0;
    701             }
    702 
    703             keywordList[numKeywords].keyword[n] = 0;
    704             keywordList[numKeywords].keywordLen = n;
    705             /* now grab the value part. First we skip the '=' */
    706             equalSign++;
    707             /* then we leading spaces */
    708             while(*equalSign == ' ') {
    709                 equalSign++;
    710             }
    711 
    712             /* Premature end or zero-length value */
    713             if (!equalSign || equalSign == semicolon) {
    714                 *status = U_INVALID_FORMAT_ERROR;
    715                 return 0;
    716             }
    717 
    718             keywordList[numKeywords].valueStart = equalSign;
    719 
    720             pos = semicolon;
    721             i = 0;
    722             if(pos) {
    723                 while(*(pos - i - 1) == ' ') {
    724                     i++;
    725                 }
    726                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
    727                 pos++;
    728             } else {
    729                 i = (int32_t)uprv_strlen(equalSign);
    730                 while(i && equalSign[i-1] == ' ') {
    731                     i--;
    732                 }
    733                 keywordList[numKeywords].valueLen = i;
    734             }
    735             /* If this is a duplicate keyword, then ignore it */
    736             for (j=0; j<numKeywords; ++j) {
    737                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
    738                     duplicate = TRUE;
    739                     break;
    740                 }
    741             }
    742             if (!duplicate) {
    743                 ++numKeywords;
    744             }
    745         } while(pos);
    746 
    747         /* Handle addKeyword/addValue. */
    748         if (addKeyword != NULL) {
    749             UBool duplicate = FALSE;
    750             U_ASSERT(addValue != NULL);
    751             /* Search for duplicate; if found, do nothing. Explicit keyword
    752                overrides addKeyword. */
    753             for (j=0; j<numKeywords; ++j) {
    754                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
    755                     duplicate = TRUE;
    756                     break;
    757                 }
    758             }
    759             if (!duplicate) {
    760                 if (numKeywords == maxKeywords) {
    761                     *status = U_INTERNAL_PROGRAM_ERROR;
    762                     return 0;
    763                 }
    764                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
    765                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
    766                 keywordList[numKeywords].valueStart = addValue;
    767                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
    768                 ++numKeywords;
    769             }
    770         } else {
    771             U_ASSERT(addValue == NULL);
    772         }
    773 
    774         /* now we have a list of keywords */
    775         /* we need to sort it */
    776         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
    777 
    778         /* Now construct the keyword part */
    779         for(i = 0; i < numKeywords; i++) {
    780             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
    781                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
    782                 if(valuesToo) {
    783                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
    784                 } else {
    785                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
    786                 }
    787             }
    788             keywordsLen += keywordList[i].keywordLen + 1;
    789             if(valuesToo) {
    790                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
    791                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
    792                 }
    793                 keywordsLen += keywordList[i].valueLen;
    794 
    795                 if(i < numKeywords - 1) {
    796                     if(keywordsLen < keywordCapacity) {
    797                         keywords[keywordsLen] = ';';
    798                     }
    799                     keywordsLen++;
    800                 }
    801             }
    802             if(values) {
    803                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
    804                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
    805                     values[valuesLen + keywordList[i].valueLen] = 0;
    806                 }
    807                 valuesLen += keywordList[i].valueLen + 1;
    808             }
    809         }
    810         if(values) {
    811             values[valuesLen] = 0;
    812             if(valLen) {
    813                 *valLen = valuesLen;
    814             }
    815         }
    816         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
    817     } else {
    818         return 0;
    819     }
    820 }
    821 
    822 U_CFUNC int32_t
    823 locale_getKeywords(const char *localeID,
    824                    char prev,
    825                    char *keywords, int32_t keywordCapacity,
    826                    char *values, int32_t valuesCapacity, int32_t *valLen,
    827                    UBool valuesToo,
    828                    UErrorCode *status) {
    829     return _getKeywords(localeID, prev, keywords, keywordCapacity,
    830                         values, valuesCapacity, valLen, valuesToo,
    831                         NULL, NULL, status);
    832 }
    833 
    834 U_CAPI int32_t U_EXPORT2
    835 uloc_getKeywordValue(const char* localeID,
    836                      const char* keywordName,
    837                      char* buffer, int32_t bufferCapacity,
    838                      UErrorCode* status)
    839 {
    840     const char* startSearchHere = NULL;
    841     const char* nextSeparator = NULL;
    842     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    843     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    844     int32_t i = 0;
    845     int32_t result = 0;
    846 
    847     if(status && U_SUCCESS(*status) && localeID) {
    848       char tempBuffer[ULOC_FULLNAME_CAPACITY];
    849       const char* tmpLocaleID;
    850 
    851       if (_hasBCP47Extension(localeID)) {
    852           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
    853       } else {
    854           tmpLocaleID=localeID;
    855       }
    856 
    857       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
    858       if(startSearchHere == NULL) {
    859           /* no keywords, return at once */
    860           return 0;
    861       }
    862 
    863       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    864       if(U_FAILURE(*status)) {
    865         return 0;
    866       }
    867 
    868       /* find the first keyword */
    869       while(startSearchHere) {
    870           startSearchHere++;
    871           /* skip leading spaces (allowed?) */
    872           while(*startSearchHere == ' ') {
    873               startSearchHere++;
    874           }
    875           nextSeparator = uprv_strchr(startSearchHere, '=');
    876           /* need to normalize both keyword and keyword name */
    877           if(!nextSeparator) {
    878               break;
    879           }
    880           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
    881               /* keyword name too long for internal buffer */
    882               *status = U_INTERNAL_PROGRAM_ERROR;
    883               return 0;
    884           }
    885           for(i = 0; i < nextSeparator - startSearchHere; i++) {
    886               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
    887           }
    888           /* trim trailing spaces */
    889           while(startSearchHere[i-1] == ' ') {
    890               i--;
    891           }
    892           localeKeywordNameBuffer[i] = 0;
    893 
    894           startSearchHere = uprv_strchr(nextSeparator, ';');
    895 
    896           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
    897               nextSeparator++;
    898               while(*nextSeparator == ' ') {
    899                   nextSeparator++;
    900               }
    901               /* we actually found the keyword. Copy the value */
    902               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
    903                   while(*(startSearchHere-1) == ' ') {
    904                       startSearchHere--;
    905                   }
    906                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
    907                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
    908               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
    909                   i = (int32_t)uprv_strlen(nextSeparator);
    910                   while(nextSeparator[i - 1] == ' ') {
    911                       i--;
    912                   }
    913                   uprv_strncpy(buffer, nextSeparator, i);
    914                   result = u_terminateChars(buffer, bufferCapacity, i, status);
    915               } else {
    916                   /* give a bigger buffer, please */
    917                   *status = U_BUFFER_OVERFLOW_ERROR;
    918                   if(startSearchHere) {
    919                       result = (int32_t)(startSearchHere - nextSeparator);
    920                   } else {
    921                       result = (int32_t)uprv_strlen(nextSeparator);
    922                   }
    923               }
    924               return result;
    925           }
    926       }
    927     }
    928     return 0;
    929 }
    930 
    931 U_CAPI int32_t U_EXPORT2
    932 uloc_setKeywordValue(const char* keywordName,
    933                      const char* keywordValue,
    934                      char* buffer, int32_t bufferCapacity,
    935                      UErrorCode* status)
    936 {
    937     /* TODO: sorting. removal. */
    938     int32_t keywordNameLen;
    939     int32_t keywordValueLen;
    940     int32_t bufLen;
    941     int32_t needLen = 0;
    942     int32_t foundValueLen;
    943     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
    944     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    945     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    946     int32_t i = 0;
    947     int32_t rc;
    948     char* nextSeparator = NULL;
    949     char* nextEqualsign = NULL;
    950     char* startSearchHere = NULL;
    951     char* keywordStart = NULL;
    952     char *insertHere = NULL;
    953     if(U_FAILURE(*status)) {
    954         return -1;
    955     }
    956     if(bufferCapacity>1) {
    957         bufLen = (int32_t)uprv_strlen(buffer);
    958     } else {
    959         *status = U_ILLEGAL_ARGUMENT_ERROR;
    960         return 0;
    961     }
    962     if(bufferCapacity<bufLen) {
    963         /* The capacity is less than the length?! Is this NULL terminated? */
    964         *status = U_ILLEGAL_ARGUMENT_ERROR;
    965         return 0;
    966     }
    967     if(keywordValue && !*keywordValue) {
    968         keywordValue = NULL;
    969     }
    970     if(keywordValue) {
    971         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
    972     } else {
    973         keywordValueLen = 0;
    974     }
    975     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    976     if(U_FAILURE(*status)) {
    977         return 0;
    978     }
    979     startSearchHere = (char*)locale_getKeywordsStart(buffer);
    980     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
    981         if(!keywordValue) { /* no keywords = nothing to remove */
    982             return bufLen;
    983         }
    984 
    985         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
    986         if(startSearchHere) { /* had a single @ */
    987             needLen--; /* already had the @ */
    988             /* startSearchHere points at the @ */
    989         } else {
    990             startSearchHere=buffer+bufLen;
    991         }
    992         if(needLen >= bufferCapacity) {
    993             *status = U_BUFFER_OVERFLOW_ERROR;
    994             return needLen; /* no change */
    995         }
    996         *startSearchHere = '@';
    997         startSearchHere++;
    998         uprv_strcpy(startSearchHere, keywordNameBuffer);
    999         startSearchHere += keywordNameLen;
   1000         *startSearchHere = '=';
   1001         startSearchHere++;
   1002         uprv_strcpy(startSearchHere, keywordValue);
   1003         startSearchHere+=keywordValueLen;
   1004         return needLen;
   1005     } /* end shortcut - no @ */
   1006 
   1007     keywordStart = startSearchHere;
   1008     /* search for keyword */
   1009     while(keywordStart) {
   1010         keywordStart++;
   1011         /* skip leading spaces (allowed?) */
   1012         while(*keywordStart == ' ') {
   1013             keywordStart++;
   1014         }
   1015         nextEqualsign = uprv_strchr(keywordStart, '=');
   1016         /* need to normalize both keyword and keyword name */
   1017         if(!nextEqualsign) {
   1018             break;
   1019         }
   1020         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
   1021             /* keyword name too long for internal buffer */
   1022             *status = U_INTERNAL_PROGRAM_ERROR;
   1023             return 0;
   1024         }
   1025         for(i = 0; i < nextEqualsign - keywordStart; i++) {
   1026             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
   1027         }
   1028         /* trim trailing spaces */
   1029         while(keywordStart[i-1] == ' ') {
   1030             i--;
   1031         }
   1032         localeKeywordNameBuffer[i] = 0;
   1033 
   1034         nextSeparator = uprv_strchr(nextEqualsign, ';');
   1035         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
   1036         if(rc == 0) {
   1037             nextEqualsign++;
   1038             while(*nextEqualsign == ' ') {
   1039                 nextEqualsign++;
   1040             }
   1041             /* we actually found the keyword. Change the value */
   1042             if (nextSeparator) {
   1043                 keywordAtEnd = 0;
   1044                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
   1045             } else {
   1046                 keywordAtEnd = 1;
   1047                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
   1048             }
   1049             if(keywordValue) { /* adding a value - not removing */
   1050               if(foundValueLen == keywordValueLen) {
   1051                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1052                 return bufLen; /* no change in size */
   1053               } else if(foundValueLen > keywordValueLen) {
   1054                 int32_t delta = foundValueLen - keywordValueLen;
   1055                 if(nextSeparator) { /* RH side */
   1056                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
   1057                 }
   1058                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1059                 bufLen -= delta;
   1060                 buffer[bufLen]=0;
   1061                 return bufLen;
   1062               } else { /* FVL < KVL */
   1063                 int32_t delta = keywordValueLen - foundValueLen;
   1064                 if((bufLen+delta) >= bufferCapacity) {
   1065                   *status = U_BUFFER_OVERFLOW_ERROR;
   1066                   return bufLen+delta;
   1067                 }
   1068                 if(nextSeparator) { /* RH side */
   1069                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
   1070                 }
   1071                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1072                 bufLen += delta;
   1073                 buffer[bufLen]=0;
   1074                 return bufLen;
   1075               }
   1076             } else { /* removing a keyword */
   1077               if(keywordAtEnd) {
   1078                 /* zero out the ';' or '@' just before startSearchhere */
   1079                 keywordStart[-1] = 0;
   1080                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
   1081               } else {
   1082                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
   1083                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
   1084                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
   1085               }
   1086             }
   1087         } else if(rc<0){ /* end match keyword */
   1088           /* could insert at this location. */
   1089           insertHere = keywordStart;
   1090         }
   1091         keywordStart = nextSeparator;
   1092     } /* end loop searching */
   1093 
   1094     if(!keywordValue) {
   1095       return bufLen; /* removal of non-extant keyword - no change */
   1096     }
   1097 
   1098     /* we know there is at least one keyword. */
   1099     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
   1100     if(needLen >= bufferCapacity) {
   1101         *status = U_BUFFER_OVERFLOW_ERROR;
   1102         return needLen; /* no change */
   1103     }
   1104 
   1105     if(insertHere) {
   1106       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
   1107       keywordStart = insertHere;
   1108     } else {
   1109       keywordStart = buffer+bufLen;
   1110       *keywordStart = ';';
   1111       keywordStart++;
   1112     }
   1113     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
   1114     keywordStart += keywordNameLen;
   1115     *keywordStart = '=';
   1116     keywordStart++;
   1117     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
   1118     keywordStart+=keywordValueLen;
   1119     if(insertHere) {
   1120       *keywordStart = ';';
   1121       keywordStart++;
   1122     }
   1123     buffer[needLen]=0;
   1124     return needLen;
   1125 }
   1126 
   1127 /* ### ID parsing implementation **************************************************/
   1128 
   1129 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
   1130 
   1131 /*returns TRUE if one of the special prefixes is here (s=string)
   1132   'x-' or 'i-' */
   1133 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
   1134 
   1135 /* Dot terminates it because of POSIX form  where dot precedes the codepage
   1136  * except for variant
   1137  */
   1138 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
   1139 
   1140 static char* _strnchr(const char* str, int32_t len, char c) {
   1141     U_ASSERT(str != 0 && len >= 0);
   1142     while (len-- != 0) {
   1143         char d = *str;
   1144         if (d == c) {
   1145             return (char*) str;
   1146         } else if (d == 0) {
   1147             break;
   1148         }
   1149         ++str;
   1150     }
   1151     return NULL;
   1152 }
   1153 
   1154 /**
   1155  * Lookup 'key' in the array 'list'.  The array 'list' should contain
   1156  * a NULL entry, followed by more entries, and a second NULL entry.
   1157  *
   1158  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
   1159  * COUNTRIES_3.
   1160  */
   1161 static int16_t _findIndex(const char* const* list, const char* key)
   1162 {
   1163     const char* const* anchor = list;
   1164     int32_t pass = 0;
   1165 
   1166     /* Make two passes through two NULL-terminated arrays at 'list' */
   1167     while (pass++ < 2) {
   1168         while (*list) {
   1169             if (uprv_strcmp(key, *list) == 0) {
   1170                 return (int16_t)(list - anchor);
   1171             }
   1172             list++;
   1173         }
   1174         ++list;     /* skip final NULL *CWB*/
   1175     }
   1176     return -1;
   1177 }
   1178 
   1179 /* count the length of src while copying it to dest; return strlen(src) */
   1180 static U_INLINE int32_t
   1181 _copyCount(char *dest, int32_t destCapacity, const char *src) {
   1182     const char *anchor;
   1183     char c;
   1184 
   1185     anchor=src;
   1186     for(;;) {
   1187         if((c=*src)==0) {
   1188             return (int32_t)(src-anchor);
   1189         }
   1190         if(destCapacity<=0) {
   1191             return (int32_t)((src-anchor)+uprv_strlen(src));
   1192         }
   1193         ++src;
   1194         *dest++=c;
   1195         --destCapacity;
   1196     }
   1197 }
   1198 
   1199 U_CFUNC const char*
   1200 uloc_getCurrentCountryID(const char* oldID){
   1201     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
   1202     if (offset >= 0) {
   1203         return REPLACEMENT_COUNTRIES[offset];
   1204     }
   1205     return oldID;
   1206 }
   1207 U_CFUNC const char*
   1208 uloc_getCurrentLanguageID(const char* oldID){
   1209     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
   1210     if (offset >= 0) {
   1211         return REPLACEMENT_LANGUAGES[offset];
   1212     }
   1213     return oldID;
   1214 }
   1215 /*
   1216  * the internal functions _getLanguage(), _getCountry(), _getVariant()
   1217  * avoid duplicating code to handle the earlier locale ID pieces
   1218  * in the functions for the later ones by
   1219  * setting the *pEnd pointer to where they stopped parsing
   1220  *
   1221  * TODO try to use this in Locale
   1222  */
   1223 U_CFUNC int32_t
   1224 ulocimp_getLanguage(const char *localeID,
   1225                     char *language, int32_t languageCapacity,
   1226                     const char **pEnd) {
   1227     int32_t i=0;
   1228     int32_t offset;
   1229     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
   1230 
   1231     /* if it starts with i- or x- then copy that prefix */
   1232     if(_isIDPrefix(localeID)) {
   1233         if(i<languageCapacity) {
   1234             language[i]=(char)uprv_tolower(*localeID);
   1235         }
   1236         if(i<languageCapacity) {
   1237             language[i+1]='-';
   1238         }
   1239         i+=2;
   1240         localeID+=2;
   1241     }
   1242 
   1243     /* copy the language as far as possible and count its length */
   1244     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
   1245         if(i<languageCapacity) {
   1246             language[i]=(char)uprv_tolower(*localeID);
   1247         }
   1248         if(i<3) {
   1249             lang[i]=(char)uprv_tolower(*localeID);
   1250         }
   1251         i++;
   1252         localeID++;
   1253     }
   1254 
   1255     if(i==3) {
   1256         /* convert 3 character code to 2 character code if possible *CWB*/
   1257         offset=_findIndex(LANGUAGES_3, lang);
   1258         if(offset>=0) {
   1259             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
   1260         }
   1261     }
   1262 
   1263     if(pEnd!=NULL) {
   1264         *pEnd=localeID;
   1265     }
   1266     return i;
   1267 }
   1268 
   1269 U_CFUNC int32_t
   1270 ulocimp_getScript(const char *localeID,
   1271                   char *script, int32_t scriptCapacity,
   1272                   const char **pEnd)
   1273 {
   1274     int32_t idLen = 0;
   1275 
   1276     if (pEnd != NULL) {
   1277         *pEnd = localeID;
   1278     }
   1279 
   1280     /* copy the second item as far as possible and count its length */
   1281     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1282         idLen++;
   1283     }
   1284 
   1285     /* If it's exactly 4 characters long, then it's a script and not a country. */
   1286     if (idLen == 4) {
   1287         int32_t i;
   1288         if (pEnd != NULL) {
   1289             *pEnd = localeID+idLen;
   1290         }
   1291         if(idLen > scriptCapacity) {
   1292             idLen = scriptCapacity;
   1293         }
   1294         if (idLen >= 1) {
   1295             script[0]=(char)uprv_toupper(*(localeID++));
   1296         }
   1297         for (i = 1; i < idLen; i++) {
   1298             script[i]=(char)uprv_tolower(*(localeID++));
   1299         }
   1300     }
   1301     else {
   1302         idLen = 0;
   1303     }
   1304     return idLen;
   1305 }
   1306 
   1307 U_CFUNC int32_t
   1308 ulocimp_getCountry(const char *localeID,
   1309                    char *country, int32_t countryCapacity,
   1310                    const char **pEnd)
   1311 {
   1312     int32_t idLen=0;
   1313     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
   1314     int32_t offset;
   1315 
   1316     /* copy the country as far as possible and count its length */
   1317     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1318         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
   1319             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
   1320         }
   1321         idLen++;
   1322     }
   1323 
   1324     /* the country should be either length 2 or 3 */
   1325     if (idLen == 2 || idLen == 3) {
   1326         UBool gotCountry = FALSE;
   1327         /* convert 3 character code to 2 character code if possible *CWB*/
   1328         if(idLen==3) {
   1329             offset=_findIndex(COUNTRIES_3, cnty);
   1330             if(offset>=0) {
   1331                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
   1332                 gotCountry = TRUE;
   1333             }
   1334         }
   1335         if (!gotCountry) {
   1336             int32_t i = 0;
   1337             for (i = 0; i < idLen; i++) {
   1338                 if (i < countryCapacity) {
   1339                     country[i]=(char)uprv_toupper(localeID[i]);
   1340                 }
   1341             }
   1342         }
   1343         localeID+=idLen;
   1344     } else {
   1345         idLen = 0;
   1346     }
   1347 
   1348     if(pEnd!=NULL) {
   1349         *pEnd=localeID;
   1350     }
   1351 
   1352     return idLen;
   1353 }
   1354 
   1355 /**
   1356  * @param needSeparator if true, then add leading '_' if any variants
   1357  * are added to 'variant'
   1358  */
   1359 static int32_t
   1360 _getVariantEx(const char *localeID,
   1361               char prev,
   1362               char *variant, int32_t variantCapacity,
   1363               UBool needSeparator) {
   1364     int32_t i=0;
   1365 
   1366     /* get one or more variant tags and separate them with '_' */
   1367     if(_isIDSeparator(prev)) {
   1368         /* get a variant string after a '-' or '_' */
   1369         while(!_isTerminator(*localeID)) {
   1370             if (needSeparator) {
   1371                 if (i<variantCapacity) {
   1372                     variant[i] = '_';
   1373                 }
   1374                 ++i;
   1375                 needSeparator = FALSE;
   1376             }
   1377             if(i<variantCapacity) {
   1378                 variant[i]=(char)uprv_toupper(*localeID);
   1379                 if(variant[i]=='-') {
   1380                     variant[i]='_';
   1381                 }
   1382             }
   1383             i++;
   1384             localeID++;
   1385         }
   1386     }
   1387 
   1388     /* if there is no variant tag after a '-' or '_' then look for '@' */
   1389     if(i==0) {
   1390         if(prev=='@') {
   1391             /* keep localeID */
   1392         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
   1393             ++localeID; /* point after the '@' */
   1394         } else {
   1395             return 0;
   1396         }
   1397         while(!_isTerminator(*localeID)) {
   1398             if (needSeparator) {
   1399                 if (i<variantCapacity) {
   1400                     variant[i] = '_';
   1401                 }
   1402                 ++i;
   1403                 needSeparator = FALSE;
   1404             }
   1405             if(i<variantCapacity) {
   1406                 variant[i]=(char)uprv_toupper(*localeID);
   1407                 if(variant[i]=='-' || variant[i]==',') {
   1408                     variant[i]='_';
   1409                 }
   1410             }
   1411             i++;
   1412             localeID++;
   1413         }
   1414     }
   1415 
   1416     return i;
   1417 }
   1418 
   1419 static int32_t
   1420 _getVariant(const char *localeID,
   1421             char prev,
   1422             char *variant, int32_t variantCapacity) {
   1423     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
   1424 }
   1425 
   1426 /**
   1427  * Delete ALL instances of a variant from the given list of one or
   1428  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
   1429  * @param variants the source string of one or more variants,
   1430  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
   1431  * terminated; if it is, trailing zero will NOT be maintained.
   1432  * @param variantsLen length of variants
   1433  * @param toDelete variant to delete, without separators, e.g.  "EURO"
   1434  * or "PREEURO"; not zero terminated
   1435  * @param toDeleteLen length of toDelete
   1436  * @return number of characters deleted from variants
   1437  */
   1438 static int32_t
   1439 _deleteVariant(char* variants, int32_t variantsLen,
   1440                const char* toDelete, int32_t toDeleteLen)
   1441 {
   1442     int32_t delta = 0; /* number of chars deleted */
   1443     for (;;) {
   1444         UBool flag = FALSE;
   1445         if (variantsLen < toDeleteLen) {
   1446             return delta;
   1447         }
   1448         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
   1449             (variantsLen == toDeleteLen ||
   1450              (flag=(variants[toDeleteLen] == '_'))))
   1451         {
   1452             int32_t d = toDeleteLen + (flag?1:0);
   1453             variantsLen -= d;
   1454             delta += d;
   1455             if (variantsLen > 0) {
   1456                 uprv_memmove(variants, variants+d, variantsLen);
   1457             }
   1458         } else {
   1459             char* p = _strnchr(variants, variantsLen, '_');
   1460             if (p == NULL) {
   1461                 return delta;
   1462             }
   1463             ++p;
   1464             variantsLen -= (int32_t)(p - variants);
   1465             variants = p;
   1466         }
   1467     }
   1468 }
   1469 
   1470 /* Keyword enumeration */
   1471 
   1472 typedef struct UKeywordsContext {
   1473     char* keywords;
   1474     char* current;
   1475 } UKeywordsContext;
   1476 
   1477 static void U_CALLCONV
   1478 uloc_kw_closeKeywords(UEnumeration *enumerator) {
   1479     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
   1480     uprv_free(enumerator->context);
   1481     uprv_free(enumerator);
   1482 }
   1483 
   1484 static int32_t U_CALLCONV
   1485 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
   1486     char *kw = ((UKeywordsContext *)en->context)->keywords;
   1487     int32_t result = 0;
   1488     while(*kw) {
   1489         result++;
   1490         kw += uprv_strlen(kw)+1;
   1491     }
   1492     return result;
   1493 }
   1494 
   1495 static const char* U_CALLCONV
   1496 uloc_kw_nextKeyword(UEnumeration* en,
   1497                     int32_t* resultLength,
   1498                     UErrorCode* status) {
   1499     const char* result = ((UKeywordsContext *)en->context)->current;
   1500     int32_t len = 0;
   1501     if(*result) {
   1502         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
   1503         ((UKeywordsContext *)en->context)->current += len+1;
   1504     } else {
   1505         result = NULL;
   1506     }
   1507     if (resultLength) {
   1508         *resultLength = len;
   1509     }
   1510     return result;
   1511 }
   1512 
   1513 static void U_CALLCONV
   1514 uloc_kw_resetKeywords(UEnumeration* en,
   1515                       UErrorCode* status) {
   1516     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
   1517 }
   1518 
   1519 static const UEnumeration gKeywordsEnum = {
   1520     NULL,
   1521     NULL,
   1522     uloc_kw_closeKeywords,
   1523     uloc_kw_countKeywords,
   1524     uenum_unextDefault,
   1525     uloc_kw_nextKeyword,
   1526     uloc_kw_resetKeywords
   1527 };
   1528 
   1529 U_CAPI UEnumeration* U_EXPORT2
   1530 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
   1531 {
   1532     UKeywordsContext *myContext = NULL;
   1533     UEnumeration *result = NULL;
   1534 
   1535     if(U_FAILURE(*status)) {
   1536         return NULL;
   1537     }
   1538     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   1539     /* Null pointer test */
   1540     if (result == NULL) {
   1541         *status = U_MEMORY_ALLOCATION_ERROR;
   1542         return NULL;
   1543     }
   1544     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
   1545     myContext = uprv_malloc(sizeof(UKeywordsContext));
   1546     if (myContext == NULL) {
   1547         *status = U_MEMORY_ALLOCATION_ERROR;
   1548         uprv_free(result);
   1549         return NULL;
   1550     }
   1551     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
   1552     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
   1553     myContext->keywords[keywordListSize] = 0;
   1554     myContext->current = myContext->keywords;
   1555     result->context = myContext;
   1556     return result;
   1557 }
   1558 
   1559 U_CAPI UEnumeration* U_EXPORT2
   1560 uloc_openKeywords(const char* localeID,
   1561                         UErrorCode* status)
   1562 {
   1563     int32_t i=0;
   1564     char keywords[256];
   1565     int32_t keywordsCapacity = 256;
   1566     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1567     const char* tmpLocaleID;
   1568 
   1569     if(status==NULL || U_FAILURE(*status)) {
   1570         return 0;
   1571     }
   1572 
   1573     if (_hasBCP47Extension(localeID)) {
   1574         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
   1575     } else {
   1576         if (localeID==NULL) {
   1577            localeID=uloc_getDefault();
   1578         }
   1579         tmpLocaleID=localeID;
   1580     }
   1581 
   1582     /* Skip the language */
   1583     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1584     if(_isIDSeparator(*tmpLocaleID)) {
   1585         const char *scriptID;
   1586         /* Skip the script if available */
   1587         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1588         if(scriptID != tmpLocaleID+1) {
   1589             /* Found optional script */
   1590             tmpLocaleID = scriptID;
   1591         }
   1592         /* Skip the Country */
   1593         if (_isIDSeparator(*tmpLocaleID)) {
   1594             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
   1595             if(_isIDSeparator(*tmpLocaleID)) {
   1596                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
   1597             }
   1598         }
   1599     }
   1600 
   1601     /* keywords are located after '@' */
   1602     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
   1603         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
   1604     }
   1605 
   1606     if(i) {
   1607         return uloc_openKeywordList(keywords, i, status);
   1608     } else {
   1609         return NULL;
   1610     }
   1611 }
   1612 
   1613 
   1614 /* bit-flags for 'options' parameter of _canonicalize */
   1615 #define _ULOC_STRIP_KEYWORDS 0x2
   1616 #define _ULOC_CANONICALIZE   0x1
   1617 
   1618 #define OPTION_SET(options, mask) ((options & mask) != 0)
   1619 
   1620 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
   1621 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
   1622 
   1623 /**
   1624  * Canonicalize the given localeID, to level 1 or to level 2,
   1625  * depending on the options.  To specify level 1, pass in options=0.
   1626  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
   1627  *
   1628  * This is the code underlying uloc_getName and uloc_canonicalize.
   1629  */
   1630 static int32_t
   1631 _canonicalize(const char* localeID,
   1632               char* result,
   1633               int32_t resultCapacity,
   1634               uint32_t options,
   1635               UErrorCode* err) {
   1636     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
   1637     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1638     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1639     const char* origLocaleID;
   1640     const char* tmpLocaleID;
   1641     const char* keywordAssign = NULL;
   1642     const char* separatorIndicator = NULL;
   1643     const char* addKeyword = NULL;
   1644     const char* addValue = NULL;
   1645     char* name;
   1646     char* variant = NULL; /* pointer into name, or NULL */
   1647 
   1648     if (U_FAILURE(*err)) {
   1649         return 0;
   1650     }
   1651 
   1652     if (_hasBCP47Extension(localeID)) {
   1653         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1654     } else {
   1655         if (localeID==NULL) {
   1656            localeID=uloc_getDefault();
   1657         }
   1658         tmpLocaleID=localeID;
   1659     }
   1660 
   1661     origLocaleID=tmpLocaleID;
   1662 
   1663     /* if we are doing a full canonicalization, then put results in
   1664        localeBuffer, if necessary; otherwise send them to result. */
   1665     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
   1666         (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
   1667         name = localeBuffer;
   1668         nameCapacity = sizeof(localeBuffer);
   1669     } else {
   1670         name = result;
   1671         nameCapacity = resultCapacity;
   1672     }
   1673 
   1674     /* get all pieces, one after another, and separate with '_' */
   1675     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
   1676 
   1677     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
   1678         const char *d = uloc_getDefault();
   1679 
   1680         len = (int32_t)uprv_strlen(d);
   1681 
   1682         if (name != NULL) {
   1683             uprv_strncpy(name, d, len);
   1684         }
   1685     } else if(_isIDSeparator(*tmpLocaleID)) {
   1686         const char *scriptID;
   1687 
   1688         ++fieldCount;
   1689         if(len<nameCapacity) {
   1690             name[len]='_';
   1691         }
   1692         ++len;
   1693 
   1694         scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);
   1695         if(scriptSize > 0) {
   1696             /* Found optional script */
   1697             tmpLocaleID = scriptID;
   1698             ++fieldCount;
   1699             len+=scriptSize;
   1700             if (_isIDSeparator(*tmpLocaleID)) {
   1701                 /* If there is something else, then we add the _ */
   1702                 if(len<nameCapacity) {
   1703                     name[len]='_';
   1704                 }
   1705                 ++len;
   1706             }
   1707         }
   1708 
   1709         if (_isIDSeparator(*tmpLocaleID)) {
   1710             const char *cntryID;
   1711             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID);
   1712             if (cntrySize > 0) {
   1713                 /* Found optional country */
   1714                 tmpLocaleID = cntryID;
   1715                 len+=cntrySize;
   1716             }
   1717             if(_isIDSeparator(*tmpLocaleID)) {
   1718                 /* If there is something else, then we add the _  if we found country before.*/
   1719                 if (cntrySize > 0) {
   1720                     ++fieldCount;
   1721                     if(len<nameCapacity) {
   1722                         name[len]='_';
   1723                     }
   1724                     ++len;
   1725                 }
   1726 
   1727                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);
   1728                 if (variantSize > 0) {
   1729                     variant = name+len;
   1730                     len += variantSize;
   1731                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
   1732                 }
   1733             }
   1734         }
   1735     }
   1736 
   1737     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
   1738     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
   1739         UBool done = FALSE;
   1740         do {
   1741             char c = *tmpLocaleID;
   1742             switch (c) {
   1743             case 0:
   1744             case '@':
   1745                 done = TRUE;
   1746                 break;
   1747             default:
   1748                 if (len<nameCapacity) {
   1749                     name[len] = c;
   1750                 }
   1751                 ++len;
   1752                 ++tmpLocaleID;
   1753                 break;
   1754             }
   1755         } while (!done);
   1756     }
   1757 
   1758     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
   1759        After this, tmpLocaleID either points to '@' or is NULL */
   1760     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
   1761         keywordAssign = uprv_strchr(tmpLocaleID, '=');
   1762         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
   1763     }
   1764 
   1765     /* Copy POSIX-style variant, if any [mr@FOO] */
   1766     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
   1767         tmpLocaleID != NULL && keywordAssign == NULL) {
   1768         for (;;) {
   1769             char c = *tmpLocaleID;
   1770             if (c == 0) {
   1771                 break;
   1772             }
   1773             if (len<nameCapacity) {
   1774                 name[len] = c;
   1775             }
   1776             ++len;
   1777             ++tmpLocaleID;
   1778         }
   1779     }
   1780 
   1781     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
   1782         /* Handle @FOO variant if @ is present and not followed by = */
   1783         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
   1784             int32_t posixVariantSize;
   1785             /* Add missing '_' if needed */
   1786             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
   1787                 do {
   1788                     if(len<nameCapacity) {
   1789                         name[len]='_';
   1790                     }
   1791                     ++len;
   1792                     ++fieldCount;
   1793                 } while(fieldCount<2);
   1794             }
   1795             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
   1796                                              (UBool)(variantSize > 0));
   1797             if (posixVariantSize > 0) {
   1798                 if (variant == NULL) {
   1799                     variant = name+len;
   1800                 }
   1801                 len += posixVariantSize;
   1802                 variantSize += posixVariantSize;
   1803             }
   1804         }
   1805 
   1806         /* Handle generic variants first */
   1807         if (variant) {
   1808             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
   1809                 const char* variantToCompare = VARIANT_MAP[j].variant;
   1810                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
   1811                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
   1812                 len -= variantLen;
   1813                 if (variantLen > 0) {
   1814                     if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
   1815                         --len;
   1816                     }
   1817                     addKeyword = VARIANT_MAP[j].keyword;
   1818                     addValue = VARIANT_MAP[j].value;
   1819                     break;
   1820                 }
   1821             }
   1822             if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
   1823                 --len;
   1824             }
   1825         }
   1826 
   1827         /* Look up the ID in the canonicalization map */
   1828         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
   1829             const char* id = CANONICALIZE_MAP[j].id;
   1830             int32_t n = (int32_t)uprv_strlen(id);
   1831             if (len == n && uprv_strncmp(name, id, n) == 0) {
   1832                 if (n == 0 && tmpLocaleID != NULL) {
   1833                     break; /* Don't remap "" if keywords present */
   1834                 }
   1835                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
   1836                 if (CANONICALIZE_MAP[j].keyword) {
   1837                     addKeyword = CANONICALIZE_MAP[j].keyword;
   1838                     addValue = CANONICALIZE_MAP[j].value;
   1839                 }
   1840                 break;
   1841             }
   1842         }
   1843     }
   1844 
   1845     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
   1846         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
   1847             (!separatorIndicator || separatorIndicator > keywordAssign)) {
   1848             if(len<nameCapacity) {
   1849                 name[len]='@';
   1850             }
   1851             ++len;
   1852             ++fieldCount;
   1853             len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
   1854                                 addKeyword, addValue, err);
   1855         } else if (addKeyword != NULL) {
   1856             U_ASSERT(addValue != NULL);
   1857             /* inelegant but works -- later make _getKeywords do this? */
   1858             len += _copyCount(name+len, nameCapacity-len, "@");
   1859             len += _copyCount(name+len, nameCapacity-len, addKeyword);
   1860             len += _copyCount(name+len, nameCapacity-len, "=");
   1861             len += _copyCount(name+len, nameCapacity-len, addValue);
   1862         }
   1863     }
   1864 
   1865     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
   1866         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
   1867     }
   1868 
   1869     return u_terminateChars(result, resultCapacity, len, err);
   1870 }
   1871 
   1872 /* ### ID parsing API **************************************************/
   1873 
   1874 U_CAPI int32_t  U_EXPORT2
   1875 uloc_getParent(const char*    localeID,
   1876                char* parent,
   1877                int32_t parentCapacity,
   1878                UErrorCode* err)
   1879 {
   1880     const char *lastUnderscore;
   1881     int32_t i;
   1882 
   1883     if (U_FAILURE(*err))
   1884         return 0;
   1885 
   1886     if (localeID == NULL)
   1887         localeID = uloc_getDefault();
   1888 
   1889     lastUnderscore=uprv_strrchr(localeID, '_');
   1890     if(lastUnderscore!=NULL) {
   1891         i=(int32_t)(lastUnderscore-localeID);
   1892     } else {
   1893         i=0;
   1894     }
   1895 
   1896     if(i>0 && parent != localeID) {
   1897         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
   1898     }
   1899     return u_terminateChars(parent, parentCapacity, i, err);
   1900 }
   1901 
   1902 U_CAPI int32_t U_EXPORT2
   1903 uloc_getLanguage(const char*    localeID,
   1904          char* language,
   1905          int32_t languageCapacity,
   1906          UErrorCode* err)
   1907 {
   1908     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
   1909     int32_t i=0;
   1910 
   1911     if (err==NULL || U_FAILURE(*err)) {
   1912         return 0;
   1913     }
   1914 
   1915     if(localeID==NULL) {
   1916         localeID=uloc_getDefault();
   1917     }
   1918 
   1919     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
   1920     return u_terminateChars(language, languageCapacity, i, err);
   1921 }
   1922 
   1923 U_CAPI int32_t U_EXPORT2
   1924 uloc_getScript(const char*    localeID,
   1925          char* script,
   1926          int32_t scriptCapacity,
   1927          UErrorCode* err)
   1928 {
   1929     int32_t i=0;
   1930 
   1931     if(err==NULL || U_FAILURE(*err)) {
   1932         return 0;
   1933     }
   1934 
   1935     if(localeID==NULL) {
   1936         localeID=uloc_getDefault();
   1937     }
   1938 
   1939     /* skip the language */
   1940     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1941     if(_isIDSeparator(*localeID)) {
   1942         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
   1943     }
   1944     return u_terminateChars(script, scriptCapacity, i, err);
   1945 }
   1946 
   1947 U_CAPI int32_t  U_EXPORT2
   1948 uloc_getCountry(const char* localeID,
   1949             char* country,
   1950             int32_t countryCapacity,
   1951             UErrorCode* err)
   1952 {
   1953     int32_t i=0;
   1954 
   1955     if(err==NULL || U_FAILURE(*err)) {
   1956         return 0;
   1957     }
   1958 
   1959     if(localeID==NULL) {
   1960         localeID=uloc_getDefault();
   1961     }
   1962 
   1963     /* Skip the language */
   1964     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1965     if(_isIDSeparator(*localeID)) {
   1966         const char *scriptID;
   1967         /* Skip the script if available */
   1968         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
   1969         if(scriptID != localeID+1) {
   1970             /* Found optional script */
   1971             localeID = scriptID;
   1972         }
   1973         if(_isIDSeparator(*localeID)) {
   1974             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
   1975         }
   1976     }
   1977     return u_terminateChars(country, countryCapacity, i, err);
   1978 }
   1979 
   1980 U_CAPI int32_t  U_EXPORT2
   1981 uloc_getVariant(const char* localeID,
   1982                 char* variant,
   1983                 int32_t variantCapacity,
   1984                 UErrorCode* err)
   1985 {
   1986     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1987     const char* tmpLocaleID;
   1988     int32_t i=0;
   1989 
   1990     if(err==NULL || U_FAILURE(*err)) {
   1991         return 0;
   1992     }
   1993 
   1994     if (_hasBCP47Extension(localeID)) {
   1995         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1996     } else {
   1997         if (localeID==NULL) {
   1998            localeID=uloc_getDefault();
   1999         }
   2000         tmpLocaleID=localeID;
   2001     }
   2002 
   2003     /* Skip the language */
   2004     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   2005     if(_isIDSeparator(*tmpLocaleID)) {
   2006         const char *scriptID;
   2007         /* Skip the script if available */
   2008         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   2009         if(scriptID != tmpLocaleID+1) {
   2010             /* Found optional script */
   2011             tmpLocaleID = scriptID;
   2012         }
   2013         /* Skip the Country */
   2014         if (_isIDSeparator(*tmpLocaleID)) {
   2015             const char *cntryID;
   2016             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
   2017             if (cntryID != tmpLocaleID+1) {
   2018                 /* Found optional country */
   2019                 tmpLocaleID = cntryID;
   2020             }
   2021             if(_isIDSeparator(*tmpLocaleID)) {
   2022                 /* If there was no country ID, skip a possible extra IDSeparator */
   2023                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
   2024                     tmpLocaleID++;
   2025                 }
   2026                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
   2027             }
   2028         }
   2029     }
   2030 
   2031     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
   2032     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
   2033 /*
   2034     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
   2035         i=_getVariant(localeID+1, '@', variant, variantCapacity);
   2036     }
   2037 */
   2038     return u_terminateChars(variant, variantCapacity, i, err);
   2039 }
   2040 
   2041 U_CAPI int32_t  U_EXPORT2
   2042 uloc_getName(const char* localeID,
   2043              char* name,
   2044              int32_t nameCapacity,
   2045              UErrorCode* err)
   2046 {
   2047     return _canonicalize(localeID, name, nameCapacity, 0, err);
   2048 }
   2049 
   2050 U_CAPI int32_t  U_EXPORT2
   2051 uloc_getBaseName(const char* localeID,
   2052                  char* name,
   2053                  int32_t nameCapacity,
   2054                  UErrorCode* err)
   2055 {
   2056     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
   2057 }
   2058 
   2059 U_CAPI int32_t  U_EXPORT2
   2060 uloc_canonicalize(const char* localeID,
   2061                   char* name,
   2062                   int32_t nameCapacity,
   2063                   UErrorCode* err)
   2064 {
   2065     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
   2066 }
   2067 
   2068 U_CAPI const char*  U_EXPORT2
   2069 uloc_getISO3Language(const char* localeID)
   2070 {
   2071     int16_t offset;
   2072     char lang[ULOC_LANG_CAPACITY];
   2073     UErrorCode err = U_ZERO_ERROR;
   2074 
   2075     if (localeID == NULL)
   2076     {
   2077         localeID = uloc_getDefault();
   2078     }
   2079     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
   2080     if (U_FAILURE(err))
   2081         return "";
   2082     offset = _findIndex(LANGUAGES, lang);
   2083     if (offset < 0)
   2084         return "";
   2085     return LANGUAGES_3[offset];
   2086 }
   2087 
   2088 U_CAPI const char*  U_EXPORT2
   2089 uloc_getISO3Country(const char* localeID)
   2090 {
   2091     int16_t offset;
   2092     char cntry[ULOC_LANG_CAPACITY];
   2093     UErrorCode err = U_ZERO_ERROR;
   2094 
   2095     if (localeID == NULL)
   2096     {
   2097         localeID = uloc_getDefault();
   2098     }
   2099     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
   2100     if (U_FAILURE(err))
   2101         return "";
   2102     offset = _findIndex(COUNTRIES, cntry);
   2103     if (offset < 0)
   2104         return "";
   2105 
   2106     return COUNTRIES_3[offset];
   2107 }
   2108 
   2109 U_CAPI uint32_t  U_EXPORT2
   2110 uloc_getLCID(const char* localeID)
   2111 {
   2112     UErrorCode status = U_ZERO_ERROR;
   2113     char       langID[ULOC_FULLNAME_CAPACITY];
   2114 
   2115     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
   2116     if (U_FAILURE(status)) {
   2117         return 0;
   2118     }
   2119 
   2120     return uprv_convertToLCID(langID, localeID, &status);
   2121 }
   2122 
   2123 U_CAPI int32_t U_EXPORT2
   2124 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
   2125                 UErrorCode *status)
   2126 {
   2127     int32_t length;
   2128     const char *posix = uprv_convertToPosix(hostid, status);
   2129     if (U_FAILURE(*status) || posix == NULL) {
   2130         return 0;
   2131     }
   2132     length = (int32_t)uprv_strlen(posix);
   2133     if (length+1 > localeCapacity) {
   2134         *status = U_BUFFER_OVERFLOW_ERROR;
   2135     }
   2136     else {
   2137         uprv_strcpy(locale, posix);
   2138     }
   2139     return length;
   2140 }
   2141 
   2142 /* ### Default locale **************************************************/
   2143 
   2144 U_CAPI const char*  U_EXPORT2
   2145 uloc_getDefault()
   2146 {
   2147     return locale_get_default();
   2148 }
   2149 
   2150 U_CAPI void  U_EXPORT2
   2151 uloc_setDefault(const char*   newDefaultLocale,
   2152              UErrorCode* err)
   2153 {
   2154     if (U_FAILURE(*err))
   2155         return;
   2156     /* the error code isn't currently used for anything by this function*/
   2157 
   2158     /* propagate change to C++ */
   2159     locale_set_default(newDefaultLocale);
   2160 }
   2161 
   2162 /**
   2163  * Returns a list of all language codes defined in ISO 639.  This is a pointer
   2164  * to an array of pointers to arrays of char.  All of these pointers are owned
   2165  * by ICU-- do not delete them, and do not write through them.  The array is
   2166  * terminated with a null pointer.
   2167  */
   2168 U_CAPI const char* const*  U_EXPORT2
   2169 uloc_getISOLanguages()
   2170 {
   2171     return LANGUAGES;
   2172 }
   2173 
   2174 /**
   2175  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
   2176  * pointer to an array of pointers to arrays of char.  All of these pointers are
   2177  * owned by ICU-- do not delete them, and do not write through them.  The array is
   2178  * terminated with a null pointer.
   2179  */
   2180 U_CAPI const char* const*  U_EXPORT2
   2181 uloc_getISOCountries()
   2182 {
   2183     return COUNTRIES;
   2184 }
   2185 
   2186 
   2187 /* this function to be moved into cstring.c later */
   2188 static char gDecimal = 0;
   2189 
   2190 static /* U_CAPI */
   2191 double
   2192 /* U_EXPORT2 */
   2193 _uloc_strtod(const char *start, char **end) {
   2194     char *decimal;
   2195     char *myEnd;
   2196     char buf[30];
   2197     double rv;
   2198     if (!gDecimal) {
   2199         char rep[5];
   2200         /* For machines that decide to change the decimal on you,
   2201         and try to be too smart with localization.
   2202         This normally should be just a '.'. */
   2203         sprintf(rep, "%+1.1f", 1.0);
   2204         gDecimal = rep[2];
   2205     }
   2206 
   2207     if(gDecimal == '.') {
   2208         return uprv_strtod(start, end); /* fall through to OS */
   2209     } else {
   2210         uprv_strncpy(buf, start, 29);
   2211         buf[29]=0;
   2212         decimal = uprv_strchr(buf, '.');
   2213         if(decimal) {
   2214             *decimal = gDecimal;
   2215         } else {
   2216             return uprv_strtod(start, end); /* no decimal point */
   2217         }
   2218         rv = uprv_strtod(buf, &myEnd);
   2219         if(end) {
   2220             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
   2221         }
   2222         return rv;
   2223     }
   2224 }
   2225 
   2226 typedef struct {
   2227     float q;
   2228     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
   2229     char *locale;
   2230 } _acceptLangItem;
   2231 
   2232 static int32_t U_CALLCONV
   2233 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
   2234 {
   2235     const _acceptLangItem *aa = (const _acceptLangItem*)a;
   2236     const _acceptLangItem *bb = (const _acceptLangItem*)b;
   2237 
   2238     int32_t rc = 0;
   2239     if(bb->q < aa->q) {
   2240         rc = -1;  /* A > B */
   2241     } else if(bb->q > aa->q) {
   2242         rc = 1;   /* A < B */
   2243     } else {
   2244         rc = 0;   /* A = B */
   2245     }
   2246 
   2247     if(rc==0) {
   2248         rc = uprv_stricmp(aa->locale, bb->locale);
   2249     }
   2250 
   2251 #if defined(ULOC_DEBUG)
   2252     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
   2253     aa->locale, aa->q,
   2254     bb->locale, bb->q,
   2255     rc);*/
   2256 #endif
   2257 
   2258     return rc;
   2259 }
   2260 
   2261 /*
   2262 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
   2263 */
   2264 
   2265 U_CAPI int32_t U_EXPORT2
   2266 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
   2267                             const char *httpAcceptLanguage,
   2268                             UEnumeration* availableLocales,
   2269                             UErrorCode *status)
   2270 {
   2271     _acceptLangItem *j;
   2272     _acceptLangItem smallBuffer[30];
   2273     char **strs;
   2274     char tmp[ULOC_FULLNAME_CAPACITY +1];
   2275     int32_t n = 0;
   2276     const char *itemEnd;
   2277     const char *paramEnd;
   2278     const char *s;
   2279     const char *t;
   2280     int32_t res;
   2281     int32_t i;
   2282     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
   2283     int32_t jSize;
   2284     char *tempstr; /* Use for null pointer check */
   2285 
   2286     j = smallBuffer;
   2287     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
   2288     if(U_FAILURE(*status)) {
   2289         return -1;
   2290     }
   2291 
   2292     for(s=httpAcceptLanguage;s&&*s;) {
   2293         while(isspace(*s)) /* eat space at the beginning */
   2294             s++;
   2295         itemEnd=uprv_strchr(s,',');
   2296         paramEnd=uprv_strchr(s,';');
   2297         if(!itemEnd) {
   2298             itemEnd = httpAcceptLanguage+l; /* end of string */
   2299         }
   2300         if(paramEnd && paramEnd<itemEnd) {
   2301             /* semicolon (;) is closer than end (,) */
   2302             t = paramEnd+1;
   2303             if(*t=='q') {
   2304                 t++;
   2305             }
   2306             while(isspace(*t)) {
   2307                 t++;
   2308             }
   2309             if(*t=='=') {
   2310                 t++;
   2311             }
   2312             while(isspace(*t)) {
   2313                 t++;
   2314             }
   2315             j[n].q = (float)_uloc_strtod(t,NULL);
   2316         } else {
   2317             /* no semicolon - it's 1.0 */
   2318             j[n].q = 1.0f;
   2319             paramEnd = itemEnd;
   2320         }
   2321         j[n].dummy=0;
   2322         /* eat spaces prior to semi */
   2323         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
   2324             ;
   2325         /* Check for null pointer from uprv_strndup */
   2326         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
   2327         if (tempstr == NULL) {
   2328             *status = U_MEMORY_ALLOCATION_ERROR;
   2329             return -1;
   2330         }
   2331         j[n].locale = tempstr;
   2332         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
   2333         if(strcmp(j[n].locale,tmp)) {
   2334             uprv_free(j[n].locale);
   2335             j[n].locale=uprv_strdup(tmp);
   2336         }
   2337 #if defined(ULOC_DEBUG)
   2338         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
   2339 #endif
   2340         n++;
   2341         s = itemEnd;
   2342         while(*s==',') { /* eat duplicate commas */
   2343             s++;
   2344         }
   2345         if(n>=jSize) {
   2346             if(j==smallBuffer) {  /* overflowed the small buffer. */
   2347                 j = uprv_malloc(sizeof(j[0])*(jSize*2));
   2348                 if(j!=NULL) {
   2349                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
   2350                 }
   2351 #if defined(ULOC_DEBUG)
   2352                 fprintf(stderr,"malloced at size %d\n", jSize);
   2353 #endif
   2354             } else {
   2355                 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
   2356 #if defined(ULOC_DEBUG)
   2357                 fprintf(stderr,"re-alloced at size %d\n", jSize);
   2358 #endif
   2359             }
   2360             jSize *= 2;
   2361             if(j==NULL) {
   2362                 *status = U_MEMORY_ALLOCATION_ERROR;
   2363                 return -1;
   2364             }
   2365         }
   2366     }
   2367     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
   2368     if(U_FAILURE(*status)) {
   2369         if(j != smallBuffer) {
   2370 #if defined(ULOC_DEBUG)
   2371             fprintf(stderr,"freeing j %p\n", j);
   2372 #endif
   2373             uprv_free(j);
   2374         }
   2375         return -1;
   2376     }
   2377     strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
   2378     /* Check for null pointer */
   2379     if (strs == NULL) {
   2380         uprv_free(j); /* Free to avoid memory leak */
   2381         *status = U_MEMORY_ALLOCATION_ERROR;
   2382         return -1;
   2383     }
   2384     for(i=0;i<n;i++) {
   2385 #if defined(ULOC_DEBUG)
   2386         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
   2387 #endif
   2388         strs[i]=j[i].locale;
   2389     }
   2390     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
   2391         (const char**)strs, n, availableLocales, status);
   2392     for(i=0;i<n;i++) {
   2393         uprv_free(strs[i]);
   2394     }
   2395     uprv_free(strs);
   2396     if(j != smallBuffer) {
   2397 #if defined(ULOC_DEBUG)
   2398         fprintf(stderr,"freeing j %p\n", j);
   2399 #endif
   2400         uprv_free(j);
   2401     }
   2402     return res;
   2403 }
   2404 
   2405 
   2406 U_CAPI int32_t U_EXPORT2
   2407 uloc_acceptLanguage(char *result, int32_t resultAvailable,
   2408                     UAcceptResult *outResult, const char **acceptList,
   2409                     int32_t acceptListCount,
   2410                     UEnumeration* availableLocales,
   2411                     UErrorCode *status)
   2412 {
   2413     int32_t i,j;
   2414     int32_t len;
   2415     int32_t maxLen=0;
   2416     char tmp[ULOC_FULLNAME_CAPACITY+1];
   2417     const char *l;
   2418     char **fallbackList;
   2419     if(U_FAILURE(*status)) {
   2420         return -1;
   2421     }
   2422     fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
   2423     if(fallbackList==NULL) {
   2424         *status = U_MEMORY_ALLOCATION_ERROR;
   2425         return -1;
   2426     }
   2427     for(i=0;i<acceptListCount;i++) {
   2428 #if defined(ULOC_DEBUG)
   2429         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
   2430 #endif
   2431         while((l=uenum_next(availableLocales, NULL, status))) {
   2432 #if defined(ULOC_DEBUG)
   2433             fprintf(stderr,"  %s\n", l);
   2434 #endif
   2435             len = (int32_t)uprv_strlen(l);
   2436             if(!uprv_strcmp(acceptList[i], l)) {
   2437                 if(outResult) {
   2438                     *outResult = ULOC_ACCEPT_VALID;
   2439                 }
   2440 #if defined(ULOC_DEBUG)
   2441                 fprintf(stderr, "MATCH! %s\n", l);
   2442 #endif
   2443                 if(len>0) {
   2444                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2445                 }
   2446                 for(j=0;j<i;j++) {
   2447                     uprv_free(fallbackList[j]);
   2448                 }
   2449                 uprv_free(fallbackList);
   2450                 return u_terminateChars(result, resultAvailable, len, status);
   2451             }
   2452             if(len>maxLen) {
   2453                 maxLen = len;
   2454             }
   2455         }
   2456         uenum_reset(availableLocales, status);
   2457         /* save off parent info */
   2458         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2459             fallbackList[i] = uprv_strdup(tmp);
   2460         } else {
   2461             fallbackList[i]=0;
   2462         }
   2463     }
   2464 
   2465     for(maxLen--;maxLen>0;maxLen--) {
   2466         for(i=0;i<acceptListCount;i++) {
   2467             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
   2468 #if defined(ULOC_DEBUG)
   2469                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
   2470 #endif
   2471                 while((l=uenum_next(availableLocales, NULL, status))) {
   2472 #if defined(ULOC_DEBUG)
   2473                     fprintf(stderr,"  %s\n", l);
   2474 #endif
   2475                     len = (int32_t)uprv_strlen(l);
   2476                     if(!uprv_strcmp(fallbackList[i], l)) {
   2477                         if(outResult) {
   2478                             *outResult = ULOC_ACCEPT_FALLBACK;
   2479                         }
   2480 #if defined(ULOC_DEBUG)
   2481                         fprintf(stderr, "fallback MATCH! %s\n", l);
   2482 #endif
   2483                         if(len>0) {
   2484                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2485                         }
   2486                         for(j=0;j<acceptListCount;j++) {
   2487                             uprv_free(fallbackList[j]);
   2488                         }
   2489                         uprv_free(fallbackList);
   2490                         return u_terminateChars(result, resultAvailable, len, status);
   2491                     }
   2492                 }
   2493                 uenum_reset(availableLocales, status);
   2494 
   2495                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2496                     uprv_free(fallbackList[i]);
   2497                     fallbackList[i] = uprv_strdup(tmp);
   2498                 } else {
   2499                     uprv_free(fallbackList[i]);
   2500                     fallbackList[i]=0;
   2501                 }
   2502             }
   2503         }
   2504         if(outResult) {
   2505             *outResult = ULOC_ACCEPT_FAILED;
   2506         }
   2507     }
   2508     for(i=0;i<acceptListCount;i++) {
   2509         uprv_free(fallbackList[i]);
   2510     }
   2511     uprv_free(fallbackList);
   2512     return -1;
   2513 }
   2514 
   2515 /*eof*/
   2516