Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1997-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ULOC.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/01/97    aliu        Creation.
     13 *   08/21/98    stephen     JDK 1.2 sync
     14 *   12/08/98    rtg         New Locale implementation and C API
     15 *   03/15/99    damiba      overhaul.
     16 *   04/06/99    stephen     changed setDefault() to realloc and copy
     17 *   06/14/99    stephen     Changed calls to ures_open for new params
     18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
     19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
     20 *                           brought canonicalization code into line with spec
     21 *****************************************************************************/
     22 
     23 /*
     24    POSIX's locale format, from putil.c: [no spaces]
     25 
     26      ll [ _CC ] [ . MM ] [ @ VV]
     27 
     28      l = lang, C = ctry, M = charmap, V = variant
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/uloc.h"
     34 
     35 #include "putilimp.h"
     36 #include "ustr_imp.h"
     37 #include "ulocimp.h"
     38 #include "umutex.h"
     39 #include "cstring.h"
     40 #include "cmemory.h"
     41 #include "ucln_cmn.h"
     42 #include "locmap.h"
     43 #include "uarrsort.h"
     44 #include "uenumimp.h"
     45 #include "uassert.h"
     46 
     47 #include <stdio.h> /* for sprintf */
     48 
     49 /* ### Declarations **************************************************/
     50 
     51 /* Locale stuff from locid.cpp */
     52 U_CFUNC void locale_set_default(const char *id);
     53 U_CFUNC const char *locale_get_default(void);
     54 U_CFUNC int32_t
     55 locale_getKeywords(const char *localeID,
     56             char prev,
     57             char *keywords, int32_t keywordCapacity,
     58             char *values, int32_t valuesCapacity, int32_t *valLen,
     59             UBool valuesToo,
     60             UErrorCode *status);
     61 
     62 /* ### Data tables **************************************************/
     63 
     64 /**
     65  * Table of language codes, both 2- and 3-letter, with preference
     66  * given to 2-letter codes where possible.  Includes 3-letter codes
     67  * that lack a 2-letter equivalent.
     68  *
     69  * This list must be in sorted order.  This list is returned directly
     70  * to the user by some API.
     71  *
     72  * This list must be kept in sync with LANGUAGES_3, with corresponding
     73  * entries matched.
     74  *
     75  * This table should be terminated with a NULL entry, followed by a
     76  * second list, and another NULL entry.  The first list is visible to
     77  * user code when this array is returned by API.  The second list
     78  * contains codes we support, but do not expose through user API.
     79  *
     80  * Notes
     81  *
     82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
     83  * include the revisions up to 2001/7/27 *CWB*
     84  *
     85  * The 3 character codes are the terminology codes like RFC 3066.  This
     86  * is compatible with prior ICU codes
     87  *
     88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
     89  * table but now at the end of the table because 3 character codes are
     90  * duplicates.  This avoids bad searches going from 3 to 2 character
     91  * codes.
     92  *
     93  * The range qaa-qtz is reserved for local use
     94  */
     95 static const char * const LANGUAGES[] = {
     96     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
     97     "afh", "agq", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
     98     "ang", "anp", "apa",
     99     "ar",  "arc", "arn", "arp", "art", "arw", "as", "asa", "ast",
    100     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
    101     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
    102     "bem", "ber", "bez", "bg",  "bh",  "bho", "bi",  "bik", "bin",
    103     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",
    104     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
    105     "cch", "ce",  "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
    106     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
    107     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
    108     "cv",  "cy",  "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
    109     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyo", "dyu",
    110     "dz",  "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
    111     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
    112     "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
    113     "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
    114     "ga",  "gaa", "gan", "gay", "gba", "gd",  "gem", "gez", "gil",
    115     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
    116     "grc", "gsw", "gu",  "guz", "gv",  "gwi",
    117     "ha",  "hai", "hak", "haw", "he",  "hi",  "hil", "him",
    118     "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",  "hup", "hy",  "hz",
    119     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
    120     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
    121     "iu",  "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
    122     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg",  "kha", "khi",
    123     "kho", "khq", "ki",  "kj",  "kk",  "kl",  "kln", "km",  "kmb", "kn",
    124     "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
    125     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad", "lag",
    126     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
    127     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus", "luy",
    128     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
    129     "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga", "mgh", "mgo", "mh",  "mi",  "mic", "min",
    130     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
    131     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun",
    132     "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nan", "nap", "naq",
    133     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
    134     "niu", "nl",  "nmg", "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub", "nus",
    135     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
    136     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
    137     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    138     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
    139     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof", "rom",
    140     "ru",  "rup", "rw",  "rwk", "sa",  "sad", "sah", "sai", "sal", "sam", "saq",
    141     "sas", "sat", "sbp", "sc",  "scn", "sco", "sd",  "se",  "seh", "sel", "sem", "ses",
    142     "sg",  "sga", "sgn", "shi", "shn", "si",  "sid", "sio", "sit",
    143     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
    144     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
    145     "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
    146     "sv",  "sw",  "swc", "syc", "syr", "ta",  "tai", "te",  "tem", "teo", "ter",
    147     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
    148     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",
    149     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw", "twq",
    150     "ty",  "tyv", "tzm", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
    151     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "vun", "wa",  "wak",
    152     "wal", "war", "was", "wen", "wo",  "wuu", "xal", "xh",  "xog", "yao", "yap", "yav",
    153     "yi",  "yo",  "ypk", "yue", "za",  "zap", "zbl", "zen", "zh",  "znd",
    154     "zu",  "zun", "zxx", "zza",
    155 NULL,
    156     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
    157 NULL
    158 };
    159 static const char* const DEPRECATED_LANGUAGES[]={
    160     "in", "iw", "ji", "jw", NULL, NULL
    161 };
    162 static const char* const REPLACEMENT_LANGUAGES[]={
    163     "id", "he", "yi", "jv", NULL, NULL
    164 };
    165 
    166 /**
    167  * Table of 3-letter language codes.
    168  *
    169  * This is a lookup table used to convert 3-letter language codes to
    170  * their 2-letter equivalent, where possible.  It must be kept in sync
    171  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
    172  * same language as LANGUAGES_3[i].  The commented-out lines are
    173  * copied from LANGUAGES to make eyeballing this baby easier.
    174  *
    175  * Where a 3-letter language code has no 2-letter equivalent, the
    176  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
    177  *
    178  * This table should be terminated with a NULL entry, followed by a
    179  * second list, and another NULL entry.  The two lists correspond to
    180  * the two lists in LANGUAGES.
    181  */
    182 static const char * const LANGUAGES_3[] = {
    183 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
    184     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
    185 /*  "afh", "agq", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
    186     "afh", "agq", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
    187 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "asa", "ast",    */
    188     "ara", "arc", "arn", "arp", "art", "arw", "asm", "asa", "ast",
    189 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
    190     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
    191 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
    192     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
    193 /*  "bem", "ber", "bez", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
    194     "bem", "ber", "bez", "bul", "bih", "bho", "bis", "bik", "bin",
    195 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",     */
    196     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "brx", "bos",
    197 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
    198     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
    199 /*  "cch", "ce",  "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",    */
    200     "cch", "che", "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
    201 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
    202     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
    203 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
    204     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
    205 /*  "cv",  "cy",  "da",  "dak", "dar", "dav", "day", "de",  "del", "den",    */
    206     "chv", "cym", "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
    207 /*  "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyo", "dyu",    */
    208     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "div", "dyo", "dyu",
    209 /*  "dz",  "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
    210     "dzo", "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
    211 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
    212     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
    213 /*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
    214     "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
    215 /*  "fr",  "frm", "fro", "frr", "frs", "fur", "fy",  "ga",  "gaa", "gan", "gay",    */
    216     "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gan", "gay",
    217 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
    218     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
    219 /*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guz", "gv",     */
    220     "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guz", "glv",
    221 /*  "gwi", "ha",  "hai", "hak", "haw", "he",  "hi",  "hil", "him",    */
    222     "gwi", "hau", "hai", "hak", "haw", "heb", "hin", "hil", "him",
    223 /*  "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",  "hup", "hy",  "hz",     */
    224     "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", "hup", "hye", "her",
    225 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
    226     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
    227 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
    228     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
    229 /*  "iu",  "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
    230     "iku", "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav", "kat", "kaa", "kab",
    231 /*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg",  "kha", "khi",*/
    232     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg",  "kha", "khi",
    233 /*  "kho", "khq", "ki",  "kj",  "kk",  "kl",  "kln", "km",  "kmb", "kn",     */
    234     "kho", "khq", "kik", "kua", "kaz", "kal", "kln", "khm", "kmb", "kan",
    235 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf", */
    236     "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
    237 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad", "lag",    */
    238     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad", "lag",
    239 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
    240     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
    241 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus", "luy",   */
    242     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus", "luy",
    243 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
    244     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
    245 /*  "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga", "mgh", "mgo", "mh",  "mi",  "mic", "min",    */
    246     "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga", "mgh", "mgo", "mah", "mri", "mic", "min",
    247 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
    248     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
    249 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun",    */
    250     "mol", "moh", "mos", "mar", "msa", "mlt", "mua", "mul", "mun",
    251 /*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nan", "nap", "naq",   */
    252     "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nan", "nap", "naq",
    253 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
    254     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
    255 /*  "niu", "nl",  "nmg", "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub", "nus",   */
    256     "niu", "nld", "nmg", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub", "nus",
    257 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
    258     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
    259 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
    260     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
    261 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
    262     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
    263 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
    264     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
    265 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof", "rom",    */
    266     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof", "rom",
    267 /*  "ru",  "rup", "rw",  "rwk", "sa",  "sad", "sah", "sai", "sal", "sam", "saq",    */
    268     "rus", "rup", "kin", "rwk", "san", "sad", "sah", "sai", "sal", "sam", "saq",
    269 /*  "sas", "sat", "sbp", "sc",  "scn", "sco", "sd",  "se",  "seh", "sel", "sem", "ses",    */
    270     "sas", "sat", "sbp", "srd", "scn", "sco", "snd", "sme", "seh", "sel", "sem", "ses",
    271 /*  "sg",  "sga", "sgn", "shi", "shn", "si",  "sid", "sio", "sit",    */
    272     "sag", "sga", "sgn", "shi", "shn", "sin", "sid", "sio", "sit",
    273 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
    274     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
    275 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
    276     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
    277 /*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
    278     "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
    279 /*  "sv",  "sw",  "swc", "syc", "syr", "ta",  "tai", "te",  "tem", "teo", "ter",    */
    280     "swe", "swa", "swc", "syc", "syr", "tam", "tai", "tel", "tem", "teo", "ter",
    281 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
    282     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
    283 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",    */
    284     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
    285 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",  "twq"   */
    286     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", "twq",
    287 /*  "ty",  "tyv", "tzm", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
    288     "tah", "tyv", "tzm", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
    289 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "vun", "wa",  "wak",    */
    290     "uzb", "vai", "ven", "vie", "vol", "vot", "vun", "wln", "wak",
    291 /*  "wal", "war", "was", "wen", "wo",  "wuu", "xal", "xh",  "xog", "yao", "yap", "yav",   */
    292     "wal", "war", "was", "wen", "wol", "wuu", "xal", "xho", "xog", "yao", "yap", "yav",
    293 /*  "yi",  "yo",  "ypk", "yue", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
    294     "yid", "yor", "ypk", "yue", "zha", "zap", "zbl", "zen", "zho", "znd",
    295 /*  "zu",  "zun", "zxx", "zza",                                         */
    296     "zul", "zun", "zxx", "zza",
    297 NULL,
    298 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
    299     "ind", "heb", "yid", "jaw", "srp",
    300 NULL
    301 };
    302 
    303 /**
    304  * Table of 2-letter country codes.
    305  *
    306  * This list must be in sorted order.  This list is returned directly
    307  * to the user by some API.
    308  *
    309  * This list must be kept in sync with COUNTRIES_3, with corresponding
    310  * entries matched.
    311  *
    312  * This table should be terminated with a NULL entry, followed by a
    313  * second list, and another NULL entry.  The first list is visible to
    314  * user code when this array is returned by API.  The second list
    315  * contains codes we support, but do not expose through user API.
    316  *
    317  * Notes:
    318  *
    319  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
    320  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
    321  * new codes keeping the old ones for compatibility updated to include
    322  * 1999/12/03 revisions *CWB*
    323  *
    324  * RO(ROM) is now RO(ROU) according to
    325  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
    326  */
    327 static const char * const COUNTRIES[] = {
    328     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
    329     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
    330     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
    331     "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
    332     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
    333     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
    334     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
    335     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
    336     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
    337     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
    338     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
    339     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
    340     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
    341     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
    342     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
    343     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
    344     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
    345     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
    346     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
    347     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
    348     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
    349     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
    350     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
    351     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
    352     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
    353     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
    354     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
    355     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
    356     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
    357     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
    358 NULL,
    359     "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
    360 NULL
    361 };
    362 
    363 static const char* const DEPRECATED_COUNTRIES[] ={
    364     "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
    365 };
    366 static const char* const REPLACEMENT_COUNTRIES[] = {
    367 /*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
    368     "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
    369 };
    370 
    371 /**
    372  * Table of 3-letter country codes.
    373  *
    374  * This is a lookup table used to convert 3-letter country codes to
    375  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
    376  * For all valid i, COUNTRIES[i] must refer to the same country as
    377  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
    378  * to make eyeballing this baby easier.
    379  *
    380  * This table should be terminated with a NULL entry, followed by a
    381  * second list, and another NULL entry.  The two lists correspond to
    382  * the two lists in COUNTRIES.
    383  */
    384 static const char * const COUNTRIES_3[] = {
    385 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
    386     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
    387 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
    388     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
    389 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
    390     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
    391 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
    392     "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
    393 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
    394     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
    395 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
    396     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
    397 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
    398     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
    399 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
    400     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
    401 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
    402     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
    403 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
    404     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
    405 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
    406     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
    407 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
    408     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
    409 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
    410     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
    411 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
    412     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
    413 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
    414     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
    415 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
    416     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
    417 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
    418     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
    419 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
    420     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
    421 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
    422     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
    423 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
    424     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
    425 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
    426     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
    427 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
    428     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
    429 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
    430     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
    431 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
    432     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
    433 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
    434     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
    435 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
    436     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
    437 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
    438     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
    439 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
    440     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
    441 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    442     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
    443 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
    444     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
    445 NULL,
    446 /*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
    447     "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
    448 NULL
    449 };
    450 
    451 typedef struct CanonicalizationMap {
    452     const char *id;          /* input ID */
    453     const char *canonicalID; /* canonicalized output ID */
    454     const char *keyword;     /* keyword, or NULL if none */
    455     const char *value;       /* keyword value, or NULL if kw==NULL */
    456 } CanonicalizationMap;
    457 
    458 /**
    459  * A map to canonicalize locale IDs.  This handles a variety of
    460  * different semantic kinds of transformations.
    461  */
    462 static const CanonicalizationMap CANONICALIZE_MAP[] = {
    463     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
    464     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
    465     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
    466     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
    467     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
    468     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
    469     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
    470     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
    471     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
    472     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
    473     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
    474     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
    475     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
    476     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
    477     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
    478     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
    479     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
    480     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
    481     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
    482     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
    483     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
    484     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
    485     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
    486     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
    487     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
    488     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
    489     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
    490     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
    491     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
    492     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
    493     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
    494     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
    495     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
    496     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
    497     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
    498     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
    499     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
    500     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
    501     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
    502     { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
    503     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
    504     { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
    505     { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
    506     { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
    507     { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
    508     { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
    509 };
    510 
    511 typedef struct VariantMap {
    512     const char *variant;          /* input ID */
    513     const char *keyword;     /* keyword, or NULL if none */
    514     const char *value;       /* keyword value, or NULL if kw==NULL */
    515 } VariantMap;
    516 
    517 static const VariantMap VARIANT_MAP[] = {
    518     { "EURO",   "currency", "EUR" },
    519     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
    520     { "STROKE", "collation", "stroke" }  /* Solaris variant */
    521 };
    522 
    523 /* ### BCP47 Conversion *******************************************/
    524 /* Test if the locale id has BCP47 u extension and does not have '@' */
    525 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
    526 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
    527 #define _ConvertBCP47(finalID, id, buffer, length,err) \
    528         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
    529             finalID=id; \
    530         } else { \
    531             finalID=buffer; \
    532         }
    533 /* Gets the size of the shortest subtag in the given localeID. */
    534 static int32_t getShortestSubtagLength(const char *localeID) {
    535     int32_t localeIDLength = uprv_strlen(localeID);
    536     int32_t length = localeIDLength;
    537     int32_t tmpLength = 0;
    538     int32_t i;
    539     UBool reset = TRUE;
    540 
    541     for (i = 0; i < localeIDLength; i++) {
    542         if (localeID[i] != '_' && localeID[i] != '-') {
    543             if (reset) {
    544                 tmpLength = 0;
    545                 reset = FALSE;
    546             }
    547             tmpLength++;
    548         } else {
    549             if (tmpLength != 0 && tmpLength < length) {
    550                 length = tmpLength;
    551             }
    552             reset = TRUE;
    553         }
    554     }
    555 
    556     return length;
    557 }
    558 
    559 /* ### Keywords **************************************************/
    560 
    561 #define ULOC_KEYWORD_BUFFER_LEN 25
    562 #define ULOC_MAX_NO_KEYWORDS 25
    563 
    564 U_CAPI const char * U_EXPORT2
    565 locale_getKeywordsStart(const char *localeID) {
    566     const char *result = NULL;
    567     if((result = uprv_strchr(localeID, '@')) != NULL) {
    568         return result;
    569     }
    570 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    571     else {
    572         /* We do this because the @ sign is variant, and the @ sign used on one
    573         EBCDIC machine won't be compiled the same way on other EBCDIC based
    574         machines. */
    575         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    576         const uint8_t *charToFind = ebcdicSigns;
    577         while(*charToFind) {
    578             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
    579                 return result;
    580             }
    581             charToFind++;
    582         }
    583     }
    584 #endif
    585     return NULL;
    586 }
    587 
    588 /**
    589  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
    590  * @param keywordName incoming name to be canonicalized
    591  * @param status return status (keyword too long)
    592  * @return length of the keyword name
    593  */
    594 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
    595 {
    596   int32_t i;
    597   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
    598 
    599   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
    600     /* keyword name too long for internal buffer */
    601     *status = U_INTERNAL_PROGRAM_ERROR;
    602           return 0;
    603   }
    604 
    605   /* normalize the keyword name */
    606   for(i = 0; i < keywordNameLen; i++) {
    607     buf[i] = uprv_tolower(keywordName[i]);
    608   }
    609   buf[i] = 0;
    610 
    611   return keywordNameLen;
    612 }
    613 
    614 typedef struct {
    615     char keyword[ULOC_KEYWORD_BUFFER_LEN];
    616     int32_t keywordLen;
    617     const char *valueStart;
    618     int32_t valueLen;
    619 } KeywordStruct;
    620 
    621 static int32_t U_CALLCONV
    622 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
    623     const char* leftString = ((const KeywordStruct *)left)->keyword;
    624     const char* rightString = ((const KeywordStruct *)right)->keyword;
    625     return uprv_strcmp(leftString, rightString);
    626 }
    627 
    628 /**
    629  * Both addKeyword and addValue must already be in canonical form.
    630  * Either both addKeyword and addValue are NULL, or neither is NULL.
    631  * If they are not NULL they must be zero terminated.
    632  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
    633  */
    634 static int32_t
    635 _getKeywords(const char *localeID,
    636              char prev,
    637              char *keywords, int32_t keywordCapacity,
    638              char *values, int32_t valuesCapacity, int32_t *valLen,
    639              UBool valuesToo,
    640              const char* addKeyword,
    641              const char* addValue,
    642              UErrorCode *status)
    643 {
    644     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
    645 
    646     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    647     int32_t numKeywords = 0;
    648     const char* pos = localeID;
    649     const char* equalSign = NULL;
    650     const char* semicolon = NULL;
    651     int32_t i = 0, j, n;
    652     int32_t keywordsLen = 0;
    653     int32_t valuesLen = 0;
    654 
    655     if(prev == '@') { /* start of keyword definition */
    656         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
    657         do {
    658             UBool duplicate = FALSE;
    659             /* skip leading spaces */
    660             while(*pos == ' ') {
    661                 pos++;
    662             }
    663             if (!*pos) { /* handle trailing "; " */
    664                 break;
    665             }
    666             if(numKeywords == maxKeywords) {
    667                 *status = U_INTERNAL_PROGRAM_ERROR;
    668                 return 0;
    669             }
    670             equalSign = uprv_strchr(pos, '=');
    671             semicolon = uprv_strchr(pos, ';');
    672             /* lack of '=' [foo@currency] is illegal */
    673             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
    674             if(!equalSign || (semicolon && semicolon<equalSign)) {
    675                 *status = U_INVALID_FORMAT_ERROR;
    676                 return 0;
    677             }
    678             /* need to normalize both keyword and keyword name */
    679             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
    680                 /* keyword name too long for internal buffer */
    681                 *status = U_INTERNAL_PROGRAM_ERROR;
    682                 return 0;
    683             }
    684             for(i = 0, n = 0; i < equalSign - pos; ++i) {
    685                 if (pos[i] != ' ') {
    686                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
    687                 }
    688             }
    689             keywordList[numKeywords].keyword[n] = 0;
    690             keywordList[numKeywords].keywordLen = n;
    691             /* now grab the value part. First we skip the '=' */
    692             equalSign++;
    693             /* then we leading spaces */
    694             while(*equalSign == ' ') {
    695                 equalSign++;
    696             }
    697             keywordList[numKeywords].valueStart = equalSign;
    698 
    699             pos = semicolon;
    700             i = 0;
    701             if(pos) {
    702                 while(*(pos - i - 1) == ' ') {
    703                     i++;
    704                 }
    705                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
    706                 pos++;
    707             } else {
    708                 i = (int32_t)uprv_strlen(equalSign);
    709                 while(i && equalSign[i-1] == ' ') {
    710                     i--;
    711                 }
    712                 keywordList[numKeywords].valueLen = i;
    713             }
    714             /* If this is a duplicate keyword, then ignore it */
    715             for (j=0; j<numKeywords; ++j) {
    716                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
    717                     duplicate = TRUE;
    718                     break;
    719                 }
    720             }
    721             if (!duplicate) {
    722                 ++numKeywords;
    723             }
    724         } while(pos);
    725 
    726         /* Handle addKeyword/addValue. */
    727         if (addKeyword != NULL) {
    728             UBool duplicate = FALSE;
    729             U_ASSERT(addValue != NULL);
    730             /* Search for duplicate; if found, do nothing. Explicit keyword
    731                overrides addKeyword. */
    732             for (j=0; j<numKeywords; ++j) {
    733                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
    734                     duplicate = TRUE;
    735                     break;
    736                 }
    737             }
    738             if (!duplicate) {
    739                 if (numKeywords == maxKeywords) {
    740                     *status = U_INTERNAL_PROGRAM_ERROR;
    741                     return 0;
    742                 }
    743                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
    744                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
    745                 keywordList[numKeywords].valueStart = addValue;
    746                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
    747                 ++numKeywords;
    748             }
    749         } else {
    750             U_ASSERT(addValue == NULL);
    751         }
    752 
    753         /* now we have a list of keywords */
    754         /* we need to sort it */
    755         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
    756 
    757         /* Now construct the keyword part */
    758         for(i = 0; i < numKeywords; i++) {
    759             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
    760                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
    761                 if(valuesToo) {
    762                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
    763                 } else {
    764                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
    765                 }
    766             }
    767             keywordsLen += keywordList[i].keywordLen + 1;
    768             if(valuesToo) {
    769                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
    770                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
    771                 }
    772                 keywordsLen += keywordList[i].valueLen;
    773 
    774                 if(i < numKeywords - 1) {
    775                     if(keywordsLen < keywordCapacity) {
    776                         keywords[keywordsLen] = ';';
    777                     }
    778                     keywordsLen++;
    779                 }
    780             }
    781             if(values) {
    782                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
    783                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
    784                     values[valuesLen + keywordList[i].valueLen] = 0;
    785                 }
    786                 valuesLen += keywordList[i].valueLen + 1;
    787             }
    788         }
    789         if(values) {
    790             values[valuesLen] = 0;
    791             if(valLen) {
    792                 *valLen = valuesLen;
    793             }
    794         }
    795         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
    796     } else {
    797         return 0;
    798     }
    799 }
    800 
    801 U_CFUNC int32_t
    802 locale_getKeywords(const char *localeID,
    803                    char prev,
    804                    char *keywords, int32_t keywordCapacity,
    805                    char *values, int32_t valuesCapacity, int32_t *valLen,
    806                    UBool valuesToo,
    807                    UErrorCode *status) {
    808     return _getKeywords(localeID, prev, keywords, keywordCapacity,
    809                         values, valuesCapacity, valLen, valuesToo,
    810                         NULL, NULL, status);
    811 }
    812 
    813 U_CAPI int32_t U_EXPORT2
    814 uloc_getKeywordValue(const char* localeID,
    815                      const char* keywordName,
    816                      char* buffer, int32_t bufferCapacity,
    817                      UErrorCode* status)
    818 {
    819     const char* startSearchHere = NULL;
    820     const char* nextSeparator = NULL;
    821     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    822     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    823     int32_t i = 0;
    824     int32_t result = 0;
    825 
    826     if(status && U_SUCCESS(*status) && localeID) {
    827       char tempBuffer[ULOC_FULLNAME_CAPACITY];
    828       const char* tmpLocaleID;
    829 
    830       if (_hasBCP47Extension(localeID)) {
    831           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
    832       } else {
    833           tmpLocaleID=localeID;
    834       }
    835 
    836       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
    837       if(startSearchHere == NULL) {
    838           /* no keywords, return at once */
    839           return 0;
    840       }
    841 
    842       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    843       if(U_FAILURE(*status)) {
    844         return 0;
    845       }
    846 
    847       /* find the first keyword */
    848       while(startSearchHere) {
    849           startSearchHere++;
    850           /* skip leading spaces (allowed?) */
    851           while(*startSearchHere == ' ') {
    852               startSearchHere++;
    853           }
    854           nextSeparator = uprv_strchr(startSearchHere, '=');
    855           /* need to normalize both keyword and keyword name */
    856           if(!nextSeparator) {
    857               break;
    858           }
    859           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
    860               /* keyword name too long for internal buffer */
    861               *status = U_INTERNAL_PROGRAM_ERROR;
    862               return 0;
    863           }
    864           for(i = 0; i < nextSeparator - startSearchHere; i++) {
    865               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
    866           }
    867           /* trim trailing spaces */
    868           while(startSearchHere[i-1] == ' ') {
    869               i--;
    870               U_ASSERT(i>=0);
    871           }
    872           localeKeywordNameBuffer[i] = 0;
    873 
    874           startSearchHere = uprv_strchr(nextSeparator, ';');
    875 
    876           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
    877               nextSeparator++;
    878               while(*nextSeparator == ' ') {
    879                   nextSeparator++;
    880               }
    881               /* we actually found the keyword. Copy the value */
    882               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
    883                   while(*(startSearchHere-1) == ' ') {
    884                       startSearchHere--;
    885                   }
    886                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
    887                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
    888               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
    889                   i = (int32_t)uprv_strlen(nextSeparator);
    890                   while(nextSeparator[i - 1] == ' ') {
    891                       i--;
    892                   }
    893                   uprv_strncpy(buffer, nextSeparator, i);
    894                   result = u_terminateChars(buffer, bufferCapacity, i, status);
    895               } else {
    896                   /* give a bigger buffer, please */
    897                   *status = U_BUFFER_OVERFLOW_ERROR;
    898                   if(startSearchHere) {
    899                       result = (int32_t)(startSearchHere - nextSeparator);
    900                   } else {
    901                       result = (int32_t)uprv_strlen(nextSeparator);
    902                   }
    903               }
    904               return result;
    905           }
    906       }
    907     }
    908     return 0;
    909 }
    910 
    911 U_CAPI int32_t U_EXPORT2
    912 uloc_setKeywordValue(const char* keywordName,
    913                      const char* keywordValue,
    914                      char* buffer, int32_t bufferCapacity,
    915                      UErrorCode* status)
    916 {
    917     /* TODO: sorting. removal. */
    918     int32_t keywordNameLen;
    919     int32_t keywordValueLen;
    920     int32_t bufLen;
    921     int32_t needLen = 0;
    922     int32_t foundValueLen;
    923     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
    924     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    925     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    926     int32_t i = 0;
    927     int32_t rc;
    928     char* nextSeparator = NULL;
    929     char* nextEqualsign = NULL;
    930     char* startSearchHere = NULL;
    931     char* keywordStart = NULL;
    932     char *insertHere = NULL;
    933     if(U_FAILURE(*status)) {
    934         return -1;
    935     }
    936     if(bufferCapacity>1) {
    937         bufLen = (int32_t)uprv_strlen(buffer);
    938     } else {
    939         *status = U_ILLEGAL_ARGUMENT_ERROR;
    940         return 0;
    941     }
    942     if(bufferCapacity<bufLen) {
    943         /* The capacity is less than the length?! Is this NULL terminated? */
    944         *status = U_ILLEGAL_ARGUMENT_ERROR;
    945         return 0;
    946     }
    947     if(keywordValue && !*keywordValue) {
    948         keywordValue = NULL;
    949     }
    950     if(keywordValue) {
    951         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
    952     } else {
    953         keywordValueLen = 0;
    954     }
    955     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    956     if(U_FAILURE(*status)) {
    957         return 0;
    958     }
    959     startSearchHere = (char*)locale_getKeywordsStart(buffer);
    960     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
    961         if(!keywordValue) { /* no keywords = nothing to remove */
    962             return bufLen;
    963         }
    964 
    965         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
    966         if(startSearchHere) { /* had a single @ */
    967             needLen--; /* already had the @ */
    968             /* startSearchHere points at the @ */
    969         } else {
    970             startSearchHere=buffer+bufLen;
    971         }
    972         if(needLen >= bufferCapacity) {
    973             *status = U_BUFFER_OVERFLOW_ERROR;
    974             return needLen; /* no change */
    975         }
    976         *startSearchHere = '@';
    977         startSearchHere++;
    978         uprv_strcpy(startSearchHere, keywordNameBuffer);
    979         startSearchHere += keywordNameLen;
    980         *startSearchHere = '=';
    981         startSearchHere++;
    982         uprv_strcpy(startSearchHere, keywordValue);
    983         startSearchHere+=keywordValueLen;
    984         return needLen;
    985     } /* end shortcut - no @ */
    986 
    987     keywordStart = startSearchHere;
    988     /* search for keyword */
    989     while(keywordStart) {
    990         keywordStart++;
    991         /* skip leading spaces (allowed?) */
    992         while(*keywordStart == ' ') {
    993             keywordStart++;
    994         }
    995         nextEqualsign = uprv_strchr(keywordStart, '=');
    996         /* need to normalize both keyword and keyword name */
    997         if(!nextEqualsign) {
    998             break;
    999         }
   1000         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
   1001             /* keyword name too long for internal buffer */
   1002             *status = U_INTERNAL_PROGRAM_ERROR;
   1003             return 0;
   1004         }
   1005         for(i = 0; i < nextEqualsign - keywordStart; i++) {
   1006             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
   1007         }
   1008         /* trim trailing spaces */
   1009         while(keywordStart[i-1] == ' ') {
   1010             i--;
   1011         }
   1012         U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
   1013         localeKeywordNameBuffer[i] = 0;
   1014 
   1015         nextSeparator = uprv_strchr(nextEqualsign, ';');
   1016         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
   1017         if(rc == 0) {
   1018             nextEqualsign++;
   1019             while(*nextEqualsign == ' ') {
   1020                 nextEqualsign++;
   1021             }
   1022             /* we actually found the keyword. Change the value */
   1023             if (nextSeparator) {
   1024                 keywordAtEnd = 0;
   1025                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
   1026             } else {
   1027                 keywordAtEnd = 1;
   1028                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
   1029             }
   1030             if(keywordValue) { /* adding a value - not removing */
   1031               if(foundValueLen == keywordValueLen) {
   1032                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1033                 return bufLen; /* no change in size */
   1034               } else if(foundValueLen > keywordValueLen) {
   1035                 int32_t delta = foundValueLen - keywordValueLen;
   1036                 if(nextSeparator) { /* RH side */
   1037                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
   1038                 }
   1039                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1040                 bufLen -= delta;
   1041                 buffer[bufLen]=0;
   1042                 return bufLen;
   1043               } else { /* FVL < KVL */
   1044                 int32_t delta = keywordValueLen - foundValueLen;
   1045                 if((bufLen+delta) >= bufferCapacity) {
   1046                   *status = U_BUFFER_OVERFLOW_ERROR;
   1047                   return bufLen+delta;
   1048                 }
   1049                 if(nextSeparator) { /* RH side */
   1050                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
   1051                 }
   1052                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1053                 bufLen += delta;
   1054                 buffer[bufLen]=0;
   1055                 return bufLen;
   1056               }
   1057             } else { /* removing a keyword */
   1058               if(keywordAtEnd) {
   1059                 /* zero out the ';' or '@' just before startSearchhere */
   1060                 keywordStart[-1] = 0;
   1061                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
   1062               } else {
   1063                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
   1064                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
   1065                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
   1066               }
   1067             }
   1068         } else if(rc<0){ /* end match keyword */
   1069           /* could insert at this location. */
   1070           insertHere = keywordStart;
   1071         }
   1072         keywordStart = nextSeparator;
   1073     } /* end loop searching */
   1074 
   1075     if(!keywordValue) {
   1076       return bufLen; /* removal of non-extant keyword - no change */
   1077     }
   1078 
   1079     /* we know there is at least one keyword. */
   1080     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
   1081     if(needLen >= bufferCapacity) {
   1082         *status = U_BUFFER_OVERFLOW_ERROR;
   1083         return needLen; /* no change */
   1084     }
   1085 
   1086     if(insertHere) {
   1087       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
   1088       keywordStart = insertHere;
   1089     } else {
   1090       keywordStart = buffer+bufLen;
   1091       *keywordStart = ';';
   1092       keywordStart++;
   1093     }
   1094     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
   1095     keywordStart += keywordNameLen;
   1096     *keywordStart = '=';
   1097     keywordStart++;
   1098     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
   1099     keywordStart+=keywordValueLen;
   1100     if(insertHere) {
   1101       *keywordStart = ';';
   1102       keywordStart++;
   1103     }
   1104     buffer[needLen]=0;
   1105     return needLen;
   1106 }
   1107 
   1108 /* ### ID parsing implementation **************************************************/
   1109 
   1110 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
   1111 
   1112 /*returns TRUE if one of the special prefixes is here (s=string)
   1113   'x-' or 'i-' */
   1114 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
   1115 
   1116 /* Dot terminates it because of POSIX form  where dot precedes the codepage
   1117  * except for variant
   1118  */
   1119 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
   1120 
   1121 static char* _strnchr(const char* str, int32_t len, char c) {
   1122     U_ASSERT(str != 0 && len >= 0);
   1123     while (len-- != 0) {
   1124         char d = *str;
   1125         if (d == c) {
   1126             return (char*) str;
   1127         } else if (d == 0) {
   1128             break;
   1129         }
   1130         ++str;
   1131     }
   1132     return NULL;
   1133 }
   1134 
   1135 /**
   1136  * Lookup 'key' in the array 'list'.  The array 'list' should contain
   1137  * a NULL entry, followed by more entries, and a second NULL entry.
   1138  *
   1139  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
   1140  * COUNTRIES_3.
   1141  */
   1142 static int16_t _findIndex(const char* const* list, const char* key)
   1143 {
   1144     const char* const* anchor = list;
   1145     int32_t pass = 0;
   1146 
   1147     /* Make two passes through two NULL-terminated arrays at 'list' */
   1148     while (pass++ < 2) {
   1149         while (*list) {
   1150             if (uprv_strcmp(key, *list) == 0) {
   1151                 return (int16_t)(list - anchor);
   1152             }
   1153             list++;
   1154         }
   1155         ++list;     /* skip final NULL *CWB*/
   1156     }
   1157     return -1;
   1158 }
   1159 
   1160 /* count the length of src while copying it to dest; return strlen(src) */
   1161 static inline int32_t
   1162 _copyCount(char *dest, int32_t destCapacity, const char *src) {
   1163     const char *anchor;
   1164     char c;
   1165 
   1166     anchor=src;
   1167     for(;;) {
   1168         if((c=*src)==0) {
   1169             return (int32_t)(src-anchor);
   1170         }
   1171         if(destCapacity<=0) {
   1172             return (int32_t)((src-anchor)+uprv_strlen(src));
   1173         }
   1174         ++src;
   1175         *dest++=c;
   1176         --destCapacity;
   1177     }
   1178 }
   1179 
   1180 U_CFUNC const char*
   1181 uloc_getCurrentCountryID(const char* oldID){
   1182     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
   1183     if (offset >= 0) {
   1184         return REPLACEMENT_COUNTRIES[offset];
   1185     }
   1186     return oldID;
   1187 }
   1188 U_CFUNC const char*
   1189 uloc_getCurrentLanguageID(const char* oldID){
   1190     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
   1191     if (offset >= 0) {
   1192         return REPLACEMENT_LANGUAGES[offset];
   1193     }
   1194     return oldID;
   1195 }
   1196 /*
   1197  * the internal functions _getLanguage(), _getCountry(), _getVariant()
   1198  * avoid duplicating code to handle the earlier locale ID pieces
   1199  * in the functions for the later ones by
   1200  * setting the *pEnd pointer to where they stopped parsing
   1201  *
   1202  * TODO try to use this in Locale
   1203  */
   1204 U_CFUNC int32_t
   1205 ulocimp_getLanguage(const char *localeID,
   1206                     char *language, int32_t languageCapacity,
   1207                     const char **pEnd) {
   1208     int32_t i=0;
   1209     int32_t offset;
   1210     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
   1211 
   1212     /* if it starts with i- or x- then copy that prefix */
   1213     if(_isIDPrefix(localeID)) {
   1214         if(i<languageCapacity) {
   1215             language[i]=(char)uprv_tolower(*localeID);
   1216         }
   1217         if(i<languageCapacity) {
   1218             language[i+1]='-';
   1219         }
   1220         i+=2;
   1221         localeID+=2;
   1222     }
   1223 
   1224     /* copy the language as far as possible and count its length */
   1225     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
   1226         if(i<languageCapacity) {
   1227             language[i]=(char)uprv_tolower(*localeID);
   1228         }
   1229         if(i<3) {
   1230             U_ASSERT(i>=0);
   1231             lang[i]=(char)uprv_tolower(*localeID);
   1232         }
   1233         i++;
   1234         localeID++;
   1235     }
   1236 
   1237     if(i==3) {
   1238         /* convert 3 character code to 2 character code if possible *CWB*/
   1239         offset=_findIndex(LANGUAGES_3, lang);
   1240         if(offset>=0) {
   1241             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
   1242         }
   1243     }
   1244 
   1245     if(pEnd!=NULL) {
   1246         *pEnd=localeID;
   1247     }
   1248     return i;
   1249 }
   1250 
   1251 U_CFUNC int32_t
   1252 ulocimp_getScript(const char *localeID,
   1253                   char *script, int32_t scriptCapacity,
   1254                   const char **pEnd)
   1255 {
   1256     int32_t idLen = 0;
   1257 
   1258     if (pEnd != NULL) {
   1259         *pEnd = localeID;
   1260     }
   1261 
   1262     /* copy the second item as far as possible and count its length */
   1263     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
   1264             && uprv_isASCIILetter(localeID[idLen])) {
   1265         idLen++;
   1266     }
   1267 
   1268     /* If it's exactly 4 characters long, then it's a script and not a country. */
   1269     if (idLen == 4) {
   1270         int32_t i;
   1271         if (pEnd != NULL) {
   1272             *pEnd = localeID+idLen;
   1273         }
   1274         if(idLen > scriptCapacity) {
   1275             idLen = scriptCapacity;
   1276         }
   1277         if (idLen >= 1) {
   1278             script[0]=(char)uprv_toupper(*(localeID++));
   1279         }
   1280         for (i = 1; i < idLen; i++) {
   1281             script[i]=(char)uprv_tolower(*(localeID++));
   1282         }
   1283     }
   1284     else {
   1285         idLen = 0;
   1286     }
   1287     return idLen;
   1288 }
   1289 
   1290 U_CFUNC int32_t
   1291 ulocimp_getCountry(const char *localeID,
   1292                    char *country, int32_t countryCapacity,
   1293                    const char **pEnd)
   1294 {
   1295     int32_t idLen=0;
   1296     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
   1297     int32_t offset;
   1298 
   1299     /* copy the country as far as possible and count its length */
   1300     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1301         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
   1302             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
   1303         }
   1304         idLen++;
   1305     }
   1306 
   1307     /* the country should be either length 2 or 3 */
   1308     if (idLen == 2 || idLen == 3) {
   1309         UBool gotCountry = FALSE;
   1310         /* convert 3 character code to 2 character code if possible *CWB*/
   1311         if(idLen==3) {
   1312             offset=_findIndex(COUNTRIES_3, cnty);
   1313             if(offset>=0) {
   1314                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
   1315                 gotCountry = TRUE;
   1316             }
   1317         }
   1318         if (!gotCountry) {
   1319             int32_t i = 0;
   1320             for (i = 0; i < idLen; i++) {
   1321                 if (i < countryCapacity) {
   1322                     country[i]=(char)uprv_toupper(localeID[i]);
   1323                 }
   1324             }
   1325         }
   1326         localeID+=idLen;
   1327     } else {
   1328         idLen = 0;
   1329     }
   1330 
   1331     if(pEnd!=NULL) {
   1332         *pEnd=localeID;
   1333     }
   1334 
   1335     return idLen;
   1336 }
   1337 
   1338 /**
   1339  * @param needSeparator if true, then add leading '_' if any variants
   1340  * are added to 'variant'
   1341  */
   1342 static int32_t
   1343 _getVariantEx(const char *localeID,
   1344               char prev,
   1345               char *variant, int32_t variantCapacity,
   1346               UBool needSeparator) {
   1347     int32_t i=0;
   1348 
   1349     /* get one or more variant tags and separate them with '_' */
   1350     if(_isIDSeparator(prev)) {
   1351         /* get a variant string after a '-' or '_' */
   1352         while(!_isTerminator(*localeID)) {
   1353             if (needSeparator) {
   1354                 if (i<variantCapacity) {
   1355                     variant[i] = '_';
   1356                 }
   1357                 ++i;
   1358                 needSeparator = FALSE;
   1359             }
   1360             if(i<variantCapacity) {
   1361                 variant[i]=(char)uprv_toupper(*localeID);
   1362                 if(variant[i]=='-') {
   1363                     variant[i]='_';
   1364                 }
   1365             }
   1366             i++;
   1367             localeID++;
   1368         }
   1369     }
   1370 
   1371     /* if there is no variant tag after a '-' or '_' then look for '@' */
   1372     if(i==0) {
   1373         if(prev=='@') {
   1374             /* keep localeID */
   1375         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
   1376             ++localeID; /* point after the '@' */
   1377         } else {
   1378             return 0;
   1379         }
   1380         while(!_isTerminator(*localeID)) {
   1381             if (needSeparator) {
   1382                 if (i<variantCapacity) {
   1383                     variant[i] = '_';
   1384                 }
   1385                 ++i;
   1386                 needSeparator = FALSE;
   1387             }
   1388             if(i<variantCapacity) {
   1389                 variant[i]=(char)uprv_toupper(*localeID);
   1390                 if(variant[i]=='-' || variant[i]==',') {
   1391                     variant[i]='_';
   1392                 }
   1393             }
   1394             i++;
   1395             localeID++;
   1396         }
   1397     }
   1398 
   1399     return i;
   1400 }
   1401 
   1402 static int32_t
   1403 _getVariant(const char *localeID,
   1404             char prev,
   1405             char *variant, int32_t variantCapacity) {
   1406     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
   1407 }
   1408 
   1409 /**
   1410  * Delete ALL instances of a variant from the given list of one or
   1411  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
   1412  * @param variants the source string of one or more variants,
   1413  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
   1414  * terminated; if it is, trailing zero will NOT be maintained.
   1415  * @param variantsLen length of variants
   1416  * @param toDelete variant to delete, without separators, e.g.  "EURO"
   1417  * or "PREEURO"; not zero terminated
   1418  * @param toDeleteLen length of toDelete
   1419  * @return number of characters deleted from variants
   1420  */
   1421 static int32_t
   1422 _deleteVariant(char* variants, int32_t variantsLen,
   1423                const char* toDelete, int32_t toDeleteLen)
   1424 {
   1425     int32_t delta = 0; /* number of chars deleted */
   1426     for (;;) {
   1427         UBool flag = FALSE;
   1428         if (variantsLen < toDeleteLen) {
   1429             return delta;
   1430         }
   1431         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
   1432             (variantsLen == toDeleteLen ||
   1433              (flag=(variants[toDeleteLen] == '_'))))
   1434         {
   1435             int32_t d = toDeleteLen + (flag?1:0);
   1436             variantsLen -= d;
   1437             delta += d;
   1438             if (variantsLen > 0) {
   1439                 uprv_memmove(variants, variants+d, variantsLen);
   1440             }
   1441         } else {
   1442             char* p = _strnchr(variants, variantsLen, '_');
   1443             if (p == NULL) {
   1444                 return delta;
   1445             }
   1446             ++p;
   1447             variantsLen -= (int32_t)(p - variants);
   1448             variants = p;
   1449         }
   1450     }
   1451 }
   1452 
   1453 /* Keyword enumeration */
   1454 
   1455 typedef struct UKeywordsContext {
   1456     char* keywords;
   1457     char* current;
   1458 } UKeywordsContext;
   1459 
   1460 static void U_CALLCONV
   1461 uloc_kw_closeKeywords(UEnumeration *enumerator) {
   1462     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
   1463     uprv_free(enumerator->context);
   1464     uprv_free(enumerator);
   1465 }
   1466 
   1467 static int32_t U_CALLCONV
   1468 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
   1469     char *kw = ((UKeywordsContext *)en->context)->keywords;
   1470     int32_t result = 0;
   1471     while(*kw) {
   1472         result++;
   1473         kw += uprv_strlen(kw)+1;
   1474     }
   1475     return result;
   1476 }
   1477 
   1478 static const char* U_CALLCONV
   1479 uloc_kw_nextKeyword(UEnumeration* en,
   1480                     int32_t* resultLength,
   1481                     UErrorCode* /*status*/) {
   1482     const char* result = ((UKeywordsContext *)en->context)->current;
   1483     int32_t len = 0;
   1484     if(*result) {
   1485         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
   1486         ((UKeywordsContext *)en->context)->current += len+1;
   1487     } else {
   1488         result = NULL;
   1489     }
   1490     if (resultLength) {
   1491         *resultLength = len;
   1492     }
   1493     return result;
   1494 }
   1495 
   1496 static void U_CALLCONV
   1497 uloc_kw_resetKeywords(UEnumeration* en,
   1498                       UErrorCode* /*status*/) {
   1499     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
   1500 }
   1501 
   1502 static const UEnumeration gKeywordsEnum = {
   1503     NULL,
   1504     NULL,
   1505     uloc_kw_closeKeywords,
   1506     uloc_kw_countKeywords,
   1507     uenum_unextDefault,
   1508     uloc_kw_nextKeyword,
   1509     uloc_kw_resetKeywords
   1510 };
   1511 
   1512 U_CAPI UEnumeration* U_EXPORT2
   1513 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
   1514 {
   1515     UKeywordsContext *myContext = NULL;
   1516     UEnumeration *result = NULL;
   1517 
   1518     if(U_FAILURE(*status)) {
   1519         return NULL;
   1520     }
   1521     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   1522     /* Null pointer test */
   1523     if (result == NULL) {
   1524         *status = U_MEMORY_ALLOCATION_ERROR;
   1525         return NULL;
   1526     }
   1527     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
   1528     myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
   1529     if (myContext == NULL) {
   1530         *status = U_MEMORY_ALLOCATION_ERROR;
   1531         uprv_free(result);
   1532         return NULL;
   1533     }
   1534     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
   1535     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
   1536     myContext->keywords[keywordListSize] = 0;
   1537     myContext->current = myContext->keywords;
   1538     result->context = myContext;
   1539     return result;
   1540 }
   1541 
   1542 U_CAPI UEnumeration* U_EXPORT2
   1543 uloc_openKeywords(const char* localeID,
   1544                         UErrorCode* status)
   1545 {
   1546     int32_t i=0;
   1547     char keywords[256];
   1548     int32_t keywordsCapacity = 256;
   1549     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1550     const char* tmpLocaleID;
   1551 
   1552     if(status==NULL || U_FAILURE(*status)) {
   1553         return 0;
   1554     }
   1555 
   1556     if (_hasBCP47Extension(localeID)) {
   1557         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
   1558     } else {
   1559         if (localeID==NULL) {
   1560            localeID=uloc_getDefault();
   1561         }
   1562         tmpLocaleID=localeID;
   1563     }
   1564 
   1565     /* Skip the language */
   1566     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1567     if(_isIDSeparator(*tmpLocaleID)) {
   1568         const char *scriptID;
   1569         /* Skip the script if available */
   1570         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1571         if(scriptID != tmpLocaleID+1) {
   1572             /* Found optional script */
   1573             tmpLocaleID = scriptID;
   1574         }
   1575         /* Skip the Country */
   1576         if (_isIDSeparator(*tmpLocaleID)) {
   1577             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
   1578             if(_isIDSeparator(*tmpLocaleID)) {
   1579                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
   1580             }
   1581         }
   1582     }
   1583 
   1584     /* keywords are located after '@' */
   1585     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
   1586         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
   1587     }
   1588 
   1589     if(i) {
   1590         return uloc_openKeywordList(keywords, i, status);
   1591     } else {
   1592         return NULL;
   1593     }
   1594 }
   1595 
   1596 
   1597 /* bit-flags for 'options' parameter of _canonicalize */
   1598 #define _ULOC_STRIP_KEYWORDS 0x2
   1599 #define _ULOC_CANONICALIZE   0x1
   1600 
   1601 #define OPTION_SET(options, mask) ((options & mask) != 0)
   1602 
   1603 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
   1604 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
   1605 
   1606 /**
   1607  * Canonicalize the given localeID, to level 1 or to level 2,
   1608  * depending on the options.  To specify level 1, pass in options=0.
   1609  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
   1610  *
   1611  * This is the code underlying uloc_getName and uloc_canonicalize.
   1612  */
   1613 static int32_t
   1614 _canonicalize(const char* localeID,
   1615               char* result,
   1616               int32_t resultCapacity,
   1617               uint32_t options,
   1618               UErrorCode* err) {
   1619     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
   1620     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1621     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1622     const char* origLocaleID;
   1623     const char* tmpLocaleID;
   1624     const char* keywordAssign = NULL;
   1625     const char* separatorIndicator = NULL;
   1626     const char* addKeyword = NULL;
   1627     const char* addValue = NULL;
   1628     char* name;
   1629     char* variant = NULL; /* pointer into name, or NULL */
   1630 
   1631     if (U_FAILURE(*err)) {
   1632         return 0;
   1633     }
   1634 
   1635     if (_hasBCP47Extension(localeID)) {
   1636         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1637     } else {
   1638         if (localeID==NULL) {
   1639            localeID=uloc_getDefault();
   1640         }
   1641         tmpLocaleID=localeID;
   1642     }
   1643 
   1644     origLocaleID=tmpLocaleID;
   1645 
   1646     /* if we are doing a full canonicalization, then put results in
   1647        localeBuffer, if necessary; otherwise send them to result. */
   1648     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
   1649         (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
   1650         name = localeBuffer;
   1651         nameCapacity = (int32_t)sizeof(localeBuffer);
   1652     } else {
   1653         name = result;
   1654         nameCapacity = resultCapacity;
   1655     }
   1656 
   1657     /* get all pieces, one after another, and separate with '_' */
   1658     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
   1659 
   1660     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
   1661         const char *d = uloc_getDefault();
   1662 
   1663         len = (int32_t)uprv_strlen(d);
   1664 
   1665         if (name != NULL) {
   1666             uprv_strncpy(name, d, len);
   1667         }
   1668     } else if(_isIDSeparator(*tmpLocaleID)) {
   1669         const char *scriptID;
   1670 
   1671         ++fieldCount;
   1672         if(len<nameCapacity) {
   1673             name[len]='_';
   1674         }
   1675         ++len;
   1676 
   1677         scriptSize=ulocimp_getScript(tmpLocaleID+1,
   1678             (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
   1679         if(scriptSize > 0) {
   1680             /* Found optional script */
   1681             tmpLocaleID = scriptID;
   1682             ++fieldCount;
   1683             len+=scriptSize;
   1684             if (_isIDSeparator(*tmpLocaleID)) {
   1685                 /* If there is something else, then we add the _ */
   1686                 if(len<nameCapacity) {
   1687                     name[len]='_';
   1688                 }
   1689                 ++len;
   1690             }
   1691         }
   1692 
   1693         if (_isIDSeparator(*tmpLocaleID)) {
   1694             const char *cntryID;
   1695             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
   1696                 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
   1697             if (cntrySize > 0) {
   1698                 /* Found optional country */
   1699                 tmpLocaleID = cntryID;
   1700                 len+=cntrySize;
   1701             }
   1702             if(_isIDSeparator(*tmpLocaleID)) {
   1703                 /* If there is something else, then we add the _  if we found country before. */
   1704                 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
   1705                     ++fieldCount;
   1706                     if(len<nameCapacity) {
   1707                         name[len]='_';
   1708                     }
   1709                     ++len;
   1710                 }
   1711 
   1712                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
   1713                     (len<nameCapacity ? name+len : NULL), nameCapacity-len);
   1714                 if (variantSize > 0) {
   1715                     variant = len<nameCapacity ? name+len : NULL;
   1716                     len += variantSize;
   1717                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
   1718                 }
   1719             }
   1720         }
   1721     }
   1722 
   1723     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
   1724     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
   1725         UBool done = FALSE;
   1726         do {
   1727             char c = *tmpLocaleID;
   1728             switch (c) {
   1729             case 0:
   1730             case '@':
   1731                 done = TRUE;
   1732                 break;
   1733             default:
   1734                 if (len<nameCapacity) {
   1735                     name[len] = c;
   1736                 }
   1737                 ++len;
   1738                 ++tmpLocaleID;
   1739                 break;
   1740             }
   1741         } while (!done);
   1742     }
   1743 
   1744     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
   1745        After this, tmpLocaleID either points to '@' or is NULL */
   1746     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
   1747         keywordAssign = uprv_strchr(tmpLocaleID, '=');
   1748         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
   1749     }
   1750 
   1751     /* Copy POSIX-style variant, if any [mr@FOO] */
   1752     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
   1753         tmpLocaleID != NULL && keywordAssign == NULL) {
   1754         for (;;) {
   1755             char c = *tmpLocaleID;
   1756             if (c == 0) {
   1757                 break;
   1758             }
   1759             if (len<nameCapacity) {
   1760                 name[len] = c;
   1761             }
   1762             ++len;
   1763             ++tmpLocaleID;
   1764         }
   1765     }
   1766 
   1767     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
   1768         /* Handle @FOO variant if @ is present and not followed by = */
   1769         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
   1770             int32_t posixVariantSize;
   1771             /* Add missing '_' if needed */
   1772             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
   1773                 do {
   1774                     if(len<nameCapacity) {
   1775                         name[len]='_';
   1776                     }
   1777                     ++len;
   1778                     ++fieldCount;
   1779                 } while(fieldCount<2);
   1780             }
   1781             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
   1782                                              (UBool)(variantSize > 0));
   1783             if (posixVariantSize > 0) {
   1784                 if (variant == NULL) {
   1785                     variant = name+len;
   1786                 }
   1787                 len += posixVariantSize;
   1788                 variantSize += posixVariantSize;
   1789             }
   1790         }
   1791 
   1792         /* Handle generic variants first */
   1793         if (variant) {
   1794             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
   1795                 const char* variantToCompare = VARIANT_MAP[j].variant;
   1796                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
   1797                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
   1798                 len -= variantLen;
   1799                 if (variantLen > 0) {
   1800                     if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
   1801                         --len;
   1802                     }
   1803                     addKeyword = VARIANT_MAP[j].keyword;
   1804                     addValue = VARIANT_MAP[j].value;
   1805                     break;
   1806                 }
   1807             }
   1808             if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
   1809                 --len;
   1810             }
   1811         }
   1812 
   1813         /* Look up the ID in the canonicalization map */
   1814         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
   1815             const char* id = CANONICALIZE_MAP[j].id;
   1816             int32_t n = (int32_t)uprv_strlen(id);
   1817             if (len == n && uprv_strncmp(name, id, n) == 0) {
   1818                 if (n == 0 && tmpLocaleID != NULL) {
   1819                     break; /* Don't remap "" if keywords present */
   1820                 }
   1821                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
   1822                 if (CANONICALIZE_MAP[j].keyword) {
   1823                     addKeyword = CANONICALIZE_MAP[j].keyword;
   1824                     addValue = CANONICALIZE_MAP[j].value;
   1825                 }
   1826                 break;
   1827             }
   1828         }
   1829     }
   1830 
   1831     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
   1832         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
   1833             (!separatorIndicator || separatorIndicator > keywordAssign)) {
   1834             if(len<nameCapacity) {
   1835                 name[len]='@';
   1836             }
   1837             ++len;
   1838             ++fieldCount;
   1839             len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
   1840                                 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
   1841         } else if (addKeyword != NULL) {
   1842             U_ASSERT(addValue != NULL && len < nameCapacity);
   1843             /* inelegant but works -- later make _getKeywords do this? */
   1844             len += _copyCount(name+len, nameCapacity-len, "@");
   1845             len += _copyCount(name+len, nameCapacity-len, addKeyword);
   1846             len += _copyCount(name+len, nameCapacity-len, "=");
   1847             len += _copyCount(name+len, nameCapacity-len, addValue);
   1848         }
   1849     }
   1850 
   1851     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
   1852         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
   1853     }
   1854 
   1855     return u_terminateChars(result, resultCapacity, len, err);
   1856 }
   1857 
   1858 /* ### ID parsing API **************************************************/
   1859 
   1860 U_CAPI int32_t  U_EXPORT2
   1861 uloc_getParent(const char*    localeID,
   1862                char* parent,
   1863                int32_t parentCapacity,
   1864                UErrorCode* err)
   1865 {
   1866     const char *lastUnderscore;
   1867     int32_t i;
   1868 
   1869     if (U_FAILURE(*err))
   1870         return 0;
   1871 
   1872     if (localeID == NULL)
   1873         localeID = uloc_getDefault();
   1874 
   1875     lastUnderscore=uprv_strrchr(localeID, '_');
   1876     if(lastUnderscore!=NULL) {
   1877         i=(int32_t)(lastUnderscore-localeID);
   1878     } else {
   1879         i=0;
   1880     }
   1881 
   1882     if(i>0 && parent != localeID) {
   1883         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
   1884     }
   1885     return u_terminateChars(parent, parentCapacity, i, err);
   1886 }
   1887 
   1888 U_CAPI int32_t U_EXPORT2
   1889 uloc_getLanguage(const char*    localeID,
   1890          char* language,
   1891          int32_t languageCapacity,
   1892          UErrorCode* err)
   1893 {
   1894     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
   1895     int32_t i=0;
   1896 
   1897     if (err==NULL || U_FAILURE(*err)) {
   1898         return 0;
   1899     }
   1900 
   1901     if(localeID==NULL) {
   1902         localeID=uloc_getDefault();
   1903     }
   1904 
   1905     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
   1906     return u_terminateChars(language, languageCapacity, i, err);
   1907 }
   1908 
   1909 U_CAPI int32_t U_EXPORT2
   1910 uloc_getScript(const char*    localeID,
   1911          char* script,
   1912          int32_t scriptCapacity,
   1913          UErrorCode* err)
   1914 {
   1915     int32_t i=0;
   1916 
   1917     if(err==NULL || U_FAILURE(*err)) {
   1918         return 0;
   1919     }
   1920 
   1921     if(localeID==NULL) {
   1922         localeID=uloc_getDefault();
   1923     }
   1924 
   1925     /* skip the language */
   1926     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1927     if(_isIDSeparator(*localeID)) {
   1928         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
   1929     }
   1930     return u_terminateChars(script, scriptCapacity, i, err);
   1931 }
   1932 
   1933 U_CAPI int32_t  U_EXPORT2
   1934 uloc_getCountry(const char* localeID,
   1935             char* country,
   1936             int32_t countryCapacity,
   1937             UErrorCode* err)
   1938 {
   1939     int32_t i=0;
   1940 
   1941     if(err==NULL || U_FAILURE(*err)) {
   1942         return 0;
   1943     }
   1944 
   1945     if(localeID==NULL) {
   1946         localeID=uloc_getDefault();
   1947     }
   1948 
   1949     /* Skip the language */
   1950     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1951     if(_isIDSeparator(*localeID)) {
   1952         const char *scriptID;
   1953         /* Skip the script if available */
   1954         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
   1955         if(scriptID != localeID+1) {
   1956             /* Found optional script */
   1957             localeID = scriptID;
   1958         }
   1959         if(_isIDSeparator(*localeID)) {
   1960             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
   1961         }
   1962     }
   1963     return u_terminateChars(country, countryCapacity, i, err);
   1964 }
   1965 
   1966 U_CAPI int32_t  U_EXPORT2
   1967 uloc_getVariant(const char* localeID,
   1968                 char* variant,
   1969                 int32_t variantCapacity,
   1970                 UErrorCode* err)
   1971 {
   1972     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1973     const char* tmpLocaleID;
   1974     int32_t i=0;
   1975 
   1976     if(err==NULL || U_FAILURE(*err)) {
   1977         return 0;
   1978     }
   1979 
   1980     if (_hasBCP47Extension(localeID)) {
   1981         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1982     } else {
   1983         if (localeID==NULL) {
   1984            localeID=uloc_getDefault();
   1985         }
   1986         tmpLocaleID=localeID;
   1987     }
   1988 
   1989     /* Skip the language */
   1990     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1991     if(_isIDSeparator(*tmpLocaleID)) {
   1992         const char *scriptID;
   1993         /* Skip the script if available */
   1994         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1995         if(scriptID != tmpLocaleID+1) {
   1996             /* Found optional script */
   1997             tmpLocaleID = scriptID;
   1998         }
   1999         /* Skip the Country */
   2000         if (_isIDSeparator(*tmpLocaleID)) {
   2001             const char *cntryID;
   2002             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
   2003             if (cntryID != tmpLocaleID+1) {
   2004                 /* Found optional country */
   2005                 tmpLocaleID = cntryID;
   2006             }
   2007             if(_isIDSeparator(*tmpLocaleID)) {
   2008                 /* If there was no country ID, skip a possible extra IDSeparator */
   2009                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
   2010                     tmpLocaleID++;
   2011                 }
   2012                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
   2013             }
   2014         }
   2015     }
   2016 
   2017     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
   2018     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
   2019 /*
   2020     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
   2021         i=_getVariant(localeID+1, '@', variant, variantCapacity);
   2022     }
   2023 */
   2024     return u_terminateChars(variant, variantCapacity, i, err);
   2025 }
   2026 
   2027 U_CAPI int32_t  U_EXPORT2
   2028 uloc_getName(const char* localeID,
   2029              char* name,
   2030              int32_t nameCapacity,
   2031              UErrorCode* err)
   2032 {
   2033     return _canonicalize(localeID, name, nameCapacity, 0, err);
   2034 }
   2035 
   2036 U_CAPI int32_t  U_EXPORT2
   2037 uloc_getBaseName(const char* localeID,
   2038                  char* name,
   2039                  int32_t nameCapacity,
   2040                  UErrorCode* err)
   2041 {
   2042     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
   2043 }
   2044 
   2045 U_CAPI int32_t  U_EXPORT2
   2046 uloc_canonicalize(const char* localeID,
   2047                   char* name,
   2048                   int32_t nameCapacity,
   2049                   UErrorCode* err)
   2050 {
   2051     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
   2052 }
   2053 
   2054 U_CAPI const char*  U_EXPORT2
   2055 uloc_getISO3Language(const char* localeID)
   2056 {
   2057     int16_t offset;
   2058     char lang[ULOC_LANG_CAPACITY];
   2059     UErrorCode err = U_ZERO_ERROR;
   2060 
   2061     if (localeID == NULL)
   2062     {
   2063         localeID = uloc_getDefault();
   2064     }
   2065     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
   2066     if (U_FAILURE(err))
   2067         return "";
   2068     offset = _findIndex(LANGUAGES, lang);
   2069     if (offset < 0)
   2070         return "";
   2071     return LANGUAGES_3[offset];
   2072 }
   2073 
   2074 U_CAPI const char*  U_EXPORT2
   2075 uloc_getISO3Country(const char* localeID)
   2076 {
   2077     int16_t offset;
   2078     char cntry[ULOC_LANG_CAPACITY];
   2079     UErrorCode err = U_ZERO_ERROR;
   2080 
   2081     if (localeID == NULL)
   2082     {
   2083         localeID = uloc_getDefault();
   2084     }
   2085     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
   2086     if (U_FAILURE(err))
   2087         return "";
   2088     offset = _findIndex(COUNTRIES, cntry);
   2089     if (offset < 0)
   2090         return "";
   2091 
   2092     return COUNTRIES_3[offset];
   2093 }
   2094 
   2095 U_CAPI uint32_t  U_EXPORT2
   2096 uloc_getLCID(const char* localeID)
   2097 {
   2098     UErrorCode status = U_ZERO_ERROR;
   2099     char       langID[ULOC_FULLNAME_CAPACITY];
   2100 
   2101     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
   2102     if (U_FAILURE(status)) {
   2103         return 0;
   2104     }
   2105 
   2106     return uprv_convertToLCID(langID, localeID, &status);
   2107 }
   2108 
   2109 U_CAPI int32_t U_EXPORT2
   2110 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
   2111                 UErrorCode *status)
   2112 {
   2113     int32_t length;
   2114     const char *posix = uprv_convertToPosix(hostid, status);
   2115     if (U_FAILURE(*status) || posix == NULL) {
   2116         return 0;
   2117     }
   2118     length = (int32_t)uprv_strlen(posix);
   2119     if (length+1 > localeCapacity) {
   2120         *status = U_BUFFER_OVERFLOW_ERROR;
   2121     }
   2122     else {
   2123         uprv_strcpy(locale, posix);
   2124     }
   2125     return length;
   2126 }
   2127 
   2128 /* ### Default locale **************************************************/
   2129 
   2130 U_CAPI const char*  U_EXPORT2
   2131 uloc_getDefault()
   2132 {
   2133     return locale_get_default();
   2134 }
   2135 
   2136 U_CAPI void  U_EXPORT2
   2137 uloc_setDefault(const char*   newDefaultLocale,
   2138              UErrorCode* err)
   2139 {
   2140     if (U_FAILURE(*err))
   2141         return;
   2142     /* the error code isn't currently used for anything by this function*/
   2143 
   2144     /* propagate change to C++ */
   2145     locale_set_default(newDefaultLocale);
   2146 }
   2147 
   2148 /**
   2149  * Returns a list of all language codes defined in ISO 639.  This is a pointer
   2150  * to an array of pointers to arrays of char.  All of these pointers are owned
   2151  * by ICU-- do not delete them, and do not write through them.  The array is
   2152  * terminated with a null pointer.
   2153  */
   2154 U_CAPI const char* const*  U_EXPORT2
   2155 uloc_getISOLanguages()
   2156 {
   2157     return LANGUAGES;
   2158 }
   2159 
   2160 /**
   2161  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
   2162  * pointer to an array of pointers to arrays of char.  All of these pointers are
   2163  * owned by ICU-- do not delete them, and do not write through them.  The array is
   2164  * terminated with a null pointer.
   2165  */
   2166 U_CAPI const char* const*  U_EXPORT2
   2167 uloc_getISOCountries()
   2168 {
   2169     return COUNTRIES;
   2170 }
   2171 
   2172 
   2173 /* this function to be moved into cstring.c later */
   2174 static char gDecimal = 0;
   2175 
   2176 static /* U_CAPI */
   2177 double
   2178 /* U_EXPORT2 */
   2179 _uloc_strtod(const char *start, char **end) {
   2180     char *decimal;
   2181     char *myEnd;
   2182     char buf[30];
   2183     double rv;
   2184     if (!gDecimal) {
   2185         char rep[5];
   2186         /* For machines that decide to change the decimal on you,
   2187         and try to be too smart with localization.
   2188         This normally should be just a '.'. */
   2189         sprintf(rep, "%+1.1f", 1.0);
   2190         gDecimal = rep[2];
   2191     }
   2192 
   2193     if(gDecimal == '.') {
   2194         return uprv_strtod(start, end); /* fall through to OS */
   2195     } else {
   2196         uprv_strncpy(buf, start, 29);
   2197         buf[29]=0;
   2198         decimal = uprv_strchr(buf, '.');
   2199         if(decimal) {
   2200             *decimal = gDecimal;
   2201         } else {
   2202             return uprv_strtod(start, end); /* no decimal point */
   2203         }
   2204         rv = uprv_strtod(buf, &myEnd);
   2205         if(end) {
   2206             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
   2207         }
   2208         return rv;
   2209     }
   2210 }
   2211 
   2212 typedef struct {
   2213     float q;
   2214     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
   2215     char *locale;
   2216 } _acceptLangItem;
   2217 
   2218 static int32_t U_CALLCONV
   2219 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
   2220 {
   2221     const _acceptLangItem *aa = (const _acceptLangItem*)a;
   2222     const _acceptLangItem *bb = (const _acceptLangItem*)b;
   2223 
   2224     int32_t rc = 0;
   2225     if(bb->q < aa->q) {
   2226         rc = -1;  /* A > B */
   2227     } else if(bb->q > aa->q) {
   2228         rc = 1;   /* A < B */
   2229     } else {
   2230         rc = 0;   /* A = B */
   2231     }
   2232 
   2233     if(rc==0) {
   2234         rc = uprv_stricmp(aa->locale, bb->locale);
   2235     }
   2236 
   2237 #if defined(ULOC_DEBUG)
   2238     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
   2239     aa->locale, aa->q,
   2240     bb->locale, bb->q,
   2241     rc);*/
   2242 #endif
   2243 
   2244     return rc;
   2245 }
   2246 
   2247 /*
   2248 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
   2249 */
   2250 
   2251 U_CAPI int32_t U_EXPORT2
   2252 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
   2253                             const char *httpAcceptLanguage,
   2254                             UEnumeration* availableLocales,
   2255                             UErrorCode *status)
   2256 {
   2257     _acceptLangItem *j;
   2258     _acceptLangItem smallBuffer[30];
   2259     char **strs;
   2260     char tmp[ULOC_FULLNAME_CAPACITY +1];
   2261     int32_t n = 0;
   2262     const char *itemEnd;
   2263     const char *paramEnd;
   2264     const char *s;
   2265     const char *t;
   2266     int32_t res;
   2267     int32_t i;
   2268     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
   2269     int32_t jSize;
   2270     char *tempstr; /* Use for null pointer check */
   2271 
   2272     j = smallBuffer;
   2273     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
   2274     if(U_FAILURE(*status)) {
   2275         return -1;
   2276     }
   2277 
   2278     for(s=httpAcceptLanguage;s&&*s;) {
   2279         while(isspace(*s)) /* eat space at the beginning */
   2280             s++;
   2281         itemEnd=uprv_strchr(s,',');
   2282         paramEnd=uprv_strchr(s,';');
   2283         if(!itemEnd) {
   2284             itemEnd = httpAcceptLanguage+l; /* end of string */
   2285         }
   2286         if(paramEnd && paramEnd<itemEnd) {
   2287             /* semicolon (;) is closer than end (,) */
   2288             t = paramEnd+1;
   2289             if(*t=='q') {
   2290                 t++;
   2291             }
   2292             while(isspace(*t)) {
   2293                 t++;
   2294             }
   2295             if(*t=='=') {
   2296                 t++;
   2297             }
   2298             while(isspace(*t)) {
   2299                 t++;
   2300             }
   2301             j[n].q = (float)_uloc_strtod(t,NULL);
   2302         } else {
   2303             /* no semicolon - it's 1.0 */
   2304             j[n].q = 1.0f;
   2305             paramEnd = itemEnd;
   2306         }
   2307         j[n].dummy=0;
   2308         /* eat spaces prior to semi */
   2309         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
   2310             ;
   2311         /* Check for null pointer from uprv_strndup */
   2312         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
   2313         if (tempstr == NULL) {
   2314             *status = U_MEMORY_ALLOCATION_ERROR;
   2315             return -1;
   2316         }
   2317         j[n].locale = tempstr;
   2318         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
   2319         if(strcmp(j[n].locale,tmp)) {
   2320             uprv_free(j[n].locale);
   2321             j[n].locale=uprv_strdup(tmp);
   2322         }
   2323 #if defined(ULOC_DEBUG)
   2324         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
   2325 #endif
   2326         n++;
   2327         s = itemEnd;
   2328         while(*s==',') { /* eat duplicate commas */
   2329             s++;
   2330         }
   2331         if(n>=jSize) {
   2332             if(j==smallBuffer) {  /* overflowed the small buffer. */
   2333                 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
   2334                 if(j!=NULL) {
   2335                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
   2336                 }
   2337 #if defined(ULOC_DEBUG)
   2338                 fprintf(stderr,"malloced at size %d\n", jSize);
   2339 #endif
   2340             } else {
   2341                 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
   2342 #if defined(ULOC_DEBUG)
   2343                 fprintf(stderr,"re-alloced at size %d\n", jSize);
   2344 #endif
   2345             }
   2346             jSize *= 2;
   2347             if(j==NULL) {
   2348                 *status = U_MEMORY_ALLOCATION_ERROR;
   2349                 return -1;
   2350             }
   2351         }
   2352     }
   2353     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
   2354     if(U_FAILURE(*status)) {
   2355         if(j != smallBuffer) {
   2356 #if defined(ULOC_DEBUG)
   2357             fprintf(stderr,"freeing j %p\n", j);
   2358 #endif
   2359             uprv_free(j);
   2360         }
   2361         return -1;
   2362     }
   2363     strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
   2364     /* Check for null pointer */
   2365     if (strs == NULL) {
   2366         uprv_free(j); /* Free to avoid memory leak */
   2367         *status = U_MEMORY_ALLOCATION_ERROR;
   2368         return -1;
   2369     }
   2370     for(i=0;i<n;i++) {
   2371 #if defined(ULOC_DEBUG)
   2372         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
   2373 #endif
   2374         strs[i]=j[i].locale;
   2375     }
   2376     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
   2377         (const char**)strs, n, availableLocales, status);
   2378     for(i=0;i<n;i++) {
   2379         uprv_free(strs[i]);
   2380     }
   2381     uprv_free(strs);
   2382     if(j != smallBuffer) {
   2383 #if defined(ULOC_DEBUG)
   2384         fprintf(stderr,"freeing j %p\n", j);
   2385 #endif
   2386         uprv_free(j);
   2387     }
   2388     return res;
   2389 }
   2390 
   2391 
   2392 U_CAPI int32_t U_EXPORT2
   2393 uloc_acceptLanguage(char *result, int32_t resultAvailable,
   2394                     UAcceptResult *outResult, const char **acceptList,
   2395                     int32_t acceptListCount,
   2396                     UEnumeration* availableLocales,
   2397                     UErrorCode *status)
   2398 {
   2399     int32_t i,j;
   2400     int32_t len;
   2401     int32_t maxLen=0;
   2402     char tmp[ULOC_FULLNAME_CAPACITY+1];
   2403     const char *l;
   2404     char **fallbackList;
   2405     if(U_FAILURE(*status)) {
   2406         return -1;
   2407     }
   2408     fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
   2409     if(fallbackList==NULL) {
   2410         *status = U_MEMORY_ALLOCATION_ERROR;
   2411         return -1;
   2412     }
   2413     for(i=0;i<acceptListCount;i++) {
   2414 #if defined(ULOC_DEBUG)
   2415         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
   2416 #endif
   2417         while((l=uenum_next(availableLocales, NULL, status))) {
   2418 #if defined(ULOC_DEBUG)
   2419             fprintf(stderr,"  %s\n", l);
   2420 #endif
   2421             len = (int32_t)uprv_strlen(l);
   2422             if(!uprv_strcmp(acceptList[i], l)) {
   2423                 if(outResult) {
   2424                     *outResult = ULOC_ACCEPT_VALID;
   2425                 }
   2426 #if defined(ULOC_DEBUG)
   2427                 fprintf(stderr, "MATCH! %s\n", l);
   2428 #endif
   2429                 if(len>0) {
   2430                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2431                 }
   2432                 for(j=0;j<i;j++) {
   2433                     uprv_free(fallbackList[j]);
   2434                 }
   2435                 uprv_free(fallbackList);
   2436                 return u_terminateChars(result, resultAvailable, len, status);
   2437             }
   2438             if(len>maxLen) {
   2439                 maxLen = len;
   2440             }
   2441         }
   2442         uenum_reset(availableLocales, status);
   2443         /* save off parent info */
   2444         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2445             fallbackList[i] = uprv_strdup(tmp);
   2446         } else {
   2447             fallbackList[i]=0;
   2448         }
   2449     }
   2450 
   2451     for(maxLen--;maxLen>0;maxLen--) {
   2452         for(i=0;i<acceptListCount;i++) {
   2453             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
   2454 #if defined(ULOC_DEBUG)
   2455                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
   2456 #endif
   2457                 while((l=uenum_next(availableLocales, NULL, status))) {
   2458 #if defined(ULOC_DEBUG)
   2459                     fprintf(stderr,"  %s\n", l);
   2460 #endif
   2461                     len = (int32_t)uprv_strlen(l);
   2462                     if(!uprv_strcmp(fallbackList[i], l)) {
   2463                         if(outResult) {
   2464                             *outResult = ULOC_ACCEPT_FALLBACK;
   2465                         }
   2466 #if defined(ULOC_DEBUG)
   2467                         fprintf(stderr, "fallback MATCH! %s\n", l);
   2468 #endif
   2469                         if(len>0) {
   2470                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2471                         }
   2472                         for(j=0;j<acceptListCount;j++) {
   2473                             uprv_free(fallbackList[j]);
   2474                         }
   2475                         uprv_free(fallbackList);
   2476                         return u_terminateChars(result, resultAvailable, len, status);
   2477                     }
   2478                 }
   2479                 uenum_reset(availableLocales, status);
   2480 
   2481                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2482                     uprv_free(fallbackList[i]);
   2483                     fallbackList[i] = uprv_strdup(tmp);
   2484                 } else {
   2485                     uprv_free(fallbackList[i]);
   2486                     fallbackList[i]=0;
   2487                 }
   2488             }
   2489         }
   2490         if(outResult) {
   2491             *outResult = ULOC_ACCEPT_FAILED;
   2492         }
   2493     }
   2494     for(i=0;i<acceptListCount;i++) {
   2495         uprv_free(fallbackList[i]);
   2496     }
   2497     uprv_free(fallbackList);
   2498     return -1;
   2499 }
   2500 
   2501 /*eof*/
   2502