Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1997-2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ULOC.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/01/97    aliu        Creation.
     13 *   08/21/98    stephen     JDK 1.2 sync
     14 *   12/08/98    rtg         New Locale implementation and C API
     15 *   03/15/99    damiba      overhaul.
     16 *   04/06/99    stephen     changed setDefault() to realloc and copy
     17 *   06/14/99    stephen     Changed calls to ures_open for new params
     18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
     19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
     20 *                           brought canonicalization code into line with spec
     21 *****************************************************************************/
     22 
     23 /*
     24    POSIX's locale format, from putil.c: [no spaces]
     25 
     26      ll [ _CC ] [ . MM ] [ @ VV]
     27 
     28      l = lang, C = ctry, M = charmap, V = variant
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/uloc.h"
     34 
     35 #include "putilimp.h"
     36 #include "ustr_imp.h"
     37 #include "ulocimp.h"
     38 #include "umutex.h"
     39 #include "cstring.h"
     40 #include "cmemory.h"
     41 #include "ucln_cmn.h"
     42 #include "locmap.h"
     43 #include "uarrsort.h"
     44 #include "uenumimp.h"
     45 #include "uassert.h"
     46 
     47 #include <stdio.h> /* for sprintf */
     48 
     49 /* ### Declarations **************************************************/
     50 
     51 /* Locale stuff from locid.cpp */
     52 U_CFUNC void locale_set_default(const char *id);
     53 U_CFUNC const char *locale_get_default(void);
     54 U_CFUNC int32_t
     55 locale_getKeywords(const char *localeID,
     56             char prev,
     57             char *keywords, int32_t keywordCapacity,
     58             char *values, int32_t valuesCapacity, int32_t *valLen,
     59             UBool valuesToo,
     60             UErrorCode *status);
     61 
     62 /* ### Data tables **************************************************/
     63 
     64 /**
     65  * Table of language codes, both 2- and 3-letter, with preference
     66  * given to 2-letter codes where possible.  Includes 3-letter codes
     67  * that lack a 2-letter equivalent.
     68  *
     69  * This list must be in sorted order.  This list is returned directly
     70  * to the user by some API.
     71  *
     72  * This list must be kept in sync with LANGUAGES_3, with corresponding
     73  * entries matched.
     74  *
     75  * This table should be terminated with a NULL entry, followed by a
     76  * second list, and another NULL entry.  The first list is visible to
     77  * user code when this array is returned by API.  The second list
     78  * contains codes we support, but do not expose through user API.
     79  *
     80  * Notes
     81  *
     82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
     83  * include the revisions up to 2001/7/27 *CWB*
     84  *
     85  * The 3 character codes are the terminology codes like RFC 3066.  This
     86  * is compatible with prior ICU codes
     87  *
     88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
     89  * table but now at the end of the table because 3 character codes are
     90  * duplicates.  This avoids bad searches going from 3 to 2 character
     91  * codes.
     92  *
     93  * The range qaa-qtz is reserved for local use
     94  */
     95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
     96 /* ISO639 table version is 20130123 */
     97 static const char * const LANGUAGES[] = {
     98     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",
     99     "afa", "afh", "agq", "ain", "ak",  "akk", "ale", "alg",
    100     "alt", "am",  "an",  "ang", "anp", "apa", "ar",  "arc",
    101     "arn", "arp", "art", "arw", "as",  "asa", "ast", "ath",
    102     "aus", "av",  "awa", "ay",  "az",
    103     "ba",  "bad", "bai", "bal", "ban", "bas", "bat", "bax",
    104     "bbj", "be",  "bej", "bem", "ber", "bez", "bfd", "bg",
    105     "bh",  "bho", "bi",  "bik", "bin", "bkm", "bla", "bm",
    106     "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",  "bss",
    107     "btk", "bua", "bug", "bum", "byn", "byv",
    108     "ca",  "cad", "cai", "car", "cau", "cay", "cch", "ce",
    109     "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
    110     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
    111     "cop", "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",
    112     "csb", "cu",  "cus", "cv",  "cy",
    113     "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
    114     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
    115     "dv",  "dyo", "dyu", "dz",  "dzg",
    116     "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
    117     "enm", "eo",  "es",  "et",  "eu",  "ewo",
    118     "fa",  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",
    119     "fo",  "fon", "fr",  "frm", "fro", "frr", "frs", "fur",
    120     "fy",
    121     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
    122     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
    123     "grc", "gsw", "gu",  "guz", "gv",  "gwi",
    124     "ha",  "hai", "haw", "he",  "hi",  "hil", "him", "hit",
    125     "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",
    126     "hz",
    127     "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ijo",
    128     "ik",  "ilo", "inc", "ine", "inh", "io",  "ira", "iro",
    129     "is",  "it",  "iu",
    130     "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
    131     "ka",  "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
    132     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg",  "kha",
    133     "khi", "kho", "khq", "ki",  "kj",  "kk",  "kkj", "kl",
    134     "kln", "km",  "kmb", "kn",  "ko",  "kok", "kos", "kpe",
    135     "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf",
    136     "ksh", "ku",  "kum", "kut", "kv",  "kw",  "ky",
    137     "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lg",
    138     "li",  "lkt", "ln",  "lo",  "lol", "loz", "lt",  "lu",
    139     "lua", "lui", "lun", "luo", "lus", "luy", "lv",
    140     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
    141     "mde", "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga",
    142     "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
    143     "mkh", "ml",  "mn",  "mnc", "mni", "mno", "mo",  "moh",
    144     "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun", "mus",
    145     "mwl", "mwr", "my",  "mye", "myn", "myv",
    146     "na",  "nah", "nai", "nap", "naq", "nb",  "nd",  "nds",
    147     "ne",  "new", "ng",  "nia", "nic", "niu", "nl",  "nmg",
    148     "nn",  "nnh", "no",  "nog", "non", "nqo", "nr",  "nso",
    149     "nub", "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo",
    150     "nzi",
    151     "oc",  "oj",  "om",  "or",  "os",  "osa", "ota", "oto",
    152     "pa",  "paa", "pag", "pal", "pam", "pap", "pau", "peo",
    153     "phi", "phn", "pi",  "pl",  "pon", "pra", "pro", "ps",
    154     "pt",
    155     "qu",
    156     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof",
    157     "rom", "ru",  "rup", "rw",  "rwk",
    158     "sa",  "sad", "sah", "sai", "sal", "sam", "saq", "sas",
    159     "sat", "sba", "sbp", "sc",  "scn", "sco", "sd",  "se",
    160     "see", "seh", "sel", "sem", "ses", "sg",  "sga", "sgn",
    161     "shi", "shn", "shu", "si",  "sid", "sio", "sit",
    162     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
    163     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
    164     "srn", "srr", "ss",  "ssa", "ssy", "st",  "su",  "suk",
    165     "sus", "sux", "sv",  "sw",  "swb", "swc", "syc", "syr",
    166     "ta",  "tai", "te",  "tem", "teo", "ter", "tet", "tg",
    167     "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tl",  "tlh",
    168     "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",  "trv",
    169     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
    170     "twq", "ty",  "tyv", "tzm",
    171     "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
    172     "vai", "ve",  "vi",  "vo",  "vot", "vun",
    173     "wa",  "wae", "wak", "wal", "war", "was", "wen", "wo",
    174     "xal", "xh",  "xog",
    175     "yao", "yap", "yav", "ybb", "yi",  "yo",  "ypk", "yue",
    176     "za",  "zap", "zbl", "zen", "zh",  "znd", "zu",  "zun",
    177     "zxx", "zza",
    178 NULL,
    179     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
    180 NULL
    181 };
    182 
    183 static const char* const DEPRECATED_LANGUAGES[]={
    184     "in", "iw", "ji", "jw", NULL, NULL
    185 };
    186 static const char* const REPLACEMENT_LANGUAGES[]={
    187     "id", "he", "yi", "jv", NULL, NULL
    188 };
    189 
    190 /**
    191  * Table of 3-letter language codes.
    192  *
    193  * This is a lookup table used to convert 3-letter language codes to
    194  * their 2-letter equivalent, where possible.  It must be kept in sync
    195  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
    196  * same language as LANGUAGES_3[i].  The commented-out lines are
    197  * copied from LANGUAGES to make eyeballing this baby easier.
    198  *
    199  * Where a 3-letter language code has no 2-letter equivalent, the
    200  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
    201  *
    202  * This table should be terminated with a NULL entry, followed by a
    203  * second list, and another NULL entry.  The two lists correspond to
    204  * the two lists in LANGUAGES.
    205  */
    206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
    207 /* ISO639 table version is 20130123 */
    208 static const char * const LANGUAGES_3[] = {
    209     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
    210     "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
    211     "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
    212     "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
    213     "aus", "ava", "awa", "aym", "aze",
    214     "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
    215     "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
    216     "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
    217     "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
    218     "btk", "bua", "bug", "bum", "byn", "byv",
    219     "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
    220     "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
    221     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
    222     "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
    223     "csb", "chu", "cus", "chv", "cym",
    224     "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
    225     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
    226     "div", "dyo", "dyu", "dzo", "dzg",
    227     "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
    228     "enm", "epo", "spa", "est", "eus", "ewo",
    229     "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
    230     "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
    231     "fry",
    232     "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
    233     "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
    234     "grc", "gsw", "guj", "guz", "glv", "gwi",
    235     "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
    236     "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
    237     "her",
    238     "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
    239     "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
    240     "isl", "ita", "iku",
    241     "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
    242     "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
    243     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
    244     "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
    245     "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
    246     "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
    247     "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
    248     "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
    249     "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
    250     "lua", "lui", "lun", "luo", "lus", "luy", "lav",
    251     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
    252     "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
    253     "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
    254     "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
    255     "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
    256     "mwl", "mwr", "mya", "mye", "myn", "myv",
    257     "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
    258     "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
    259     "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
    260     "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
    261     "nzi",
    262     "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
    263     "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
    264     "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
    265     "por",
    266     "que",
    267     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
    268     "rom", "rus", "rup", "kin", "rwk",
    269     "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
    270     "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
    271     "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
    272     "shi", "shn", "shu", "sin", "sid", "sio", "sit",
    273     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
    274     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
    275     "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
    276     "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
    277     "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
    278     "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
    279     "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
    280     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
    281     "twq", "tah", "tyv", "tzm",
    282     "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
    283     "vai", "ven", "vie", "vol", "vot", "vun",
    284     "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
    285     "xal", "xho", "xog",
    286     "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
    287     "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun",
    288     "zxx", "zza",
    289 NULL,
    290 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
    291     "ind", "heb", "yid", "jaw", "srp",
    292 NULL
    293 };
    294 
    295 /**
    296  * Table of 2-letter country codes.
    297  *
    298  * This list must be in sorted order.  This list is returned directly
    299  * to the user by some API.
    300  *
    301  * This list must be kept in sync with COUNTRIES_3, with corresponding
    302  * entries matched.
    303  *
    304  * This table should be terminated with a NULL entry, followed by a
    305  * second list, and another NULL entry.  The first list is visible to
    306  * user code when this array is returned by API.  The second list
    307  * contains codes we support, but do not expose through user API.
    308  *
    309  * Notes:
    310  *
    311  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
    312  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
    313  * new codes keeping the old ones for compatibility updated to include
    314  * 1999/12/03 revisions *CWB*
    315  *
    316  * RO(ROM) is now RO(ROU) according to
    317  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
    318  */
    319 static const char * const COUNTRIES[] = {
    320     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
    321     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
    322     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
    323     "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
    324     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
    325     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
    326     "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
    327     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
    328     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
    329     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
    330     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
    331     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
    332     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
    333     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
    334     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
    335     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
    336     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
    337     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
    338     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
    339     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
    340     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
    341     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
    342     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
    343     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
    344     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
    345     "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
    346     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
    347     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
    348     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
    349     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
    350 NULL,
    351     "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
    352 NULL
    353 };
    354 
    355 static const char* const DEPRECATED_COUNTRIES[] = {
    356     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
    357 };
    358 static const char* const REPLACEMENT_COUNTRIES[] = {
    359 /*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
    360     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
    361 };
    362 
    363 /**
    364  * Table of 3-letter country codes.
    365  *
    366  * This is a lookup table used to convert 3-letter country codes to
    367  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
    368  * For all valid i, COUNTRIES[i] must refer to the same country as
    369  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
    370  * to make eyeballing this baby easier.
    371  *
    372  * This table should be terminated with a NULL entry, followed by a
    373  * second list, and another NULL entry.  The two lists correspond to
    374  * the two lists in COUNTRIES.
    375  */
    376 static const char * const COUNTRIES_3[] = {
    377 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
    378     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
    379 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
    380     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
    381 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
    382     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
    383 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
    384     "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
    385 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
    386     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
    387 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
    388     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
    389 /*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
    390     "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
    391 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
    392     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
    393 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
    394     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
    395 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
    396     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
    397 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
    398     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
    399 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
    400     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
    401 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
    402     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
    403 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
    404     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
    405 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
    406     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
    407 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
    408     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
    409 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
    410     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
    411 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
    412     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
    413 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
    414     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
    415 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
    416     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
    417 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
    418     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
    419 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
    420     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
    421 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
    422     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
    423 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
    424     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
    425 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
    426     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
    427 /*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
    428     "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
    429 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
    430     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
    431 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
    432     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
    433 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    434     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
    435 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
    436     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
    437 NULL,
    438 /*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
    439     "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
    440 NULL
    441 };
    442 
    443 typedef struct CanonicalizationMap {
    444     const char *id;          /* input ID */
    445     const char *canonicalID; /* canonicalized output ID */
    446     const char *keyword;     /* keyword, or NULL if none */
    447     const char *value;       /* keyword value, or NULL if kw==NULL */
    448 } CanonicalizationMap;
    449 
    450 /**
    451  * A map to canonicalize locale IDs.  This handles a variety of
    452  * different semantic kinds of transformations.
    453  */
    454 static const CanonicalizationMap CANONICALIZE_MAP[] = {
    455     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
    456     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
    457     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
    458     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
    459     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
    460     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
    461     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
    462     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
    463     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
    464     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
    465     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
    466     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
    467     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
    468     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
    469     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
    470     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
    471     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
    472     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
    473     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
    474     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
    475     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
    476     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
    477     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
    478     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
    479     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
    480     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
    481     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
    482     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
    483     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
    484     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
    485     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
    486     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
    487     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
    488     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
    489     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
    490     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
    491     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
    492     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
    493     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
    494     { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
    495     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
    496     { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
    497     { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
    498     { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
    499     { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
    500     { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
    501 };
    502 
    503 typedef struct VariantMap {
    504     const char *variant;          /* input ID */
    505     const char *keyword;     /* keyword, or NULL if none */
    506     const char *value;       /* keyword value, or NULL if kw==NULL */
    507 } VariantMap;
    508 
    509 static const VariantMap VARIANT_MAP[] = {
    510     { "EURO",   "currency", "EUR" },
    511     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
    512     { "STROKE", "collation", "stroke" }  /* Solaris variant */
    513 };
    514 
    515 /* ### BCP47 Conversion *******************************************/
    516 /* Test if the locale id has BCP47 u extension and does not have '@' */
    517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
    518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
    519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
    520         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
    521             finalID=id; \
    522         } else { \
    523             finalID=buffer; \
    524         }
    525 /* Gets the size of the shortest subtag in the given localeID. */
    526 static int32_t getShortestSubtagLength(const char *localeID) {
    527     int32_t localeIDLength = uprv_strlen(localeID);
    528     int32_t length = localeIDLength;
    529     int32_t tmpLength = 0;
    530     int32_t i;
    531     UBool reset = TRUE;
    532 
    533     for (i = 0; i < localeIDLength; i++) {
    534         if (localeID[i] != '_' && localeID[i] != '-') {
    535             if (reset) {
    536                 tmpLength = 0;
    537                 reset = FALSE;
    538             }
    539             tmpLength++;
    540         } else {
    541             if (tmpLength != 0 && tmpLength < length) {
    542                 length = tmpLength;
    543             }
    544             reset = TRUE;
    545         }
    546     }
    547 
    548     return length;
    549 }
    550 
    551 /* ### Keywords **************************************************/
    552 
    553 #define ULOC_KEYWORD_BUFFER_LEN 25
    554 #define ULOC_MAX_NO_KEYWORDS 25
    555 
    556 U_CAPI const char * U_EXPORT2
    557 locale_getKeywordsStart(const char *localeID) {
    558     const char *result = NULL;
    559     if((result = uprv_strchr(localeID, '@')) != NULL) {
    560         return result;
    561     }
    562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    563     else {
    564         /* We do this because the @ sign is variant, and the @ sign used on one
    565         EBCDIC machine won't be compiled the same way on other EBCDIC based
    566         machines. */
    567         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    568         const uint8_t *charToFind = ebcdicSigns;
    569         while(*charToFind) {
    570             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
    571                 return result;
    572             }
    573             charToFind++;
    574         }
    575     }
    576 #endif
    577     return NULL;
    578 }
    579 
    580 /**
    581  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
    582  * @param keywordName incoming name to be canonicalized
    583  * @param status return status (keyword too long)
    584  * @return length of the keyword name
    585  */
    586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
    587 {
    588   int32_t i;
    589   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
    590 
    591   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
    592     /* keyword name too long for internal buffer */
    593     *status = U_INTERNAL_PROGRAM_ERROR;
    594           return 0;
    595   }
    596 
    597   /* normalize the keyword name */
    598   for(i = 0; i < keywordNameLen; i++) {
    599     buf[i] = uprv_tolower(keywordName[i]);
    600   }
    601   buf[i] = 0;
    602 
    603   return keywordNameLen;
    604 }
    605 
    606 typedef struct {
    607     char keyword[ULOC_KEYWORD_BUFFER_LEN];
    608     int32_t keywordLen;
    609     const char *valueStart;
    610     int32_t valueLen;
    611 } KeywordStruct;
    612 
    613 static int32_t U_CALLCONV
    614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
    615     const char* leftString = ((const KeywordStruct *)left)->keyword;
    616     const char* rightString = ((const KeywordStruct *)right)->keyword;
    617     return uprv_strcmp(leftString, rightString);
    618 }
    619 
    620 /**
    621  * Both addKeyword and addValue must already be in canonical form.
    622  * Either both addKeyword and addValue are NULL, or neither is NULL.
    623  * If they are not NULL they must be zero terminated.
    624  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
    625  */
    626 static int32_t
    627 _getKeywords(const char *localeID,
    628              char prev,
    629              char *keywords, int32_t keywordCapacity,
    630              char *values, int32_t valuesCapacity, int32_t *valLen,
    631              UBool valuesToo,
    632              const char* addKeyword,
    633              const char* addValue,
    634              UErrorCode *status)
    635 {
    636     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
    637 
    638     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    639     int32_t numKeywords = 0;
    640     const char* pos = localeID;
    641     const char* equalSign = NULL;
    642     const char* semicolon = NULL;
    643     int32_t i = 0, j, n;
    644     int32_t keywordsLen = 0;
    645     int32_t valuesLen = 0;
    646 
    647     if(prev == '@') { /* start of keyword definition */
    648         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
    649         do {
    650             UBool duplicate = FALSE;
    651             /* skip leading spaces */
    652             while(*pos == ' ') {
    653                 pos++;
    654             }
    655             if (!*pos) { /* handle trailing "; " */
    656                 break;
    657             }
    658             if(numKeywords == maxKeywords) {
    659                 *status = U_INTERNAL_PROGRAM_ERROR;
    660                 return 0;
    661             }
    662             equalSign = uprv_strchr(pos, '=');
    663             semicolon = uprv_strchr(pos, ';');
    664             /* lack of '=' [foo@currency] is illegal */
    665             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
    666             if(!equalSign || (semicolon && semicolon<equalSign)) {
    667                 *status = U_INVALID_FORMAT_ERROR;
    668                 return 0;
    669             }
    670             /* need to normalize both keyword and keyword name */
    671             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
    672                 /* keyword name too long for internal buffer */
    673                 *status = U_INTERNAL_PROGRAM_ERROR;
    674                 return 0;
    675             }
    676             for(i = 0, n = 0; i < equalSign - pos; ++i) {
    677                 if (pos[i] != ' ') {
    678                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
    679                 }
    680             }
    681             keywordList[numKeywords].keyword[n] = 0;
    682             keywordList[numKeywords].keywordLen = n;
    683             /* now grab the value part. First we skip the '=' */
    684             equalSign++;
    685             /* then we leading spaces */
    686             while(*equalSign == ' ') {
    687                 equalSign++;
    688             }
    689             keywordList[numKeywords].valueStart = equalSign;
    690 
    691             pos = semicolon;
    692             i = 0;
    693             if(pos) {
    694                 while(*(pos - i - 1) == ' ') {
    695                     i++;
    696                 }
    697                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
    698                 pos++;
    699             } else {
    700                 i = (int32_t)uprv_strlen(equalSign);
    701                 while(i && equalSign[i-1] == ' ') {
    702                     i--;
    703                 }
    704                 keywordList[numKeywords].valueLen = i;
    705             }
    706             /* If this is a duplicate keyword, then ignore it */
    707             for (j=0; j<numKeywords; ++j) {
    708                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
    709                     duplicate = TRUE;
    710                     break;
    711                 }
    712             }
    713             if (!duplicate) {
    714                 ++numKeywords;
    715             }
    716         } while(pos);
    717 
    718         /* Handle addKeyword/addValue. */
    719         if (addKeyword != NULL) {
    720             UBool duplicate = FALSE;
    721             U_ASSERT(addValue != NULL);
    722             /* Search for duplicate; if found, do nothing. Explicit keyword
    723                overrides addKeyword. */
    724             for (j=0; j<numKeywords; ++j) {
    725                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
    726                     duplicate = TRUE;
    727                     break;
    728                 }
    729             }
    730             if (!duplicate) {
    731                 if (numKeywords == maxKeywords) {
    732                     *status = U_INTERNAL_PROGRAM_ERROR;
    733                     return 0;
    734                 }
    735                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
    736                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
    737                 keywordList[numKeywords].valueStart = addValue;
    738                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
    739                 ++numKeywords;
    740             }
    741         } else {
    742             U_ASSERT(addValue == NULL);
    743         }
    744 
    745         /* now we have a list of keywords */
    746         /* we need to sort it */
    747         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
    748 
    749         /* Now construct the keyword part */
    750         for(i = 0; i < numKeywords; i++) {
    751             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
    752                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
    753                 if(valuesToo) {
    754                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
    755                 } else {
    756                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
    757                 }
    758             }
    759             keywordsLen += keywordList[i].keywordLen + 1;
    760             if(valuesToo) {
    761                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
    762                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
    763                 }
    764                 keywordsLen += keywordList[i].valueLen;
    765 
    766                 if(i < numKeywords - 1) {
    767                     if(keywordsLen < keywordCapacity) {
    768                         keywords[keywordsLen] = ';';
    769                     }
    770                     keywordsLen++;
    771                 }
    772             }
    773             if(values) {
    774                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
    775                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
    776                     values[valuesLen + keywordList[i].valueLen] = 0;
    777                 }
    778                 valuesLen += keywordList[i].valueLen + 1;
    779             }
    780         }
    781         if(values) {
    782             values[valuesLen] = 0;
    783             if(valLen) {
    784                 *valLen = valuesLen;
    785             }
    786         }
    787         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
    788     } else {
    789         return 0;
    790     }
    791 }
    792 
    793 U_CFUNC int32_t
    794 locale_getKeywords(const char *localeID,
    795                    char prev,
    796                    char *keywords, int32_t keywordCapacity,
    797                    char *values, int32_t valuesCapacity, int32_t *valLen,
    798                    UBool valuesToo,
    799                    UErrorCode *status) {
    800     return _getKeywords(localeID, prev, keywords, keywordCapacity,
    801                         values, valuesCapacity, valLen, valuesToo,
    802                         NULL, NULL, status);
    803 }
    804 
    805 U_CAPI int32_t U_EXPORT2
    806 uloc_getKeywordValue(const char* localeID,
    807                      const char* keywordName,
    808                      char* buffer, int32_t bufferCapacity,
    809                      UErrorCode* status)
    810 {
    811     const char* startSearchHere = NULL;
    812     const char* nextSeparator = NULL;
    813     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    814     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    815     int32_t i = 0;
    816     int32_t result = 0;
    817 
    818     if(status && U_SUCCESS(*status) && localeID) {
    819       char tempBuffer[ULOC_FULLNAME_CAPACITY];
    820       const char* tmpLocaleID;
    821 
    822       if (_hasBCP47Extension(localeID)) {
    823           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
    824       } else {
    825           tmpLocaleID=localeID;
    826       }
    827 
    828       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
    829       if(startSearchHere == NULL) {
    830           /* no keywords, return at once */
    831           return 0;
    832       }
    833 
    834       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    835       if(U_FAILURE(*status)) {
    836         return 0;
    837       }
    838 
    839       /* find the first keyword */
    840       while(startSearchHere) {
    841           startSearchHere++;
    842           /* skip leading spaces (allowed?) */
    843           while(*startSearchHere == ' ') {
    844               startSearchHere++;
    845           }
    846           nextSeparator = uprv_strchr(startSearchHere, '=');
    847           /* need to normalize both keyword and keyword name */
    848           if(!nextSeparator) {
    849               break;
    850           }
    851           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
    852               /* keyword name too long for internal buffer */
    853               *status = U_INTERNAL_PROGRAM_ERROR;
    854               return 0;
    855           }
    856           for(i = 0; i < nextSeparator - startSearchHere; i++) {
    857               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
    858           }
    859           /* trim trailing spaces */
    860           while(startSearchHere[i-1] == ' ') {
    861               i--;
    862               U_ASSERT(i>=0);
    863           }
    864           localeKeywordNameBuffer[i] = 0;
    865 
    866           startSearchHere = uprv_strchr(nextSeparator, ';');
    867 
    868           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
    869               nextSeparator++;
    870               while(*nextSeparator == ' ') {
    871                   nextSeparator++;
    872               }
    873               /* we actually found the keyword. Copy the value */
    874               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
    875                   while(*(startSearchHere-1) == ' ') {
    876                       startSearchHere--;
    877                   }
    878                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
    879                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
    880               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
    881                   i = (int32_t)uprv_strlen(nextSeparator);
    882                   while(nextSeparator[i - 1] == ' ') {
    883                       i--;
    884                   }
    885                   uprv_strncpy(buffer, nextSeparator, i);
    886                   result = u_terminateChars(buffer, bufferCapacity, i, status);
    887               } else {
    888                   /* give a bigger buffer, please */
    889                   *status = U_BUFFER_OVERFLOW_ERROR;
    890                   if(startSearchHere) {
    891                       result = (int32_t)(startSearchHere - nextSeparator);
    892                   } else {
    893                       result = (int32_t)uprv_strlen(nextSeparator);
    894                   }
    895               }
    896               return result;
    897           }
    898       }
    899     }
    900     return 0;
    901 }
    902 
    903 U_CAPI int32_t U_EXPORT2
    904 uloc_setKeywordValue(const char* keywordName,
    905                      const char* keywordValue,
    906                      char* buffer, int32_t bufferCapacity,
    907                      UErrorCode* status)
    908 {
    909     /* TODO: sorting. removal. */
    910     int32_t keywordNameLen;
    911     int32_t keywordValueLen;
    912     int32_t bufLen;
    913     int32_t needLen = 0;
    914     int32_t foundValueLen;
    915     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
    916     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    917     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    918     int32_t i = 0;
    919     int32_t rc;
    920     char* nextSeparator = NULL;
    921     char* nextEqualsign = NULL;
    922     char* startSearchHere = NULL;
    923     char* keywordStart = NULL;
    924     char *insertHere = NULL;
    925     if(U_FAILURE(*status)) {
    926         return -1;
    927     }
    928     if(bufferCapacity>1) {
    929         bufLen = (int32_t)uprv_strlen(buffer);
    930     } else {
    931         *status = U_ILLEGAL_ARGUMENT_ERROR;
    932         return 0;
    933     }
    934     if(bufferCapacity<bufLen) {
    935         /* The capacity is less than the length?! Is this NULL terminated? */
    936         *status = U_ILLEGAL_ARGUMENT_ERROR;
    937         return 0;
    938     }
    939     if(keywordValue && !*keywordValue) {
    940         keywordValue = NULL;
    941     }
    942     if(keywordValue) {
    943         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
    944     } else {
    945         keywordValueLen = 0;
    946     }
    947     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    948     if(U_FAILURE(*status)) {
    949         return 0;
    950     }
    951     startSearchHere = (char*)locale_getKeywordsStart(buffer);
    952     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
    953         if(!keywordValue) { /* no keywords = nothing to remove */
    954             return bufLen;
    955         }
    956 
    957         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
    958         if(startSearchHere) { /* had a single @ */
    959             needLen--; /* already had the @ */
    960             /* startSearchHere points at the @ */
    961         } else {
    962             startSearchHere=buffer+bufLen;
    963         }
    964         if(needLen >= bufferCapacity) {
    965             *status = U_BUFFER_OVERFLOW_ERROR;
    966             return needLen; /* no change */
    967         }
    968         *startSearchHere = '@';
    969         startSearchHere++;
    970         uprv_strcpy(startSearchHere, keywordNameBuffer);
    971         startSearchHere += keywordNameLen;
    972         *startSearchHere = '=';
    973         startSearchHere++;
    974         uprv_strcpy(startSearchHere, keywordValue);
    975         startSearchHere+=keywordValueLen;
    976         return needLen;
    977     } /* end shortcut - no @ */
    978 
    979     keywordStart = startSearchHere;
    980     /* search for keyword */
    981     while(keywordStart) {
    982         keywordStart++;
    983         /* skip leading spaces (allowed?) */
    984         while(*keywordStart == ' ') {
    985             keywordStart++;
    986         }
    987         nextEqualsign = uprv_strchr(keywordStart, '=');
    988         /* need to normalize both keyword and keyword name */
    989         if(!nextEqualsign) {
    990             break;
    991         }
    992         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
    993             /* keyword name too long for internal buffer */
    994             *status = U_INTERNAL_PROGRAM_ERROR;
    995             return 0;
    996         }
    997         for(i = 0; i < nextEqualsign - keywordStart; i++) {
    998             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
    999         }
   1000         /* trim trailing spaces */
   1001         while(keywordStart[i-1] == ' ') {
   1002             i--;
   1003         }
   1004         U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
   1005         localeKeywordNameBuffer[i] = 0;
   1006 
   1007         nextSeparator = uprv_strchr(nextEqualsign, ';');
   1008         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
   1009         if(rc == 0) {
   1010             nextEqualsign++;
   1011             while(*nextEqualsign == ' ') {
   1012                 nextEqualsign++;
   1013             }
   1014             /* we actually found the keyword. Change the value */
   1015             if (nextSeparator) {
   1016                 keywordAtEnd = 0;
   1017                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
   1018             } else {
   1019                 keywordAtEnd = 1;
   1020                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
   1021             }
   1022             if(keywordValue) { /* adding a value - not removing */
   1023               if(foundValueLen == keywordValueLen) {
   1024                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1025                 return bufLen; /* no change in size */
   1026               } else if(foundValueLen > keywordValueLen) {
   1027                 int32_t delta = foundValueLen - keywordValueLen;
   1028                 if(nextSeparator) { /* RH side */
   1029                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
   1030                 }
   1031                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1032                 bufLen -= delta;
   1033                 buffer[bufLen]=0;
   1034                 return bufLen;
   1035               } else { /* FVL < KVL */
   1036                 int32_t delta = keywordValueLen - foundValueLen;
   1037                 if((bufLen+delta) >= bufferCapacity) {
   1038                   *status = U_BUFFER_OVERFLOW_ERROR;
   1039                   return bufLen+delta;
   1040                 }
   1041                 if(nextSeparator) { /* RH side */
   1042                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
   1043                 }
   1044                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1045                 bufLen += delta;
   1046                 buffer[bufLen]=0;
   1047                 return bufLen;
   1048               }
   1049             } else { /* removing a keyword */
   1050               if(keywordAtEnd) {
   1051                 /* zero out the ';' or '@' just before startSearchhere */
   1052                 keywordStart[-1] = 0;
   1053                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
   1054               } else {
   1055                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
   1056                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
   1057                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
   1058               }
   1059             }
   1060         } else if(rc<0){ /* end match keyword */
   1061           /* could insert at this location. */
   1062           insertHere = keywordStart;
   1063         }
   1064         keywordStart = nextSeparator;
   1065     } /* end loop searching */
   1066 
   1067     if(!keywordValue) {
   1068       return bufLen; /* removal of non-extant keyword - no change */
   1069     }
   1070 
   1071     /* we know there is at least one keyword. */
   1072     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
   1073     if(needLen >= bufferCapacity) {
   1074         *status = U_BUFFER_OVERFLOW_ERROR;
   1075         return needLen; /* no change */
   1076     }
   1077 
   1078     if(insertHere) {
   1079       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
   1080       keywordStart = insertHere;
   1081     } else {
   1082       keywordStart = buffer+bufLen;
   1083       *keywordStart = ';';
   1084       keywordStart++;
   1085     }
   1086     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
   1087     keywordStart += keywordNameLen;
   1088     *keywordStart = '=';
   1089     keywordStart++;
   1090     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
   1091     keywordStart+=keywordValueLen;
   1092     if(insertHere) {
   1093       *keywordStart = ';';
   1094       keywordStart++;
   1095     }
   1096     buffer[needLen]=0;
   1097     return needLen;
   1098 }
   1099 
   1100 /* ### ID parsing implementation **************************************************/
   1101 
   1102 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
   1103 
   1104 /*returns TRUE if one of the special prefixes is here (s=string)
   1105   'x-' or 'i-' */
   1106 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
   1107 
   1108 /* Dot terminates it because of POSIX form  where dot precedes the codepage
   1109  * except for variant
   1110  */
   1111 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
   1112 
   1113 static char* _strnchr(const char* str, int32_t len, char c) {
   1114     U_ASSERT(str != 0 && len >= 0);
   1115     while (len-- != 0) {
   1116         char d = *str;
   1117         if (d == c) {
   1118             return (char*) str;
   1119         } else if (d == 0) {
   1120             break;
   1121         }
   1122         ++str;
   1123     }
   1124     return NULL;
   1125 }
   1126 
   1127 /**
   1128  * Lookup 'key' in the array 'list'.  The array 'list' should contain
   1129  * a NULL entry, followed by more entries, and a second NULL entry.
   1130  *
   1131  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
   1132  * COUNTRIES_3.
   1133  */
   1134 static int16_t _findIndex(const char* const* list, const char* key)
   1135 {
   1136     const char* const* anchor = list;
   1137     int32_t pass = 0;
   1138 
   1139     /* Make two passes through two NULL-terminated arrays at 'list' */
   1140     while (pass++ < 2) {
   1141         while (*list) {
   1142             if (uprv_strcmp(key, *list) == 0) {
   1143                 return (int16_t)(list - anchor);
   1144             }
   1145             list++;
   1146         }
   1147         ++list;     /* skip final NULL *CWB*/
   1148     }
   1149     return -1;
   1150 }
   1151 
   1152 /* count the length of src while copying it to dest; return strlen(src) */
   1153 static inline int32_t
   1154 _copyCount(char *dest, int32_t destCapacity, const char *src) {
   1155     const char *anchor;
   1156     char c;
   1157 
   1158     anchor=src;
   1159     for(;;) {
   1160         if((c=*src)==0) {
   1161             return (int32_t)(src-anchor);
   1162         }
   1163         if(destCapacity<=0) {
   1164             return (int32_t)((src-anchor)+uprv_strlen(src));
   1165         }
   1166         ++src;
   1167         *dest++=c;
   1168         --destCapacity;
   1169     }
   1170 }
   1171 
   1172 U_CFUNC const char*
   1173 uloc_getCurrentCountryID(const char* oldID){
   1174     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
   1175     if (offset >= 0) {
   1176         return REPLACEMENT_COUNTRIES[offset];
   1177     }
   1178     return oldID;
   1179 }
   1180 U_CFUNC const char*
   1181 uloc_getCurrentLanguageID(const char* oldID){
   1182     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
   1183     if (offset >= 0) {
   1184         return REPLACEMENT_LANGUAGES[offset];
   1185     }
   1186     return oldID;
   1187 }
   1188 /*
   1189  * the internal functions _getLanguage(), _getCountry(), _getVariant()
   1190  * avoid duplicating code to handle the earlier locale ID pieces
   1191  * in the functions for the later ones by
   1192  * setting the *pEnd pointer to where they stopped parsing
   1193  *
   1194  * TODO try to use this in Locale
   1195  */
   1196 U_CFUNC int32_t
   1197 ulocimp_getLanguage(const char *localeID,
   1198                     char *language, int32_t languageCapacity,
   1199                     const char **pEnd) {
   1200     int32_t i=0;
   1201     int32_t offset;
   1202     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
   1203 
   1204     /* if it starts with i- or x- then copy that prefix */
   1205     if(_isIDPrefix(localeID)) {
   1206         if(i<languageCapacity) {
   1207             language[i]=(char)uprv_tolower(*localeID);
   1208         }
   1209         if(i<languageCapacity) {
   1210             language[i+1]='-';
   1211         }
   1212         i+=2;
   1213         localeID+=2;
   1214     }
   1215 
   1216     /* copy the language as far as possible and count its length */
   1217     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
   1218         if(i<languageCapacity) {
   1219             language[i]=(char)uprv_tolower(*localeID);
   1220         }
   1221         if(i<3) {
   1222             U_ASSERT(i>=0);
   1223             lang[i]=(char)uprv_tolower(*localeID);
   1224         }
   1225         i++;
   1226         localeID++;
   1227     }
   1228 
   1229     if(i==3) {
   1230         /* convert 3 character code to 2 character code if possible *CWB*/
   1231         offset=_findIndex(LANGUAGES_3, lang);
   1232         if(offset>=0) {
   1233             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
   1234         }
   1235     }
   1236 
   1237     if(pEnd!=NULL) {
   1238         *pEnd=localeID;
   1239     }
   1240     return i;
   1241 }
   1242 
   1243 U_CFUNC int32_t
   1244 ulocimp_getScript(const char *localeID,
   1245                   char *script, int32_t scriptCapacity,
   1246                   const char **pEnd)
   1247 {
   1248     int32_t idLen = 0;
   1249 
   1250     if (pEnd != NULL) {
   1251         *pEnd = localeID;
   1252     }
   1253 
   1254     /* copy the second item as far as possible and count its length */
   1255     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
   1256             && uprv_isASCIILetter(localeID[idLen])) {
   1257         idLen++;
   1258     }
   1259 
   1260     /* If it's exactly 4 characters long, then it's a script and not a country. */
   1261     if (idLen == 4) {
   1262         int32_t i;
   1263         if (pEnd != NULL) {
   1264             *pEnd = localeID+idLen;
   1265         }
   1266         if(idLen > scriptCapacity) {
   1267             idLen = scriptCapacity;
   1268         }
   1269         if (idLen >= 1) {
   1270             script[0]=(char)uprv_toupper(*(localeID++));
   1271         }
   1272         for (i = 1; i < idLen; i++) {
   1273             script[i]=(char)uprv_tolower(*(localeID++));
   1274         }
   1275     }
   1276     else {
   1277         idLen = 0;
   1278     }
   1279     return idLen;
   1280 }
   1281 
   1282 U_CFUNC int32_t
   1283 ulocimp_getCountry(const char *localeID,
   1284                    char *country, int32_t countryCapacity,
   1285                    const char **pEnd)
   1286 {
   1287     int32_t idLen=0;
   1288     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
   1289     int32_t offset;
   1290 
   1291     /* copy the country as far as possible and count its length */
   1292     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1293         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
   1294             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
   1295         }
   1296         idLen++;
   1297     }
   1298 
   1299     /* the country should be either length 2 or 3 */
   1300     if (idLen == 2 || idLen == 3) {
   1301         UBool gotCountry = FALSE;
   1302         /* convert 3 character code to 2 character code if possible *CWB*/
   1303         if(idLen==3) {
   1304             offset=_findIndex(COUNTRIES_3, cnty);
   1305             if(offset>=0) {
   1306                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
   1307                 gotCountry = TRUE;
   1308             }
   1309         }
   1310         if (!gotCountry) {
   1311             int32_t i = 0;
   1312             for (i = 0; i < idLen; i++) {
   1313                 if (i < countryCapacity) {
   1314                     country[i]=(char)uprv_toupper(localeID[i]);
   1315                 }
   1316             }
   1317         }
   1318         localeID+=idLen;
   1319     } else {
   1320         idLen = 0;
   1321     }
   1322 
   1323     if(pEnd!=NULL) {
   1324         *pEnd=localeID;
   1325     }
   1326 
   1327     return idLen;
   1328 }
   1329 
   1330 /**
   1331  * @param needSeparator if true, then add leading '_' if any variants
   1332  * are added to 'variant'
   1333  */
   1334 static int32_t
   1335 _getVariantEx(const char *localeID,
   1336               char prev,
   1337               char *variant, int32_t variantCapacity,
   1338               UBool needSeparator) {
   1339     int32_t i=0;
   1340 
   1341     /* get one or more variant tags and separate them with '_' */
   1342     if(_isIDSeparator(prev)) {
   1343         /* get a variant string after a '-' or '_' */
   1344         while(!_isTerminator(*localeID)) {
   1345             if (needSeparator) {
   1346                 if (i<variantCapacity) {
   1347                     variant[i] = '_';
   1348                 }
   1349                 ++i;
   1350                 needSeparator = FALSE;
   1351             }
   1352             if(i<variantCapacity) {
   1353                 variant[i]=(char)uprv_toupper(*localeID);
   1354                 if(variant[i]=='-') {
   1355                     variant[i]='_';
   1356                 }
   1357             }
   1358             i++;
   1359             localeID++;
   1360         }
   1361     }
   1362 
   1363     /* if there is no variant tag after a '-' or '_' then look for '@' */
   1364     if(i==0) {
   1365         if(prev=='@') {
   1366             /* keep localeID */
   1367         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
   1368             ++localeID; /* point after the '@' */
   1369         } else {
   1370             return 0;
   1371         }
   1372         while(!_isTerminator(*localeID)) {
   1373             if (needSeparator) {
   1374                 if (i<variantCapacity) {
   1375                     variant[i] = '_';
   1376                 }
   1377                 ++i;
   1378                 needSeparator = FALSE;
   1379             }
   1380             if(i<variantCapacity) {
   1381                 variant[i]=(char)uprv_toupper(*localeID);
   1382                 if(variant[i]=='-' || variant[i]==',') {
   1383                     variant[i]='_';
   1384                 }
   1385             }
   1386             i++;
   1387             localeID++;
   1388         }
   1389     }
   1390 
   1391     return i;
   1392 }
   1393 
   1394 static int32_t
   1395 _getVariant(const char *localeID,
   1396             char prev,
   1397             char *variant, int32_t variantCapacity) {
   1398     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
   1399 }
   1400 
   1401 /**
   1402  * Delete ALL instances of a variant from the given list of one or
   1403  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
   1404  * @param variants the source string of one or more variants,
   1405  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
   1406  * terminated; if it is, trailing zero will NOT be maintained.
   1407  * @param variantsLen length of variants
   1408  * @param toDelete variant to delete, without separators, e.g.  "EURO"
   1409  * or "PREEURO"; not zero terminated
   1410  * @param toDeleteLen length of toDelete
   1411  * @return number of characters deleted from variants
   1412  */
   1413 static int32_t
   1414 _deleteVariant(char* variants, int32_t variantsLen,
   1415                const char* toDelete, int32_t toDeleteLen)
   1416 {
   1417     int32_t delta = 0; /* number of chars deleted */
   1418     for (;;) {
   1419         UBool flag = FALSE;
   1420         if (variantsLen < toDeleteLen) {
   1421             return delta;
   1422         }
   1423         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
   1424             (variantsLen == toDeleteLen ||
   1425              (flag=(variants[toDeleteLen] == '_'))))
   1426         {
   1427             int32_t d = toDeleteLen + (flag?1:0);
   1428             variantsLen -= d;
   1429             delta += d;
   1430             if (variantsLen > 0) {
   1431                 uprv_memmove(variants, variants+d, variantsLen);
   1432             }
   1433         } else {
   1434             char* p = _strnchr(variants, variantsLen, '_');
   1435             if (p == NULL) {
   1436                 return delta;
   1437             }
   1438             ++p;
   1439             variantsLen -= (int32_t)(p - variants);
   1440             variants = p;
   1441         }
   1442     }
   1443 }
   1444 
   1445 /* Keyword enumeration */
   1446 
   1447 typedef struct UKeywordsContext {
   1448     char* keywords;
   1449     char* current;
   1450 } UKeywordsContext;
   1451 
   1452 static void U_CALLCONV
   1453 uloc_kw_closeKeywords(UEnumeration *enumerator) {
   1454     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
   1455     uprv_free(enumerator->context);
   1456     uprv_free(enumerator);
   1457 }
   1458 
   1459 static int32_t U_CALLCONV
   1460 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
   1461     char *kw = ((UKeywordsContext *)en->context)->keywords;
   1462     int32_t result = 0;
   1463     while(*kw) {
   1464         result++;
   1465         kw += uprv_strlen(kw)+1;
   1466     }
   1467     return result;
   1468 }
   1469 
   1470 static const char* U_CALLCONV
   1471 uloc_kw_nextKeyword(UEnumeration* en,
   1472                     int32_t* resultLength,
   1473                     UErrorCode* /*status*/) {
   1474     const char* result = ((UKeywordsContext *)en->context)->current;
   1475     int32_t len = 0;
   1476     if(*result) {
   1477         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
   1478         ((UKeywordsContext *)en->context)->current += len+1;
   1479     } else {
   1480         result = NULL;
   1481     }
   1482     if (resultLength) {
   1483         *resultLength = len;
   1484     }
   1485     return result;
   1486 }
   1487 
   1488 static void U_CALLCONV
   1489 uloc_kw_resetKeywords(UEnumeration* en,
   1490                       UErrorCode* /*status*/) {
   1491     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
   1492 }
   1493 
   1494 static const UEnumeration gKeywordsEnum = {
   1495     NULL,
   1496     NULL,
   1497     uloc_kw_closeKeywords,
   1498     uloc_kw_countKeywords,
   1499     uenum_unextDefault,
   1500     uloc_kw_nextKeyword,
   1501     uloc_kw_resetKeywords
   1502 };
   1503 
   1504 U_CAPI UEnumeration* U_EXPORT2
   1505 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
   1506 {
   1507     UKeywordsContext *myContext = NULL;
   1508     UEnumeration *result = NULL;
   1509 
   1510     if(U_FAILURE(*status)) {
   1511         return NULL;
   1512     }
   1513     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   1514     /* Null pointer test */
   1515     if (result == NULL) {
   1516         *status = U_MEMORY_ALLOCATION_ERROR;
   1517         return NULL;
   1518     }
   1519     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
   1520     myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
   1521     if (myContext == NULL) {
   1522         *status = U_MEMORY_ALLOCATION_ERROR;
   1523         uprv_free(result);
   1524         return NULL;
   1525     }
   1526     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
   1527     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
   1528     myContext->keywords[keywordListSize] = 0;
   1529     myContext->current = myContext->keywords;
   1530     result->context = myContext;
   1531     return result;
   1532 }
   1533 
   1534 U_CAPI UEnumeration* U_EXPORT2
   1535 uloc_openKeywords(const char* localeID,
   1536                         UErrorCode* status)
   1537 {
   1538     int32_t i=0;
   1539     char keywords[256];
   1540     int32_t keywordsCapacity = 256;
   1541     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1542     const char* tmpLocaleID;
   1543 
   1544     if(status==NULL || U_FAILURE(*status)) {
   1545         return 0;
   1546     }
   1547 
   1548     if (_hasBCP47Extension(localeID)) {
   1549         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
   1550     } else {
   1551         if (localeID==NULL) {
   1552            localeID=uloc_getDefault();
   1553         }
   1554         tmpLocaleID=localeID;
   1555     }
   1556 
   1557     /* Skip the language */
   1558     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1559     if(_isIDSeparator(*tmpLocaleID)) {
   1560         const char *scriptID;
   1561         /* Skip the script if available */
   1562         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1563         if(scriptID != tmpLocaleID+1) {
   1564             /* Found optional script */
   1565             tmpLocaleID = scriptID;
   1566         }
   1567         /* Skip the Country */
   1568         if (_isIDSeparator(*tmpLocaleID)) {
   1569             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
   1570             if(_isIDSeparator(*tmpLocaleID)) {
   1571                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
   1572             }
   1573         }
   1574     }
   1575 
   1576     /* keywords are located after '@' */
   1577     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
   1578         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
   1579     }
   1580 
   1581     if(i) {
   1582         return uloc_openKeywordList(keywords, i, status);
   1583     } else {
   1584         return NULL;
   1585     }
   1586 }
   1587 
   1588 
   1589 /* bit-flags for 'options' parameter of _canonicalize */
   1590 #define _ULOC_STRIP_KEYWORDS 0x2
   1591 #define _ULOC_CANONICALIZE   0x1
   1592 
   1593 #define OPTION_SET(options, mask) ((options & mask) != 0)
   1594 
   1595 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
   1596 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
   1597 
   1598 /**
   1599  * Canonicalize the given localeID, to level 1 or to level 2,
   1600  * depending on the options.  To specify level 1, pass in options=0.
   1601  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
   1602  *
   1603  * This is the code underlying uloc_getName and uloc_canonicalize.
   1604  */
   1605 static int32_t
   1606 _canonicalize(const char* localeID,
   1607               char* result,
   1608               int32_t resultCapacity,
   1609               uint32_t options,
   1610               UErrorCode* err) {
   1611     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
   1612     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1613     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1614     const char* origLocaleID;
   1615     const char* tmpLocaleID;
   1616     const char* keywordAssign = NULL;
   1617     const char* separatorIndicator = NULL;
   1618     const char* addKeyword = NULL;
   1619     const char* addValue = NULL;
   1620     char* name;
   1621     char* variant = NULL; /* pointer into name, or NULL */
   1622 
   1623     if (U_FAILURE(*err)) {
   1624         return 0;
   1625     }
   1626 
   1627     if (_hasBCP47Extension(localeID)) {
   1628         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1629     } else {
   1630         if (localeID==NULL) {
   1631            localeID=uloc_getDefault();
   1632         }
   1633         tmpLocaleID=localeID;
   1634     }
   1635 
   1636     origLocaleID=tmpLocaleID;
   1637 
   1638     /* if we are doing a full canonicalization, then put results in
   1639        localeBuffer, if necessary; otherwise send them to result. */
   1640     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
   1641         (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
   1642         name = localeBuffer;
   1643         nameCapacity = (int32_t)sizeof(localeBuffer);
   1644     } else {
   1645         name = result;
   1646         nameCapacity = resultCapacity;
   1647     }
   1648 
   1649     /* get all pieces, one after another, and separate with '_' */
   1650     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
   1651 
   1652     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
   1653         const char *d = uloc_getDefault();
   1654 
   1655         len = (int32_t)uprv_strlen(d);
   1656 
   1657         if (name != NULL) {
   1658             uprv_strncpy(name, d, len);
   1659         }
   1660     } else if(_isIDSeparator(*tmpLocaleID)) {
   1661         const char *scriptID;
   1662 
   1663         ++fieldCount;
   1664         if(len<nameCapacity) {
   1665             name[len]='_';
   1666         }
   1667         ++len;
   1668 
   1669         scriptSize=ulocimp_getScript(tmpLocaleID+1,
   1670             (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
   1671         if(scriptSize > 0) {
   1672             /* Found optional script */
   1673             tmpLocaleID = scriptID;
   1674             ++fieldCount;
   1675             len+=scriptSize;
   1676             if (_isIDSeparator(*tmpLocaleID)) {
   1677                 /* If there is something else, then we add the _ */
   1678                 if(len<nameCapacity) {
   1679                     name[len]='_';
   1680                 }
   1681                 ++len;
   1682             }
   1683         }
   1684 
   1685         if (_isIDSeparator(*tmpLocaleID)) {
   1686             const char *cntryID;
   1687             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
   1688                 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
   1689             if (cntrySize > 0) {
   1690                 /* Found optional country */
   1691                 tmpLocaleID = cntryID;
   1692                 len+=cntrySize;
   1693             }
   1694             if(_isIDSeparator(*tmpLocaleID)) {
   1695                 /* If there is something else, then we add the _  if we found country before. */
   1696                 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
   1697                     ++fieldCount;
   1698                     if(len<nameCapacity) {
   1699                         name[len]='_';
   1700                     }
   1701                     ++len;
   1702                 }
   1703 
   1704                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
   1705                     (len<nameCapacity ? name+len : NULL), nameCapacity-len);
   1706                 if (variantSize > 0) {
   1707                     variant = len<nameCapacity ? name+len : NULL;
   1708                     len += variantSize;
   1709                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
   1710                 }
   1711             }
   1712         }
   1713     }
   1714 
   1715     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
   1716     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
   1717         UBool done = FALSE;
   1718         do {
   1719             char c = *tmpLocaleID;
   1720             switch (c) {
   1721             case 0:
   1722             case '@':
   1723                 done = TRUE;
   1724                 break;
   1725             default:
   1726                 if (len<nameCapacity) {
   1727                     name[len] = c;
   1728                 }
   1729                 ++len;
   1730                 ++tmpLocaleID;
   1731                 break;
   1732             }
   1733         } while (!done);
   1734     }
   1735 
   1736     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
   1737        After this, tmpLocaleID either points to '@' or is NULL */
   1738     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
   1739         keywordAssign = uprv_strchr(tmpLocaleID, '=');
   1740         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
   1741     }
   1742 
   1743     /* Copy POSIX-style variant, if any [mr@FOO] */
   1744     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
   1745         tmpLocaleID != NULL && keywordAssign == NULL) {
   1746         for (;;) {
   1747             char c = *tmpLocaleID;
   1748             if (c == 0) {
   1749                 break;
   1750             }
   1751             if (len<nameCapacity) {
   1752                 name[len] = c;
   1753             }
   1754             ++len;
   1755             ++tmpLocaleID;
   1756         }
   1757     }
   1758 
   1759     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
   1760         /* Handle @FOO variant if @ is present and not followed by = */
   1761         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
   1762             int32_t posixVariantSize;
   1763             /* Add missing '_' if needed */
   1764             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
   1765                 do {
   1766                     if(len<nameCapacity) {
   1767                         name[len]='_';
   1768                     }
   1769                     ++len;
   1770                     ++fieldCount;
   1771                 } while(fieldCount<2);
   1772             }
   1773             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
   1774                                              (UBool)(variantSize > 0));
   1775             if (posixVariantSize > 0) {
   1776                 if (variant == NULL) {
   1777                     variant = name+len;
   1778                 }
   1779                 len += posixVariantSize;
   1780                 variantSize += posixVariantSize;
   1781             }
   1782         }
   1783 
   1784         /* Handle generic variants first */
   1785         if (variant) {
   1786             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
   1787                 const char* variantToCompare = VARIANT_MAP[j].variant;
   1788                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
   1789                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
   1790                 len -= variantLen;
   1791                 if (variantLen > 0) {
   1792                     if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
   1793                         --len;
   1794                     }
   1795                     addKeyword = VARIANT_MAP[j].keyword;
   1796                     addValue = VARIANT_MAP[j].value;
   1797                     break;
   1798                 }
   1799             }
   1800             if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
   1801                 --len;
   1802             }
   1803         }
   1804 
   1805         /* Look up the ID in the canonicalization map */
   1806         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
   1807             const char* id = CANONICALIZE_MAP[j].id;
   1808             int32_t n = (int32_t)uprv_strlen(id);
   1809             if (len == n && uprv_strncmp(name, id, n) == 0) {
   1810                 if (n == 0 && tmpLocaleID != NULL) {
   1811                     break; /* Don't remap "" if keywords present */
   1812                 }
   1813                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
   1814                 if (CANONICALIZE_MAP[j].keyword) {
   1815                     addKeyword = CANONICALIZE_MAP[j].keyword;
   1816                     addValue = CANONICALIZE_MAP[j].value;
   1817                 }
   1818                 break;
   1819             }
   1820         }
   1821     }
   1822 
   1823     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
   1824         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
   1825             (!separatorIndicator || separatorIndicator > keywordAssign)) {
   1826             if(len<nameCapacity) {
   1827                 name[len]='@';
   1828             }
   1829             ++len;
   1830             ++fieldCount;
   1831             len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
   1832                                 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
   1833         } else if (addKeyword != NULL) {
   1834             U_ASSERT(addValue != NULL && len < nameCapacity);
   1835             /* inelegant but works -- later make _getKeywords do this? */
   1836             len += _copyCount(name+len, nameCapacity-len, "@");
   1837             len += _copyCount(name+len, nameCapacity-len, addKeyword);
   1838             len += _copyCount(name+len, nameCapacity-len, "=");
   1839             len += _copyCount(name+len, nameCapacity-len, addValue);
   1840         }
   1841     }
   1842 
   1843     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
   1844         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
   1845     }
   1846 
   1847     return u_terminateChars(result, resultCapacity, len, err);
   1848 }
   1849 
   1850 /* ### ID parsing API **************************************************/
   1851 
   1852 U_CAPI int32_t  U_EXPORT2
   1853 uloc_getParent(const char*    localeID,
   1854                char* parent,
   1855                int32_t parentCapacity,
   1856                UErrorCode* err)
   1857 {
   1858     const char *lastUnderscore;
   1859     int32_t i;
   1860 
   1861     if (U_FAILURE(*err))
   1862         return 0;
   1863 
   1864     if (localeID == NULL)
   1865         localeID = uloc_getDefault();
   1866 
   1867     lastUnderscore=uprv_strrchr(localeID, '_');
   1868     if(lastUnderscore!=NULL) {
   1869         i=(int32_t)(lastUnderscore-localeID);
   1870     } else {
   1871         i=0;
   1872     }
   1873 
   1874     if(i>0 && parent != localeID) {
   1875         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
   1876     }
   1877     return u_terminateChars(parent, parentCapacity, i, err);
   1878 }
   1879 
   1880 U_CAPI int32_t U_EXPORT2
   1881 uloc_getLanguage(const char*    localeID,
   1882          char* language,
   1883          int32_t languageCapacity,
   1884          UErrorCode* err)
   1885 {
   1886     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
   1887     int32_t i=0;
   1888 
   1889     if (err==NULL || U_FAILURE(*err)) {
   1890         return 0;
   1891     }
   1892 
   1893     if(localeID==NULL) {
   1894         localeID=uloc_getDefault();
   1895     }
   1896 
   1897     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
   1898     return u_terminateChars(language, languageCapacity, i, err);
   1899 }
   1900 
   1901 U_CAPI int32_t U_EXPORT2
   1902 uloc_getScript(const char*    localeID,
   1903          char* script,
   1904          int32_t scriptCapacity,
   1905          UErrorCode* err)
   1906 {
   1907     int32_t i=0;
   1908 
   1909     if(err==NULL || U_FAILURE(*err)) {
   1910         return 0;
   1911     }
   1912 
   1913     if(localeID==NULL) {
   1914         localeID=uloc_getDefault();
   1915     }
   1916 
   1917     /* skip the language */
   1918     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1919     if(_isIDSeparator(*localeID)) {
   1920         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
   1921     }
   1922     return u_terminateChars(script, scriptCapacity, i, err);
   1923 }
   1924 
   1925 U_CAPI int32_t  U_EXPORT2
   1926 uloc_getCountry(const char* localeID,
   1927             char* country,
   1928             int32_t countryCapacity,
   1929             UErrorCode* err)
   1930 {
   1931     int32_t i=0;
   1932 
   1933     if(err==NULL || U_FAILURE(*err)) {
   1934         return 0;
   1935     }
   1936 
   1937     if(localeID==NULL) {
   1938         localeID=uloc_getDefault();
   1939     }
   1940 
   1941     /* Skip the language */
   1942     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1943     if(_isIDSeparator(*localeID)) {
   1944         const char *scriptID;
   1945         /* Skip the script if available */
   1946         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
   1947         if(scriptID != localeID+1) {
   1948             /* Found optional script */
   1949             localeID = scriptID;
   1950         }
   1951         if(_isIDSeparator(*localeID)) {
   1952             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
   1953         }
   1954     }
   1955     return u_terminateChars(country, countryCapacity, i, err);
   1956 }
   1957 
   1958 U_CAPI int32_t  U_EXPORT2
   1959 uloc_getVariant(const char* localeID,
   1960                 char* variant,
   1961                 int32_t variantCapacity,
   1962                 UErrorCode* err)
   1963 {
   1964     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1965     const char* tmpLocaleID;
   1966     int32_t i=0;
   1967 
   1968     if(err==NULL || U_FAILURE(*err)) {
   1969         return 0;
   1970     }
   1971 
   1972     if (_hasBCP47Extension(localeID)) {
   1973         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1974     } else {
   1975         if (localeID==NULL) {
   1976            localeID=uloc_getDefault();
   1977         }
   1978         tmpLocaleID=localeID;
   1979     }
   1980 
   1981     /* Skip the language */
   1982     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1983     if(_isIDSeparator(*tmpLocaleID)) {
   1984         const char *scriptID;
   1985         /* Skip the script if available */
   1986         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1987         if(scriptID != tmpLocaleID+1) {
   1988             /* Found optional script */
   1989             tmpLocaleID = scriptID;
   1990         }
   1991         /* Skip the Country */
   1992         if (_isIDSeparator(*tmpLocaleID)) {
   1993             const char *cntryID;
   1994             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
   1995             if (cntryID != tmpLocaleID+1) {
   1996                 /* Found optional country */
   1997                 tmpLocaleID = cntryID;
   1998             }
   1999             if(_isIDSeparator(*tmpLocaleID)) {
   2000                 /* If there was no country ID, skip a possible extra IDSeparator */
   2001                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
   2002                     tmpLocaleID++;
   2003                 }
   2004                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
   2005             }
   2006         }
   2007     }
   2008 
   2009     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
   2010     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
   2011 /*
   2012     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
   2013         i=_getVariant(localeID+1, '@', variant, variantCapacity);
   2014     }
   2015 */
   2016     return u_terminateChars(variant, variantCapacity, i, err);
   2017 }
   2018 
   2019 U_CAPI int32_t  U_EXPORT2
   2020 uloc_getName(const char* localeID,
   2021              char* name,
   2022              int32_t nameCapacity,
   2023              UErrorCode* err)
   2024 {
   2025     return _canonicalize(localeID, name, nameCapacity, 0, err);
   2026 }
   2027 
   2028 U_CAPI int32_t  U_EXPORT2
   2029 uloc_getBaseName(const char* localeID,
   2030                  char* name,
   2031                  int32_t nameCapacity,
   2032                  UErrorCode* err)
   2033 {
   2034     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
   2035 }
   2036 
   2037 U_CAPI int32_t  U_EXPORT2
   2038 uloc_canonicalize(const char* localeID,
   2039                   char* name,
   2040                   int32_t nameCapacity,
   2041                   UErrorCode* err)
   2042 {
   2043     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
   2044 }
   2045 
   2046 U_CAPI const char*  U_EXPORT2
   2047 uloc_getISO3Language(const char* localeID)
   2048 {
   2049     int16_t offset;
   2050     char lang[ULOC_LANG_CAPACITY];
   2051     UErrorCode err = U_ZERO_ERROR;
   2052 
   2053     if (localeID == NULL)
   2054     {
   2055         localeID = uloc_getDefault();
   2056     }
   2057     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
   2058     if (U_FAILURE(err))
   2059         return "";
   2060     offset = _findIndex(LANGUAGES, lang);
   2061     if (offset < 0)
   2062         return "";
   2063     return LANGUAGES_3[offset];
   2064 }
   2065 
   2066 U_CAPI const char*  U_EXPORT2
   2067 uloc_getISO3Country(const char* localeID)
   2068 {
   2069     int16_t offset;
   2070     char cntry[ULOC_LANG_CAPACITY];
   2071     UErrorCode err = U_ZERO_ERROR;
   2072 
   2073     if (localeID == NULL)
   2074     {
   2075         localeID = uloc_getDefault();
   2076     }
   2077     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
   2078     if (U_FAILURE(err))
   2079         return "";
   2080     offset = _findIndex(COUNTRIES, cntry);
   2081     if (offset < 0)
   2082         return "";
   2083 
   2084     return COUNTRIES_3[offset];
   2085 }
   2086 
   2087 U_CAPI uint32_t  U_EXPORT2
   2088 uloc_getLCID(const char* localeID)
   2089 {
   2090     UErrorCode status = U_ZERO_ERROR;
   2091     char       langID[ULOC_FULLNAME_CAPACITY];
   2092 
   2093     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
   2094     if (U_FAILURE(status)) {
   2095         return 0;
   2096     }
   2097 
   2098     return uprv_convertToLCID(langID, localeID, &status);
   2099 }
   2100 
   2101 U_CAPI int32_t U_EXPORT2
   2102 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
   2103                 UErrorCode *status)
   2104 {
   2105     int32_t length;
   2106     const char *posix = uprv_convertToPosix(hostid, status);
   2107     if (U_FAILURE(*status) || posix == NULL) {
   2108         return 0;
   2109     }
   2110     length = (int32_t)uprv_strlen(posix);
   2111     if (length+1 > localeCapacity) {
   2112         *status = U_BUFFER_OVERFLOW_ERROR;
   2113     }
   2114     else {
   2115         uprv_strcpy(locale, posix);
   2116     }
   2117     return length;
   2118 }
   2119 
   2120 /* ### Default locale **************************************************/
   2121 
   2122 U_CAPI const char*  U_EXPORT2
   2123 uloc_getDefault()
   2124 {
   2125     return locale_get_default();
   2126 }
   2127 
   2128 U_CAPI void  U_EXPORT2
   2129 uloc_setDefault(const char*   newDefaultLocale,
   2130              UErrorCode* err)
   2131 {
   2132     if (U_FAILURE(*err))
   2133         return;
   2134     /* the error code isn't currently used for anything by this function*/
   2135 
   2136     /* propagate change to C++ */
   2137     locale_set_default(newDefaultLocale);
   2138 }
   2139 
   2140 /**
   2141  * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
   2142  * to an array of pointers to arrays of char.  All of these pointers are owned
   2143  * by ICU-- do not delete them, and do not write through them.  The array is
   2144  * terminated with a null pointer.
   2145  */
   2146 U_CAPI const char* const*  U_EXPORT2
   2147 uloc_getISOLanguages()
   2148 {
   2149     return LANGUAGES;
   2150 }
   2151 
   2152 /**
   2153  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
   2154  * pointer to an array of pointers to arrays of char.  All of these pointers are
   2155  * owned by ICU-- do not delete them, and do not write through them.  The array is
   2156  * terminated with a null pointer.
   2157  */
   2158 U_CAPI const char* const*  U_EXPORT2
   2159 uloc_getISOCountries()
   2160 {
   2161     return COUNTRIES;
   2162 }
   2163 
   2164 
   2165 /* this function to be moved into cstring.c later */
   2166 static char gDecimal = 0;
   2167 
   2168 static /* U_CAPI */
   2169 double
   2170 /* U_EXPORT2 */
   2171 _uloc_strtod(const char *start, char **end) {
   2172     char *decimal;
   2173     char *myEnd;
   2174     char buf[30];
   2175     double rv;
   2176     if (!gDecimal) {
   2177         char rep[5];
   2178         /* For machines that decide to change the decimal on you,
   2179         and try to be too smart with localization.
   2180         This normally should be just a '.'. */
   2181         sprintf(rep, "%+1.1f", 1.0);
   2182         gDecimal = rep[2];
   2183     }
   2184 
   2185     if(gDecimal == '.') {
   2186         return uprv_strtod(start, end); /* fall through to OS */
   2187     } else {
   2188         uprv_strncpy(buf, start, 29);
   2189         buf[29]=0;
   2190         decimal = uprv_strchr(buf, '.');
   2191         if(decimal) {
   2192             *decimal = gDecimal;
   2193         } else {
   2194             return uprv_strtod(start, end); /* no decimal point */
   2195         }
   2196         rv = uprv_strtod(buf, &myEnd);
   2197         if(end) {
   2198             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
   2199         }
   2200         return rv;
   2201     }
   2202 }
   2203 
   2204 typedef struct {
   2205     float q;
   2206     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
   2207     char *locale;
   2208 } _acceptLangItem;
   2209 
   2210 static int32_t U_CALLCONV
   2211 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
   2212 {
   2213     const _acceptLangItem *aa = (const _acceptLangItem*)a;
   2214     const _acceptLangItem *bb = (const _acceptLangItem*)b;
   2215 
   2216     int32_t rc = 0;
   2217     if(bb->q < aa->q) {
   2218         rc = -1;  /* A > B */
   2219     } else if(bb->q > aa->q) {
   2220         rc = 1;   /* A < B */
   2221     } else {
   2222         rc = 0;   /* A = B */
   2223     }
   2224 
   2225     if(rc==0) {
   2226         rc = uprv_stricmp(aa->locale, bb->locale);
   2227     }
   2228 
   2229 #if defined(ULOC_DEBUG)
   2230     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
   2231     aa->locale, aa->q,
   2232     bb->locale, bb->q,
   2233     rc);*/
   2234 #endif
   2235 
   2236     return rc;
   2237 }
   2238 
   2239 /*
   2240 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
   2241 */
   2242 
   2243 U_CAPI int32_t U_EXPORT2
   2244 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
   2245                             const char *httpAcceptLanguage,
   2246                             UEnumeration* availableLocales,
   2247                             UErrorCode *status)
   2248 {
   2249     _acceptLangItem *j;
   2250     _acceptLangItem smallBuffer[30];
   2251     char **strs;
   2252     char tmp[ULOC_FULLNAME_CAPACITY +1];
   2253     int32_t n = 0;
   2254     const char *itemEnd;
   2255     const char *paramEnd;
   2256     const char *s;
   2257     const char *t;
   2258     int32_t res;
   2259     int32_t i;
   2260     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
   2261     int32_t jSize;
   2262     char *tempstr; /* Use for null pointer check */
   2263 
   2264     j = smallBuffer;
   2265     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
   2266     if(U_FAILURE(*status)) {
   2267         return -1;
   2268     }
   2269 
   2270     for(s=httpAcceptLanguage;s&&*s;) {
   2271         while(isspace(*s)) /* eat space at the beginning */
   2272             s++;
   2273         itemEnd=uprv_strchr(s,',');
   2274         paramEnd=uprv_strchr(s,';');
   2275         if(!itemEnd) {
   2276             itemEnd = httpAcceptLanguage+l; /* end of string */
   2277         }
   2278         if(paramEnd && paramEnd<itemEnd) {
   2279             /* semicolon (;) is closer than end (,) */
   2280             t = paramEnd+1;
   2281             if(*t=='q') {
   2282                 t++;
   2283             }
   2284             while(isspace(*t)) {
   2285                 t++;
   2286             }
   2287             if(*t=='=') {
   2288                 t++;
   2289             }
   2290             while(isspace(*t)) {
   2291                 t++;
   2292             }
   2293             j[n].q = (float)_uloc_strtod(t,NULL);
   2294         } else {
   2295             /* no semicolon - it's 1.0 */
   2296             j[n].q = 1.0f;
   2297             paramEnd = itemEnd;
   2298         }
   2299         j[n].dummy=0;
   2300         /* eat spaces prior to semi */
   2301         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
   2302             ;
   2303         /* Check for null pointer from uprv_strndup */
   2304         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
   2305         if (tempstr == NULL) {
   2306             *status = U_MEMORY_ALLOCATION_ERROR;
   2307             return -1;
   2308         }
   2309         j[n].locale = tempstr;
   2310         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
   2311         if(strcmp(j[n].locale,tmp)) {
   2312             uprv_free(j[n].locale);
   2313             j[n].locale=uprv_strdup(tmp);
   2314         }
   2315 #if defined(ULOC_DEBUG)
   2316         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
   2317 #endif
   2318         n++;
   2319         s = itemEnd;
   2320         while(*s==',') { /* eat duplicate commas */
   2321             s++;
   2322         }
   2323         if(n>=jSize) {
   2324             if(j==smallBuffer) {  /* overflowed the small buffer. */
   2325                 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
   2326                 if(j!=NULL) {
   2327                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
   2328                 }
   2329 #if defined(ULOC_DEBUG)
   2330                 fprintf(stderr,"malloced at size %d\n", jSize);
   2331 #endif
   2332             } else {
   2333                 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
   2334 #if defined(ULOC_DEBUG)
   2335                 fprintf(stderr,"re-alloced at size %d\n", jSize);
   2336 #endif
   2337             }
   2338             jSize *= 2;
   2339             if(j==NULL) {
   2340                 *status = U_MEMORY_ALLOCATION_ERROR;
   2341                 return -1;
   2342             }
   2343         }
   2344     }
   2345     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
   2346     if(U_FAILURE(*status)) {
   2347         if(j != smallBuffer) {
   2348 #if defined(ULOC_DEBUG)
   2349             fprintf(stderr,"freeing j %p\n", j);
   2350 #endif
   2351             uprv_free(j);
   2352         }
   2353         return -1;
   2354     }
   2355     strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
   2356     /* Check for null pointer */
   2357     if (strs == NULL) {
   2358         uprv_free(j); /* Free to avoid memory leak */
   2359         *status = U_MEMORY_ALLOCATION_ERROR;
   2360         return -1;
   2361     }
   2362     for(i=0;i<n;i++) {
   2363 #if defined(ULOC_DEBUG)
   2364         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
   2365 #endif
   2366         strs[i]=j[i].locale;
   2367     }
   2368     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
   2369         (const char**)strs, n, availableLocales, status);
   2370     for(i=0;i<n;i++) {
   2371         uprv_free(strs[i]);
   2372     }
   2373     uprv_free(strs);
   2374     if(j != smallBuffer) {
   2375 #if defined(ULOC_DEBUG)
   2376         fprintf(stderr,"freeing j %p\n", j);
   2377 #endif
   2378         uprv_free(j);
   2379     }
   2380     return res;
   2381 }
   2382 
   2383 
   2384 U_CAPI int32_t U_EXPORT2
   2385 uloc_acceptLanguage(char *result, int32_t resultAvailable,
   2386                     UAcceptResult *outResult, const char **acceptList,
   2387                     int32_t acceptListCount,
   2388                     UEnumeration* availableLocales,
   2389                     UErrorCode *status)
   2390 {
   2391     int32_t i,j;
   2392     int32_t len;
   2393     int32_t maxLen=0;
   2394     char tmp[ULOC_FULLNAME_CAPACITY+1];
   2395     const char *l;
   2396     char **fallbackList;
   2397     if(U_FAILURE(*status)) {
   2398         return -1;
   2399     }
   2400     fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
   2401     if(fallbackList==NULL) {
   2402         *status = U_MEMORY_ALLOCATION_ERROR;
   2403         return -1;
   2404     }
   2405     for(i=0;i<acceptListCount;i++) {
   2406 #if defined(ULOC_DEBUG)
   2407         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
   2408 #endif
   2409         while((l=uenum_next(availableLocales, NULL, status))) {
   2410 #if defined(ULOC_DEBUG)
   2411             fprintf(stderr,"  %s\n", l);
   2412 #endif
   2413             len = (int32_t)uprv_strlen(l);
   2414             if(!uprv_strcmp(acceptList[i], l)) {
   2415                 if(outResult) {
   2416                     *outResult = ULOC_ACCEPT_VALID;
   2417                 }
   2418 #if defined(ULOC_DEBUG)
   2419                 fprintf(stderr, "MATCH! %s\n", l);
   2420 #endif
   2421                 if(len>0) {
   2422                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2423                 }
   2424                 for(j=0;j<i;j++) {
   2425                     uprv_free(fallbackList[j]);
   2426                 }
   2427                 uprv_free(fallbackList);
   2428                 return u_terminateChars(result, resultAvailable, len, status);
   2429             }
   2430             if(len>maxLen) {
   2431                 maxLen = len;
   2432             }
   2433         }
   2434         uenum_reset(availableLocales, status);
   2435         /* save off parent info */
   2436         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2437             fallbackList[i] = uprv_strdup(tmp);
   2438         } else {
   2439             fallbackList[i]=0;
   2440         }
   2441     }
   2442 
   2443     for(maxLen--;maxLen>0;maxLen--) {
   2444         for(i=0;i<acceptListCount;i++) {
   2445             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
   2446 #if defined(ULOC_DEBUG)
   2447                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
   2448 #endif
   2449                 while((l=uenum_next(availableLocales, NULL, status))) {
   2450 #if defined(ULOC_DEBUG)
   2451                     fprintf(stderr,"  %s\n", l);
   2452 #endif
   2453                     len = (int32_t)uprv_strlen(l);
   2454                     if(!uprv_strcmp(fallbackList[i], l)) {
   2455                         if(outResult) {
   2456                             *outResult = ULOC_ACCEPT_FALLBACK;
   2457                         }
   2458 #if defined(ULOC_DEBUG)
   2459                         fprintf(stderr, "fallback MATCH! %s\n", l);
   2460 #endif
   2461                         if(len>0) {
   2462                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2463                         }
   2464                         for(j=0;j<acceptListCount;j++) {
   2465                             uprv_free(fallbackList[j]);
   2466                         }
   2467                         uprv_free(fallbackList);
   2468                         return u_terminateChars(result, resultAvailable, len, status);
   2469                     }
   2470                 }
   2471                 uenum_reset(availableLocales, status);
   2472 
   2473                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2474                     uprv_free(fallbackList[i]);
   2475                     fallbackList[i] = uprv_strdup(tmp);
   2476                 } else {
   2477                     uprv_free(fallbackList[i]);
   2478                     fallbackList[i]=0;
   2479                 }
   2480             }
   2481         }
   2482         if(outResult) {
   2483             *outResult = ULOC_ACCEPT_FAILED;
   2484         }
   2485     }
   2486     for(i=0;i<acceptListCount;i++) {
   2487         uprv_free(fallbackList[i]);
   2488     }
   2489     uprv_free(fallbackList);
   2490     return -1;
   2491 }
   2492 
   2493 /*eof*/
   2494