Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1997-2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ULOC.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/01/97    aliu        Creation.
     13 *   08/21/98    stephen     JDK 1.2 sync
     14 *   12/08/98    rtg         New Locale implementation and C API
     15 *   03/15/99    damiba      overhaul.
     16 *   04/06/99    stephen     changed setDefault() to realloc and copy
     17 *   06/14/99    stephen     Changed calls to ures_open for new params
     18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
     19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
     20 *                           brought canonicalization code into line with spec
     21 *****************************************************************************/
     22 
     23 /*
     24    POSIX's locale format, from putil.c: [no spaces]
     25 
     26      ll [ _CC ] [ . MM ] [ @ VV]
     27 
     28      l = lang, C = ctry, M = charmap, V = variant
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/uloc.h"
     34 
     35 #include "putilimp.h"
     36 #include "ustr_imp.h"
     37 #include "ulocimp.h"
     38 #include "umutex.h"
     39 #include "cstring.h"
     40 #include "cmemory.h"
     41 #include "ucln_cmn.h"
     42 #include "locmap.h"
     43 #include "uarrsort.h"
     44 #include "uenumimp.h"
     45 #include "uassert.h"
     46 
     47 #include <stdio.h> /* for sprintf */
     48 
     49 /* ### Declarations **************************************************/
     50 
     51 /* Locale stuff from locid.cpp */
     52 U_CFUNC void locale_set_default(const char *id);
     53 U_CFUNC const char *locale_get_default(void);
     54 U_CFUNC int32_t
     55 locale_getKeywords(const char *localeID,
     56             char prev,
     57             char *keywords, int32_t keywordCapacity,
     58             char *values, int32_t valuesCapacity, int32_t *valLen,
     59             UBool valuesToo,
     60             UErrorCode *status);
     61 
     62 /* ### Data tables **************************************************/
     63 
     64 /**
     65  * Table of language codes, both 2- and 3-letter, with preference
     66  * given to 2-letter codes where possible.  Includes 3-letter codes
     67  * that lack a 2-letter equivalent.
     68  *
     69  * This list must be in sorted order.  This list is returned directly
     70  * to the user by some API.
     71  *
     72  * This list must be kept in sync with LANGUAGES_3, with corresponding
     73  * entries matched.
     74  *
     75  * This table should be terminated with a NULL entry, followed by a
     76  * second list, and another NULL entry.  The first list is visible to
     77  * user code when this array is returned by API.  The second list
     78  * contains codes we support, but do not expose through user API.
     79  *
     80  * Notes
     81  *
     82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
     83  * include the revisions up to 2001/7/27 *CWB*
     84  *
     85  * The 3 character codes are the terminology codes like RFC 3066.  This
     86  * is compatible with prior ICU codes
     87  *
     88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
     89  * table but now at the end of the table because 3 character codes are
     90  * duplicates.  This avoids bad searches going from 3 to 2 character
     91  * codes.
     92  *
     93  * The range qaa-qtz is reserved for local use
     94  */
     95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
     96 /* ISO639 table version is 20130531 */
     97 static const char * const LANGUAGES[] = {
     98     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",
     99     "afa", "afh", "agq", "ain", "ak",  "akk", "ale", "alg",
    100     "alt", "am",  "an",  "ang", "anp", "apa", "ar",  "arc",
    101     "arn", "arp", "art", "arw", "as",  "asa", "ast", "ath",
    102     "aus", "av",  "awa", "ay",  "az",
    103     "ba",  "bad", "bai", "bal", "ban", "bas", "bat", "bax",
    104     "bbj", "be",  "bej", "bem", "ber", "bez", "bfd", "bg",
    105     "bh",  "bho", "bi",  "bik", "bin", "bkm", "bla", "bm",
    106     "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",  "bss",
    107     "btk", "bua", "bug", "bum", "byn", "byv",
    108     "ca",  "cad", "cai", "car", "cau", "cay", "cch", "ce",
    109     "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
    110     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
    111     "cop", "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",
    112     "csb", "cu",  "cus", "cv",  "cy",
    113     "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
    114     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
    115     "dv",  "dyo", "dyu", "dz",  "dzg",
    116     "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
    117     "enm", "eo",  "es",  "et",  "eu",  "ewo",
    118     "fa",  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",
    119     "fo",  "fon", "fr",  "frm", "fro", "frr", "frs", "fur",
    120     "fy",
    121     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
    122     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
    123     "grc", "gsw", "gu",  "guz", "gv",  "gwi",
    124     "ha",  "hai", "haw", "he",  "hi",  "hil", "him", "hit",
    125     "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",
    126     "hz",
    127     "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ijo",
    128     "ik",  "ilo", "inc", "ine", "inh", "io",  "ira", "iro",
    129     "is",  "it",  "iu",
    130     "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
    131     "ka",  "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
    132     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg",  "kha",
    133     "khi", "kho", "khq", "ki",  "kj",  "kk",  "kkj", "kl",
    134     "kln", "km",  "kmb", "kn",  "ko",  "kok", "kos", "kpe",
    135     "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf",
    136     "ksh", "ku",  "kum", "kut", "kv",  "kw",  "ky",
    137     "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lg",
    138     "li",  "lkt", "ln",  "lo",  "lol", "loz", "lt",  "lu",
    139     "lua", "lui", "lun", "luo", "lus", "luy", "lv",
    140     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
    141     "mde", "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga",
    142     "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
    143     "mkh", "ml",  "mn",  "mnc", "mni", "mno", "mo",  "moh",
    144     "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun", "mus",
    145     "mwl", "mwr", "my",  "mye", "myn", "myv",
    146     "na",  "nah", "nai", "nap", "naq", "nb",  "nd",  "nds",
    147     "ne",  "new", "ng",  "nia", "nic", "niu", "nl",  "nmg",
    148     "nn",  "nnh", "no",  "nog", "non", "nqo", "nr",  "nso",
    149     "nub", "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo",
    150     "nzi",
    151     "oc",  "oj",  "om",  "or",  "os",  "osa", "ota", "oto",
    152     "pa",  "paa", "pag", "pal", "pam", "pap", "pau", "peo",
    153     "phi", "phn", "pi",  "pl",  "pon", "pra", "pro", "ps",
    154     "pt",
    155     "qu",
    156     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof",
    157     "rom", "ru",  "rup", "rw",  "rwk",
    158     "sa",  "sad", "sah", "sai", "sal", "sam", "saq", "sas",
    159     "sat", "sba", "sbp", "sc",  "scn", "sco", "sd",  "se",
    160     "see", "seh", "sel", "sem", "ses", "sg",  "sga", "sgn",
    161     "shi", "shn", "shu", "si",  "sid", "sio", "sit",
    162     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
    163     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
    164     "srn", "srr", "ss",  "ssa", "ssy", "st",  "su",  "suk",
    165     "sus", "sux", "sv",  "sw",  "swb", "swc", "syc", "syr",
    166     "ta",  "tai", "te",  "tem", "teo", "ter", "tet", "tg",
    167     "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tl",  "tlh",
    168     "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",  "trv",
    169     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
    170     "twq", "ty",  "tyv", "tzm",
    171     "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
    172     "vai", "ve",  "vi",  "vo",  "vot", "vun",
    173     "wa",  "wae", "wak", "wal", "war", "was", "wen", "wo",
    174     "xal", "xh",  "xog",
    175     "yao", "yap", "yav", "ybb", "yi",  "yo",  "ypk", "yue",
    176     "za",  "zap", "zbl", "zen", "zgh", "zh",  "znd", "zu",
    177     "zun", "zxx", "zza",
    178 NULL,
    179     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
    180 NULL
    181 };
    182 
    183 static const char* const DEPRECATED_LANGUAGES[]={
    184     "in", "iw", "ji", "jw", NULL, NULL
    185 };
    186 static const char* const REPLACEMENT_LANGUAGES[]={
    187     "id", "he", "yi", "jv", NULL, NULL
    188 };
    189 
    190 /**
    191  * Table of 3-letter language codes.
    192  *
    193  * This is a lookup table used to convert 3-letter language codes to
    194  * their 2-letter equivalent, where possible.  It must be kept in sync
    195  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
    196  * same language as LANGUAGES_3[i].  The commented-out lines are
    197  * copied from LANGUAGES to make eyeballing this baby easier.
    198  *
    199  * Where a 3-letter language code has no 2-letter equivalent, the
    200  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
    201  *
    202  * This table should be terminated with a NULL entry, followed by a
    203  * second list, and another NULL entry.  The two lists correspond to
    204  * the two lists in LANGUAGES.
    205  */
    206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
    207 /* ISO639 table version is 20130531 */
    208 static const char * const LANGUAGES_3[] = {
    209     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
    210     "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
    211     "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
    212     "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
    213     "aus", "ava", "awa", "aym", "aze",
    214     "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
    215     "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
    216     "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
    217     "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
    218     "btk", "bua", "bug", "bum", "byn", "byv",
    219     "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
    220     "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
    221     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
    222     "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
    223     "csb", "chu", "cus", "chv", "cym",
    224     "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
    225     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
    226     "div", "dyo", "dyu", "dzo", "dzg",
    227     "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
    228     "enm", "epo", "spa", "est", "eus", "ewo",
    229     "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
    230     "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
    231     "fry",
    232     "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
    233     "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
    234     "grc", "gsw", "guj", "guz", "glv", "gwi",
    235     "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
    236     "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
    237     "her",
    238     "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
    239     "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
    240     "isl", "ita", "iku",
    241     "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
    242     "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
    243     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
    244     "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
    245     "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
    246     "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
    247     "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
    248     "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
    249     "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
    250     "lua", "lui", "lun", "luo", "lus", "luy", "lav",
    251     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
    252     "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
    253     "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
    254     "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
    255     "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
    256     "mwl", "mwr", "mya", "mye", "myn", "myv",
    257     "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
    258     "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
    259     "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
    260     "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
    261     "nzi",
    262     "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
    263     "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
    264     "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
    265     "por",
    266     "que",
    267     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
    268     "rom", "rus", "rup", "kin", "rwk",
    269     "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
    270     "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
    271     "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
    272     "shi", "shn", "shu", "sin", "sid", "sio", "sit",
    273     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
    274     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
    275     "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
    276     "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
    277     "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
    278     "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
    279     "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
    280     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
    281     "twq", "tah", "tyv", "tzm",
    282     "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
    283     "vai", "ven", "vie", "vol", "vot", "vun",
    284     "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
    285     "xal", "xho", "xog",
    286     "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
    287     "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
    288     "zun", "zxx", "zza",
    289 NULL,
    290 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
    291     "ind", "heb", "yid", "jaw", "srp",
    292 NULL
    293 };
    294 
    295 /**
    296  * Table of 2-letter country codes.
    297  *
    298  * This list must be in sorted order.  This list is returned directly
    299  * to the user by some API.
    300  *
    301  * This list must be kept in sync with COUNTRIES_3, with corresponding
    302  * entries matched.
    303  *
    304  * This table should be terminated with a NULL entry, followed by a
    305  * second list, and another NULL entry.  The first list is visible to
    306  * user code when this array is returned by API.  The second list
    307  * contains codes we support, but do not expose through user API.
    308  *
    309  * Notes:
    310  *
    311  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
    312  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
    313  * new codes keeping the old ones for compatibility updated to include
    314  * 1999/12/03 revisions *CWB*
    315  *
    316  * RO(ROM) is now RO(ROU) according to
    317  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
    318  */
    319 static const char * const COUNTRIES[] = {
    320     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
    321     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
    322     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
    323     "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
    324     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
    325     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
    326     "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
    327     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
    328     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
    329     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
    330     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
    331     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
    332     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
    333     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
    334     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
    335     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
    336     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
    337     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
    338     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
    339     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
    340     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
    341     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
    342     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
    343     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
    344     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
    345     "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
    346     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
    347     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
    348     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
    349     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
    350 NULL,
    351     "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
    352 NULL
    353 };
    354 
    355 static const char* const DEPRECATED_COUNTRIES[] = {
    356     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
    357 };
    358 static const char* const REPLACEMENT_COUNTRIES[] = {
    359 /*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
    360     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
    361 };
    362 
    363 /**
    364  * Table of 3-letter country codes.
    365  *
    366  * This is a lookup table used to convert 3-letter country codes to
    367  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
    368  * For all valid i, COUNTRIES[i] must refer to the same country as
    369  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
    370  * to make eyeballing this baby easier.
    371  *
    372  * This table should be terminated with a NULL entry, followed by a
    373  * second list, and another NULL entry.  The two lists correspond to
    374  * the two lists in COUNTRIES.
    375  */
    376 static const char * const COUNTRIES_3[] = {
    377 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
    378     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
    379 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
    380     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
    381 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
    382     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
    383 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
    384     "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
    385 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
    386     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
    387 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
    388     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
    389 /*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
    390     "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
    391 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
    392     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
    393 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
    394     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
    395 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
    396     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
    397 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
    398     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
    399 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
    400     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
    401 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
    402     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
    403 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
    404     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
    405 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
    406     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
    407 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
    408     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
    409 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
    410     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
    411 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
    412     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
    413 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
    414     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
    415 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
    416     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
    417 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
    418     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
    419 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
    420     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
    421 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
    422     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
    423 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
    424     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
    425 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
    426     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
    427 /*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
    428     "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
    429 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
    430     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
    431 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
    432     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
    433 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    434     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
    435 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
    436     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
    437 NULL,
    438 /*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
    439     "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
    440 NULL
    441 };
    442 
    443 typedef struct CanonicalizationMap {
    444     const char *id;          /* input ID */
    445     const char *canonicalID; /* canonicalized output ID */
    446     const char *keyword;     /* keyword, or NULL if none */
    447     const char *value;       /* keyword value, or NULL if kw==NULL */
    448 } CanonicalizationMap;
    449 
    450 /**
    451  * A map to canonicalize locale IDs.  This handles a variety of
    452  * different semantic kinds of transformations.
    453  */
    454 static const CanonicalizationMap CANONICALIZE_MAP[] = {
    455     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
    456     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
    457     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
    458     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
    459     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
    460     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
    461     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
    462     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
    463     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
    464     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
    465     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
    466     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
    467     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
    468     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
    469     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
    470     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
    471     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
    472     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
    473     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
    474     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
    475     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
    476     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
    477     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
    478     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
    479     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
    480     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
    481     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
    482     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
    483     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
    484     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
    485     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
    486     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
    487     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
    488     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
    489     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
    490     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
    491     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
    492     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
    493     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
    494     { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
    495     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
    496     { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
    497     { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
    498     { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
    499     { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
    500     { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
    501 };
    502 
    503 typedef struct VariantMap {
    504     const char *variant;          /* input ID */
    505     const char *keyword;     /* keyword, or NULL if none */
    506     const char *value;       /* keyword value, or NULL if kw==NULL */
    507 } VariantMap;
    508 
    509 static const VariantMap VARIANT_MAP[] = {
    510     { "EURO",   "currency", "EUR" },
    511     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
    512     { "STROKE", "collation", "stroke" }  /* Solaris variant */
    513 };
    514 
    515 /* ### BCP47 Conversion *******************************************/
    516 /* Test if the locale id has BCP47 u extension and does not have '@' */
    517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
    518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
    519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
    520         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
    521             finalID=id; \
    522         } else { \
    523             finalID=buffer; \
    524         }
    525 /* Gets the size of the shortest subtag in the given localeID. */
    526 static int32_t getShortestSubtagLength(const char *localeID) {
    527     int32_t localeIDLength = uprv_strlen(localeID);
    528     int32_t length = localeIDLength;
    529     int32_t tmpLength = 0;
    530     int32_t i;
    531     UBool reset = TRUE;
    532 
    533     for (i = 0; i < localeIDLength; i++) {
    534         if (localeID[i] != '_' && localeID[i] != '-') {
    535             if (reset) {
    536                 tmpLength = 0;
    537                 reset = FALSE;
    538             }
    539             tmpLength++;
    540         } else {
    541             if (tmpLength != 0 && tmpLength < length) {
    542                 length = tmpLength;
    543             }
    544             reset = TRUE;
    545         }
    546     }
    547 
    548     return length;
    549 }
    550 
    551 /* ### Keywords **************************************************/
    552 
    553 #define ULOC_KEYWORD_BUFFER_LEN 25
    554 #define ULOC_MAX_NO_KEYWORDS 25
    555 
    556 U_CAPI const char * U_EXPORT2
    557 locale_getKeywordsStart(const char *localeID) {
    558     const char *result = NULL;
    559     if((result = uprv_strchr(localeID, '@')) != NULL) {
    560         return result;
    561     }
    562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    563     else {
    564         /* We do this because the @ sign is variant, and the @ sign used on one
    565         EBCDIC machine won't be compiled the same way on other EBCDIC based
    566         machines. */
    567         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    568         const uint8_t *charToFind = ebcdicSigns;
    569         while(*charToFind) {
    570             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
    571                 return result;
    572             }
    573             charToFind++;
    574         }
    575     }
    576 #endif
    577     return NULL;
    578 }
    579 
    580 /**
    581  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
    582  * @param keywordName incoming name to be canonicalized
    583  * @param status return status (keyword too long)
    584  * @return length of the keyword name
    585  */
    586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
    587 {
    588   int32_t i;
    589   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
    590 
    591   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
    592     /* keyword name too long for internal buffer */
    593     *status = U_INTERNAL_PROGRAM_ERROR;
    594           return 0;
    595   }
    596 
    597   /* normalize the keyword name */
    598   for(i = 0; i < keywordNameLen; i++) {
    599     buf[i] = uprv_tolower(keywordName[i]);
    600   }
    601   buf[i] = 0;
    602 
    603   return keywordNameLen;
    604 }
    605 
    606 typedef struct {
    607     char keyword[ULOC_KEYWORD_BUFFER_LEN];
    608     int32_t keywordLen;
    609     const char *valueStart;
    610     int32_t valueLen;
    611 } KeywordStruct;
    612 
    613 static int32_t U_CALLCONV
    614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
    615     const char* leftString = ((const KeywordStruct *)left)->keyword;
    616     const char* rightString = ((const KeywordStruct *)right)->keyword;
    617     return uprv_strcmp(leftString, rightString);
    618 }
    619 
    620 /**
    621  * Both addKeyword and addValue must already be in canonical form.
    622  * Either both addKeyword and addValue are NULL, or neither is NULL.
    623  * If they are not NULL they must be zero terminated.
    624  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
    625  */
    626 static int32_t
    627 _getKeywords(const char *localeID,
    628              char prev,
    629              char *keywords, int32_t keywordCapacity,
    630              char *values, int32_t valuesCapacity, int32_t *valLen,
    631              UBool valuesToo,
    632              const char* addKeyword,
    633              const char* addValue,
    634              UErrorCode *status)
    635 {
    636     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
    637 
    638     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    639     int32_t numKeywords = 0;
    640     const char* pos = localeID;
    641     const char* equalSign = NULL;
    642     const char* semicolon = NULL;
    643     int32_t i = 0, j, n;
    644     int32_t keywordsLen = 0;
    645     int32_t valuesLen = 0;
    646 
    647     if(prev == '@') { /* start of keyword definition */
    648         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
    649         do {
    650             UBool duplicate = FALSE;
    651             /* skip leading spaces */
    652             while(*pos == ' ') {
    653                 pos++;
    654             }
    655             if (!*pos) { /* handle trailing "; " */
    656                 break;
    657             }
    658             if(numKeywords == maxKeywords) {
    659                 *status = U_INTERNAL_PROGRAM_ERROR;
    660                 return 0;
    661             }
    662             equalSign = uprv_strchr(pos, '=');
    663             semicolon = uprv_strchr(pos, ';');
    664             /* lack of '=' [foo@currency] is illegal */
    665             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
    666             if(!equalSign || (semicolon && semicolon<equalSign)) {
    667                 *status = U_INVALID_FORMAT_ERROR;
    668                 return 0;
    669             }
    670             /* need to normalize both keyword and keyword name */
    671             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
    672                 /* keyword name too long for internal buffer */
    673                 *status = U_INTERNAL_PROGRAM_ERROR;
    674                 return 0;
    675             }
    676             for(i = 0, n = 0; i < equalSign - pos; ++i) {
    677                 if (pos[i] != ' ') {
    678                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
    679                 }
    680             }
    681 
    682             /* zero-length keyword is an error. */
    683             if (n == 0) {
    684                 *status = U_INVALID_FORMAT_ERROR;
    685                 return 0;
    686             }
    687 
    688             keywordList[numKeywords].keyword[n] = 0;
    689             keywordList[numKeywords].keywordLen = n;
    690             /* now grab the value part. First we skip the '=' */
    691             equalSign++;
    692             /* then we leading spaces */
    693             while(*equalSign == ' ') {
    694                 equalSign++;
    695             }
    696 
    697             /* Premature end or zero-length value */
    698             if (!equalSign || equalSign == semicolon) {
    699                 *status = U_INVALID_FORMAT_ERROR;
    700                 return 0;
    701             }
    702 
    703             keywordList[numKeywords].valueStart = equalSign;
    704 
    705             pos = semicolon;
    706             i = 0;
    707             if(pos) {
    708                 while(*(pos - i - 1) == ' ') {
    709                     i++;
    710                 }
    711                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
    712                 pos++;
    713             } else {
    714                 i = (int32_t)uprv_strlen(equalSign);
    715                 while(i && equalSign[i-1] == ' ') {
    716                     i--;
    717                 }
    718                 keywordList[numKeywords].valueLen = i;
    719             }
    720             /* If this is a duplicate keyword, then ignore it */
    721             for (j=0; j<numKeywords; ++j) {
    722                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
    723                     duplicate = TRUE;
    724                     break;
    725                 }
    726             }
    727             if (!duplicate) {
    728                 ++numKeywords;
    729             }
    730         } while(pos);
    731 
    732         /* Handle addKeyword/addValue. */
    733         if (addKeyword != NULL) {
    734             UBool duplicate = FALSE;
    735             U_ASSERT(addValue != NULL);
    736             /* Search for duplicate; if found, do nothing. Explicit keyword
    737                overrides addKeyword. */
    738             for (j=0; j<numKeywords; ++j) {
    739                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
    740                     duplicate = TRUE;
    741                     break;
    742                 }
    743             }
    744             if (!duplicate) {
    745                 if (numKeywords == maxKeywords) {
    746                     *status = U_INTERNAL_PROGRAM_ERROR;
    747                     return 0;
    748                 }
    749                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
    750                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
    751                 keywordList[numKeywords].valueStart = addValue;
    752                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
    753                 ++numKeywords;
    754             }
    755         } else {
    756             U_ASSERT(addValue == NULL);
    757         }
    758 
    759         /* now we have a list of keywords */
    760         /* we need to sort it */
    761         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
    762 
    763         /* Now construct the keyword part */
    764         for(i = 0; i < numKeywords; i++) {
    765             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
    766                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
    767                 if(valuesToo) {
    768                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
    769                 } else {
    770                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
    771                 }
    772             }
    773             keywordsLen += keywordList[i].keywordLen + 1;
    774             if(valuesToo) {
    775                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
    776                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
    777                 }
    778                 keywordsLen += keywordList[i].valueLen;
    779 
    780                 if(i < numKeywords - 1) {
    781                     if(keywordsLen < keywordCapacity) {
    782                         keywords[keywordsLen] = ';';
    783                     }
    784                     keywordsLen++;
    785                 }
    786             }
    787             if(values) {
    788                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
    789                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
    790                     values[valuesLen + keywordList[i].valueLen] = 0;
    791                 }
    792                 valuesLen += keywordList[i].valueLen + 1;
    793             }
    794         }
    795         if(values) {
    796             values[valuesLen] = 0;
    797             if(valLen) {
    798                 *valLen = valuesLen;
    799             }
    800         }
    801         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
    802     } else {
    803         return 0;
    804     }
    805 }
    806 
    807 U_CFUNC int32_t
    808 locale_getKeywords(const char *localeID,
    809                    char prev,
    810                    char *keywords, int32_t keywordCapacity,
    811                    char *values, int32_t valuesCapacity, int32_t *valLen,
    812                    UBool valuesToo,
    813                    UErrorCode *status) {
    814     return _getKeywords(localeID, prev, keywords, keywordCapacity,
    815                         values, valuesCapacity, valLen, valuesToo,
    816                         NULL, NULL, status);
    817 }
    818 
    819 U_CAPI int32_t U_EXPORT2
    820 uloc_getKeywordValue(const char* localeID,
    821                      const char* keywordName,
    822                      char* buffer, int32_t bufferCapacity,
    823                      UErrorCode* status)
    824 {
    825     const char* startSearchHere = NULL;
    826     const char* nextSeparator = NULL;
    827     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    828     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    829     int32_t i = 0;
    830     int32_t result = 0;
    831 
    832     if(status && U_SUCCESS(*status) && localeID) {
    833       char tempBuffer[ULOC_FULLNAME_CAPACITY];
    834       const char* tmpLocaleID;
    835 
    836       if (_hasBCP47Extension(localeID)) {
    837           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
    838       } else {
    839           tmpLocaleID=localeID;
    840       }
    841 
    842       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
    843       if(startSearchHere == NULL) {
    844           /* no keywords, return at once */
    845           return 0;
    846       }
    847 
    848       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    849       if(U_FAILURE(*status)) {
    850         return 0;
    851       }
    852 
    853       /* find the first keyword */
    854       while(startSearchHere) {
    855           startSearchHere++;
    856           /* skip leading spaces (allowed?) */
    857           while(*startSearchHere == ' ') {
    858               startSearchHere++;
    859           }
    860           nextSeparator = uprv_strchr(startSearchHere, '=');
    861           /* need to normalize both keyword and keyword name */
    862           if(!nextSeparator) {
    863               break;
    864           }
    865           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
    866               /* keyword name too long for internal buffer */
    867               *status = U_INTERNAL_PROGRAM_ERROR;
    868               return 0;
    869           }
    870           for(i = 0; i < nextSeparator - startSearchHere; i++) {
    871               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
    872           }
    873           /* trim trailing spaces */
    874           while(startSearchHere[i-1] == ' ') {
    875               i--;
    876               U_ASSERT(i>=0);
    877           }
    878           localeKeywordNameBuffer[i] = 0;
    879 
    880           startSearchHere = uprv_strchr(nextSeparator, ';');
    881 
    882           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
    883               nextSeparator++;
    884               while(*nextSeparator == ' ') {
    885                   nextSeparator++;
    886               }
    887               /* we actually found the keyword. Copy the value */
    888               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
    889                   while(*(startSearchHere-1) == ' ') {
    890                       startSearchHere--;
    891                   }
    892                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
    893                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
    894               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
    895                   i = (int32_t)uprv_strlen(nextSeparator);
    896                   while(nextSeparator[i - 1] == ' ') {
    897                       i--;
    898                   }
    899                   uprv_strncpy(buffer, nextSeparator, i);
    900                   result = u_terminateChars(buffer, bufferCapacity, i, status);
    901               } else {
    902                   /* give a bigger buffer, please */
    903                   *status = U_BUFFER_OVERFLOW_ERROR;
    904                   if(startSearchHere) {
    905                       result = (int32_t)(startSearchHere - nextSeparator);
    906                   } else {
    907                       result = (int32_t)uprv_strlen(nextSeparator);
    908                   }
    909               }
    910               return result;
    911           }
    912       }
    913     }
    914     return 0;
    915 }
    916 
    917 U_CAPI int32_t U_EXPORT2
    918 uloc_setKeywordValue(const char* keywordName,
    919                      const char* keywordValue,
    920                      char* buffer, int32_t bufferCapacity,
    921                      UErrorCode* status)
    922 {
    923     /* TODO: sorting. removal. */
    924     int32_t keywordNameLen;
    925     int32_t keywordValueLen;
    926     int32_t bufLen;
    927     int32_t needLen = 0;
    928     int32_t foundValueLen;
    929     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
    930     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    931     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
    932     int32_t i = 0;
    933     int32_t rc;
    934     char* nextSeparator = NULL;
    935     char* nextEqualsign = NULL;
    936     char* startSearchHere = NULL;
    937     char* keywordStart = NULL;
    938     char *insertHere = NULL;
    939     if(U_FAILURE(*status)) {
    940         return -1;
    941     }
    942     if(bufferCapacity>1) {
    943         bufLen = (int32_t)uprv_strlen(buffer);
    944     } else {
    945         *status = U_ILLEGAL_ARGUMENT_ERROR;
    946         return 0;
    947     }
    948     if(bufferCapacity<bufLen) {
    949         /* The capacity is less than the length?! Is this NULL terminated? */
    950         *status = U_ILLEGAL_ARGUMENT_ERROR;
    951         return 0;
    952     }
    953     if(keywordValue && !*keywordValue) {
    954         keywordValue = NULL;
    955     }
    956     if(keywordValue) {
    957         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
    958     } else {
    959         keywordValueLen = 0;
    960     }
    961     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
    962     if(U_FAILURE(*status)) {
    963         return 0;
    964     }
    965     startSearchHere = (char*)locale_getKeywordsStart(buffer);
    966     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
    967         if(!keywordValue) { /* no keywords = nothing to remove */
    968             return bufLen;
    969         }
    970 
    971         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
    972         if(startSearchHere) { /* had a single @ */
    973             needLen--; /* already had the @ */
    974             /* startSearchHere points at the @ */
    975         } else {
    976             startSearchHere=buffer+bufLen;
    977         }
    978         if(needLen >= bufferCapacity) {
    979             *status = U_BUFFER_OVERFLOW_ERROR;
    980             return needLen; /* no change */
    981         }
    982         *startSearchHere = '@';
    983         startSearchHere++;
    984         uprv_strcpy(startSearchHere, keywordNameBuffer);
    985         startSearchHere += keywordNameLen;
    986         *startSearchHere = '=';
    987         startSearchHere++;
    988         uprv_strcpy(startSearchHere, keywordValue);
    989         startSearchHere+=keywordValueLen;
    990         return needLen;
    991     } /* end shortcut - no @ */
    992 
    993     keywordStart = startSearchHere;
    994     /* search for keyword */
    995     while(keywordStart) {
    996         keywordStart++;
    997         /* skip leading spaces (allowed?) */
    998         while(*keywordStart == ' ') {
    999             keywordStart++;
   1000         }
   1001         nextEqualsign = uprv_strchr(keywordStart, '=');
   1002         /* need to normalize both keyword and keyword name */
   1003         if(!nextEqualsign) {
   1004             break;
   1005         }
   1006         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
   1007             /* keyword name too long for internal buffer */
   1008             *status = U_INTERNAL_PROGRAM_ERROR;
   1009             return 0;
   1010         }
   1011         for(i = 0; i < nextEqualsign - keywordStart; i++) {
   1012             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
   1013         }
   1014         /* trim trailing spaces */
   1015         while(keywordStart[i-1] == ' ') {
   1016             i--;
   1017         }
   1018         U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
   1019         localeKeywordNameBuffer[i] = 0;
   1020 
   1021         nextSeparator = uprv_strchr(nextEqualsign, ';');
   1022         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
   1023         if(rc == 0) {
   1024             nextEqualsign++;
   1025             while(*nextEqualsign == ' ') {
   1026                 nextEqualsign++;
   1027             }
   1028             /* we actually found the keyword. Change the value */
   1029             if (nextSeparator) {
   1030                 keywordAtEnd = 0;
   1031                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
   1032             } else {
   1033                 keywordAtEnd = 1;
   1034                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
   1035             }
   1036             if(keywordValue) { /* adding a value - not removing */
   1037               if(foundValueLen == keywordValueLen) {
   1038                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1039                 return bufLen; /* no change in size */
   1040               } else if(foundValueLen > keywordValueLen) {
   1041                 int32_t delta = foundValueLen - keywordValueLen;
   1042                 if(nextSeparator) { /* RH side */
   1043                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
   1044                 }
   1045                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1046                 bufLen -= delta;
   1047                 buffer[bufLen]=0;
   1048                 return bufLen;
   1049               } else { /* FVL < KVL */
   1050                 int32_t delta = keywordValueLen - foundValueLen;
   1051                 if((bufLen+delta) >= bufferCapacity) {
   1052                   *status = U_BUFFER_OVERFLOW_ERROR;
   1053                   return bufLen+delta;
   1054                 }
   1055                 if(nextSeparator) { /* RH side */
   1056                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
   1057                 }
   1058                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
   1059                 bufLen += delta;
   1060                 buffer[bufLen]=0;
   1061                 return bufLen;
   1062               }
   1063             } else { /* removing a keyword */
   1064               if(keywordAtEnd) {
   1065                 /* zero out the ';' or '@' just before startSearchhere */
   1066                 keywordStart[-1] = 0;
   1067                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
   1068               } else {
   1069                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
   1070                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
   1071                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
   1072               }
   1073             }
   1074         } else if(rc<0){ /* end match keyword */
   1075           /* could insert at this location. */
   1076           insertHere = keywordStart;
   1077         }
   1078         keywordStart = nextSeparator;
   1079     } /* end loop searching */
   1080 
   1081     if(!keywordValue) {
   1082       return bufLen; /* removal of non-extant keyword - no change */
   1083     }
   1084 
   1085     /* we know there is at least one keyword. */
   1086     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
   1087     if(needLen >= bufferCapacity) {
   1088         *status = U_BUFFER_OVERFLOW_ERROR;
   1089         return needLen; /* no change */
   1090     }
   1091 
   1092     if(insertHere) {
   1093       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
   1094       keywordStart = insertHere;
   1095     } else {
   1096       keywordStart = buffer+bufLen;
   1097       *keywordStart = ';';
   1098       keywordStart++;
   1099     }
   1100     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
   1101     keywordStart += keywordNameLen;
   1102     *keywordStart = '=';
   1103     keywordStart++;
   1104     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
   1105     keywordStart+=keywordValueLen;
   1106     if(insertHere) {
   1107       *keywordStart = ';';
   1108       keywordStart++;
   1109     }
   1110     buffer[needLen]=0;
   1111     return needLen;
   1112 }
   1113 
   1114 /* ### ID parsing implementation **************************************************/
   1115 
   1116 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
   1117 
   1118 /*returns TRUE if one of the special prefixes is here (s=string)
   1119   'x-' or 'i-' */
   1120 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
   1121 
   1122 /* Dot terminates it because of POSIX form  where dot precedes the codepage
   1123  * except for variant
   1124  */
   1125 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
   1126 
   1127 static char* _strnchr(const char* str, int32_t len, char c) {
   1128     U_ASSERT(str != 0 && len >= 0);
   1129     while (len-- != 0) {
   1130         char d = *str;
   1131         if (d == c) {
   1132             return (char*) str;
   1133         } else if (d == 0) {
   1134             break;
   1135         }
   1136         ++str;
   1137     }
   1138     return NULL;
   1139 }
   1140 
   1141 /**
   1142  * Lookup 'key' in the array 'list'.  The array 'list' should contain
   1143  * a NULL entry, followed by more entries, and a second NULL entry.
   1144  *
   1145  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
   1146  * COUNTRIES_3.
   1147  */
   1148 static int16_t _findIndex(const char* const* list, const char* key)
   1149 {
   1150     const char* const* anchor = list;
   1151     int32_t pass = 0;
   1152 
   1153     /* Make two passes through two NULL-terminated arrays at 'list' */
   1154     while (pass++ < 2) {
   1155         while (*list) {
   1156             if (uprv_strcmp(key, *list) == 0) {
   1157                 return (int16_t)(list - anchor);
   1158             }
   1159             list++;
   1160         }
   1161         ++list;     /* skip final NULL *CWB*/
   1162     }
   1163     return -1;
   1164 }
   1165 
   1166 /* count the length of src while copying it to dest; return strlen(src) */
   1167 static inline int32_t
   1168 _copyCount(char *dest, int32_t destCapacity, const char *src) {
   1169     const char *anchor;
   1170     char c;
   1171 
   1172     anchor=src;
   1173     for(;;) {
   1174         if((c=*src)==0) {
   1175             return (int32_t)(src-anchor);
   1176         }
   1177         if(destCapacity<=0) {
   1178             return (int32_t)((src-anchor)+uprv_strlen(src));
   1179         }
   1180         ++src;
   1181         *dest++=c;
   1182         --destCapacity;
   1183     }
   1184 }
   1185 
   1186 U_CFUNC const char*
   1187 uloc_getCurrentCountryID(const char* oldID){
   1188     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
   1189     if (offset >= 0) {
   1190         return REPLACEMENT_COUNTRIES[offset];
   1191     }
   1192     return oldID;
   1193 }
   1194 U_CFUNC const char*
   1195 uloc_getCurrentLanguageID(const char* oldID){
   1196     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
   1197     if (offset >= 0) {
   1198         return REPLACEMENT_LANGUAGES[offset];
   1199     }
   1200     return oldID;
   1201 }
   1202 /*
   1203  * the internal functions _getLanguage(), _getCountry(), _getVariant()
   1204  * avoid duplicating code to handle the earlier locale ID pieces
   1205  * in the functions for the later ones by
   1206  * setting the *pEnd pointer to where they stopped parsing
   1207  *
   1208  * TODO try to use this in Locale
   1209  */
   1210 U_CFUNC int32_t
   1211 ulocimp_getLanguage(const char *localeID,
   1212                     char *language, int32_t languageCapacity,
   1213                     const char **pEnd) {
   1214     int32_t i=0;
   1215     int32_t offset;
   1216     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
   1217 
   1218     /* if it starts with i- or x- then copy that prefix */
   1219     if(_isIDPrefix(localeID)) {
   1220         if(i<languageCapacity) {
   1221             language[i]=(char)uprv_tolower(*localeID);
   1222         }
   1223         if(i<languageCapacity) {
   1224             language[i+1]='-';
   1225         }
   1226         i+=2;
   1227         localeID+=2;
   1228     }
   1229 
   1230     /* copy the language as far as possible and count its length */
   1231     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
   1232         if(i<languageCapacity) {
   1233             language[i]=(char)uprv_tolower(*localeID);
   1234         }
   1235         if(i<3) {
   1236             U_ASSERT(i>=0);
   1237             lang[i]=(char)uprv_tolower(*localeID);
   1238         }
   1239         i++;
   1240         localeID++;
   1241     }
   1242 
   1243     if(i==3) {
   1244         /* convert 3 character code to 2 character code if possible *CWB*/
   1245         offset=_findIndex(LANGUAGES_3, lang);
   1246         if(offset>=0) {
   1247             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
   1248         }
   1249     }
   1250 
   1251     if(pEnd!=NULL) {
   1252         *pEnd=localeID;
   1253     }
   1254     return i;
   1255 }
   1256 
   1257 U_CFUNC int32_t
   1258 ulocimp_getScript(const char *localeID,
   1259                   char *script, int32_t scriptCapacity,
   1260                   const char **pEnd)
   1261 {
   1262     int32_t idLen = 0;
   1263 
   1264     if (pEnd != NULL) {
   1265         *pEnd = localeID;
   1266     }
   1267 
   1268     /* copy the second item as far as possible and count its length */
   1269     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
   1270             && uprv_isASCIILetter(localeID[idLen])) {
   1271         idLen++;
   1272     }
   1273 
   1274     /* If it's exactly 4 characters long, then it's a script and not a country. */
   1275     if (idLen == 4) {
   1276         int32_t i;
   1277         if (pEnd != NULL) {
   1278             *pEnd = localeID+idLen;
   1279         }
   1280         if(idLen > scriptCapacity) {
   1281             idLen = scriptCapacity;
   1282         }
   1283         if (idLen >= 1) {
   1284             script[0]=(char)uprv_toupper(*(localeID++));
   1285         }
   1286         for (i = 1; i < idLen; i++) {
   1287             script[i]=(char)uprv_tolower(*(localeID++));
   1288         }
   1289     }
   1290     else {
   1291         idLen = 0;
   1292     }
   1293     return idLen;
   1294 }
   1295 
   1296 U_CFUNC int32_t
   1297 ulocimp_getCountry(const char *localeID,
   1298                    char *country, int32_t countryCapacity,
   1299                    const char **pEnd)
   1300 {
   1301     int32_t idLen=0;
   1302     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
   1303     int32_t offset;
   1304 
   1305     /* copy the country as far as possible and count its length */
   1306     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
   1307         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
   1308             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
   1309         }
   1310         idLen++;
   1311     }
   1312 
   1313     /* the country should be either length 2 or 3 */
   1314     if (idLen == 2 || idLen == 3) {
   1315         UBool gotCountry = FALSE;
   1316         /* convert 3 character code to 2 character code if possible *CWB*/
   1317         if(idLen==3) {
   1318             offset=_findIndex(COUNTRIES_3, cnty);
   1319             if(offset>=0) {
   1320                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
   1321                 gotCountry = TRUE;
   1322             }
   1323         }
   1324         if (!gotCountry) {
   1325             int32_t i = 0;
   1326             for (i = 0; i < idLen; i++) {
   1327                 if (i < countryCapacity) {
   1328                     country[i]=(char)uprv_toupper(localeID[i]);
   1329                 }
   1330             }
   1331         }
   1332         localeID+=idLen;
   1333     } else {
   1334         idLen = 0;
   1335     }
   1336 
   1337     if(pEnd!=NULL) {
   1338         *pEnd=localeID;
   1339     }
   1340 
   1341     return idLen;
   1342 }
   1343 
   1344 /**
   1345  * @param needSeparator if true, then add leading '_' if any variants
   1346  * are added to 'variant'
   1347  */
   1348 static int32_t
   1349 _getVariantEx(const char *localeID,
   1350               char prev,
   1351               char *variant, int32_t variantCapacity,
   1352               UBool needSeparator) {
   1353     int32_t i=0;
   1354 
   1355     /* get one or more variant tags and separate them with '_' */
   1356     if(_isIDSeparator(prev)) {
   1357         /* get a variant string after a '-' or '_' */
   1358         while(!_isTerminator(*localeID)) {
   1359             if (needSeparator) {
   1360                 if (i<variantCapacity) {
   1361                     variant[i] = '_';
   1362                 }
   1363                 ++i;
   1364                 needSeparator = FALSE;
   1365             }
   1366             if(i<variantCapacity) {
   1367                 variant[i]=(char)uprv_toupper(*localeID);
   1368                 if(variant[i]=='-') {
   1369                     variant[i]='_';
   1370                 }
   1371             }
   1372             i++;
   1373             localeID++;
   1374         }
   1375     }
   1376 
   1377     /* if there is no variant tag after a '-' or '_' then look for '@' */
   1378     if(i==0) {
   1379         if(prev=='@') {
   1380             /* keep localeID */
   1381         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
   1382             ++localeID; /* point after the '@' */
   1383         } else {
   1384             return 0;
   1385         }
   1386         while(!_isTerminator(*localeID)) {
   1387             if (needSeparator) {
   1388                 if (i<variantCapacity) {
   1389                     variant[i] = '_';
   1390                 }
   1391                 ++i;
   1392                 needSeparator = FALSE;
   1393             }
   1394             if(i<variantCapacity) {
   1395                 variant[i]=(char)uprv_toupper(*localeID);
   1396                 if(variant[i]=='-' || variant[i]==',') {
   1397                     variant[i]='_';
   1398                 }
   1399             }
   1400             i++;
   1401             localeID++;
   1402         }
   1403     }
   1404 
   1405     return i;
   1406 }
   1407 
   1408 static int32_t
   1409 _getVariant(const char *localeID,
   1410             char prev,
   1411             char *variant, int32_t variantCapacity) {
   1412     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
   1413 }
   1414 
   1415 /**
   1416  * Delete ALL instances of a variant from the given list of one or
   1417  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
   1418  * @param variants the source string of one or more variants,
   1419  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
   1420  * terminated; if it is, trailing zero will NOT be maintained.
   1421  * @param variantsLen length of variants
   1422  * @param toDelete variant to delete, without separators, e.g.  "EURO"
   1423  * or "PREEURO"; not zero terminated
   1424  * @param toDeleteLen length of toDelete
   1425  * @return number of characters deleted from variants
   1426  */
   1427 static int32_t
   1428 _deleteVariant(char* variants, int32_t variantsLen,
   1429                const char* toDelete, int32_t toDeleteLen)
   1430 {
   1431     int32_t delta = 0; /* number of chars deleted */
   1432     for (;;) {
   1433         UBool flag = FALSE;
   1434         if (variantsLen < toDeleteLen) {
   1435             return delta;
   1436         }
   1437         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
   1438             (variantsLen == toDeleteLen ||
   1439              (flag=(variants[toDeleteLen] == '_'))))
   1440         {
   1441             int32_t d = toDeleteLen + (flag?1:0);
   1442             variantsLen -= d;
   1443             delta += d;
   1444             if (variantsLen > 0) {
   1445                 uprv_memmove(variants, variants+d, variantsLen);
   1446             }
   1447         } else {
   1448             char* p = _strnchr(variants, variantsLen, '_');
   1449             if (p == NULL) {
   1450                 return delta;
   1451             }
   1452             ++p;
   1453             variantsLen -= (int32_t)(p - variants);
   1454             variants = p;
   1455         }
   1456     }
   1457 }
   1458 
   1459 /* Keyword enumeration */
   1460 
   1461 typedef struct UKeywordsContext {
   1462     char* keywords;
   1463     char* current;
   1464 } UKeywordsContext;
   1465 
   1466 static void U_CALLCONV
   1467 uloc_kw_closeKeywords(UEnumeration *enumerator) {
   1468     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
   1469     uprv_free(enumerator->context);
   1470     uprv_free(enumerator);
   1471 }
   1472 
   1473 static int32_t U_CALLCONV
   1474 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
   1475     char *kw = ((UKeywordsContext *)en->context)->keywords;
   1476     int32_t result = 0;
   1477     while(*kw) {
   1478         result++;
   1479         kw += uprv_strlen(kw)+1;
   1480     }
   1481     return result;
   1482 }
   1483 
   1484 static const char* U_CALLCONV
   1485 uloc_kw_nextKeyword(UEnumeration* en,
   1486                     int32_t* resultLength,
   1487                     UErrorCode* /*status*/) {
   1488     const char* result = ((UKeywordsContext *)en->context)->current;
   1489     int32_t len = 0;
   1490     if(*result) {
   1491         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
   1492         ((UKeywordsContext *)en->context)->current += len+1;
   1493     } else {
   1494         result = NULL;
   1495     }
   1496     if (resultLength) {
   1497         *resultLength = len;
   1498     }
   1499     return result;
   1500 }
   1501 
   1502 static void U_CALLCONV
   1503 uloc_kw_resetKeywords(UEnumeration* en,
   1504                       UErrorCode* /*status*/) {
   1505     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
   1506 }
   1507 
   1508 static const UEnumeration gKeywordsEnum = {
   1509     NULL,
   1510     NULL,
   1511     uloc_kw_closeKeywords,
   1512     uloc_kw_countKeywords,
   1513     uenum_unextDefault,
   1514     uloc_kw_nextKeyword,
   1515     uloc_kw_resetKeywords
   1516 };
   1517 
   1518 U_CAPI UEnumeration* U_EXPORT2
   1519 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
   1520 {
   1521     UKeywordsContext *myContext = NULL;
   1522     UEnumeration *result = NULL;
   1523 
   1524     if(U_FAILURE(*status)) {
   1525         return NULL;
   1526     }
   1527     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   1528     /* Null pointer test */
   1529     if (result == NULL) {
   1530         *status = U_MEMORY_ALLOCATION_ERROR;
   1531         return NULL;
   1532     }
   1533     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
   1534     myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
   1535     if (myContext == NULL) {
   1536         *status = U_MEMORY_ALLOCATION_ERROR;
   1537         uprv_free(result);
   1538         return NULL;
   1539     }
   1540     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
   1541     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
   1542     myContext->keywords[keywordListSize] = 0;
   1543     myContext->current = myContext->keywords;
   1544     result->context = myContext;
   1545     return result;
   1546 }
   1547 
   1548 U_CAPI UEnumeration* U_EXPORT2
   1549 uloc_openKeywords(const char* localeID,
   1550                         UErrorCode* status)
   1551 {
   1552     int32_t i=0;
   1553     char keywords[256];
   1554     int32_t keywordsCapacity = 256;
   1555     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1556     const char* tmpLocaleID;
   1557 
   1558     if(status==NULL || U_FAILURE(*status)) {
   1559         return 0;
   1560     }
   1561 
   1562     if (_hasBCP47Extension(localeID)) {
   1563         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
   1564     } else {
   1565         if (localeID==NULL) {
   1566            localeID=uloc_getDefault();
   1567         }
   1568         tmpLocaleID=localeID;
   1569     }
   1570 
   1571     /* Skip the language */
   1572     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1573     if(_isIDSeparator(*tmpLocaleID)) {
   1574         const char *scriptID;
   1575         /* Skip the script if available */
   1576         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   1577         if(scriptID != tmpLocaleID+1) {
   1578             /* Found optional script */
   1579             tmpLocaleID = scriptID;
   1580         }
   1581         /* Skip the Country */
   1582         if (_isIDSeparator(*tmpLocaleID)) {
   1583             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
   1584             if(_isIDSeparator(*tmpLocaleID)) {
   1585                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
   1586             }
   1587         }
   1588     }
   1589 
   1590     /* keywords are located after '@' */
   1591     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
   1592         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
   1593     }
   1594 
   1595     if(i) {
   1596         return uloc_openKeywordList(keywords, i, status);
   1597     } else {
   1598         return NULL;
   1599     }
   1600 }
   1601 
   1602 
   1603 /* bit-flags for 'options' parameter of _canonicalize */
   1604 #define _ULOC_STRIP_KEYWORDS 0x2
   1605 #define _ULOC_CANONICALIZE   0x1
   1606 
   1607 #define OPTION_SET(options, mask) ((options & mask) != 0)
   1608 
   1609 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
   1610 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
   1611 
   1612 /**
   1613  * Canonicalize the given localeID, to level 1 or to level 2,
   1614  * depending on the options.  To specify level 1, pass in options=0.
   1615  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
   1616  *
   1617  * This is the code underlying uloc_getName and uloc_canonicalize.
   1618  */
   1619 static int32_t
   1620 _canonicalize(const char* localeID,
   1621               char* result,
   1622               int32_t resultCapacity,
   1623               uint32_t options,
   1624               UErrorCode* err) {
   1625     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
   1626     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1627     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1628     const char* origLocaleID;
   1629     const char* tmpLocaleID;
   1630     const char* keywordAssign = NULL;
   1631     const char* separatorIndicator = NULL;
   1632     const char* addKeyword = NULL;
   1633     const char* addValue = NULL;
   1634     char* name;
   1635     char* variant = NULL; /* pointer into name, or NULL */
   1636 
   1637     if (U_FAILURE(*err)) {
   1638         return 0;
   1639     }
   1640 
   1641     if (_hasBCP47Extension(localeID)) {
   1642         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1643     } else {
   1644         if (localeID==NULL) {
   1645            localeID=uloc_getDefault();
   1646         }
   1647         tmpLocaleID=localeID;
   1648     }
   1649 
   1650     origLocaleID=tmpLocaleID;
   1651 
   1652     /* if we are doing a full canonicalization, then put results in
   1653        localeBuffer, if necessary; otherwise send them to result. */
   1654     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
   1655         (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
   1656         name = localeBuffer;
   1657         nameCapacity = (int32_t)sizeof(localeBuffer);
   1658     } else {
   1659         name = result;
   1660         nameCapacity = resultCapacity;
   1661     }
   1662 
   1663     /* get all pieces, one after another, and separate with '_' */
   1664     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
   1665 
   1666     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
   1667         const char *d = uloc_getDefault();
   1668 
   1669         len = (int32_t)uprv_strlen(d);
   1670 
   1671         if (name != NULL) {
   1672             uprv_strncpy(name, d, len);
   1673         }
   1674     } else if(_isIDSeparator(*tmpLocaleID)) {
   1675         const char *scriptID;
   1676 
   1677         ++fieldCount;
   1678         if(len<nameCapacity) {
   1679             name[len]='_';
   1680         }
   1681         ++len;
   1682 
   1683         scriptSize=ulocimp_getScript(tmpLocaleID+1,
   1684             (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
   1685         if(scriptSize > 0) {
   1686             /* Found optional script */
   1687             tmpLocaleID = scriptID;
   1688             ++fieldCount;
   1689             len+=scriptSize;
   1690             if (_isIDSeparator(*tmpLocaleID)) {
   1691                 /* If there is something else, then we add the _ */
   1692                 if(len<nameCapacity) {
   1693                     name[len]='_';
   1694                 }
   1695                 ++len;
   1696             }
   1697         }
   1698 
   1699         if (_isIDSeparator(*tmpLocaleID)) {
   1700             const char *cntryID;
   1701             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
   1702                 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
   1703             if (cntrySize > 0) {
   1704                 /* Found optional country */
   1705                 tmpLocaleID = cntryID;
   1706                 len+=cntrySize;
   1707             }
   1708             if(_isIDSeparator(*tmpLocaleID)) {
   1709                 /* If there is something else, then we add the _  if we found country before. */
   1710                 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
   1711                     ++fieldCount;
   1712                     if(len<nameCapacity) {
   1713                         name[len]='_';
   1714                     }
   1715                     ++len;
   1716                 }
   1717 
   1718                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
   1719                     (len<nameCapacity ? name+len : NULL), nameCapacity-len);
   1720                 if (variantSize > 0) {
   1721                     variant = len<nameCapacity ? name+len : NULL;
   1722                     len += variantSize;
   1723                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
   1724                 }
   1725             }
   1726         }
   1727     }
   1728 
   1729     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
   1730     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
   1731         UBool done = FALSE;
   1732         do {
   1733             char c = *tmpLocaleID;
   1734             switch (c) {
   1735             case 0:
   1736             case '@':
   1737                 done = TRUE;
   1738                 break;
   1739             default:
   1740                 if (len<nameCapacity) {
   1741                     name[len] = c;
   1742                 }
   1743                 ++len;
   1744                 ++tmpLocaleID;
   1745                 break;
   1746             }
   1747         } while (!done);
   1748     }
   1749 
   1750     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
   1751        After this, tmpLocaleID either points to '@' or is NULL */
   1752     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
   1753         keywordAssign = uprv_strchr(tmpLocaleID, '=');
   1754         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
   1755     }
   1756 
   1757     /* Copy POSIX-style variant, if any [mr@FOO] */
   1758     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
   1759         tmpLocaleID != NULL && keywordAssign == NULL) {
   1760         for (;;) {
   1761             char c = *tmpLocaleID;
   1762             if (c == 0) {
   1763                 break;
   1764             }
   1765             if (len<nameCapacity) {
   1766                 name[len] = c;
   1767             }
   1768             ++len;
   1769             ++tmpLocaleID;
   1770         }
   1771     }
   1772 
   1773     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
   1774         /* Handle @FOO variant if @ is present and not followed by = */
   1775         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
   1776             int32_t posixVariantSize;
   1777             /* Add missing '_' if needed */
   1778             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
   1779                 do {
   1780                     if(len<nameCapacity) {
   1781                         name[len]='_';
   1782                     }
   1783                     ++len;
   1784                     ++fieldCount;
   1785                 } while(fieldCount<2);
   1786             }
   1787             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
   1788                                              (UBool)(variantSize > 0));
   1789             if (posixVariantSize > 0) {
   1790                 if (variant == NULL) {
   1791                     variant = name+len;
   1792                 }
   1793                 len += posixVariantSize;
   1794                 variantSize += posixVariantSize;
   1795             }
   1796         }
   1797 
   1798         /* Handle generic variants first */
   1799         if (variant) {
   1800             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
   1801                 const char* variantToCompare = VARIANT_MAP[j].variant;
   1802                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
   1803                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
   1804                 len -= variantLen;
   1805                 if (variantLen > 0) {
   1806                     if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
   1807                         --len;
   1808                     }
   1809                     addKeyword = VARIANT_MAP[j].keyword;
   1810                     addValue = VARIANT_MAP[j].value;
   1811                     break;
   1812                 }
   1813             }
   1814             if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
   1815                 --len;
   1816             }
   1817         }
   1818 
   1819         /* Look up the ID in the canonicalization map */
   1820         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
   1821             const char* id = CANONICALIZE_MAP[j].id;
   1822             int32_t n = (int32_t)uprv_strlen(id);
   1823             if (len == n && uprv_strncmp(name, id, n) == 0) {
   1824                 if (n == 0 && tmpLocaleID != NULL) {
   1825                     break; /* Don't remap "" if keywords present */
   1826                 }
   1827                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
   1828                 if (CANONICALIZE_MAP[j].keyword) {
   1829                     addKeyword = CANONICALIZE_MAP[j].keyword;
   1830                     addValue = CANONICALIZE_MAP[j].value;
   1831                 }
   1832                 break;
   1833             }
   1834         }
   1835     }
   1836 
   1837     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
   1838         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
   1839             (!separatorIndicator || separatorIndicator > keywordAssign)) {
   1840             if(len<nameCapacity) {
   1841                 name[len]='@';
   1842             }
   1843             ++len;
   1844             ++fieldCount;
   1845             len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
   1846                                 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
   1847         } else if (addKeyword != NULL) {
   1848             U_ASSERT(addValue != NULL && len < nameCapacity);
   1849             /* inelegant but works -- later make _getKeywords do this? */
   1850             len += _copyCount(name+len, nameCapacity-len, "@");
   1851             len += _copyCount(name+len, nameCapacity-len, addKeyword);
   1852             len += _copyCount(name+len, nameCapacity-len, "=");
   1853             len += _copyCount(name+len, nameCapacity-len, addValue);
   1854         }
   1855     }
   1856 
   1857     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
   1858         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
   1859     }
   1860 
   1861     return u_terminateChars(result, resultCapacity, len, err);
   1862 }
   1863 
   1864 /* ### ID parsing API **************************************************/
   1865 
   1866 U_CAPI int32_t  U_EXPORT2
   1867 uloc_getParent(const char*    localeID,
   1868                char* parent,
   1869                int32_t parentCapacity,
   1870                UErrorCode* err)
   1871 {
   1872     const char *lastUnderscore;
   1873     int32_t i;
   1874 
   1875     if (U_FAILURE(*err))
   1876         return 0;
   1877 
   1878     if (localeID == NULL)
   1879         localeID = uloc_getDefault();
   1880 
   1881     lastUnderscore=uprv_strrchr(localeID, '_');
   1882     if(lastUnderscore!=NULL) {
   1883         i=(int32_t)(lastUnderscore-localeID);
   1884     } else {
   1885         i=0;
   1886     }
   1887 
   1888     if(i>0 && parent != localeID) {
   1889         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
   1890     }
   1891     return u_terminateChars(parent, parentCapacity, i, err);
   1892 }
   1893 
   1894 U_CAPI int32_t U_EXPORT2
   1895 uloc_getLanguage(const char*    localeID,
   1896          char* language,
   1897          int32_t languageCapacity,
   1898          UErrorCode* err)
   1899 {
   1900     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
   1901     int32_t i=0;
   1902 
   1903     if (err==NULL || U_FAILURE(*err)) {
   1904         return 0;
   1905     }
   1906 
   1907     if(localeID==NULL) {
   1908         localeID=uloc_getDefault();
   1909     }
   1910 
   1911     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
   1912     return u_terminateChars(language, languageCapacity, i, err);
   1913 }
   1914 
   1915 U_CAPI int32_t U_EXPORT2
   1916 uloc_getScript(const char*    localeID,
   1917          char* script,
   1918          int32_t scriptCapacity,
   1919          UErrorCode* err)
   1920 {
   1921     int32_t i=0;
   1922 
   1923     if(err==NULL || U_FAILURE(*err)) {
   1924         return 0;
   1925     }
   1926 
   1927     if(localeID==NULL) {
   1928         localeID=uloc_getDefault();
   1929     }
   1930 
   1931     /* skip the language */
   1932     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1933     if(_isIDSeparator(*localeID)) {
   1934         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
   1935     }
   1936     return u_terminateChars(script, scriptCapacity, i, err);
   1937 }
   1938 
   1939 U_CAPI int32_t  U_EXPORT2
   1940 uloc_getCountry(const char* localeID,
   1941             char* country,
   1942             int32_t countryCapacity,
   1943             UErrorCode* err)
   1944 {
   1945     int32_t i=0;
   1946 
   1947     if(err==NULL || U_FAILURE(*err)) {
   1948         return 0;
   1949     }
   1950 
   1951     if(localeID==NULL) {
   1952         localeID=uloc_getDefault();
   1953     }
   1954 
   1955     /* Skip the language */
   1956     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
   1957     if(_isIDSeparator(*localeID)) {
   1958         const char *scriptID;
   1959         /* Skip the script if available */
   1960         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
   1961         if(scriptID != localeID+1) {
   1962             /* Found optional script */
   1963             localeID = scriptID;
   1964         }
   1965         if(_isIDSeparator(*localeID)) {
   1966             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
   1967         }
   1968     }
   1969     return u_terminateChars(country, countryCapacity, i, err);
   1970 }
   1971 
   1972 U_CAPI int32_t  U_EXPORT2
   1973 uloc_getVariant(const char* localeID,
   1974                 char* variant,
   1975                 int32_t variantCapacity,
   1976                 UErrorCode* err)
   1977 {
   1978     char tempBuffer[ULOC_FULLNAME_CAPACITY];
   1979     const char* tmpLocaleID;
   1980     int32_t i=0;
   1981 
   1982     if(err==NULL || U_FAILURE(*err)) {
   1983         return 0;
   1984     }
   1985 
   1986     if (_hasBCP47Extension(localeID)) {
   1987         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
   1988     } else {
   1989         if (localeID==NULL) {
   1990            localeID=uloc_getDefault();
   1991         }
   1992         tmpLocaleID=localeID;
   1993     }
   1994 
   1995     /* Skip the language */
   1996     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
   1997     if(_isIDSeparator(*tmpLocaleID)) {
   1998         const char *scriptID;
   1999         /* Skip the script if available */
   2000         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
   2001         if(scriptID != tmpLocaleID+1) {
   2002             /* Found optional script */
   2003             tmpLocaleID = scriptID;
   2004         }
   2005         /* Skip the Country */
   2006         if (_isIDSeparator(*tmpLocaleID)) {
   2007             const char *cntryID;
   2008             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
   2009             if (cntryID != tmpLocaleID+1) {
   2010                 /* Found optional country */
   2011                 tmpLocaleID = cntryID;
   2012             }
   2013             if(_isIDSeparator(*tmpLocaleID)) {
   2014                 /* If there was no country ID, skip a possible extra IDSeparator */
   2015                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
   2016                     tmpLocaleID++;
   2017                 }
   2018                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
   2019             }
   2020         }
   2021     }
   2022 
   2023     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
   2024     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
   2025 /*
   2026     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
   2027         i=_getVariant(localeID+1, '@', variant, variantCapacity);
   2028     }
   2029 */
   2030     return u_terminateChars(variant, variantCapacity, i, err);
   2031 }
   2032 
   2033 U_CAPI int32_t  U_EXPORT2
   2034 uloc_getName(const char* localeID,
   2035              char* name,
   2036              int32_t nameCapacity,
   2037              UErrorCode* err)
   2038 {
   2039     return _canonicalize(localeID, name, nameCapacity, 0, err);
   2040 }
   2041 
   2042 U_CAPI int32_t  U_EXPORT2
   2043 uloc_getBaseName(const char* localeID,
   2044                  char* name,
   2045                  int32_t nameCapacity,
   2046                  UErrorCode* err)
   2047 {
   2048     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
   2049 }
   2050 
   2051 U_CAPI int32_t  U_EXPORT2
   2052 uloc_canonicalize(const char* localeID,
   2053                   char* name,
   2054                   int32_t nameCapacity,
   2055                   UErrorCode* err)
   2056 {
   2057     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
   2058 }
   2059 
   2060 U_CAPI const char*  U_EXPORT2
   2061 uloc_getISO3Language(const char* localeID)
   2062 {
   2063     int16_t offset;
   2064     char lang[ULOC_LANG_CAPACITY];
   2065     UErrorCode err = U_ZERO_ERROR;
   2066 
   2067     if (localeID == NULL)
   2068     {
   2069         localeID = uloc_getDefault();
   2070     }
   2071     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
   2072     if (U_FAILURE(err))
   2073         return "";
   2074     offset = _findIndex(LANGUAGES, lang);
   2075     if (offset < 0)
   2076         return "";
   2077     return LANGUAGES_3[offset];
   2078 }
   2079 
   2080 U_CAPI const char*  U_EXPORT2
   2081 uloc_getISO3Country(const char* localeID)
   2082 {
   2083     int16_t offset;
   2084     char cntry[ULOC_LANG_CAPACITY];
   2085     UErrorCode err = U_ZERO_ERROR;
   2086 
   2087     if (localeID == NULL)
   2088     {
   2089         localeID = uloc_getDefault();
   2090     }
   2091     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
   2092     if (U_FAILURE(err))
   2093         return "";
   2094     offset = _findIndex(COUNTRIES, cntry);
   2095     if (offset < 0)
   2096         return "";
   2097 
   2098     return COUNTRIES_3[offset];
   2099 }
   2100 
   2101 U_CAPI uint32_t  U_EXPORT2
   2102 uloc_getLCID(const char* localeID)
   2103 {
   2104     UErrorCode status = U_ZERO_ERROR;
   2105     char       langID[ULOC_FULLNAME_CAPACITY];
   2106 
   2107     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
   2108     if (U_FAILURE(status)) {
   2109         return 0;
   2110     }
   2111 
   2112     if (uprv_strchr(localeID, '@')) {
   2113         // uprv_convertToLCID does not support keywords other than collation.
   2114         // Remove all keywords except collation.
   2115         int32_t len;
   2116         char collVal[ULOC_KEYWORDS_CAPACITY];
   2117         char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
   2118 
   2119         len = uloc_getKeywordValue(localeID, "collation", collVal,
   2120             sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
   2121 
   2122         if (U_SUCCESS(status) && len > 0) {
   2123             collVal[len] = 0;
   2124 
   2125             len = uloc_getBaseName(localeID, tmpLocaleID,
   2126                 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
   2127 
   2128             if (U_SUCCESS(status)) {
   2129                 tmpLocaleID[len] = 0;
   2130 
   2131                 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
   2132                     sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
   2133 
   2134                 if (U_SUCCESS(status)) {
   2135                     tmpLocaleID[len] = 0;
   2136                     return uprv_convertToLCID(langID, tmpLocaleID, &status);
   2137                 }
   2138             }
   2139         }
   2140 
   2141         // fall through - all keywords are simply ignored
   2142         status = U_ZERO_ERROR;
   2143     }
   2144 
   2145     return uprv_convertToLCID(langID, localeID, &status);
   2146 }
   2147 
   2148 U_CAPI int32_t U_EXPORT2
   2149 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
   2150                 UErrorCode *status)
   2151 {
   2152     return uprv_convertToPosix(hostid, locale, localeCapacity, status);
   2153 }
   2154 
   2155 /* ### Default locale **************************************************/
   2156 
   2157 U_CAPI const char*  U_EXPORT2
   2158 uloc_getDefault()
   2159 {
   2160     return locale_get_default();
   2161 }
   2162 
   2163 U_CAPI void  U_EXPORT2
   2164 uloc_setDefault(const char*   newDefaultLocale,
   2165              UErrorCode* err)
   2166 {
   2167     if (U_FAILURE(*err))
   2168         return;
   2169     /* the error code isn't currently used for anything by this function*/
   2170 
   2171     /* propagate change to C++ */
   2172     locale_set_default(newDefaultLocale);
   2173 }
   2174 
   2175 /**
   2176  * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
   2177  * to an array of pointers to arrays of char.  All of these pointers are owned
   2178  * by ICU-- do not delete them, and do not write through them.  The array is
   2179  * terminated with a null pointer.
   2180  */
   2181 U_CAPI const char* const*  U_EXPORT2
   2182 uloc_getISOLanguages()
   2183 {
   2184     return LANGUAGES;
   2185 }
   2186 
   2187 /**
   2188  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
   2189  * pointer to an array of pointers to arrays of char.  All of these pointers are
   2190  * owned by ICU-- do not delete them, and do not write through them.  The array is
   2191  * terminated with a null pointer.
   2192  */
   2193 U_CAPI const char* const*  U_EXPORT2
   2194 uloc_getISOCountries()
   2195 {
   2196     return COUNTRIES;
   2197 }
   2198 
   2199 
   2200 /* this function to be moved into cstring.c later */
   2201 static char gDecimal = 0;
   2202 
   2203 static /* U_CAPI */
   2204 double
   2205 /* U_EXPORT2 */
   2206 _uloc_strtod(const char *start, char **end) {
   2207     char *decimal;
   2208     char *myEnd;
   2209     char buf[30];
   2210     double rv;
   2211     if (!gDecimal) {
   2212         char rep[5];
   2213         /* For machines that decide to change the decimal on you,
   2214         and try to be too smart with localization.
   2215         This normally should be just a '.'. */
   2216         sprintf(rep, "%+1.1f", 1.0);
   2217         gDecimal = rep[2];
   2218     }
   2219 
   2220     if(gDecimal == '.') {
   2221         return uprv_strtod(start, end); /* fall through to OS */
   2222     } else {
   2223         uprv_strncpy(buf, start, 29);
   2224         buf[29]=0;
   2225         decimal = uprv_strchr(buf, '.');
   2226         if(decimal) {
   2227             *decimal = gDecimal;
   2228         } else {
   2229             return uprv_strtod(start, end); /* no decimal point */
   2230         }
   2231         rv = uprv_strtod(buf, &myEnd);
   2232         if(end) {
   2233             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
   2234         }
   2235         return rv;
   2236     }
   2237 }
   2238 
   2239 typedef struct {
   2240     float q;
   2241     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
   2242     char *locale;
   2243 } _acceptLangItem;
   2244 
   2245 static int32_t U_CALLCONV
   2246 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
   2247 {
   2248     const _acceptLangItem *aa = (const _acceptLangItem*)a;
   2249     const _acceptLangItem *bb = (const _acceptLangItem*)b;
   2250 
   2251     int32_t rc = 0;
   2252     if(bb->q < aa->q) {
   2253         rc = -1;  /* A > B */
   2254     } else if(bb->q > aa->q) {
   2255         rc = 1;   /* A < B */
   2256     } else {
   2257         rc = 0;   /* A = B */
   2258     }
   2259 
   2260     if(rc==0) {
   2261         rc = uprv_stricmp(aa->locale, bb->locale);
   2262     }
   2263 
   2264 #if defined(ULOC_DEBUG)
   2265     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
   2266     aa->locale, aa->q,
   2267     bb->locale, bb->q,
   2268     rc);*/
   2269 #endif
   2270 
   2271     return rc;
   2272 }
   2273 
   2274 /*
   2275 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
   2276 */
   2277 
   2278 U_CAPI int32_t U_EXPORT2
   2279 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
   2280                             const char *httpAcceptLanguage,
   2281                             UEnumeration* availableLocales,
   2282                             UErrorCode *status)
   2283 {
   2284     _acceptLangItem *j;
   2285     _acceptLangItem smallBuffer[30];
   2286     char **strs;
   2287     char tmp[ULOC_FULLNAME_CAPACITY +1];
   2288     int32_t n = 0;
   2289     const char *itemEnd;
   2290     const char *paramEnd;
   2291     const char *s;
   2292     const char *t;
   2293     int32_t res;
   2294     int32_t i;
   2295     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
   2296     int32_t jSize;
   2297     char *tempstr; /* Use for null pointer check */
   2298 
   2299     j = smallBuffer;
   2300     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
   2301     if(U_FAILURE(*status)) {
   2302         return -1;
   2303     }
   2304 
   2305     for(s=httpAcceptLanguage;s&&*s;) {
   2306         while(isspace(*s)) /* eat space at the beginning */
   2307             s++;
   2308         itemEnd=uprv_strchr(s,',');
   2309         paramEnd=uprv_strchr(s,';');
   2310         if(!itemEnd) {
   2311             itemEnd = httpAcceptLanguage+l; /* end of string */
   2312         }
   2313         if(paramEnd && paramEnd<itemEnd) {
   2314             /* semicolon (;) is closer than end (,) */
   2315             t = paramEnd+1;
   2316             if(*t=='q') {
   2317                 t++;
   2318             }
   2319             while(isspace(*t)) {
   2320                 t++;
   2321             }
   2322             if(*t=='=') {
   2323                 t++;
   2324             }
   2325             while(isspace(*t)) {
   2326                 t++;
   2327             }
   2328             j[n].q = (float)_uloc_strtod(t,NULL);
   2329         } else {
   2330             /* no semicolon - it's 1.0 */
   2331             j[n].q = 1.0f;
   2332             paramEnd = itemEnd;
   2333         }
   2334         j[n].dummy=0;
   2335         /* eat spaces prior to semi */
   2336         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
   2337             ;
   2338         /* Check for null pointer from uprv_strndup */
   2339         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
   2340         if (tempstr == NULL) {
   2341             *status = U_MEMORY_ALLOCATION_ERROR;
   2342             return -1;
   2343         }
   2344         j[n].locale = tempstr;
   2345         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
   2346         if(strcmp(j[n].locale,tmp)) {
   2347             uprv_free(j[n].locale);
   2348             j[n].locale=uprv_strdup(tmp);
   2349         }
   2350 #if defined(ULOC_DEBUG)
   2351         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
   2352 #endif
   2353         n++;
   2354         s = itemEnd;
   2355         while(*s==',') { /* eat duplicate commas */
   2356             s++;
   2357         }
   2358         if(n>=jSize) {
   2359             if(j==smallBuffer) {  /* overflowed the small buffer. */
   2360                 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
   2361                 if(j!=NULL) {
   2362                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
   2363                 }
   2364 #if defined(ULOC_DEBUG)
   2365                 fprintf(stderr,"malloced at size %d\n", jSize);
   2366 #endif
   2367             } else {
   2368                 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
   2369 #if defined(ULOC_DEBUG)
   2370                 fprintf(stderr,"re-alloced at size %d\n", jSize);
   2371 #endif
   2372             }
   2373             jSize *= 2;
   2374             if(j==NULL) {
   2375                 *status = U_MEMORY_ALLOCATION_ERROR;
   2376                 return -1;
   2377             }
   2378         }
   2379     }
   2380     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
   2381     if(U_FAILURE(*status)) {
   2382         if(j != smallBuffer) {
   2383 #if defined(ULOC_DEBUG)
   2384             fprintf(stderr,"freeing j %p\n", j);
   2385 #endif
   2386             uprv_free(j);
   2387         }
   2388         return -1;
   2389     }
   2390     strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
   2391     /* Check for null pointer */
   2392     if (strs == NULL) {
   2393         uprv_free(j); /* Free to avoid memory leak */
   2394         *status = U_MEMORY_ALLOCATION_ERROR;
   2395         return -1;
   2396     }
   2397     for(i=0;i<n;i++) {
   2398 #if defined(ULOC_DEBUG)
   2399         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
   2400 #endif
   2401         strs[i]=j[i].locale;
   2402     }
   2403     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
   2404         (const char**)strs, n, availableLocales, status);
   2405     for(i=0;i<n;i++) {
   2406         uprv_free(strs[i]);
   2407     }
   2408     uprv_free(strs);
   2409     if(j != smallBuffer) {
   2410 #if defined(ULOC_DEBUG)
   2411         fprintf(stderr,"freeing j %p\n", j);
   2412 #endif
   2413         uprv_free(j);
   2414     }
   2415     return res;
   2416 }
   2417 
   2418 
   2419 U_CAPI int32_t U_EXPORT2
   2420 uloc_acceptLanguage(char *result, int32_t resultAvailable,
   2421                     UAcceptResult *outResult, const char **acceptList,
   2422                     int32_t acceptListCount,
   2423                     UEnumeration* availableLocales,
   2424                     UErrorCode *status)
   2425 {
   2426     int32_t i,j;
   2427     int32_t len;
   2428     int32_t maxLen=0;
   2429     char tmp[ULOC_FULLNAME_CAPACITY+1];
   2430     const char *l;
   2431     char **fallbackList;
   2432     if(U_FAILURE(*status)) {
   2433         return -1;
   2434     }
   2435     fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
   2436     if(fallbackList==NULL) {
   2437         *status = U_MEMORY_ALLOCATION_ERROR;
   2438         return -1;
   2439     }
   2440     for(i=0;i<acceptListCount;i++) {
   2441 #if defined(ULOC_DEBUG)
   2442         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
   2443 #endif
   2444         while((l=uenum_next(availableLocales, NULL, status))) {
   2445 #if defined(ULOC_DEBUG)
   2446             fprintf(stderr,"  %s\n", l);
   2447 #endif
   2448             len = (int32_t)uprv_strlen(l);
   2449             if(!uprv_strcmp(acceptList[i], l)) {
   2450                 if(outResult) {
   2451                     *outResult = ULOC_ACCEPT_VALID;
   2452                 }
   2453 #if defined(ULOC_DEBUG)
   2454                 fprintf(stderr, "MATCH! %s\n", l);
   2455 #endif
   2456                 if(len>0) {
   2457                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2458                 }
   2459                 for(j=0;j<i;j++) {
   2460                     uprv_free(fallbackList[j]);
   2461                 }
   2462                 uprv_free(fallbackList);
   2463                 return u_terminateChars(result, resultAvailable, len, status);
   2464             }
   2465             if(len>maxLen) {
   2466                 maxLen = len;
   2467             }
   2468         }
   2469         uenum_reset(availableLocales, status);
   2470         /* save off parent info */
   2471         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2472             fallbackList[i] = uprv_strdup(tmp);
   2473         } else {
   2474             fallbackList[i]=0;
   2475         }
   2476     }
   2477 
   2478     for(maxLen--;maxLen>0;maxLen--) {
   2479         for(i=0;i<acceptListCount;i++) {
   2480             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
   2481 #if defined(ULOC_DEBUG)
   2482                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
   2483 #endif
   2484                 while((l=uenum_next(availableLocales, NULL, status))) {
   2485 #if defined(ULOC_DEBUG)
   2486                     fprintf(stderr,"  %s\n", l);
   2487 #endif
   2488                     len = (int32_t)uprv_strlen(l);
   2489                     if(!uprv_strcmp(fallbackList[i], l)) {
   2490                         if(outResult) {
   2491                             *outResult = ULOC_ACCEPT_FALLBACK;
   2492                         }
   2493 #if defined(ULOC_DEBUG)
   2494                         fprintf(stderr, "fallback MATCH! %s\n", l);
   2495 #endif
   2496                         if(len>0) {
   2497                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
   2498                         }
   2499                         for(j=0;j<acceptListCount;j++) {
   2500                             uprv_free(fallbackList[j]);
   2501                         }
   2502                         uprv_free(fallbackList);
   2503                         return u_terminateChars(result, resultAvailable, len, status);
   2504                     }
   2505                 }
   2506                 uenum_reset(availableLocales, status);
   2507 
   2508                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
   2509                     uprv_free(fallbackList[i]);
   2510                     fallbackList[i] = uprv_strdup(tmp);
   2511                 } else {
   2512                     uprv_free(fallbackList[i]);
   2513                     fallbackList[i]=0;
   2514                 }
   2515             }
   2516         }
   2517         if(outResult) {
   2518             *outResult = ULOC_ACCEPT_FAILED;
   2519         }
   2520     }
   2521     for(i=0;i<acceptListCount;i++) {
   2522         uprv_free(fallbackList[i]);
   2523     }
   2524     uprv_free(fallbackList);
   2525     return -1;
   2526 }
   2527 
   2528 /*eof*/
   2529