Home | History | Annotate | Download | only in common
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 1996-2009, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  *
      7  * Provides functionality for mapping between
      8  * LCID and Posix IDs or ICU locale to codepage
      9  *
     10  * Note: All classes and code in this file are
     11  *       intended for internal use only.
     12  *
     13  * Methods of interest:
     14  *   unsigned long convertToLCID(const char*);
     15  *   const char* convertToPosix(unsigned long);
     16  *
     17  * Kathleen Wilson, 4/30/96
     18  *
     19  *  Date        Name        Description
     20  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
     21  *                          setId() method and safety check against
     22  *                          MAX_ID_LENGTH.
     23  * 04/23/99     stephen     Added C wrapper for convertToPosix.
     24  * 09/18/00     george      Removed the memory leaks.
     25  * 08/23/01     george      Convert to C
     26  */
     27 
     28 #include "locmap.h"
     29 #include "cstring.h"
     30 
     31 /*
     32  * Note:
     33  * The mapping from Win32 locale ID numbers to POSIX locale strings should
     34  * be the faster one.
     35  *
     36  * Many LCID values come from winnt.h
     37  * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
     38  */
     39 
     40 /*
     41 ////////////////////////////////////////////////
     42 //
     43 // Internal Classes for LCID <--> POSIX Mapping
     44 //
     45 /////////////////////////////////////////////////
     46 */
     47 
     48 typedef struct ILcidPosixElement
     49 {
     50     const uint32_t hostID;
     51     const char * const posixID;
     52 } ILcidPosixElement;
     53 
     54 typedef struct ILcidPosixMap
     55 {
     56     const uint32_t numRegions;
     57     const struct ILcidPosixElement* const regionMaps;
     58 } ILcidPosixMap;
     59 
     60 
     61 /*
     62 /////////////////////////////////////////////////
     63 //
     64 // Easy macros to make the LCID <--> POSIX Mapping
     65 //
     66 /////////////////////////////////////////////////
     67 */
     68 
     69 /*
     70  The standard one language/one country mapping for LCID.
     71  The first element must be the language, and the following
     72  elements are the language with the country.
     73  */
     74 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
     75 static const ILcidPosixElement languageID[] = { \
     76     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
     77     {hostID, #posixID}, \
     78 };
     79 
     80 /*
     81  Create the map for the posixID. This macro supposes that the language string
     82  name is the same as the global variable name, and that the first element
     83  in the ILcidPosixElement is just the language.
     84  */
     85 #define ILCID_POSIX_MAP(_posixID) \
     86     {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
     87 
     88 /*
     89 ////////////////////////////////////////////
     90 //
     91 // Create the table of LCID to POSIX Mapping
     92 // None of it should be dynamically created.
     93 //
     94 // Keep static locale variables inside the function so that
     95 // it can be created properly during static init.
     96 //
     97 ////////////////////////////////////////////
     98 */
     99 
    100 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
    101 
    102 static const ILcidPosixElement ar[] = {
    103     {0x01,   "ar"},
    104     {0x3801, "ar_AE"},
    105     {0x3c01, "ar_BH"},
    106     {0x1401, "ar_DZ"},
    107     {0x0c01, "ar_EG"},
    108     {0x0801, "ar_IQ"},
    109     {0x2c01, "ar_JO"},
    110     {0x3401, "ar_KW"},
    111     {0x3001, "ar_LB"},
    112     {0x1001, "ar_LY"},
    113     {0x1801, "ar_MA"},
    114     {0x2001, "ar_OM"},
    115     {0x4001, "ar_QA"},
    116     {0x0401, "ar_SA"},
    117     {0x2801, "ar_SY"},
    118     {0x1c01, "ar_TN"},
    119     {0x2401, "ar_YE"}
    120 };
    121 
    122 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
    123 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
    124 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
    125 
    126 static const ILcidPosixElement az[] = {
    127     {0x2c,   "az"},
    128     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
    129     {0x082c, "az_Cyrl"},  /* Cyrillic based */
    130     {0x042c, "az_Latn_AZ"}, /* Latin based */
    131     {0x042c, "az_Latn"}, /* Latin based */
    132     {0x042c, "az_AZ"} /* Latin based */
    133 };
    134 
    135 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
    136 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
    137 
    138 static const ILcidPosixElement ber[] = {
    139     {0x5f,   "ber"},
    140     {0x045f, "ber_Arab_DZ"},
    141     {0x045f, "ber_Arab"},
    142     {0x085f, "ber_Latn_DZ"},
    143     {0x085f, "ber_Latn"}
    144 };
    145 
    146 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
    147 
    148 static const ILcidPosixElement bn[] = {
    149     {0x45,   "bn"},
    150     {0x0845, "bn_BD"},
    151     {0x0445, "bn_IN"}
    152 };
    153 
    154 static const ILcidPosixElement bo[] = {
    155     {0x51,   "bo"},
    156     {0x0851, "bo_BT"},
    157     {0x0451, "bo_CN"}
    158 };
    159 
    160 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
    161 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
    162 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
    163 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
    164 
    165 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
    166 static const ILcidPosixElement cs_CZ[] = {
    167     {0x05,   "cs"},
    168     {0x0405, "cs_CZ"},
    169 };
    170 
    171 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
    172 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
    173 
    174 static const ILcidPosixElement de[] = {
    175     {0x07,   "de"},
    176     {0x0c07, "de_AT"},
    177     {0x0807, "de_CH"},
    178     {0x0407, "de_DE"},
    179     {0x1407, "de_LI"},
    180     {0x1007, "de_LU"},
    181     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
    182     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
    183 };
    184 
    185 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
    186 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
    187 
    188 static const ILcidPosixElement en[] = {
    189     {0x09,   "en"},
    190     {0x0c09, "en_AU"},
    191     {0x2809, "en_BZ"},
    192     {0x1009, "en_CA"},
    193     {0x0809, "en_GB"},
    194     {0x1809, "en_IE"},
    195     {0x4009, "en_IN"},
    196     {0x2009, "en_JM"},
    197     {0x4409, "en_MY"},
    198     {0x1409, "en_NZ"},
    199     {0x3409, "en_PH"},
    200     {0x4809, "en_SG"},
    201     {0x2C09, "en_TT"},
    202     {0x0409, "en_US"},
    203     {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
    204     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). */
    205     {0x1c09, "en_ZA"},
    206     {0x3009, "en_ZW"},
    207     {0x0409, "en_AS"},  /* Alias for en_US. Leave last. */
    208     {0x0409, "en_GU"},  /* Alias for en_US. Leave last. */
    209     {0x0409, "en_MH"},  /* Alias for en_US. Leave last. */
    210     {0x0409, "en_MP"},  /* Alias for en_US. Leave last. */
    211     {0x0409, "en_UM"}   /* Alias for en_US. Leave last. */
    212 };
    213 
    214 static const ILcidPosixElement en_US_POSIX[] = {
    215     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
    216 };
    217 
    218 static const ILcidPosixElement es[] = {
    219     {0x0a,   "es"},
    220     {0x2c0a, "es_AR"},
    221     {0x400a, "es_BO"},
    222     {0x340a, "es_CL"},
    223     {0x240a, "es_CO"},
    224     {0x140a, "es_CR"},
    225     {0x1c0a, "es_DO"},
    226     {0x300a, "es_EC"},
    227     {0x0c0a, "es_ES"},      /*Modern sort.*/
    228     {0x100a, "es_GT"},
    229     {0x480a, "es_HN"},
    230     {0x080a, "es_MX"},
    231     {0x4c0a, "es_NI"},
    232     {0x180a, "es_PA"},
    233     {0x280a, "es_PE"},
    234     {0x500a, "es_PR"},
    235     {0x3c0a, "es_PY"},
    236     {0x440a, "es_SV"},
    237     {0x540a, "es_US"},
    238     {0x380a, "es_UY"},
    239     {0x200a, "es_VE"},
    240     {0x040a, "es_ES@collation=traditional"},
    241     {0x040a, "es@collation=traditional"}
    242 };
    243 
    244 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
    245 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
    246 
    247 /* ISO-639 doesn't distinguish between Persian and Dari.*/
    248 static const ILcidPosixElement fa[] = {
    249     {0x29,   "fa"},
    250     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
    251     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    252 };
    253 
    254 /* duplicate for roundtripping */
    255 static const ILcidPosixElement fa_AF[] = {
    256     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
    257     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    258 };
    259 
    260 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
    261 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
    262 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
    263 
    264 static const ILcidPosixElement fr[] = {
    265     {0x0c,   "fr"},
    266     {0x080c, "fr_BE"},
    267     {0x0c0c, "fr_CA"},
    268     {0x240c, "fr_CD"},
    269     {0x100c, "fr_CH"},
    270     {0x300c, "fr_CI"},
    271     {0x2c0c, "fr_CM"},
    272     {0x040c, "fr_FR"},
    273     {0x3c0c, "fr_HT"},
    274     {0x140c, "fr_LU"},
    275     {0x380c, "fr_MA"},
    276     {0x180c, "fr_MC"},
    277     {0x340c, "fr_ML"},
    278     {0x200c, "fr_RE"},
    279     {0x280c, "fr_SN"}
    280 };
    281 
    282 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
    283 
    284 /* This LCID is really two different locales.*/
    285 static const ILcidPosixElement ga[] = {
    286     {0x3c,   "ga"},
    287     {0x3c,   "gd"},
    288     {0x083c, "ga_IE"},  /* Gaelic (Ireland) */
    289     {0x043c, "gd_GB"}   /* Gaelic (Scotland) */
    290 };
    291 
    292 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
    293 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
    294 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
    295 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
    296 ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha, ha_NG) /* ha_Latn_NG? */
    297 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
    298 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
    299 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
    300 
    301 /* This LCID is really four different locales.*/
    302 static const ILcidPosixElement hr[] = {
    303     {0x1a,   "hr"},
    304     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
    305     {0x141a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
    306     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
    307     {0x141a, "bs"},     /* Bosnian */
    308     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
    309     {0x201a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
    310     {0x101a, "hr_BA"},  /* Croatian in Bosnia */
    311     {0x041a, "hr_HR"},  /* Croatian*/
    312     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
    313     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
    314     {0x081a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
    315     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
    316     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
    317     {0x0c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
    318     {0x0c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
    319 };
    320 
    321 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
    322 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
    323 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
    324 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
    325 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
    326 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
    327 
    328 static const ILcidPosixElement it[] = {
    329     {0x10,   "it"},
    330     {0x0810, "it_CH"},
    331     {0x0410, "it_IT"}
    332 };
    333 
    334 static const ILcidPosixElement iu[] = {
    335     {0x5d,   "iu"},
    336     {0x045d, "iu_Cans_CA"},
    337     {0x045d, "iu_Cans"},
    338     {0x085d, "iu_Latn_CA"},
    339     {0x085d, "iu_Latn"}
    340 };
    341 
    342 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
    343 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
    344 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
    345 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
    346 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
    347 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
    348 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
    349 
    350 static const ILcidPosixElement ko[] = {
    351     {0x12,   "ko"},
    352     {0x0812, "ko_KP"},
    353     {0x0412, "ko_KR"}
    354 };
    355 
    356 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
    357 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
    358 
    359 static const ILcidPosixElement ks[] = {         /* We could add PK and CN too */
    360     {0x60,   "ks"},
    361     {0x0860, "ks_IN"},              /* Documentation doesn't mention script */
    362     {0x0460, "ks_Arab_IN"}
    363 };
    364 
    365 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
    366 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT)   /* TODO: Verify the country */
    367 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
    368 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
    369 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
    370 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
    371 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
    372 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
    373 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
    374 
    375 static const ILcidPosixElement mn[] = {
    376     {0x50,   "mn"},
    377     {0x0850, "mn_CN"},
    378     {0x0450, "mn_MN"}
    379 };
    380 
    381 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
    382 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
    383 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
    384 
    385 static const ILcidPosixElement ms[] = {
    386     {0x3e,   "ms"},
    387     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
    388     {0x043e, "ms_MY"}    /* Malaysia*/
    389 };
    390 
    391 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
    392 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
    393 
    394 static const ILcidPosixElement ne[] = {
    395     {0x61,   "ne"},
    396     {0x0861, "ne_IN"},   /* India*/
    397     {0x0461, "ne_NP"}    /* Nepal*/
    398 };
    399 
    400 static const ILcidPosixElement nl[] = {
    401     {0x13,   "nl"},
    402     {0x0813, "nl_BE"},
    403     {0x0413, "nl_NL"}
    404 };
    405 
    406 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
    407 static const ILcidPosixElement no[] = {
    408     {0x14,   "nb"},     /* really nb */
    409     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
    410     {0x0414, "no"},     /* really nb_NO */
    411     {0x0414, "no_NO"},  /* really nb_NO */
    412     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
    413     {0x0814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
    414     {0x0814, "no_NO_NY"}/* really nn_NO */
    415 };
    416 
    417 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
    418 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
    419 ILCID_POSIX_ELEMENT_ARRAY(0x0472, om, om_ET)    /* TODO: Verify the country */
    420 
    421 /* Declared as or_IN to get around compiler errors*/
    422 static const ILcidPosixElement or_IN[] = {
    423     {0x48,   "or"},
    424     {0x0448, "or_IN"},
    425 };
    426 
    427 static const ILcidPosixElement pa[] = {
    428     {0x46,   "pa"},
    429     {0x0446, "pa_IN"},
    430     {0x0846, "pa_PK"}
    431 };
    432 
    433 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
    434 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
    435 
    436 static const ILcidPosixElement pt[] = {
    437     {0x16,   "pt"},
    438     {0x0416, "pt_BR"},
    439     {0x0816, "pt_PT"}
    440 };
    441 
    442 static const ILcidPosixElement qu[] = {
    443     {0x6b,   "qu"},
    444     {0x046b, "qu_BO"},
    445     {0x086b, "qu_EC"},
    446     {0x0C6b, "qu_PE"}
    447 };
    448 
    449 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut, qut_GT) /* qut is an ISO-639-3 code */
    450 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
    451 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
    452 
    453 static const ILcidPosixElement root[] = {
    454     {0x00,   "root"}
    455 };
    456 
    457 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
    458 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
    459 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
    460 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
    461 
    462 static const ILcidPosixElement sd[] = {
    463     {0x59,   "sd"},
    464     {0x0459, "sd_IN"},
    465     {0x0859, "sd_PK"}
    466 };
    467 
    468 static const ILcidPosixElement se[] = {
    469     {0x3b,   "se"},
    470     {0x0c3b, "se_FI"},
    471     {0x043b, "se_NO"},
    472     {0x083b, "se_SE"},
    473     {0x183b, "sma_NO"},
    474     {0x1c3b, "sma_SE"},
    475     {0x103b, "smj_NO"},
    476     {0x143b, "smj_SE"},
    477     {0x243b, "smn_FI"},
    478     {0x203b, "sms_FI"},
    479 };
    480 
    481 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
    482 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
    483 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
    484 ILCID_POSIX_ELEMENT_ARRAY(0x0477, so, so_ET)    /* TODO: Verify the country */
    485 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
    486 
    487 static const ILcidPosixElement sv[] = {
    488     {0x1d,   "sv"},
    489     {0x081d, "sv_FI"},
    490     {0x041d, "sv_SE"}
    491 };
    492 
    493 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
    494 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
    495 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
    496 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
    497 ILCID_POSIX_ELEMENT_ARRAY(0x0428, tg, tg_TJ) /* Cyrillic based by default */
    498 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
    499 
    500 static const ILcidPosixElement ti[] = {
    501     {0x73,   "ti"},
    502     {0x0873, "ti_ER"},
    503     {0x0473, "ti_ET"}
    504 };
    505 
    506 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
    507 ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn, tn_BW)
    508 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
    509 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
    510 ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug, ug_CN)
    511 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
    512 
    513 static const ILcidPosixElement ur[] = {
    514     {0x20,   "ur"},
    515     {0x0820, "ur_IN"},
    516     {0x0420, "ur_PK"}
    517 };
    518 
    519 static const ILcidPosixElement uz[] = {
    520     {0x43,   "uz"},
    521     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
    522     {0x0843, "uz_Cyrl"},  /* Cyrillic based */
    523     {0x0843, "uz_UZ"},  /* Cyrillic based */
    524     {0x0443, "uz_Latn_UZ"}, /* Latin based */
    525     {0x0443, "uz_Latn"} /* Latin based */
    526 };
    527 
    528 ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve, ve_ZA)    /* TODO: Verify the country */
    529 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
    530 
    531 static const ILcidPosixElement wen[] = {
    532     {0x2E,   "wen"},
    533     {0x042E, "wen_DE"},
    534     {0x042E, "hsb_DE"},
    535     {0x082E, "dsb_DE"}
    536 };
    537 
    538 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
    539 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
    540 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
    541 
    542 static const ILcidPosixElement zh[] = {
    543     {0x04,   "zh"},
    544     {0x0804, "zh_Hans_CN"},
    545     {0x0804, "zh_Hans"},
    546     {0x0804, "zh_CN"},
    547     {0x0c04, "zh_Hant_HK"},
    548     {0x0c04, "zh_HK"},
    549     {0x1404, "zh_Hant_MO"},
    550     {0x1404, "zh_MO"},
    551     {0x1004, "zh_Hans_SG"},
    552     {0x1004, "zh_SG"},
    553     {0x0404, "zh_Hant_TW"},
    554     {0x0404, "zh_Hant"},
    555     {0x0404, "zh_TW"},
    556     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
    557     {0x30404,"zh_TW"},          /* Bopomofo order */
    558     {0x20404,"zh_Hant_TW@collation=stroke"},
    559     {0x20404,"zh_TW@collation=stroke"},
    560     {0x20804,"zh_Hans_CN@collation=stroke"},
    561     {0x20804,"zh_CN@collation=stroke"}
    562 };
    563 
    564 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
    565 
    566 /* This must be static and grouped by LCID. */
    567 
    568 /* non-existent ISO-639-2 codes */
    569 /*
    570 0x466   Edo
    571 0x467   Fulfulde - Nigeria
    572 0x486   K'iche - Guatemala
    573 0x430   Sutu
    574 */
    575 static const ILcidPosixMap gPosixIDmap[] = {
    576     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
    577     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
    578     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
    579     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
    580     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
    581     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
    582     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
    583     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
    584     ILCID_POSIX_MAP(ber),   /*  ber Berber/Tamazight          0x5f */
    585     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
    586     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
    587     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
    588     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
    589     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
    590     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
    591     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
    592     ILCID_POSIX_MAP(cs_CZ), /*  cs  Czech                     0x05 */
    593     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
    594     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
    595     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
    596     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
    597     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
    598     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
    599     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
    600     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
    601     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
    602     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
    603     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
    604     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
    605     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
    606     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
    607     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
    608     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
    609     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
    610     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
    611     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
    612     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
    613     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
    614     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
    615     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
    616     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
    617     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
    618     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
    619     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
    620     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
    621     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
    622     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
    623     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
    624     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
    625     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
    626     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
    627     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
    628     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
    629     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
    630     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
    631     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
    632     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
    633     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
    634     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
    635     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
    636     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
    637     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
    638     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
    639     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
    640     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
    641     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
    642     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
    643     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
    644     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
    645     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
    646     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
    647     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
    648     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
    649     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
    650     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
    651     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
    652     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
    653     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
    654     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
    655 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
    656     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
    657     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
    658 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
    659     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
    660     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
    661     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
    662     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
    663     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
    664     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
    665     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
    666     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
    667     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
    668     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
    669     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
    670     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
    671     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
    672     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
    673     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
    674     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
    675     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
    676     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
    677     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
    678     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
    679 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
    680     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
    681     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
    682     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
    683     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
    684     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
    685 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
    686     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
    687     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
    688     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
    689     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
    690     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
    691     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
    692     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
    693     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
    694     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
    695     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
    696     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
    697     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
    698     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
    699     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
    700     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
    701     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
    702     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
    703     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
    704     ILCID_POSIX_MAP(wen),   /*  wen Sorbian                   0x2e */
    705     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
    706     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
    707     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
    708     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
    709     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
    710 };
    711 
    712 static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
    713 
    714 /**
    715  * Do not call this function. It is called by hostID.
    716  * The function is not private because this struct must stay as a C struct,
    717  * and this is an internal class.
    718  */
    719 static int32_t
    720 idCmp(const char* id1, const char* id2)
    721 {
    722     int32_t diffIdx = 0;
    723     while (*id1 == *id2 && *id1 != 0) {
    724         diffIdx++;
    725         id1++;
    726         id2++;
    727     }
    728     return diffIdx;
    729 }
    730 
    731 /**
    732  * Searches for a Windows LCID
    733  *
    734  * @param posixid the Posix style locale id.
    735  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
    736  *               no equivalent Windows LCID.
    737  * @return the LCID
    738  */
    739 static uint32_t
    740 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
    741 {
    742     int32_t bestIdx = 0;
    743     int32_t bestIdxDiff = 0;
    744     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
    745     uint32_t idx;
    746 
    747     for (idx = 0; idx < this_0->numRegions; idx++ ) {
    748         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
    749         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
    750             if (posixIDlen == sameChars) {
    751                 /* Exact match */
    752                 return this_0->regionMaps[idx].hostID;
    753             }
    754             bestIdxDiff = sameChars;
    755             bestIdx = idx;
    756         }
    757     }
    758     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
    759     /* We also have to make sure that sid and si and similar string subsets don't match. */
    760     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
    761         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
    762     {
    763         *status = U_USING_FALLBACK_WARNING;
    764         return this_0->regionMaps[bestIdx].hostID;
    765     }
    766 
    767     /*no match found */
    768     *status = U_ILLEGAL_ARGUMENT_ERROR;
    769     return this_0->regionMaps->hostID;
    770 }
    771 
    772 static const char*
    773 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
    774 {
    775     uint32_t i;
    776     for (i = 0; i <= this_0->numRegions; i++)
    777     {
    778         if (this_0->regionMaps[i].hostID == hostID)
    779         {
    780             return this_0->regionMaps[i].posixID;
    781         }
    782     }
    783 
    784     /* If you get here, then no matching region was found,
    785        so return the language id with the wild card region. */
    786     return this_0->regionMaps[0].posixID;
    787 }
    788 
    789 /*
    790 //////////////////////////////////////
    791 //
    792 // LCID --> POSIX
    793 //
    794 /////////////////////////////////////
    795 */
    796 
    797 U_CAPI const char *
    798 uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
    799 {
    800     uint16_t langID = LANGUAGE_LCID(hostid);
    801     uint32_t localeIndex;
    802 
    803     for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++)
    804     {
    805         if (langID == gPosixIDmap[localeIndex].regionMaps->hostID)
    806         {
    807             return getPosixID(&gPosixIDmap[localeIndex], hostid);
    808         }
    809     }
    810 
    811     /* no match found */
    812     *status = U_ILLEGAL_ARGUMENT_ERROR;
    813     return NULL;
    814 }
    815 
    816 /*
    817 //////////////////////////////////////
    818 //
    819 // POSIX --> LCID
    820 // This should only be called from uloc_getLCID.
    821 // The locale ID must be in canonical form.
    822 // langID is separate so that this file doesn't depend on the uloc_* API.
    823 //
    824 /////////////////////////////////////
    825 */
    826 
    827 U_CAPI uint32_t
    828 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
    829 {
    830 
    831     uint32_t   low    = 0;
    832     uint32_t   high   = gLocaleCount;
    833     uint32_t   mid    = high;
    834     uint32_t   oldmid = 0;
    835     int32_t    compVal;
    836 
    837     uint32_t   value         = 0;
    838     uint32_t   fallbackValue = (uint32_t)-1;
    839     UErrorCode myStatus;
    840     uint32_t   idx;
    841 
    842     /* Check for incomplete id. */
    843     if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
    844         return 0;
    845     }
    846 
    847     /*Binary search for the map entry for normal cases */
    848 
    849     while (high > low)  /*binary search*/{
    850 
    851         mid = (high+low) >> 1; /*Finds median*/
    852 
    853         if (mid == oldmid)
    854             break;
    855 
    856         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
    857         if (compVal < 0){
    858             high = mid;
    859         }
    860         else if (compVal > 0){
    861             low = mid;
    862         }
    863         else /*we found it*/{
    864             return getHostID(&gPosixIDmap[mid], posixID, status);
    865         }
    866         oldmid = mid;
    867     }
    868 
    869     /*
    870      * Sometimes we can't do a binary search on posixID because some LCIDs
    871      * go to different locales.  We hit one of those special cases.
    872      */
    873     for (idx = 0; idx < gLocaleCount; idx++ ) {
    874         myStatus = U_ZERO_ERROR;
    875         value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
    876         if (myStatus == U_ZERO_ERROR) {
    877             return value;
    878         }
    879         else if (myStatus == U_USING_FALLBACK_WARNING) {
    880             fallbackValue = value;
    881         }
    882     }
    883 
    884     if (fallbackValue != (uint32_t)-1) {
    885         *status = U_USING_FALLBACK_WARNING;
    886         return fallbackValue;
    887     }
    888 
    889     /* no match found */
    890     *status = U_ILLEGAL_ARGUMENT_ERROR;
    891     return 0;   /* return international (root) */
    892 }
    893 
    894