Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  **********************************************************************
      5  *   Copyright (C) 1996-2016, International Business Machines
      6  *   Corporation and others.  All Rights Reserved.
      7  **********************************************************************
      8  *
      9  * Provides functionality for mapping between
     10  * LCID and Posix IDs or ICU locale to codepage
     11  *
     12  * Note: All classes and code in this file are
     13  *       intended for internal use only.
     14  *
     15  * Methods of interest:
     16  *   unsigned long convertToLCID(const char*);
     17  *   const char* convertToPosix(unsigned long);
     18  *
     19  * Kathleen Wilson, 4/30/96
     20  *
     21  *  Date        Name        Description
     22  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
     23  *                          setId() method and safety check against
     24  *                          MAX_ID_LENGTH.
     25  * 04/23/99     stephen     Added C wrapper for convertToPosix.
     26  * 09/18/00     george      Removed the memory leaks.
     27  * 08/23/01     george      Convert to C
     28  */
     29 
     30 #include "locmap.h"
     31 #include "cstring.h"
     32 #include "cmemory.h"
     33 #include "unicode/uloc.h"
     34 
     35 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
     36 /*
     37  * TODO: It seems like we should widen this to
     38  * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
     39  * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
     40  * but those use gcc and won't have defined(_MSC_VER).
     41  * We might need to #include some Windows header and test for some version macro from there.
     42  * Or call some Windows function and see what it returns.
     43  */
     44 #define USE_WINDOWS_LCID_MAPPING_API
     45 #include <windows.h>
     46 #include <winnls.h>
     47 #endif
     48 
     49 /*
     50  * Note:
     51  * The mapping from Win32 locale ID numbers to POSIX locale strings should
     52  * be the faster one.
     53  *
     54  * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
     55  * [MS-LCID] Windows Language Code Identifier (LCID) Reference
     56  */
     57 
     58 /*
     59 ////////////////////////////////////////////////
     60 //
     61 // Internal Classes for LCID <--> POSIX Mapping
     62 //
     63 /////////////////////////////////////////////////
     64 */
     65 
     66 typedef struct ILcidPosixElement
     67 {
     68     const uint32_t hostID;
     69     const char * const posixID;
     70 } ILcidPosixElement;
     71 
     72 typedef struct ILcidPosixMap
     73 {
     74     const uint32_t numRegions;
     75     const struct ILcidPosixElement* const regionMaps;
     76 } ILcidPosixMap;
     77 
     78 
     79 /*
     80 /////////////////////////////////////////////////
     81 //
     82 // Easy macros to make the LCID <--> POSIX Mapping
     83 //
     84 /////////////////////////////////////////////////
     85 */
     86 
     87 /**
     88  * The standard one language/one country mapping for LCID.
     89  * The first element must be the language, and the following
     90  * elements are the language with the country.
     91  * @param hostID LCID in host format such as 0x044d
     92  * @param languageID posix ID of just the language such as 'de'
     93  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
     94  */
     95 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
     96 static const ILcidPosixElement locmap_ ## languageID [] = { \
     97     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
     98     {hostID, #posixID}, \
     99 };
    100 
    101 /**
    102  * Define a subtable by ID
    103  * @param id the POSIX ID, either a language or language_TERRITORY
    104  */
    105 #define ILCID_POSIX_SUBTABLE(id) \
    106 static const ILcidPosixElement locmap_ ## id [] =
    107 
    108 
    109 /**
    110  * Create the map for the posixID. This macro supposes that the language string
    111  * name is the same as the global variable name, and that the first element
    112  * in the ILcidPosixElement is just the language.
    113  * @param _posixID the full POSIX ID for this entry.
    114  */
    115 #define ILCID_POSIX_MAP(_posixID) \
    116     {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
    117 
    118 /*
    119 ////////////////////////////////////////////
    120 //
    121 // Create the table of LCID to POSIX Mapping
    122 // None of it should be dynamically created.
    123 //
    124 // Keep static locale variables inside the function so that
    125 // it can be created properly during static init.
    126 //
    127 // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
    128 //       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
    129 //
    130 //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
    131 //       maintained for support of older Windows version.
    132 //       Update: Windows 7 (091130)
    133 //
    134 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
    135 //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
    136 //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
    137 //       to support other keywords in this mapping data, we must update the implementation.
    138 ////////////////////////////////////////////
    139 */
    140 
    141 // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
    142 // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
    143 
    144 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
    145 
    146 ILCID_POSIX_SUBTABLE(ar) {
    147     {0x01,   "ar"},
    148     {0x3801, "ar_AE"},
    149     {0x3c01, "ar_BH"},
    150     {0x1401, "ar_DZ"},
    151     {0x0c01, "ar_EG"},
    152     {0x0801, "ar_IQ"},
    153     {0x2c01, "ar_JO"},
    154     {0x3401, "ar_KW"},
    155     {0x3001, "ar_LB"},
    156     {0x1001, "ar_LY"},
    157     {0x1801, "ar_MA"},
    158     {0x1801, "ar_MO"},
    159     {0x2001, "ar_OM"},
    160     {0x4001, "ar_QA"},
    161     {0x0401, "ar_SA"},
    162     {0x2801, "ar_SY"},
    163     {0x1c01, "ar_TN"},
    164     {0x2401, "ar_YE"}
    165 };
    166 
    167 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
    168 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
    169 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
    170 
    171 ILCID_POSIX_SUBTABLE(az) {
    172     {0x2c,   "az"},
    173     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
    174     {0x742c, "az_Cyrl"},  /* Cyrillic based */
    175     {0x042c, "az_Latn_AZ"}, /* Latin based */
    176     {0x782c, "az_Latn"}, /* Latin based */
    177     {0x042c, "az_AZ"} /* Latin based */
    178 };
    179 
    180 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
    181 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
    182 
    183 /*ILCID_POSIX_SUBTABLE(ber) {
    184     {0x5f,   "ber"},
    185     {0x045f, "ber_Arab_DZ"},
    186     {0x045f, "ber_Arab"},
    187     {0x085f, "ber_Latn_DZ"},
    188     {0x085f, "ber_Latn"}
    189 };*/
    190 
    191 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
    192 
    193 ILCID_POSIX_SUBTABLE(bin) {
    194     {0x66, "bin"},
    195     {0x0466, "bin_NG"}
    196 };
    197 
    198 ILCID_POSIX_SUBTABLE(bn) {
    199     {0x45,   "bn"},
    200     {0x0845, "bn_BD"},
    201     {0x0445, "bn_IN"}
    202 };
    203 
    204 ILCID_POSIX_SUBTABLE(bo) {
    205     {0x51,   "bo"},
    206     {0x0851, "bo_BT"},
    207     {0x0451, "bo_CN"},
    208     {0x0c51, "dz_BT"}
    209 };
    210 
    211 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
    212 
    213 ILCID_POSIX_SUBTABLE(ca) {
    214     {0x03,   "ca"},
    215     {0x0403, "ca_ES"},
    216     {0x0803, "ca_ES_VALENCIA"}
    217 };
    218 
    219 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
    220 
    221 ILCID_POSIX_SUBTABLE(chr) {
    222     {0x05c,  "chr"},
    223     {0x7c5c, "chr_Cher"},
    224     {0x045c, "chr_Cher_US"},
    225     {0x045c, "chr_US"}
    226 };
    227 
    228 // ICU has chosen different names for these.
    229 ILCID_POSIX_SUBTABLE(ckb) {
    230     {0x92,   "ckb"},
    231     {0x7c92, "ckb_Arab"},
    232     {0x0492, "ckb_Arab_IQ"}
    233 };
    234 
    235 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
    236 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
    237 
    238 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
    239 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
    240 
    241 // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
    242 ILCID_POSIX_SUBTABLE(de) {
    243     {0x07,   "de"},
    244     {0x0c07, "de_AT"},
    245     {0x0807, "de_CH"},
    246     {0x0407, "de_DE"},
    247     {0x1407, "de_LI"},
    248     {0x1007, "de_LU"},
    249     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
    250     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
    251 };
    252 
    253 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
    254 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
    255 
    256 // Windows uses an empty string for 'invariant'
    257 ILCID_POSIX_SUBTABLE(en) {
    258     {0x09,   "en"},
    259     {0x0c09, "en_AU"},
    260     {0x2809, "en_BZ"},
    261     {0x1009, "en_CA"},
    262     {0x0809, "en_GB"},
    263     {0x3c09, "en_HK"},
    264     {0x3809, "en_ID"},
    265     {0x1809, "en_IE"},
    266     {0x4009, "en_IN"},
    267     {0x2009, "en_JM"},
    268     {0x4409, "en_MY"},
    269     {0x1409, "en_NZ"},
    270     {0x3409, "en_PH"},
    271     {0x4809, "en_SG"},
    272     {0x2C09, "en_TT"},
    273     {0x0409, "en_US"},
    274     {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
    275     {0x2409, "en_029"},
    276     {0x1c09, "en_ZA"},
    277     {0x3009, "en_ZW"},
    278     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
    279     {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    280     {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    281     {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    282     {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    283     {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    284 };
    285 
    286 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
    287     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
    288 };
    289 
    290 // Windows doesn't know POSIX or BCP47 Unicode traditional sort names
    291 ILCID_POSIX_SUBTABLE(es) {
    292     {0x0a,   "es"},
    293     {0x2c0a, "es_AR"},
    294     {0x400a, "es_BO"},
    295     {0x340a, "es_CL"},
    296     {0x240a, "es_CO"},
    297     {0x140a, "es_CR"},
    298     {0x5c0a, "es_CU"},
    299     {0x1c0a, "es_DO"},
    300     {0x300a, "es_EC"},
    301     {0x0c0a, "es_ES"},      /*Modern sort.*/
    302     {0x100a, "es_GT"},
    303     {0x480a, "es_HN"},
    304     {0x080a, "es_MX"},
    305     {0x4c0a, "es_NI"},
    306     {0x180a, "es_PA"},
    307     {0x280a, "es_PE"},
    308     {0x500a, "es_PR"},
    309     {0x3c0a, "es_PY"},
    310     {0x440a, "es_SV"},
    311     {0x540a, "es_US"},
    312     {0x380a, "es_UY"},
    313     {0x200a, "es_VE"},
    314     {0x580a, "es_419"},
    315     {0x040a, "es_ES@collation=traditional"},
    316     {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
    317 };
    318 
    319 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
    320 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
    321 
    322 /* ISO-639 doesn't distinguish between Persian and Dari.*/
    323 ILCID_POSIX_SUBTABLE(fa) {
    324     {0x29,   "fa"},
    325     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
    326     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    327 };
    328 
    329 
    330 /* duplicate for roundtripping */
    331 ILCID_POSIX_SUBTABLE(fa_AF) {
    332     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
    333     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    334 };
    335 
    336 ILCID_POSIX_SUBTABLE(ff) {
    337     {0x67,   "ff"},
    338     {0x7c67, "ff_Latn"},
    339     {0x0867, "ff_Latn_SN"},
    340     {0x0467, "ff_NG"}
    341 };
    342 
    343 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
    344 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
    345 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
    346 
    347 ILCID_POSIX_SUBTABLE(fr) {
    348     {0x0c,   "fr"},
    349     {0x080c, "fr_BE"},
    350     {0x0c0c, "fr_CA"},
    351     {0x240c, "fr_CD"},
    352     {0x240c, "fr_CG"},
    353     {0x100c, "fr_CH"},
    354     {0x300c, "fr_CI"},
    355     {0x2c0c, "fr_CM"},
    356     {0x040c, "fr_FR"},
    357     {0x3c0c, "fr_HT"},
    358     {0x140c, "fr_LU"},
    359     {0x380c, "fr_MA"},
    360     {0x180c, "fr_MC"},
    361     {0x340c, "fr_ML"},
    362     {0x200c, "fr_RE"},
    363     {0x280c, "fr_SN"},
    364     {0xe40c, "fr_015"},
    365     {0x1c0c, "fr_029"}
    366 };
    367 
    368 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
    369 
    370 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
    371 
    372 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
    373     {0x3c,   "ga"},
    374     {0x083c, "ga_IE"},
    375     {0x043c, "gd_GB"}
    376 };
    377 
    378 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
    379     {0x91,   "gd"},
    380     {0x0491, "gd_GB"}
    381 };
    382 
    383 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
    384 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
    385 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
    386 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
    387 
    388 ILCID_POSIX_SUBTABLE(ha) {
    389     {0x68,   "ha"},
    390     {0x7c68, "ha_Latn"},
    391     {0x0468, "ha_Latn_NG"},
    392 };
    393 
    394 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
    395 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
    396 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
    397 
    398 /* This LCID is really four different locales.*/
    399 ILCID_POSIX_SUBTABLE(hr) {
    400     {0x1a,   "hr"},
    401     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
    402     {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
    403     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
    404     {0x781a, "bs"},     /* Bosnian */
    405     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
    406     {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
    407     {0x101a, "hr_BA"},  /* Croatian in Bosnia */
    408     {0x041a, "hr_HR"},  /* Croatian*/
    409     {0x2c1a, "sr_Latn_ME"},
    410     {0x241a, "sr_Latn_RS"},
    411     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
    412     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
    413     {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
    414     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
    415     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
    416     {0x301a, "sr_Cyrl_ME"},
    417     {0x281a, "sr_Cyrl_RS"},
    418     {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
    419     {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
    420 };
    421 
    422 ILCID_POSIX_SUBTABLE(hsb) {
    423     {0x2E,   "hsb"},
    424     {0x042E, "hsb_DE"},
    425     {0x082E, "dsb_DE"},
    426     {0x7C2E, "dsb"},
    427 };
    428 
    429 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
    430 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
    431 
    432 ILCID_POSIX_SUBTABLE(ibb) {
    433     {0x69, "ibb"},
    434     {0x0469, "ibb_NG"}
    435 };
    436 
    437 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
    438 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
    439 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
    440 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
    441 
    442 ILCID_POSIX_SUBTABLE(it) {
    443     {0x10,   "it"},
    444     {0x0810, "it_CH"},
    445     {0x0410, "it_IT"}
    446 };
    447 
    448 ILCID_POSIX_SUBTABLE(iu) {
    449     {0x5d,   "iu"},
    450     {0x045d, "iu_Cans_CA"},
    451     {0x785d, "iu_Cans"},
    452     {0x085d, "iu_Latn_CA"},
    453     {0x7c5d, "iu_Latn"}
    454 };
    455 
    456 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
    457 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
    458 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
    459 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
    460 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
    461 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
    462 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
    463 
    464 ILCID_POSIX_SUBTABLE(ko) {
    465     {0x12,   "ko"},
    466     {0x0812, "ko_KP"},
    467     {0x0412, "ko_KR"}
    468 };
    469 
    470 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
    471 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
    472 
    473 ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
    474     {0x60,   "ks"},
    475     {0x0460, "ks_Arab_IN"},
    476     {0x0860, "ks_Deva_IN"}
    477 };
    478 
    479 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
    480 
    481 ILCID_POSIX_SUBTABLE(la) {
    482     {0x76,   "la"},
    483     {0x0476, "la_001"},
    484     {0x0476, "la_IT"}       /*Left in for compatibility*/
    485 };
    486 
    487 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
    488 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
    489 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
    490 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
    491 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
    492 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
    493 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
    494 
    495 ILCID_POSIX_SUBTABLE(mn) {
    496     {0x50,   "mn"},
    497     {0x0450, "mn_MN"},
    498     {0x7c50, "mn_Mong"},
    499     {0x0850, "mn_Mong_CN"},
    500     {0x0850, "mn_CN"},
    501     {0x7850, "mn_Cyrl"},
    502     {0x0c50, "mn_Mong_MN"}
    503 };
    504 
    505 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
    506 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
    507 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
    508 
    509 ILCID_POSIX_SUBTABLE(ms) {
    510     {0x3e,   "ms"},
    511     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
    512     {0x043e, "ms_MY"}    /* Malaysia*/
    513 };
    514 
    515 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
    516 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
    517 
    518 ILCID_POSIX_SUBTABLE(ne) {
    519     {0x61,   "ne"},
    520     {0x0861, "ne_IN"},   /* India*/
    521     {0x0461, "ne_NP"}    /* Nepal*/
    522 };
    523 
    524 ILCID_POSIX_SUBTABLE(nl) {
    525     {0x13,   "nl"},
    526     {0x0813, "nl_BE"},
    527     {0x0413, "nl_NL"}
    528 };
    529 
    530 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
    531 // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
    532 ILCID_POSIX_SUBTABLE(no) {
    533     {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
    534     {0x7c14, "nb"},     /* really nb */
    535     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
    536     {0x0414, "no_NO"},  /* really nb_NO */
    537     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
    538     {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
    539     {0x0814, "no_NO_NY"}/* really nn_NO */
    540 };
    541 
    542 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
    543 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
    544 
    545 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
    546     {0x72,   "om"},
    547     {0x0472, "om_ET"},
    548     {0x0472, "gaz_ET"}
    549 };
    550 
    551 /* Declared as or_IN to get around compiler errors*/
    552 ILCID_POSIX_SUBTABLE(or_IN) {
    553     {0x48,   "or"},
    554     {0x0448, "or_IN"},
    555 };
    556 
    557 ILCID_POSIX_SUBTABLE(pa) {
    558     {0x46,   "pa"},
    559     {0x0446, "pa_IN"},
    560     {0x0846, "pa_Arab_PK"},
    561     {0x0846, "pa_PK"}
    562 };
    563 
    564 ILCID_POSIX_SUBTABLE(pap) {
    565     {0x79, "pap"},
    566     {0x0479, "pap_029"},
    567     {0x0479, "pap_AN"}     /*Left in for compatibility*/
    568 };
    569 
    570 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
    571 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
    572 
    573 ILCID_POSIX_SUBTABLE(pt) {
    574     {0x16,   "pt"},
    575     {0x0416, "pt_BR"},
    576     {0x0816, "pt_PT"}
    577 };
    578 
    579 ILCID_POSIX_SUBTABLE(qu) {
    580     {0x6b,   "qu"},
    581     {0x046b, "qu_BO"},
    582     {0x086b, "qu_EC"},
    583     {0x0C6b, "qu_PE"},
    584     {0x046b, "quz_BO"},
    585     {0x086b, "quz_EC"},
    586     {0x0C6b, "quz_PE"}
    587 };
    588 
    589 ILCID_POSIX_SUBTABLE(quc) {
    590     {0x93,   "quc"},
    591     {0x0493, "quc_CO"},
    592     /*
    593         "quc_Latn_GT" is an exceptional case. Language ID of "quc"
    594         is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
    595         under the group of "qut". "qut" is a retired ISO 639-3 language
    596         code for West Central Quiche, and merged to "quc".
    597         It looks Windows previously reserved "qut" for K'iche', but,
    598         decided to use "quc" when adding a locale for K'iche' (Guatemala).
    599 
    600         This data structure used here assumes language ID bits in
    601         LCID is unique for alphabetic language code. But this is not true
    602         for "quc_Latn_GT". If we don't have the data below, LCID look up
    603         by alphabetic locale ID (POSIX) will fail. The same entry is found
    604         under "qut" below, which is required for reverse look up.
    605     */
    606     {0x0486, "quc_Latn_GT"}
    607 };
    608 
    609 ILCID_POSIX_SUBTABLE(qut) {
    610     {0x86,   "qut"},
    611     {0x0486, "qut_GT"},
    612     /*
    613         See the note in "quc" above.
    614     */
    615     {0x0486, "quc_Latn_GT"}
    616 };
    617 
    618 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
    619 
    620 ILCID_POSIX_SUBTABLE(ro) {
    621     {0x18,   "ro"},
    622     {0x0418, "ro_RO"},
    623     {0x0818, "ro_MD"}
    624 };
    625 
    626 // TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
    627 // More likely this is a similar concept to the Windows 0x7f Invariant locale ""
    628 // (Except that it's not invariant in ICU)
    629 ILCID_POSIX_SUBTABLE(root) {
    630     {0x00,   "root"}
    631 };
    632 
    633 ILCID_POSIX_SUBTABLE(ru) {
    634     {0x19,   "ru"},
    635     {0x0419, "ru_RU"},
    636     {0x0819, "ru_MD"}
    637 };
    638 
    639 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
    640 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
    641 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
    642 
    643 ILCID_POSIX_SUBTABLE(sd) {
    644     {0x59,   "sd"},
    645     {0x0459, "sd_Deva_IN"},
    646     {0x0459, "sd_IN"},
    647     {0x0859, "sd_Arab_PK"},
    648     {0x0859, "sd_PK"},
    649     {0x7c59, "sd_Arab"}
    650 };
    651 
    652 ILCID_POSIX_SUBTABLE(se) {
    653     {0x3b,   "se"},
    654     {0x0c3b, "se_FI"},
    655     {0x043b, "se_NO"},
    656     {0x083b, "se_SE"},
    657     {0x783b, "sma"},
    658     {0x183b, "sma_NO"},
    659     {0x1c3b, "sma_SE"},
    660     {0x7c3b, "smj"},
    661     {0x703b, "smn"},
    662     {0x743b, "sms"},
    663     {0x103b, "smj_NO"},
    664     {0x143b, "smj_SE"},
    665     {0x243b, "smn_FI"},
    666     {0x203b, "sms_FI"},
    667 };
    668 
    669 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
    670 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
    671 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
    672 
    673 ILCID_POSIX_SUBTABLE(so) {
    674     {0x77,   "so"},
    675     {0x0477, "so_SO"}
    676 };
    677 
    678 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
    679 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
    680 
    681 ILCID_POSIX_SUBTABLE(sv) {
    682     {0x1d,   "sv"},
    683     {0x081d, "sv_FI"},
    684     {0x041d, "sv_SE"}
    685 };
    686 
    687 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
    688 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
    689 
    690 ILCID_POSIX_SUBTABLE(ta) {
    691     {0x49,   "ta"},
    692     {0x0449, "ta_IN"},
    693     {0x0849, "ta_LK"}
    694 };
    695 
    696 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
    697 
    698 /* Cyrillic based by default */
    699 ILCID_POSIX_SUBTABLE(tg) {
    700     {0x28,   "tg"},
    701     {0x7c28, "tg_Cyrl"},
    702     {0x0428, "tg_Cyrl_TJ"}
    703 };
    704 
    705 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
    706 
    707 ILCID_POSIX_SUBTABLE(ti) {
    708     {0x73,   "ti"},
    709     {0x0873, "ti_ER"},
    710     {0x0473, "ti_ET"}
    711 };
    712 
    713 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
    714 
    715 ILCID_POSIX_SUBTABLE(tn) {
    716     {0x32,   "tn"},
    717     {0x0832, "tn_BW"},
    718     {0x0432, "tn_ZA"}
    719 };
    720 
    721 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
    722 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
    723 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
    724 
    725 ILCID_POSIX_SUBTABLE(tzm) {
    726     {0x5f,   "tzm"},
    727     {0x7c5f, "tzm_Latn"},
    728     {0x085f, "tzm_Latn_DZ"},
    729     {0x105f, "tzm_Tfng_MA"},
    730     {0x045f, "tzm_Arab_MA"},
    731     {0x045f, "tmz"}
    732 };
    733 
    734 ILCID_POSIX_SUBTABLE(ug) {
    735     {0x80,   "ug"},
    736     {0x0480, "ug_CN"},
    737     {0x0480, "ug_Arab_CN"}
    738 };
    739 
    740 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
    741 
    742 ILCID_POSIX_SUBTABLE(ur) {
    743     {0x20,   "ur"},
    744     {0x0820, "ur_IN"},
    745     {0x0420, "ur_PK"}
    746 };
    747 
    748 ILCID_POSIX_SUBTABLE(uz) {
    749     {0x43,   "uz"},
    750     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
    751     {0x7843, "uz_Cyrl"},  /* Cyrillic based */
    752     {0x0843, "uz_UZ"},  /* Cyrillic based */
    753     {0x0443, "uz_Latn_UZ"}, /* Latin based */
    754     {0x7c43, "uz_Latn"} /* Latin based */
    755 };
    756 
    757 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
    758     {0x33,   "ve"},
    759     {0x0433, "ve_ZA"},
    760     {0x0433, "ven_ZA"}
    761 };
    762 
    763 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
    764 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
    765 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
    766 
    767 ILCID_POSIX_SUBTABLE(yi) {
    768     {0x003d, "yi"},
    769     {0x043d, "yi_001"}
    770 };
    771 
    772 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
    773 
    774 // Windows & ICU tend to different names for some of these
    775 // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
    776 ILCID_POSIX_SUBTABLE(zh) {
    777     {0x0004, "zh_Hans"},
    778     {0x7804, "zh"},
    779     {0x0804, "zh_CN"},
    780     {0x0804, "zh_Hans_CN"},
    781     {0x0c04, "zh_Hant_HK"},
    782     {0x0c04, "zh_HK"},
    783     {0x1404, "zh_Hant_MO"},
    784     {0x1404, "zh_MO"},
    785     {0x1004, "zh_Hans_SG"},
    786     {0x1004, "zh_SG"},
    787     {0x0404, "zh_Hant_TW"},
    788     {0x7c04, "zh_Hant"},
    789     {0x0404, "zh_TW"},
    790     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
    791     {0x30404,"zh_TW"},          /* Bopomofo order */
    792     {0x20004,"zh@collation=stroke"},
    793     {0x20404,"zh_Hant@collation=stroke"},
    794     {0x20404,"zh_Hant_TW@collation=stroke"},
    795     {0x20404,"zh_TW@collation=stroke"},
    796     {0x20804,"zh_Hans@collation=stroke"},
    797     {0x20804,"zh_Hans_CN@collation=stroke"},
    798     {0x20804,"zh_CN@collation=stroke"}
    799     // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
    800 };
    801 
    802 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
    803 
    804 /* This must be static and grouped by LCID. */
    805 static const ILcidPosixMap gPosixIDmap[] = {
    806     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
    807     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
    808     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
    809     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
    810     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
    811     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
    812     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
    813     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
    814 /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
    815     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
    816     ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
    817     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
    818     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
    819     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
    820     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
    821     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
    822     ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
    823     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
    824     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
    825     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
    826     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
    827     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
    828     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
    829     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
    830     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
    831     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
    832     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
    833     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
    834     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
    835     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
    836     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
    837     ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
    838     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
    839     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
    840     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
    841     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
    842     ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
    843     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
    844     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
    845     ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
    846     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
    847     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
    848     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
    849     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
    850     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
    851     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
    852     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
    853     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
    854     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
    855     ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
    856     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
    857     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
    858     ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
    859     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
    860     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
    861     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
    862     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
    863     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
    864     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
    865     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
    866     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
    867     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
    868     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
    869     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
    870     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
    871     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
    872     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
    873     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
    874     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
    875     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
    876     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
    877     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
    878     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
    879     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
    880     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
    881     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
    882     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
    883     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
    884     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
    885     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
    886     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
    887     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
    888     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
    889     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
    890     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
    891     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
    892 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
    893     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
    894     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
    895 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
    896     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
    897     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
    898     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
    899     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
    900     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
    901     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
    902     ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
    903     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
    904     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
    905     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
    906     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
    907     ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
    908     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
    909     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
    910     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
    911     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
    912     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
    913     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
    914     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
    915     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
    916     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
    917     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
    918 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
    919     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
    920     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
    921     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
    922     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
    923     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
    924 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
    925     ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
    926     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
    927     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
    928     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
    929     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
    930     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
    931     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
    932     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
    933     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
    934     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
    935     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
    936     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
    937     ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
    938     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
    939     ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
    940     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
    941     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
    942     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
    943     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
    944     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
    945     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
    946     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
    947     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
    948     ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
    949     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
    950     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
    951     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
    952 };
    953 
    954 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
    955 
    956 /**
    957  * Do not call this function. It is called by hostID.
    958  * The function is not private because this struct must stay as a C struct,
    959  * and this is an internal class.
    960  */
    961 static int32_t
    962 idCmp(const char* id1, const char* id2)
    963 {
    964     int32_t diffIdx = 0;
    965     while (*id1 == *id2 && *id1 != 0) {
    966         diffIdx++;
    967         id1++;
    968         id2++;
    969     }
    970     return diffIdx;
    971 }
    972 
    973 /**
    974  * Searches for a Windows LCID
    975  *
    976  * @param posixid the Posix style locale id.
    977  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
    978  *               no equivalent Windows LCID.
    979  * @return the LCID
    980  */
    981 static uint32_t
    982 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
    983 {
    984     int32_t bestIdx = 0;
    985     int32_t bestIdxDiff = 0;
    986     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
    987     uint32_t idx;
    988 
    989     for (idx = 0; idx < this_0->numRegions; idx++ ) {
    990         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
    991         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
    992             if (posixIDlen == sameChars) {
    993                 /* Exact match */
    994                 return this_0->regionMaps[idx].hostID;
    995             }
    996             bestIdxDiff = sameChars;
    997             bestIdx = idx;
    998         }
    999     }
   1000     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
   1001     /* We also have to make sure that sid and si and similar string subsets don't match. */
   1002     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
   1003         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
   1004     {
   1005         *status = U_USING_FALLBACK_WARNING;
   1006         return this_0->regionMaps[bestIdx].hostID;
   1007     }
   1008 
   1009     /*no match found */
   1010     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1011     return this_0->regionMaps->hostID;
   1012 }
   1013 
   1014 static const char*
   1015 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
   1016 {
   1017     uint32_t i;
   1018     for (i = 0; i <= this_0->numRegions; i++)
   1019     {
   1020         if (this_0->regionMaps[i].hostID == hostID)
   1021         {
   1022             return this_0->regionMaps[i].posixID;
   1023         }
   1024     }
   1025 
   1026     /* If you get here, then no matching region was found,
   1027        so return the language id with the wild card region. */
   1028     return this_0->regionMaps[0].posixID;
   1029 }
   1030 
   1031 /*
   1032 //////////////////////////////////////
   1033 //
   1034 // LCID --> POSIX
   1035 //
   1036 /////////////////////////////////////
   1037 */
   1038 #ifdef USE_WINDOWS_LCID_MAPPING_API
   1039 /*
   1040  * Various language tags needs to be changed:
   1041  * quz -> qu
   1042  * prs -> fa
   1043  */
   1044 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
   1045     if (len >= 3) { \
   1046         if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
   1047             buffer[2] = 0; \
   1048             uprv_strcat(buffer, buffer+3); \
   1049         } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
   1050             buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
   1051             uprv_strcat(buffer, buffer+3); \
   1052         } \
   1053     }
   1054 
   1055 #endif
   1056 U_CAPI int32_t
   1057 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
   1058 {
   1059     uint16_t langID;
   1060     uint32_t localeIndex;
   1061     UBool bLookup = TRUE;
   1062     const char *pPosixID = NULL;
   1063 
   1064 #ifdef USE_WINDOWS_LCID_MAPPING_API
   1065     char locName[LOCALE_NAME_MAX_LENGTH] = {};      // ICU name can't be longer than Windows name
   1066 
   1067     // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
   1068     // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
   1069     // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
   1070     // use the Windows API to resolve locale ID for this specific case.
   1071     if ((hostid & 0x3FF) != 0x92) {
   1072         int32_t tmpLen = 0;
   1073         UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH];  // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
   1074 
   1075         // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
   1076         tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
   1077         if (tmpLen > 1) {
   1078             int32_t i = 0;
   1079             // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
   1080             bLookup = FALSE;
   1081             for (i = 0; i < UPRV_LENGTHOF(locName); i++)
   1082             {
   1083                 locName[i] = (char)(windowsLocaleName[i]);
   1084 
   1085                 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
   1086                 // In such cases, we need special mapping data found in the hardcoded table
   1087                 // in this source file.
   1088                 if (windowsLocaleName[i] == L'_')
   1089                 {
   1090                     // Keep the base locale, without variant
   1091                     // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
   1092                     locName[i] = '\0';
   1093                     tmpLen = i;
   1094                     bLookup = TRUE;
   1095                     break;
   1096                 }
   1097                 else if (windowsLocaleName[i] == L'-')
   1098                 {
   1099                     // Windows names use -, ICU uses _
   1100                     locName[i] = '_';
   1101                 }
   1102                 else if (windowsLocaleName[i] == L'\0')
   1103                 {
   1104                     // No point in doing more work than necessary
   1105                     break;
   1106                 }
   1107             }
   1108             // TODO: Need to understand this better, why isn't it an alias?
   1109             FIX_LANGUAGE_ID_TAG(locName, tmpLen);
   1110             pPosixID = locName;
   1111         }
   1112     }
   1113 #endif // USE_WINDOWS_LCID_MAPPING_API
   1114 
   1115     if (bLookup) {
   1116         const char *pCandidate = NULL;
   1117         langID = LANGUAGE_LCID(hostid);
   1118 
   1119         for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
   1120             if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
   1121                 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
   1122                 break;
   1123             }
   1124         }
   1125 
   1126         /* On Windows, when locale name has a variant, we still look up the hardcoded table.
   1127            If a match in the hardcoded table is longer than the Windows locale name without
   1128            variant, we use the one as the result */
   1129         if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
   1130             pPosixID = pCandidate;
   1131         }
   1132     }
   1133 
   1134     if (pPosixID) {
   1135         int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
   1136         int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
   1137         uprv_memcpy(posixID, pPosixID, copyLen);
   1138         if (resLen < posixIDCapacity) {
   1139             posixID[resLen] = 0;
   1140             if (*status == U_STRING_NOT_TERMINATED_WARNING) {
   1141                 *status = U_ZERO_ERROR;
   1142             }
   1143         } else if (resLen == posixIDCapacity) {
   1144             *status = U_STRING_NOT_TERMINATED_WARNING;
   1145         } else {
   1146             *status = U_BUFFER_OVERFLOW_ERROR;
   1147         }
   1148         return resLen;
   1149     }
   1150 
   1151     /* no match found */
   1152     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1153     return -1;
   1154 }
   1155 
   1156 /*
   1157 //////////////////////////////////////
   1158 //
   1159 // POSIX --> LCID
   1160 // This should only be called from uloc_getLCID.
   1161 // The locale ID must be in canonical form.
   1162 //
   1163 /////////////////////////////////////
   1164 */
   1165 U_CAPI uint32_t
   1166 uprv_convertToLCIDPlatform(const char* localeID)
   1167 {
   1168     // The purpose of this function is to leverage native platform name->lcid
   1169     // conversion functionality when available.
   1170 #ifdef USE_WINDOWS_LCID_MAPPING_API
   1171     DWORD nameLCIDFlags = 0;
   1172     UErrorCode myStatus = U_ZERO_ERROR;
   1173 
   1174     // First check for a Windows name->LCID match, fall through to catch
   1175     // ICU special cases, but Windows may know it already.
   1176 #if LOCALE_ALLOW_NEUTRAL_NAMES
   1177     nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
   1178 #endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
   1179 
   1180     int32_t len;
   1181     char collVal[ULOC_KEYWORDS_CAPACITY] = {};
   1182     char baseName[ULOC_FULLNAME_CAPACITY] = {};
   1183     const char * mylocaleID = localeID;
   1184 
   1185     // Check any for keywords.
   1186     if (uprv_strchr(localeID, '@'))
   1187     {
   1188         len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
   1189         if (U_SUCCESS(myStatus) && len > 0)
   1190         {
   1191             // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
   1192             return 0;
   1193         }
   1194         else
   1195         {
   1196             // If the locale ID contains keywords other than collation, just use the base name.
   1197             len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
   1198 
   1199             if (U_SUCCESS(myStatus) && len > 0)
   1200             {
   1201                 baseName[len] = 0;
   1202                 mylocaleID = baseName;
   1203             }
   1204         }
   1205     }
   1206 
   1207     char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
   1208     // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
   1209     (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
   1210 
   1211     if (U_SUCCESS(myStatus))
   1212     {
   1213         // Need it to be UTF-16, not 8-bit
   1214         wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
   1215         int32_t i;
   1216         for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
   1217         {
   1218             if (asciiBCP47Tag[i] == '\0')
   1219             {
   1220                 break;
   1221             }
   1222             else
   1223             {
   1224                 // Copy the character
   1225                 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
   1226             }
   1227         }
   1228 
   1229         if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
   1230         {
   1231             // Ensure it's null terminated
   1232             bcp47Tag[i] = L'\0';
   1233             LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
   1234             if (lcid > 0)
   1235             {
   1236                 // Found LCID from windows, return that one, unless its completely ambiguous
   1237                 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
   1238                 // for this process.
   1239                 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
   1240                 {
   1241                     return lcid;
   1242                 }
   1243             }
   1244         }
   1245     }
   1246 #else
   1247     (void)localeID; // Suppress unused variable warning.
   1248 #endif /* USE_WINDOWS_LCID_MAPPING_API */
   1249 
   1250     // No found, or not implemented on platforms without native name->lcid conversion
   1251     return 0;
   1252 }
   1253 
   1254 U_CAPI uint32_t
   1255 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
   1256 {
   1257     // This function does the table lookup when native platform name->lcid conversion isn't available,
   1258     // or for locales that don't follow patterns the platform expects.
   1259     uint32_t   low    = 0;
   1260     uint32_t   high   = gLocaleCount;
   1261     uint32_t   mid;
   1262     uint32_t   oldmid = 0;
   1263     int32_t    compVal;
   1264 
   1265     uint32_t   value         = 0;
   1266     uint32_t   fallbackValue = (uint32_t)-1;
   1267     UErrorCode myStatus;
   1268     uint32_t   idx;
   1269 
   1270     /* Check for incomplete id. */
   1271     if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
   1272         return 0;
   1273     }
   1274 
   1275     /*Binary search for the map entry for normal cases */
   1276 
   1277     while (high > low)  /*binary search*/{
   1278 
   1279         mid = (high+low) >> 1; /*Finds median*/
   1280 
   1281         if (mid == oldmid)
   1282             break;
   1283 
   1284         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
   1285         if (compVal < 0){
   1286             high = mid;
   1287         }
   1288         else if (compVal > 0){
   1289             low = mid;
   1290         }
   1291         else /*we found it*/{
   1292             return getHostID(&gPosixIDmap[mid], posixID, status);
   1293         }
   1294         oldmid = mid;
   1295     }
   1296 
   1297     /*
   1298      * Sometimes we can't do a binary search on posixID because some LCIDs
   1299      * go to different locales.  We hit one of those special cases.
   1300      */
   1301     for (idx = 0; idx < gLocaleCount; idx++ ) {
   1302         myStatus = U_ZERO_ERROR;
   1303         value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
   1304         if (myStatus == U_ZERO_ERROR) {
   1305             return value;
   1306         }
   1307         else if (myStatus == U_USING_FALLBACK_WARNING) {
   1308             fallbackValue = value;
   1309         }
   1310     }
   1311 
   1312     if (fallbackValue != (uint32_t)-1) {
   1313         *status = U_USING_FALLBACK_WARNING;
   1314         return fallbackValue;
   1315     }
   1316 
   1317     /* no match found */
   1318     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1319     return 0;   /* return international (root) */
   1320 }
   1321