Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2009-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 #include "unicode/ures.h"
     12 #include "unicode/putil.h"
     13 #include "unicode/uloc.h"
     14 #include "ustr_imp.h"
     15 #include "charstr.h"
     16 #include "cmemory.h"
     17 #include "cstring.h"
     18 #include "putilimp.h"
     19 #include "uinvchar.h"
     20 #include "ulocimp.h"
     21 #include "uvector.h"
     22 #include "uassert.h"
     23 
     24 
     25 /* struct holding a single variant */
     26 typedef struct VariantListEntry {
     27     const char              *variant;
     28     struct VariantListEntry *next;
     29 } VariantListEntry;
     30 
     31 /* struct holding a single attribute value */
     32 typedef struct AttributeListEntry {
     33     const char              *attribute;
     34     struct AttributeListEntry *next;
     35 } AttributeListEntry;
     36 
     37 /* struct holding a single extension */
     38 typedef struct ExtensionListEntry {
     39     const char                  *key;
     40     const char                  *value;
     41     struct ExtensionListEntry   *next;
     42 } ExtensionListEntry;
     43 
     44 #define MAXEXTLANG 3
     45 typedef struct ULanguageTag {
     46     char                *buf;   /* holding parsed subtags */
     47     const char          *language;
     48     const char          *extlang[MAXEXTLANG];
     49     const char          *script;
     50     const char          *region;
     51     VariantListEntry    *variants;
     52     ExtensionListEntry  *extensions;
     53     const char          *privateuse;
     54     const char          *grandfathered;
     55 } ULanguageTag;
     56 
     57 #define MINLEN 2
     58 #define SEP '-'
     59 #define PRIVATEUSE 'x'
     60 #define LDMLEXT 'u'
     61 
     62 #define LOCALE_SEP '_'
     63 #define LOCALE_EXT_SEP '@'
     64 #define LOCALE_KEYWORD_SEP ';'
     65 #define LOCALE_KEY_TYPE_SEP '='
     66 
     67 #define ISALPHA(c) uprv_isASCIILetter(c)
     68 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     69 
     70 static const char EMPTY[] = "";
     71 static const char LANG_UND[] = "und";
     72 static const char PRIVATEUSE_KEY[] = "x";
     73 static const char _POSIX[] = "_POSIX";
     74 static const char POSIX_KEY[] = "va";
     75 static const char POSIX_VALUE[] = "posix";
     76 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
     77 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
     78 static const char LOCALE_TYPE_YES[] = "yes";
     79 
     80 #define LANG_UND_LEN 3
     81 
     82 /*
     83  Updated on 2018-09-12 from
     84  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
     85 
     86  This table has 2 parts. The parts for Grandfathered tags is generated by the
     87  following scripts from the IANA language tag registry.
     88 
     89  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
     90  egrep -A 7 'Type: grandfathered' | \
     91  egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
     92  awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
     93  tr 'A-Z' 'a-z'
     94 
     95 
     96  The 2nd part is made of five ICU-specific entries. They're kept for
     97  the backward compatibility for now, even though there are no preferred
     98  values. They may have to be removed for the strict BCP 47 compliance.
     99 
    100 */
    101 static const char* const GRANDFATHERED[] = {
    102 /*  grandfathered   preferred */
    103     "art-lojban",   "jbo",
    104     "en-gb-oed",    "en-gb-oxendict",
    105     "i-ami",        "ami",
    106     "i-bnn",        "bnn",
    107     "i-hak",        "hak",
    108     "i-klingon",    "tlh",
    109     "i-lux",        "lb",
    110     "i-navajo",     "nv",
    111     "i-pwn",        "pwn",
    112     "i-tao",        "tao",
    113     "i-tay",        "tay",
    114     "i-tsu",        "tsu",
    115     "no-bok",       "nb",
    116     "no-nyn",       "nn",
    117     "sgn-be-fr",    "sfb",
    118     "sgn-be-nl",    "vgt",
    119     "sgn-ch-de",    "sgg",
    120     "zh-guoyu",     "cmn",
    121     "zh-hakka",     "hak",
    122     "zh-min-nan",   "nan",
    123     "zh-xiang",     "hsn",
    124 
    125     // Grandfathered tags with no preferred value in the IANA
    126     // registry. Kept for now for the backward compatibility
    127     // because ICU has mapped them this way.
    128     "cel-gaulish",  "xtg-x-cel-gaulish",
    129     "i-default",    "en-x-i-default",
    130     "i-enochian",   "und-x-i-enochian",
    131     "i-mingo",      "see-x-i-mingo",
    132     "zh-min",       "nan-x-zh-min",
    133 };
    134 
    135 /*
    136  Updated on 2018-09-12 from
    137  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
    138 
    139  The table lists redundant tags with preferred value in the IANA languate tag registry.
    140  It's generated with the following command:
    141 
    142  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
    143  grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
    144  awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
    145  tr 'A-Z' 'a-z'
    146 
    147  In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
    148  a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
    149 */
    150 
    151 static const char* const REDUNDANT[] = {
    152 //  redundant       preferred
    153     "sgn-br",       "bzs",
    154     "sgn-co",       "csn",
    155     "sgn-de",       "gsg",
    156     "sgn-dk",       "dsl",
    157     "sgn-es",       "ssp",
    158     "sgn-fr",       "fsl",
    159     "sgn-gb",       "bfi",
    160     "sgn-gr",       "gss",
    161     "sgn-ie",       "isg",
    162     "sgn-it",       "ise",
    163     "sgn-jp",       "jsl",
    164     "sgn-mx",       "mfs",
    165     "sgn-ni",       "ncs",
    166     "sgn-nl",       "dse",
    167     "sgn-no",       "nsl",
    168     "sgn-pt",       "psr",
    169     "sgn-se",       "swl",
    170     "sgn-us",       "ase",
    171     "sgn-za",       "sfs",
    172     "zh-cmn",       "cmn",
    173     "zh-cmn-hans",  "cmn-hans",
    174     "zh-cmn-hant",  "cmn-hant",
    175     "zh-gan",       "gan",
    176     "zh-wuu",       "wuu",
    177     "zh-yue",       "yue",
    178 
    179     // variant tag with preferred value
    180     "ja-latn-hepburn-heploc", "ja-latn-alalc97",
    181 };
    182 
    183 /*
    184   Updated on 2018-09-12 from
    185   https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
    186 
    187   grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
    188   grep -B1 'Preferred' | grep -v '^--' | \
    189   awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
    190 
    191   Make sure that 2-letter language subtags come before 3-letter subtags.
    192 */
    193 static const char DEPRECATEDLANGS[][4] = {
    194 /*  deprecated  new */
    195     "in",       "id",
    196     "iw",       "he",
    197     "ji",       "yi",
    198     "jw",       "jv",
    199     "mo",       "ro",
    200     "aam",       "aas",
    201     "adp",       "dz",
    202     "aue",       "ktz",
    203     "ayx",       "nun",
    204     "bgm",       "bcg",
    205     "bjd",       "drl",
    206     "ccq",       "rki",
    207     "cjr",       "mom",
    208     "cka",       "cmr",
    209     "cmk",       "xch",
    210     "coy",       "pij",
    211     "cqu",       "quh",
    212     "drh",       "khk",
    213     "drw",       "prs",
    214     "gav",       "dev",
    215     "gfx",       "vaj",
    216     "ggn",       "gvr",
    217     "gti",       "nyc",
    218     "guv",       "duz",
    219     "hrr",       "jal",
    220     "ibi",       "opa",
    221     "ilw",       "gal",
    222     "jeg",       "oyb",
    223     "kgc",       "tdf",
    224     "kgh",       "kml",
    225     "koj",       "kwv",
    226     "krm",       "bmf",
    227     "ktr",       "dtp",
    228     "kvs",       "gdj",
    229     "kwq",       "yam",
    230     "kxe",       "tvd",
    231     "kzj",       "dtp",
    232     "kzt",       "dtp",
    233     "lii",       "raq",
    234     "lmm",       "rmx",
    235     "meg",       "cir",
    236     "mst",       "mry",
    237     "mwj",       "vaj",
    238     "myt",       "mry",
    239     "nad",       "xny",
    240     "ncp",       "kdz",
    241     "nnx",       "ngv",
    242     "nts",       "pij",
    243     "oun",       "vaj",
    244     "pcr",       "adx",
    245     "pmc",       "huw",
    246     "pmu",       "phr",
    247     "ppa",       "bfy",
    248     "ppr",       "lcq",
    249     "pry",       "prt",
    250     "puz",       "pub",
    251     "sca",       "hle",
    252     "skk",       "oyb",
    253     "tdu",       "dtp",
    254     "thc",       "tpo",
    255     "thx",       "oyb",
    256     "tie",       "ras",
    257     "tkk",       "twm",
    258     "tlw",       "weo",
    259     "tmp",       "tyj",
    260     "tne",       "kak",
    261     "tnf",       "prs",
    262     "tsf",       "taj",
    263     "uok",       "ema",
    264     "xba",       "cax",
    265     "xia",       "acn",
    266     "xkh",       "waw",
    267     "xsj",       "suj",
    268     "ybd",       "rki",
    269     "yma",       "lrr",
    270     "ymt",       "mtm",
    271     "yos",       "zom",
    272     "yuu",       "yug",
    273 };
    274 
    275 /*
    276   Updated on 2018-04-24 from
    277 
    278   curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
    279   grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
    280   grep -B1 'Preferred' | \
    281   awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
    282 */
    283 static const char DEPRECATEDREGIONS[][3] = {
    284 /*  deprecated  new */
    285     "BU",       "MM",
    286     "DD",       "DE",
    287     "FX",       "FR",
    288     "TP",       "TL",
    289     "YD",       "YE",
    290     "ZR",       "CD",
    291 };
    292 
    293 /*
    294 * -------------------------------------------------
    295 *
    296 * These ultag_ functions may be exposed as APIs later
    297 *
    298 * -------------------------------------------------
    299 */
    300 
    301 static ULanguageTag*
    302 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    303 
    304 static void
    305 ultag_close(ULanguageTag* langtag);
    306 
    307 static const char*
    308 ultag_getLanguage(const ULanguageTag* langtag);
    309 
    310 #if 0
    311 static const char*
    312 ultag_getJDKLanguage(const ULanguageTag* langtag);
    313 #endif
    314 
    315 static const char*
    316 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    317 
    318 static int32_t
    319 ultag_getExtlangSize(const ULanguageTag* langtag);
    320 
    321 static const char*
    322 ultag_getScript(const ULanguageTag* langtag);
    323 
    324 static const char*
    325 ultag_getRegion(const ULanguageTag* langtag);
    326 
    327 static const char*
    328 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    329 
    330 static int32_t
    331 ultag_getVariantsSize(const ULanguageTag* langtag);
    332 
    333 static const char*
    334 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    335 
    336 static const char*
    337 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    338 
    339 static int32_t
    340 ultag_getExtensionsSize(const ULanguageTag* langtag);
    341 
    342 static const char*
    343 ultag_getPrivateUse(const ULanguageTag* langtag);
    344 
    345 #if 0
    346 static const char*
    347 ultag_getGrandfathered(const ULanguageTag* langtag);
    348 #endif
    349 
    350 namespace {
    351 
    352 // Helper class to memory manage CharString objects.
    353 // Only ever stack-allocated, does not need to inherit UMemory.
    354 class CharStringPool {
    355 public:
    356     CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
    357     ~CharStringPool() = default;
    358 
    359     CharStringPool(const CharStringPool&) = delete;
    360     CharStringPool& operator=(const CharStringPool&) = delete;
    361 
    362     icu::CharString* create() {
    363         if (U_FAILURE(status)) {
    364             return nullptr;
    365         }
    366         icu::CharString* const obj = new icu::CharString;
    367         if (obj == nullptr) {
    368             status = U_MEMORY_ALLOCATION_ERROR;
    369             return nullptr;
    370         }
    371         pool.addElement(obj, status);
    372         if (U_FAILURE(status)) {
    373             delete obj;
    374             return nullptr;
    375         }
    376         return obj;
    377     }
    378 
    379 private:
    380     static void U_CALLCONV deleter(void* obj) {
    381         delete static_cast<icu::CharString*>(obj);
    382     }
    383 
    384     UErrorCode status;
    385     icu::UVector pool;
    386 };
    387 
    388 }  // namespace
    389 
    390 /*
    391 * -------------------------------------------------
    392 *
    393 * Language subtag syntax validation functions
    394 *
    395 * -------------------------------------------------
    396 */
    397 
    398 static UBool
    399 _isAlphaString(const char* s, int32_t len) {
    400     int32_t i;
    401     for (i = 0; i < len; i++) {
    402         if (!ISALPHA(*(s + i))) {
    403             return FALSE;
    404         }
    405     }
    406     return TRUE;
    407 }
    408 
    409 static UBool
    410 _isNumericString(const char* s, int32_t len) {
    411     int32_t i;
    412     for (i = 0; i < len; i++) {
    413         if (!ISNUMERIC(*(s + i))) {
    414             return FALSE;
    415         }
    416     }
    417     return TRUE;
    418 }
    419 
    420 static UBool
    421 _isAlphaNumericString(const char* s, int32_t len) {
    422     int32_t i;
    423     for (i = 0; i < len; i++) {
    424         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    425             return FALSE;
    426         }
    427     }
    428     return TRUE;
    429 }
    430 
    431 static UBool
    432 _isLanguageSubtag(const char* s, int32_t len) {
    433     /*
    434      * language      = 2*3ALPHA            ; shortest ISO 639 code
    435      *                 ["-" extlang]       ; sometimes followed by
    436      *                                     ;   extended language subtags
    437      *               / 4ALPHA              ; or reserved for future use
    438      *               / 5*8ALPHA            ; or registered language subtag
    439      */
    440     if (len < 0) {
    441         len = (int32_t)uprv_strlen(s);
    442     }
    443     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    444         return TRUE;
    445     }
    446     return FALSE;
    447 }
    448 
    449 static UBool
    450 _isExtlangSubtag(const char* s, int32_t len) {
    451     /*
    452      * extlang       = 3ALPHA              ; selected ISO 639 codes
    453      *                 *2("-" 3ALPHA)      ; permanently reserved
    454      */
    455     if (len < 0) {
    456         len = (int32_t)uprv_strlen(s);
    457     }
    458     if (len == 3 && _isAlphaString(s, len)) {
    459         return TRUE;
    460     }
    461     return FALSE;
    462 }
    463 
    464 static UBool
    465 _isScriptSubtag(const char* s, int32_t len) {
    466     /*
    467      * script        = 4ALPHA              ; ISO 15924 code
    468      */
    469     if (len < 0) {
    470         len = (int32_t)uprv_strlen(s);
    471     }
    472     if (len == 4 && _isAlphaString(s, len)) {
    473         return TRUE;
    474     }
    475     return FALSE;
    476 }
    477 
    478 static UBool
    479 _isRegionSubtag(const char* s, int32_t len) {
    480     /*
    481      * region        = 2ALPHA              ; ISO 3166-1 code
    482      *               / 3DIGIT              ; UN M.49 code
    483      */
    484     if (len < 0) {
    485         len = (int32_t)uprv_strlen(s);
    486     }
    487     if (len == 2 && _isAlphaString(s, len)) {
    488         return TRUE;
    489     }
    490     if (len == 3 && _isNumericString(s, len)) {
    491         return TRUE;
    492     }
    493     return FALSE;
    494 }
    495 
    496 static UBool
    497 _isVariantSubtag(const char* s, int32_t len) {
    498     /*
    499      * variant       = 5*8alphanum         ; registered variants
    500      *               / (DIGIT 3alphanum)
    501      */
    502     if (len < 0) {
    503         len = (int32_t)uprv_strlen(s);
    504     }
    505     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
    506         return TRUE;
    507     }
    508     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    509         return TRUE;
    510     }
    511     return FALSE;
    512 }
    513 
    514 static UBool
    515 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
    516     /*
    517      * variant       = 1*8alphanum         ; registered variants
    518      *               / (DIGIT 3alphanum)
    519      */
    520     if (len < 0) {
    521         len = (int32_t)uprv_strlen(s);
    522     }
    523     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    524         return TRUE;
    525     }
    526     return FALSE;
    527 }
    528 
    529 static UBool
    530 _isExtensionSingleton(const char* s, int32_t len) {
    531     /*
    532      * extension     = singleton 1*("-" (2*8alphanum))
    533      */
    534     if (len < 0) {
    535         len = (int32_t)uprv_strlen(s);
    536     }
    537     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    538         return TRUE;
    539     }
    540     return FALSE;
    541 }
    542 
    543 static UBool
    544 _isExtensionSubtag(const char* s, int32_t len) {
    545     /*
    546      * extension     = singleton 1*("-" (2*8alphanum))
    547      */
    548     if (len < 0) {
    549         len = (int32_t)uprv_strlen(s);
    550     }
    551     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    552         return TRUE;
    553     }
    554     return FALSE;
    555 }
    556 
    557 static UBool
    558 _isExtensionSubtags(const char* s, int32_t len) {
    559     const char *p = s;
    560     const char *pSubtag = NULL;
    561 
    562     if (len < 0) {
    563         len = (int32_t)uprv_strlen(s);
    564     }
    565 
    566     while ((p - s) < len) {
    567         if (*p == SEP) {
    568             if (pSubtag == NULL) {
    569                 return FALSE;
    570             }
    571             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    572                 return FALSE;
    573             }
    574             pSubtag = NULL;
    575         } else if (pSubtag == NULL) {
    576             pSubtag = p;
    577         }
    578         p++;
    579     }
    580     if (pSubtag == NULL) {
    581         return FALSE;
    582     }
    583     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    584 }
    585 
    586 static UBool
    587 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    588     /*
    589      * privateuse    = "x" 1*("-" (1*8alphanum))
    590      */
    591     if (len < 0) {
    592         len = (int32_t)uprv_strlen(s);
    593     }
    594     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    595         return TRUE;
    596     }
    597     return FALSE;
    598 }
    599 
    600 static UBool
    601 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    602     const char *p = s;
    603     const char *pSubtag = NULL;
    604 
    605     if (len < 0) {
    606         len = (int32_t)uprv_strlen(s);
    607     }
    608 
    609     while ((p - s) < len) {
    610         if (*p == SEP) {
    611             if (pSubtag == NULL) {
    612                 return FALSE;
    613             }
    614             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    615                 return FALSE;
    616             }
    617             pSubtag = NULL;
    618         } else if (pSubtag == NULL) {
    619             pSubtag = p;
    620         }
    621         p++;
    622     }
    623     if (pSubtag == NULL) {
    624         return FALSE;
    625     }
    626     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    627 }
    628 
    629 U_CFUNC UBool
    630 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
    631     if (len < 0) {
    632         len = (int32_t)uprv_strlen(s);
    633     }
    634     if (len == 2 && _isAlphaNumericString(s, len)) {
    635         return TRUE;
    636     }
    637     return FALSE;
    638 }
    639 
    640 U_CFUNC UBool
    641 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
    642     const char* p;
    643     int32_t subtagLen = 0;
    644 
    645     if (len < 0) {
    646         len = (int32_t)uprv_strlen(s);
    647     }
    648 
    649     for (p = s; len > 0; p++, len--) {
    650         if (*p == SEP) {
    651             if (subtagLen < 3) {
    652                 return FALSE;
    653             }
    654             subtagLen = 0;
    655         } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
    656             subtagLen++;
    657             if (subtagLen > 8) {
    658                 return FALSE;
    659             }
    660         } else {
    661             return FALSE;
    662         }
    663     }
    664 
    665     return (subtagLen >= 3);
    666 }
    667 /*
    668 * -------------------------------------------------
    669 *
    670 * Helper functions
    671 *
    672 * -------------------------------------------------
    673 */
    674 
    675 static UBool
    676 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    677     UBool bAdded = TRUE;
    678 
    679     if (*first == NULL) {
    680         var->next = NULL;
    681         *first = var;
    682     } else {
    683         VariantListEntry *prev, *cur;
    684         int32_t cmp;
    685 
    686         /* variants order should be preserved */
    687         prev = NULL;
    688         cur = *first;
    689         while (TRUE) {
    690             if (cur == NULL) {
    691                 prev->next = var;
    692                 var->next = NULL;
    693                 break;
    694             }
    695 
    696             /* Checking for duplicate variant */
    697             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    698             if (cmp == 0) {
    699                 /* duplicated variant */
    700                 bAdded = FALSE;
    701                 break;
    702             }
    703             prev = cur;
    704             cur = cur->next;
    705         }
    706     }
    707 
    708     return bAdded;
    709 }
    710 
    711 static UBool
    712 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
    713     UBool bAdded = TRUE;
    714 
    715     if (*first == NULL) {
    716         attr->next = NULL;
    717         *first = attr;
    718     } else {
    719         AttributeListEntry *prev, *cur;
    720         int32_t cmp;
    721 
    722         /* reorder variants in alphabetical order */
    723         prev = NULL;
    724         cur = *first;
    725         while (TRUE) {
    726             if (cur == NULL) {
    727                 prev->next = attr;
    728                 attr->next = NULL;
    729                 break;
    730             }
    731             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
    732             if (cmp < 0) {
    733                 if (prev == NULL) {
    734                     *first = attr;
    735                 } else {
    736                     prev->next = attr;
    737                 }
    738                 attr->next = cur;
    739                 break;
    740             }
    741             if (cmp == 0) {
    742                 /* duplicated variant */
    743                 bAdded = FALSE;
    744                 break;
    745             }
    746             prev = cur;
    747             cur = cur->next;
    748         }
    749     }
    750 
    751     return bAdded;
    752 }
    753 
    754 
    755 static UBool
    756 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    757     UBool bAdded = TRUE;
    758 
    759     if (*first == NULL) {
    760         ext->next = NULL;
    761         *first = ext;
    762     } else {
    763         ExtensionListEntry *prev, *cur;
    764         int32_t cmp;
    765 
    766         /* reorder variants in alphabetical order */
    767         prev = NULL;
    768         cur = *first;
    769         while (TRUE) {
    770             if (cur == NULL) {
    771                 prev->next = ext;
    772                 ext->next = NULL;
    773                 break;
    774             }
    775             if (localeToBCP) {
    776                 /* special handling for locale to bcp conversion */
    777                 int32_t len, curlen;
    778 
    779                 len = (int32_t)uprv_strlen(ext->key);
    780                 curlen = (int32_t)uprv_strlen(cur->key);
    781 
    782                 if (len == 1 && curlen == 1) {
    783                     if (*(ext->key) == *(cur->key)) {
    784                         cmp = 0;
    785                     } else if (*(ext->key) == PRIVATEUSE) {
    786                         cmp = 1;
    787                     } else if (*(cur->key) == PRIVATEUSE) {
    788                         cmp = -1;
    789                     } else {
    790                         cmp = *(ext->key) - *(cur->key);
    791                     }
    792                 } else if (len == 1) {
    793                     cmp = *(ext->key) - LDMLEXT;
    794                 } else if (curlen == 1) {
    795                     cmp = LDMLEXT - *(cur->key);
    796                 } else {
    797                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    798                     /* Both are u extension keys - we need special handling for 'attribute' */
    799                     if (cmp != 0) {
    800                         if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
    801                             cmp = 1;
    802                         } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
    803                             cmp = -1;
    804                         }
    805                     }
    806                 }
    807             } else {
    808                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    809             }
    810             if (cmp < 0) {
    811                 if (prev == NULL) {
    812                     *first = ext;
    813                 } else {
    814                     prev->next = ext;
    815                 }
    816                 ext->next = cur;
    817                 break;
    818             }
    819             if (cmp == 0) {
    820                 /* duplicated extension key */
    821                 bAdded = FALSE;
    822                 break;
    823             }
    824             prev = cur;
    825             cur = cur->next;
    826         }
    827     }
    828 
    829     return bAdded;
    830 }
    831 
    832 static void
    833 _initializeULanguageTag(ULanguageTag* langtag) {
    834     int32_t i;
    835 
    836     langtag->buf = NULL;
    837 
    838     langtag->language = EMPTY;
    839     for (i = 0; i < MAXEXTLANG; i++) {
    840         langtag->extlang[i] = NULL;
    841     }
    842 
    843     langtag->script = EMPTY;
    844     langtag->region = EMPTY;
    845 
    846     langtag->variants = NULL;
    847     langtag->extensions = NULL;
    848 
    849     langtag->grandfathered = EMPTY;
    850     langtag->privateuse = EMPTY;
    851 }
    852 
    853 static int32_t
    854 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    855     char buf[ULOC_LANG_CAPACITY];
    856     UErrorCode tmpStatus = U_ZERO_ERROR;
    857     int32_t len, i;
    858     int32_t reslen = 0;
    859 
    860     if (U_FAILURE(*status)) {
    861         return 0;
    862     }
    863 
    864     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
    865     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    866         if (strict) {
    867             *status = U_ILLEGAL_ARGUMENT_ERROR;
    868             return 0;
    869         }
    870         len = 0;
    871     }
    872 
    873     /* Note: returned language code is in lower case letters */
    874 
    875     if (len == 0) {
    876         if (reslen < capacity) {
    877             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    878         }
    879         reslen += LANG_UND_LEN;
    880     } else if (!_isLanguageSubtag(buf, len)) {
    881             /* invalid language code */
    882         if (strict) {
    883             *status = U_ILLEGAL_ARGUMENT_ERROR;
    884             return 0;
    885         }
    886         if (reslen < capacity) {
    887             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    888         }
    889         reslen += LANG_UND_LEN;
    890     } else {
    891         /* resolve deprecated */
    892         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
    893             // 2-letter deprecated subtags are listede before 3-letter
    894             // ones in DEPRECATEDLANGS[]. Get out of loop on coming
    895             // across the 1st 3-letter subtag, if the input is a 2-letter code.
    896             // to avoid continuing to try when there's no match.
    897             if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
    898             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
    899                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
    900                 len = (int32_t)uprv_strlen(buf);
    901                 break;
    902             }
    903         }
    904         if (reslen < capacity) {
    905             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    906         }
    907         reslen += len;
    908     }
    909     u_terminateChars(appendAt, capacity, reslen, status);
    910     return reslen;
    911 }
    912 
    913 static int32_t
    914 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    915     char buf[ULOC_SCRIPT_CAPACITY];
    916     UErrorCode tmpStatus = U_ZERO_ERROR;
    917     int32_t len;
    918     int32_t reslen = 0;
    919 
    920     if (U_FAILURE(*status)) {
    921         return 0;
    922     }
    923 
    924     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
    925     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    926         if (strict) {
    927             *status = U_ILLEGAL_ARGUMENT_ERROR;
    928         }
    929         return 0;
    930     }
    931 
    932     if (len > 0) {
    933         if (!_isScriptSubtag(buf, len)) {
    934             /* invalid script code */
    935             if (strict) {
    936                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    937             }
    938             return 0;
    939         } else {
    940             if (reslen < capacity) {
    941                 *(appendAt + reslen) = SEP;
    942             }
    943             reslen++;
    944             if (reslen < capacity) {
    945                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    946             }
    947             reslen += len;
    948         }
    949     }
    950     u_terminateChars(appendAt, capacity, reslen, status);
    951     return reslen;
    952 }
    953 
    954 static int32_t
    955 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    956     char buf[ULOC_COUNTRY_CAPACITY];
    957     UErrorCode tmpStatus = U_ZERO_ERROR;
    958     int32_t len;
    959     int32_t reslen = 0;
    960 
    961     if (U_FAILURE(*status)) {
    962         return 0;
    963     }
    964 
    965     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
    966     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    967         if (strict) {
    968             *status = U_ILLEGAL_ARGUMENT_ERROR;
    969         }
    970         return 0;
    971     }
    972 
    973     if (len > 0) {
    974         if (!_isRegionSubtag(buf, len)) {
    975             /* invalid region code */
    976             if (strict) {
    977                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    978             }
    979             return 0;
    980         } else {
    981             if (reslen < capacity) {
    982                 *(appendAt + reslen) = SEP;
    983             }
    984             reslen++;
    985            /* resolve deprecated */
    986             for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
    987                 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
    988                     uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
    989                     len = (int32_t)uprv_strlen(buf);
    990                     break;
    991                 }
    992             }
    993 
    994             if (reslen < capacity) {
    995                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    996             }
    997             reslen += len;
    998         }
    999     }
   1000     u_terminateChars(appendAt, capacity, reslen, status);
   1001     return reslen;
   1002 }
   1003 
   1004 static int32_t
   1005 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
   1006     char buf[ULOC_FULLNAME_CAPACITY];
   1007     UErrorCode tmpStatus = U_ZERO_ERROR;
   1008     int32_t len, i;
   1009     int32_t reslen = 0;
   1010 
   1011     if (U_FAILURE(*status)) {
   1012         return 0;
   1013     }
   1014 
   1015     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1016     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1017         if (strict) {
   1018             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1019         }
   1020         return 0;
   1021     }
   1022 
   1023     if (len > 0) {
   1024         char *p, *pVar;
   1025         UBool bNext = TRUE;
   1026         VariantListEntry *var;
   1027         VariantListEntry *varFirst = NULL;
   1028 
   1029         pVar = NULL;
   1030         p = buf;
   1031         while (bNext) {
   1032             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1033                 if (*p == 0) {
   1034                     bNext = FALSE;
   1035                 } else {
   1036                     *p = 0; /* terminate */
   1037                 }
   1038                 if (pVar == NULL) {
   1039                     if (strict) {
   1040                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1041                         break;
   1042                     }
   1043                     /* ignore empty variant */
   1044                 } else {
   1045                     /* ICU uses upper case letters for variants, but
   1046                        the canonical format is lowercase in BCP47 */
   1047                     for (i = 0; *(pVar + i) != 0; i++) {
   1048                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1049                     }
   1050 
   1051                     /* validate */
   1052                     if (_isVariantSubtag(pVar, -1)) {
   1053                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
   1054                             /* emit the variant to the list */
   1055                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1056                             if (var == NULL) {
   1057                                 *status = U_MEMORY_ALLOCATION_ERROR;
   1058                                 break;
   1059                             }
   1060                             var->variant = pVar;
   1061                             if (!_addVariantToList(&varFirst, var)) {
   1062                                 /* duplicated variant */
   1063                                 uprv_free(var);
   1064                                 if (strict) {
   1065                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1066                                     break;
   1067                                 }
   1068                             }
   1069                         } else {
   1070                             /* Special handling for POSIX variant, need to remember that we had it and then */
   1071                             /* treat it like an extension later. */
   1072                             *hadPosix = TRUE;
   1073                         }
   1074                     } else if (strict) {
   1075                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1076                         break;
   1077                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
   1078                         /* Handle private use subtags separately */
   1079                         break;
   1080                     }
   1081                 }
   1082                 /* reset variant starting position */
   1083                 pVar = NULL;
   1084             } else if (pVar == NULL) {
   1085                 pVar = p;
   1086             }
   1087             p++;
   1088         }
   1089 
   1090         if (U_SUCCESS(*status)) {
   1091             if (varFirst != NULL) {
   1092                 int32_t varLen;
   1093 
   1094                 /* write out validated/normalized variants to the target */
   1095                 var = varFirst;
   1096                 while (var != NULL) {
   1097                     if (reslen < capacity) {
   1098                         *(appendAt + reslen) = SEP;
   1099                     }
   1100                     reslen++;
   1101                     varLen = (int32_t)uprv_strlen(var->variant);
   1102                     if (reslen < capacity) {
   1103                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1104                     }
   1105                     reslen += varLen;
   1106                     var = var->next;
   1107                 }
   1108             }
   1109         }
   1110 
   1111         /* clean up */
   1112         var = varFirst;
   1113         while (var != NULL) {
   1114             VariantListEntry *tmpVar = var->next;
   1115             uprv_free(var);
   1116             var = tmpVar;
   1117         }
   1118 
   1119         if (U_FAILURE(*status)) {
   1120             return 0;
   1121         }
   1122     }
   1123 
   1124     u_terminateChars(appendAt, capacity, reslen, status);
   1125     return reslen;
   1126 }
   1127 
   1128 static int32_t
   1129 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1130     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
   1131     int32_t attrBufLength = 0;
   1132     UEnumeration *keywordEnum = NULL;
   1133     int32_t reslen = 0;
   1134 
   1135     keywordEnum = uloc_openKeywords(localeID, status);
   1136     if (U_FAILURE(*status) && !hadPosix) {
   1137         uenum_close(keywordEnum);
   1138         return 0;
   1139     }
   1140     if (keywordEnum != NULL || hadPosix) {
   1141         /* reorder extensions */
   1142         int32_t len;
   1143         const char *key;
   1144         ExtensionListEntry *firstExt = NULL;
   1145         ExtensionListEntry *ext;
   1146         AttributeListEntry *firstAttr = NULL;
   1147         AttributeListEntry *attr;
   1148         char *attrValue;
   1149         CharStringPool extBufPool;
   1150         const char *bcpKey=nullptr, *bcpValue=nullptr;
   1151         UErrorCode tmpStatus = U_ZERO_ERROR;
   1152         int32_t keylen;
   1153         UBool isBcpUExt;
   1154 
   1155         while (TRUE) {
   1156             icu::CharString buf;
   1157             key = uenum_next(keywordEnum, NULL, status);
   1158             if (key == NULL) {
   1159                 break;
   1160             }
   1161             char* buffer;
   1162             int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
   1163 
   1164             for (;;) {
   1165                 buffer = buf.getAppendBuffer(
   1166                         /*minCapacity=*/resultCapacity,
   1167                         /*desiredCapacityHint=*/resultCapacity,
   1168                         resultCapacity,
   1169                         tmpStatus);
   1170 
   1171                 if (U_FAILURE(tmpStatus)) {
   1172                     break;
   1173                 }
   1174 
   1175                 len = uloc_getKeywordValue(
   1176                         localeID, key, buffer, resultCapacity, &tmpStatus);
   1177 
   1178                 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
   1179                     break;
   1180                 }
   1181 
   1182                 resultCapacity = len;
   1183                 tmpStatus = U_ZERO_ERROR;
   1184             }
   1185 
   1186             if (U_FAILURE(tmpStatus)) {
   1187                 if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
   1188                     *status = U_MEMORY_ALLOCATION_ERROR;
   1189                     break;
   1190                 }
   1191                 if (strict) {
   1192                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1193                     break;
   1194                 }
   1195                 /* ignore this keyword */
   1196                 tmpStatus = U_ZERO_ERROR;
   1197                 continue;
   1198             }
   1199 
   1200             buf.append(buffer, len, tmpStatus);
   1201             if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1202                 tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
   1203             }
   1204 
   1205             keylen = (int32_t)uprv_strlen(key);
   1206             isBcpUExt = (keylen > 1);
   1207 
   1208             /* special keyword used for representing Unicode locale attributes */
   1209             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
   1210                 if (len > 0) {
   1211                     int32_t i = 0;
   1212                     while (TRUE) {
   1213                         attrBufLength = 0;
   1214                         for (; i < len; i++) {
   1215                             if (buf[i] != '-') {
   1216                                 attrBuf[attrBufLength++] = buf[i];
   1217                             } else {
   1218                                 i++;
   1219                                 break;
   1220                             }
   1221                         }
   1222                         if (attrBufLength > 0) {
   1223                             attrBuf[attrBufLength] = 0;
   1224 
   1225                         } else if (i >= len){
   1226                             break;
   1227                         }
   1228 
   1229                         /* create AttributeListEntry */
   1230                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
   1231                         if (attr == NULL) {
   1232                             *status = U_MEMORY_ALLOCATION_ERROR;
   1233                             break;
   1234                         }
   1235                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
   1236                         if (attrValue == NULL) {
   1237                             *status = U_MEMORY_ALLOCATION_ERROR;
   1238                             break;
   1239                         }
   1240                         uprv_strcpy(attrValue, attrBuf);
   1241                         attr->attribute = attrValue;
   1242 
   1243                         if (!_addAttributeToList(&firstAttr, attr)) {
   1244                             uprv_free(attr);
   1245                             uprv_free(attrValue);
   1246                             if (strict) {
   1247                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1248                                 break;
   1249                             }
   1250                         }
   1251                     }
   1252                     /* for a place holder ExtensionListEntry */
   1253                     bcpKey = LOCALE_ATTRIBUTE_KEY;
   1254                     bcpValue = NULL;
   1255                 }
   1256             } else if (isBcpUExt) {
   1257                 bcpKey = uloc_toUnicodeLocaleKey(key);
   1258                 if (bcpKey == NULL) {
   1259                     if (strict) {
   1260                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1261                         break;
   1262                     }
   1263                     continue;
   1264                 }
   1265 
   1266                 /* we've checked buf is null-terminated above */
   1267                 bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
   1268                 if (bcpValue == NULL) {
   1269                     if (strict) {
   1270                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1271                         break;
   1272                     }
   1273                     continue;
   1274                 }
   1275                 if (bcpValue == buf.data()) {
   1276                     /*
   1277                     When uloc_toUnicodeLocaleType(key, buf) returns the
   1278                     input value as is, the value is well-formed, but has
   1279                     no known mapping. This implementation normalizes the
   1280                     value to lower case
   1281                     */
   1282                     icu::CharString* extBuf = extBufPool.create();
   1283                     if (extBuf == nullptr) {
   1284                         *status = U_MEMORY_ALLOCATION_ERROR;
   1285                         break;
   1286                     }
   1287                     int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
   1288                     int32_t resultCapacity;
   1289                     char* pExtBuf = extBuf->getAppendBuffer(
   1290                             /*minCapacity=*/bcpValueLen,
   1291                             /*desiredCapacityHint=*/bcpValueLen,
   1292                             resultCapacity,
   1293                             tmpStatus);
   1294                     if (U_FAILURE(tmpStatus)) {
   1295                         *status = tmpStatus;
   1296                         break;
   1297                     }
   1298 
   1299                     uprv_strcpy(pExtBuf, bcpValue);
   1300                     T_CString_toLowerCase(pExtBuf);
   1301 
   1302                     extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
   1303                     if (U_FAILURE(tmpStatus)) {
   1304                         *status = tmpStatus;
   1305                         break;
   1306                     }
   1307 
   1308                     bcpValue = extBuf->data();
   1309                 }
   1310             } else {
   1311                 if (*key == PRIVATEUSE) {
   1312                     if (!_isPrivateuseValueSubtags(buf.data(), len)) {
   1313                         if (strict) {
   1314                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1315                             break;
   1316                         }
   1317                         continue;
   1318                     }
   1319                 } else {
   1320                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
   1321                         if (strict) {
   1322                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1323                             break;
   1324                         }
   1325                         continue;
   1326                     }
   1327                 }
   1328                 bcpKey = key;
   1329                 icu::CharString* extBuf = extBufPool.create();
   1330                 if (extBuf == nullptr) {
   1331                     *status = U_MEMORY_ALLOCATION_ERROR;
   1332                     break;
   1333                 }
   1334                 extBuf->append(buf.data(), len, tmpStatus);
   1335                 if (U_FAILURE(tmpStatus)) {
   1336                     *status = tmpStatus;
   1337                     break;
   1338                 }
   1339                 bcpValue = extBuf->data();
   1340             }
   1341 
   1342             /* create ExtensionListEntry */
   1343             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1344             if (ext == NULL) {
   1345                 *status = U_MEMORY_ALLOCATION_ERROR;
   1346                 break;
   1347             }
   1348             ext->key = bcpKey;
   1349             ext->value = bcpValue;
   1350 
   1351             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1352                 uprv_free(ext);
   1353                 if (strict) {
   1354                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1355                     break;
   1356                 }
   1357             }
   1358         }
   1359 
   1360         /* Special handling for POSIX variant - add the keywords for POSIX */
   1361         if (hadPosix) {
   1362             /* create ExtensionListEntry for POSIX */
   1363             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1364             if (ext == NULL) {
   1365                 *status = U_MEMORY_ALLOCATION_ERROR;
   1366                 goto cleanup;
   1367             }
   1368             ext->key = POSIX_KEY;
   1369             ext->value = POSIX_VALUE;
   1370 
   1371             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1372                 uprv_free(ext);
   1373             }
   1374         }
   1375 
   1376         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
   1377             UBool startLDMLExtension = FALSE;
   1378             for (ext = firstExt; ext; ext = ext->next) {
   1379                 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
   1380                     /* first LDML u singlton extension */
   1381                    if (reslen < capacity) {
   1382                        *(appendAt + reslen) = SEP;
   1383                    }
   1384                    reslen++;
   1385                    if (reslen < capacity) {
   1386                        *(appendAt + reslen) = LDMLEXT;
   1387                    }
   1388                    reslen++;
   1389 
   1390                    startLDMLExtension = TRUE;
   1391                 }
   1392 
   1393                 /* write out the sorted BCP47 attributes, extensions and private use */
   1394                 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
   1395                     /* write the value for the attributes */
   1396                     for (attr = firstAttr; attr; attr = attr->next) {
   1397                         if (reslen < capacity) {
   1398                             *(appendAt + reslen) = SEP;
   1399                         }
   1400                         reslen++;
   1401                         len = (int32_t)uprv_strlen(attr->attribute);
   1402                         if (reslen < capacity) {
   1403                             uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
   1404                         }
   1405                         reslen += len;
   1406                     }
   1407                 } else {
   1408                     if (reslen < capacity) {
   1409                         *(appendAt + reslen) = SEP;
   1410                     }
   1411                     reslen++;
   1412                     len = (int32_t)uprv_strlen(ext->key);
   1413                     if (reslen < capacity) {
   1414                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1415                     }
   1416                     reslen += len;
   1417                     if (reslen < capacity) {
   1418                         *(appendAt + reslen) = SEP;
   1419                     }
   1420                     reslen++;
   1421                     len = (int32_t)uprv_strlen(ext->value);
   1422                     if (reslen < capacity) {
   1423                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1424                     }
   1425                     reslen += len;
   1426                 }
   1427             }
   1428         }
   1429 cleanup:
   1430         /* clean up */
   1431         ext = firstExt;
   1432         while (ext != NULL) {
   1433             ExtensionListEntry *tmpExt = ext->next;
   1434             uprv_free(ext);
   1435             ext = tmpExt;
   1436         }
   1437 
   1438         attr = firstAttr;
   1439         while (attr != NULL) {
   1440             AttributeListEntry *tmpAttr = attr->next;
   1441             char *pValue = (char *)attr->attribute;
   1442             uprv_free(pValue);
   1443             uprv_free(attr);
   1444             attr = tmpAttr;
   1445         }
   1446 
   1447         uenum_close(keywordEnum);
   1448 
   1449         if (U_FAILURE(*status)) {
   1450             return 0;
   1451         }
   1452     }
   1453 
   1454     return u_terminateChars(appendAt, capacity, reslen, status);
   1455 }
   1456 
   1457 /**
   1458  * Append keywords parsed from LDML extension value
   1459  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1460  * Note: char* buf is used for storing keywords
   1461  */
   1462 static void
   1463 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
   1464     const char *pTag;   /* beginning of current subtag */
   1465     const char *pKwds;  /* beginning of key-type pairs */
   1466     UBool variantExists = *posixVariant;
   1467 
   1468     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
   1469     ExtensionListEntry *kwd, *nextKwd;
   1470 
   1471     AttributeListEntry *attrFirst = NULL;   /* first attribute */
   1472     AttributeListEntry *attr, *nextAttr;
   1473 
   1474     int32_t len;
   1475     int32_t bufIdx = 0;
   1476 
   1477     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1478     int32_t attrBufIdx = 0;
   1479 
   1480     /* Reset the posixVariant value */
   1481     *posixVariant = FALSE;
   1482 
   1483     pTag = ldmlext;
   1484     pKwds = NULL;
   1485 
   1486     /* Iterate through u extension attributes */
   1487     while (*pTag) {
   1488         /* locate next separator char */
   1489         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1490 
   1491         if (ultag_isUnicodeLocaleKey(pTag, len)) {
   1492             pKwds = pTag;
   1493             break;
   1494         }
   1495 
   1496         /* add this attribute to the list */
   1497         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
   1498         if (attr == NULL) {
   1499             *status = U_MEMORY_ALLOCATION_ERROR;
   1500             goto cleanup;
   1501         }
   1502 
   1503         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
   1504             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
   1505             attrBuf[attrBufIdx + len] = 0;
   1506             attr->attribute = &attrBuf[attrBufIdx];
   1507             attrBufIdx += (len + 1);
   1508         } else {
   1509             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1510             uprv_free(attr);
   1511             goto cleanup;
   1512         }
   1513 
   1514         if (!_addAttributeToList(&attrFirst, attr)) {
   1515             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1516             uprv_free(attr);
   1517             goto cleanup;
   1518         }
   1519 
   1520         /* next tag */
   1521         pTag += len;
   1522         if (*pTag) {
   1523             /* next to the separator */
   1524             pTag++;
   1525         }
   1526     }
   1527 
   1528     if (attrFirst) {
   1529         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
   1530 
   1531         if (attrBufIdx > bufSize) {
   1532             /* attrBufIdx == <total length of attribute subtag> + 1 */
   1533             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1534             goto cleanup;
   1535         }
   1536 
   1537         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1538         if (kwd == NULL) {
   1539             *status = U_MEMORY_ALLOCATION_ERROR;
   1540             goto cleanup;
   1541         }
   1542 
   1543         kwd->key = LOCALE_ATTRIBUTE_KEY;
   1544         kwd->value = buf;
   1545 
   1546         /* attribute subtags sorted in alphabetical order as type */
   1547         attr = attrFirst;
   1548         while (attr != NULL) {
   1549             nextAttr = attr->next;
   1550 
   1551             /* buffer size check is done above */
   1552             if (attr != attrFirst) {
   1553                 *(buf + bufIdx) = SEP;
   1554                 bufIdx++;
   1555             }
   1556 
   1557             len = static_cast<int32_t>(uprv_strlen(attr->attribute));
   1558             uprv_memcpy(buf + bufIdx, attr->attribute, len);
   1559             bufIdx += len;
   1560 
   1561             attr = nextAttr;
   1562         }
   1563         *(buf + bufIdx) = 0;
   1564         bufIdx++;
   1565 
   1566         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1567             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1568             uprv_free(kwd);
   1569             goto cleanup;
   1570         }
   1571 
   1572         /* once keyword entry is created, delete the attribute list */
   1573         attr = attrFirst;
   1574         while (attr != NULL) {
   1575             nextAttr = attr->next;
   1576             uprv_free(attr);
   1577             attr = nextAttr;
   1578         }
   1579         attrFirst = NULL;
   1580     }
   1581 
   1582     if (pKwds) {
   1583         const char *pBcpKey = NULL;     /* u extenstion key subtag */
   1584         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
   1585         int32_t bcpKeyLen = 0;
   1586         int32_t bcpTypeLen = 0;
   1587         UBool isDone = FALSE;
   1588 
   1589         pTag = pKwds;
   1590         /* BCP47 representation of LDML key/type pairs */
   1591         while (!isDone) {
   1592             const char *pNextBcpKey = NULL;
   1593             int32_t nextBcpKeyLen = 0;
   1594             UBool emitKeyword = FALSE;
   1595 
   1596             if (*pTag) {
   1597                 /* locate next separator char */
   1598                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1599 
   1600                 if (ultag_isUnicodeLocaleKey(pTag, len)) {
   1601                     if (pBcpKey) {
   1602                         emitKeyword = TRUE;
   1603                         pNextBcpKey = pTag;
   1604                         nextBcpKeyLen = len;
   1605                     } else {
   1606                         pBcpKey = pTag;
   1607                         bcpKeyLen = len;
   1608                     }
   1609                 } else {
   1610                     U_ASSERT(pBcpKey != NULL);
   1611                     /* within LDML type subtags */
   1612                     if (pBcpType) {
   1613                         bcpTypeLen += (len + 1);
   1614                     } else {
   1615                         pBcpType = pTag;
   1616                         bcpTypeLen = len;
   1617                     }
   1618                 }
   1619 
   1620                 /* next tag */
   1621                 pTag += len;
   1622                 if (*pTag) {
   1623                     /* next to the separator */
   1624                     pTag++;
   1625                 }
   1626             } else {
   1627                 /* processing last one */
   1628                 emitKeyword = TRUE;
   1629                 isDone = TRUE;
   1630             }
   1631 
   1632             if (emitKeyword) {
   1633                 const char *pKey = NULL;    /* LDML key */
   1634                 const char *pType = NULL;   /* LDML type */
   1635 
   1636                 char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
   1637 
   1638                 U_ASSERT(pBcpKey != NULL);
   1639 
   1640                 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
   1641                     /* the BCP key is invalid */
   1642                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1643                     goto cleanup;
   1644                 }
   1645 
   1646                 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
   1647                 bcpKeyBuf[bcpKeyLen] = 0;
   1648 
   1649                 /* u extension key to LDML key */
   1650                 pKey = uloc_toLegacyKey(bcpKeyBuf);
   1651                 if (pKey == NULL) {
   1652                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1653                     goto cleanup;
   1654                 }
   1655                 if (pKey == bcpKeyBuf) {
   1656                     /*
   1657                     The key returned by toLegacyKey points to the input buffer.
   1658                     We normalize the result key to lower case.
   1659                     */
   1660                     T_CString_toLowerCase(bcpKeyBuf);
   1661                     if (bufSize - bufIdx - 1 >= bcpKeyLen) {
   1662                         uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
   1663                         pKey = buf + bufIdx;
   1664                         bufIdx += bcpKeyLen;
   1665                         *(buf + bufIdx) = 0;
   1666                         bufIdx++;
   1667                     } else {
   1668                         *status = U_BUFFER_OVERFLOW_ERROR;
   1669                         goto cleanup;
   1670                     }
   1671                 }
   1672 
   1673                 if (pBcpType) {
   1674                     char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
   1675                     if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
   1676                         /* the BCP type is too long */
   1677                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1678                         goto cleanup;
   1679                     }
   1680 
   1681                     uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
   1682                     bcpTypeBuf[bcpTypeLen] = 0;
   1683 
   1684                     /* BCP type to locale type */
   1685                     pType = uloc_toLegacyType(pKey, bcpTypeBuf);
   1686                     if (pType == NULL) {
   1687                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1688                         goto cleanup;
   1689                     }
   1690                     if (pType == bcpTypeBuf) {
   1691                         /*
   1692                         The type returned by toLegacyType points to the input buffer.
   1693                         We normalize the result type to lower case.
   1694                         */
   1695                         /* normalize to lower case */
   1696                         T_CString_toLowerCase(bcpTypeBuf);
   1697                         if (bufSize - bufIdx - 1 >= bcpTypeLen) {
   1698                             uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
   1699                             pType = buf + bufIdx;
   1700                             bufIdx += bcpTypeLen;
   1701                             *(buf + bufIdx) = 0;
   1702                             bufIdx++;
   1703                         } else {
   1704                             *status = U_BUFFER_OVERFLOW_ERROR;
   1705                             goto cleanup;
   1706                         }
   1707                     }
   1708                 } else {
   1709                     /* typeless - default type value is "yes" */
   1710                     pType = LOCALE_TYPE_YES;
   1711                 }
   1712 
   1713                 /* Special handling for u-va-posix, since we want to treat this as a variant,
   1714                    not as a keyword */
   1715                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
   1716                     *posixVariant = TRUE;
   1717                 } else {
   1718                     /* create an ExtensionListEntry for this keyword */
   1719                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1720                     if (kwd == NULL) {
   1721                         *status = U_MEMORY_ALLOCATION_ERROR;
   1722                         goto cleanup;
   1723                     }
   1724 
   1725                     kwd->key = pKey;
   1726                     kwd->value = pType;
   1727 
   1728                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1729                         // duplicate keyword is allowed, Only the first
   1730                         // is honored.
   1731                         uprv_free(kwd);
   1732                     }
   1733                 }
   1734 
   1735                 pBcpKey = pNextBcpKey;
   1736                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
   1737                 pBcpType = NULL;
   1738                 bcpTypeLen = 0;
   1739             }
   1740         }
   1741     }
   1742 
   1743     kwd = kwdFirst;
   1744     while (kwd != NULL) {
   1745         nextKwd = kwd->next;
   1746         _addExtensionToList(appendTo, kwd, FALSE);
   1747         kwd = nextKwd;
   1748     }
   1749 
   1750     return;
   1751 
   1752 cleanup:
   1753     attr = attrFirst;
   1754     while (attr != NULL) {
   1755         nextAttr = attr->next;
   1756         uprv_free(attr);
   1757         attr = nextAttr;
   1758     }
   1759 
   1760     kwd = kwdFirst;
   1761     while (kwd != NULL) {
   1762         nextKwd = kwd->next;
   1763         uprv_free(kwd);
   1764         kwd = nextKwd;
   1765     }
   1766 }
   1767 
   1768 
   1769 static int32_t
   1770 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1771     int32_t reslen = 0;
   1772     int32_t i, n;
   1773     int32_t len;
   1774     ExtensionListEntry *kwdFirst = NULL;
   1775     ExtensionListEntry *kwd;
   1776     const char *key, *type;
   1777     char *kwdBuf = NULL;
   1778     int32_t kwdBufLength = capacity;
   1779     UBool posixVariant = FALSE;
   1780 
   1781     if (U_FAILURE(*status)) {
   1782         return 0;
   1783     }
   1784 
   1785     kwdBuf = (char*)uprv_malloc(kwdBufLength);
   1786     if (kwdBuf == NULL) {
   1787         *status = U_MEMORY_ALLOCATION_ERROR;
   1788         return 0;
   1789     }
   1790 
   1791     /* Determine if variants already exists */
   1792     if (ultag_getVariantsSize(langtag)) {
   1793         posixVariant = TRUE;
   1794     }
   1795 
   1796     n = ultag_getExtensionsSize(langtag);
   1797 
   1798     /* resolve locale keywords and reordering keys */
   1799     for (i = 0; i < n; i++) {
   1800         key = ultag_getExtensionKey(langtag, i);
   1801         type = ultag_getExtensionValue(langtag, i);
   1802         if (*key == LDMLEXT) {
   1803             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
   1804             if (U_FAILURE(*status)) {
   1805                 break;
   1806             }
   1807         } else {
   1808             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1809             if (kwd == NULL) {
   1810                 *status = U_MEMORY_ALLOCATION_ERROR;
   1811                 break;
   1812             }
   1813             kwd->key = key;
   1814             kwd->value = type;
   1815             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1816                 uprv_free(kwd);
   1817                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1818                 break;
   1819             }
   1820         }
   1821     }
   1822 
   1823     if (U_SUCCESS(*status)) {
   1824         type = ultag_getPrivateUse(langtag);
   1825         if ((int32_t)uprv_strlen(type) > 0) {
   1826             /* add private use as a keyword */
   1827             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1828             if (kwd == NULL) {
   1829                 *status = U_MEMORY_ALLOCATION_ERROR;
   1830             } else {
   1831                 kwd->key = PRIVATEUSE_KEY;
   1832                 kwd->value = type;
   1833                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1834                     uprv_free(kwd);
   1835                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1836                 }
   1837             }
   1838         }
   1839     }
   1840 
   1841     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
   1842 
   1843     if (U_SUCCESS(*status) && posixVariant) {
   1844         len = (int32_t) uprv_strlen(_POSIX);
   1845         if (reslen < capacity) {
   1846             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
   1847         }
   1848         reslen += len;
   1849     }
   1850 
   1851     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1852         /* write out the sorted keywords */
   1853         UBool firstValue = TRUE;
   1854         kwd = kwdFirst;
   1855         do {
   1856             if (reslen < capacity) {
   1857                 if (firstValue) {
   1858                     /* '@' */
   1859                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1860                     firstValue = FALSE;
   1861                 } else {
   1862                     /* ';' */
   1863                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1864                 }
   1865             }
   1866             reslen++;
   1867 
   1868             /* key */
   1869             len = (int32_t)uprv_strlen(kwd->key);
   1870             if (reslen < capacity) {
   1871                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1872             }
   1873             reslen += len;
   1874 
   1875             /* '=' */
   1876             if (reslen < capacity) {
   1877                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1878             }
   1879             reslen++;
   1880 
   1881             /* type */
   1882             len = (int32_t)uprv_strlen(kwd->value);
   1883             if (reslen < capacity) {
   1884                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1885             }
   1886             reslen += len;
   1887 
   1888             kwd = kwd->next;
   1889         } while (kwd);
   1890     }
   1891 
   1892     /* clean up */
   1893     kwd = kwdFirst;
   1894     while (kwd != NULL) {
   1895         ExtensionListEntry *tmpKwd = kwd->next;
   1896         uprv_free(kwd);
   1897         kwd = tmpKwd;
   1898     }
   1899 
   1900     uprv_free(kwdBuf);
   1901 
   1902     if (U_FAILURE(*status)) {
   1903         return 0;
   1904     }
   1905 
   1906     return u_terminateChars(appendAt, capacity, reslen, status);
   1907 }
   1908 
   1909 static int32_t
   1910 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1911     (void)hadPosix;
   1912     char buf[ULOC_FULLNAME_CAPACITY];
   1913     char tmpAppend[ULOC_FULLNAME_CAPACITY];
   1914     UErrorCode tmpStatus = U_ZERO_ERROR;
   1915     int32_t len, i;
   1916     int32_t reslen = 0;
   1917 
   1918     if (U_FAILURE(*status)) {
   1919         return 0;
   1920     }
   1921 
   1922     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1923     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1924         if (strict) {
   1925             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1926         }
   1927         return 0;
   1928     }
   1929 
   1930     if (len > 0) {
   1931         char *p, *pPriv;
   1932         UBool bNext = TRUE;
   1933         UBool firstValue = TRUE;
   1934         UBool writeValue;
   1935 
   1936         pPriv = NULL;
   1937         p = buf;
   1938         while (bNext) {
   1939             writeValue = FALSE;
   1940             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1941                 if (*p == 0) {
   1942                     bNext = FALSE;
   1943                 } else {
   1944                     *p = 0; /* terminate */
   1945                 }
   1946                 if (pPriv != NULL) {
   1947                     /* Private use in the canonical format is lowercase in BCP47 */
   1948                     for (i = 0; *(pPriv + i) != 0; i++) {
   1949                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
   1950                     }
   1951 
   1952                     /* validate */
   1953                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
   1954                         if (firstValue) {
   1955                             if (!_isVariantSubtag(pPriv, -1)) {
   1956                                 writeValue = TRUE;
   1957                             }
   1958                         } else {
   1959                             writeValue = TRUE;
   1960                         }
   1961                     } else if (strict) {
   1962                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1963                         break;
   1964                     } else {
   1965                         break;
   1966                     }
   1967 
   1968                     if (writeValue) {
   1969                         if (reslen < capacity) {
   1970                             tmpAppend[reslen++] = SEP;
   1971                         }
   1972 
   1973                         if (firstValue) {
   1974                             if (reslen < capacity) {
   1975                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
   1976                             }
   1977 
   1978                             if (reslen < capacity) {
   1979                                 tmpAppend[reslen++] = SEP;
   1980                             }
   1981 
   1982                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
   1983                             if (reslen < capacity) {
   1984                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
   1985                             }
   1986                             reslen += len;
   1987 
   1988                             if (reslen < capacity) {
   1989                                 tmpAppend[reslen++] = SEP;
   1990                             }
   1991 
   1992                             firstValue = FALSE;
   1993                         }
   1994 
   1995                         len = (int32_t)uprv_strlen(pPriv);
   1996                         if (reslen < capacity) {
   1997                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
   1998                         }
   1999                         reslen += len;
   2000                     }
   2001                 }
   2002                 /* reset private use starting position */
   2003                 pPriv = NULL;
   2004             } else if (pPriv == NULL) {
   2005                 pPriv = p;
   2006             }
   2007             p++;
   2008         }
   2009 
   2010         if (U_FAILURE(*status)) {
   2011             return 0;
   2012         }
   2013     }
   2014 
   2015     if (U_SUCCESS(*status)) {
   2016         len = reslen;
   2017         if (reslen < capacity) {
   2018             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
   2019         }
   2020     }
   2021 
   2022     u_terminateChars(appendAt, capacity, reslen, status);
   2023 
   2024     return reslen;
   2025 }
   2026 
   2027 /*
   2028 * -------------------------------------------------
   2029 *
   2030 * ultag_ functions
   2031 *
   2032 * -------------------------------------------------
   2033 */
   2034 
   2035 /* Bit flags used by the parser */
   2036 #define LANG 0x0001
   2037 #define EXTL 0x0002
   2038 #define SCRT 0x0004
   2039 #define REGN 0x0008
   2040 #define VART 0x0010
   2041 #define EXTS 0x0020
   2042 #define EXTV 0x0040
   2043 #define PRIV 0x0080
   2044 
   2045 /**
   2046  * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
   2047  * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
   2048  * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
   2049  */
   2050 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
   2051 #pragma optimize( "", off )
   2052 #endif
   2053 
   2054 static ULanguageTag*
   2055 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   2056     ULanguageTag *t;
   2057     char *tagBuf;
   2058     int16_t next;
   2059     char *pSubtag, *pNext, *pLastGoodPosition;
   2060     int32_t subtagLen;
   2061     int32_t extlangIdx;
   2062     ExtensionListEntry *pExtension;
   2063     char *pExtValueSubtag, *pExtValueSubtagEnd;
   2064     int32_t i;
   2065     UBool privateuseVar = FALSE;
   2066     int32_t grandfatheredLen = 0;
   2067 
   2068     if (parsedLen != NULL) {
   2069         *parsedLen = 0;
   2070     }
   2071 
   2072     if (U_FAILURE(*status)) {
   2073         return NULL;
   2074     }
   2075 
   2076     if (tagLen < 0) {
   2077         tagLen = (int32_t)uprv_strlen(tag);
   2078     }
   2079 
   2080     /* copy the entire string */
   2081     tagBuf = (char*)uprv_malloc(tagLen + 1);
   2082     if (tagBuf == NULL) {
   2083         *status = U_MEMORY_ALLOCATION_ERROR;
   2084         return NULL;
   2085     }
   2086     uprv_memcpy(tagBuf, tag, tagLen);
   2087     *(tagBuf + tagLen) = 0;
   2088 
   2089     /* create a ULanguageTag */
   2090     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   2091     if (t == NULL) {
   2092         uprv_free(tagBuf);
   2093         *status = U_MEMORY_ALLOCATION_ERROR;
   2094         return NULL;
   2095     }
   2096     _initializeULanguageTag(t);
   2097     t->buf = tagBuf;
   2098 
   2099     if (tagLen < MINLEN) {
   2100         /* the input tag is too short - return empty ULanguageTag */
   2101         return t;
   2102     }
   2103 
   2104     /* check if the tag is grandfathered */
   2105     for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
   2106         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   2107             int32_t newTagLength;
   2108 
   2109             grandfatheredLen = tagLen;  /* back up for output parsedLen */
   2110             newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
   2111             if (tagLen < newTagLength) {
   2112                 uprv_free(tagBuf);
   2113                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
   2114                 if (tagBuf == NULL) {
   2115                     *status = U_MEMORY_ALLOCATION_ERROR;
   2116                     ultag_close(t);
   2117                     return NULL;
   2118                 }
   2119                 t->buf = tagBuf;
   2120                 tagLen = newTagLength;
   2121             }
   2122             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   2123             break;
   2124         }
   2125     }
   2126 
   2127     size_t parsedLenDelta = 0;
   2128     if (grandfatheredLen == 0) {
   2129         for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
   2130             const char* redundantTag = REDUNDANT[i];
   2131             size_t redundantTagLen = uprv_strlen(redundantTag);
   2132             // The preferred tag for a redundant tag is always shorter than redundant
   2133             // tag. A redundant tag may or may not be followed by other subtags.
   2134             // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
   2135             if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
   2136                 const char* redundantTagEnd = tagBuf + redundantTagLen;
   2137                 if (*redundantTagEnd  == '\0' || *redundantTagEnd == SEP) {
   2138                     const char* preferredTag = REDUNDANT[i + 1];
   2139                     size_t preferredTagLen = uprv_strlen(preferredTag);
   2140                     uprv_strncpy(t->buf, preferredTag, preferredTagLen);
   2141                     if (*redundantTagEnd == SEP) {
   2142                         uprv_memmove(tagBuf + preferredTagLen,
   2143                                      redundantTagEnd,
   2144                                      tagLen - redundantTagLen + 1);
   2145                     } else {
   2146                         tagBuf[preferredTagLen] = '\0';
   2147                     }
   2148                     // parsedLen should be the length of the input
   2149                     // before redundantTag is replaced by preferredTag.
   2150                     // Save the delta to add it back later.
   2151                     parsedLenDelta = redundantTagLen - preferredTagLen;
   2152                     break;
   2153                 }
   2154             }
   2155         }
   2156     }
   2157 
   2158     /*
   2159      * langtag      =   language
   2160      *                  ["-" script]
   2161      *                  ["-" region]
   2162      *                  *("-" variant)
   2163      *                  *("-" extension)
   2164      *                  ["-" privateuse]
   2165      */
   2166 
   2167     next = LANG | PRIV;
   2168     pNext = pLastGoodPosition = tagBuf;
   2169     extlangIdx = 0;
   2170     pExtension = NULL;
   2171     pExtValueSubtag = NULL;
   2172     pExtValueSubtagEnd = NULL;
   2173 
   2174     while (pNext) {
   2175         char *pSep;
   2176 
   2177         pSubtag = pNext;
   2178 
   2179         /* locate next separator char */
   2180         pSep = pSubtag;
   2181         while (*pSep) {
   2182             if (*pSep == SEP) {
   2183                 break;
   2184             }
   2185             pSep++;
   2186         }
   2187         if (*pSep == 0) {
   2188             /* last subtag */
   2189             pNext = NULL;
   2190         } else {
   2191             pNext = pSep + 1;
   2192         }
   2193         subtagLen = (int32_t)(pSep - pSubtag);
   2194 
   2195         if (next & LANG) {
   2196             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   2197                 *pSep = 0;  /* terminate */
   2198                 // TODO: move deprecated language code handling here.
   2199                 t->language = T_CString_toLowerCase(pSubtag);
   2200 
   2201                 pLastGoodPosition = pSep;
   2202                 next = SCRT | REGN | VART | EXTS | PRIV;
   2203                 if (subtagLen <= 3)
   2204                   next |= EXTL;
   2205                 continue;
   2206             }
   2207         }
   2208         if (next & EXTL) {
   2209             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   2210                 *pSep = 0;
   2211                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   2212 
   2213                 pLastGoodPosition = pSep;
   2214                 if (extlangIdx < 3) {
   2215                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   2216                 } else {
   2217                     next = SCRT | REGN | VART | EXTS | PRIV;
   2218                 }
   2219                 continue;
   2220             }
   2221         }
   2222         if (next & SCRT) {
   2223             if (_isScriptSubtag(pSubtag, subtagLen)) {
   2224                 char *p = pSubtag;
   2225 
   2226                 *pSep = 0;
   2227 
   2228                 /* to title case */
   2229                 *p = uprv_toupper(*p);
   2230                 p++;
   2231                 for (; *p; p++) {
   2232                     *p = uprv_tolower(*p);
   2233                 }
   2234 
   2235                 t->script = pSubtag;
   2236 
   2237                 pLastGoodPosition = pSep;
   2238                 next = REGN | VART | EXTS | PRIV;
   2239                 continue;
   2240             }
   2241         }
   2242         if (next & REGN) {
   2243             if (_isRegionSubtag(pSubtag, subtagLen)) {
   2244                 *pSep = 0;
   2245                 // TODO: move deprecated region code handling here.
   2246                 t->region = T_CString_toUpperCase(pSubtag);
   2247 
   2248                 pLastGoodPosition = pSep;
   2249                 next = VART | EXTS | PRIV;
   2250                 continue;
   2251             }
   2252         }
   2253         if (next & VART) {
   2254             if (_isVariantSubtag(pSubtag, subtagLen) ||
   2255                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
   2256                 VariantListEntry *var;
   2257                 UBool isAdded;
   2258 
   2259                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   2260                 if (var == NULL) {
   2261                     *status = U_MEMORY_ALLOCATION_ERROR;
   2262                     goto error;
   2263                 }
   2264                 *pSep = 0;
   2265                 var->variant = T_CString_toUpperCase(pSubtag);
   2266                 isAdded = _addVariantToList(&(t->variants), var);
   2267                 if (!isAdded) {
   2268                     /* duplicated variant entry */
   2269                     uprv_free(var);
   2270                     break;
   2271                 }
   2272                 pLastGoodPosition = pSep;
   2273                 next = VART | EXTS | PRIV;
   2274                 continue;
   2275             }
   2276         }
   2277         if (next & EXTS) {
   2278             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   2279                 if (pExtension != NULL) {
   2280                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2281                         /* the previous extension is incomplete */
   2282                         uprv_free(pExtension);
   2283                         pExtension = NULL;
   2284                         break;
   2285                     }
   2286 
   2287                     /* terminate the previous extension value */
   2288                     *pExtValueSubtagEnd = 0;
   2289                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2290 
   2291                     /* insert the extension to the list */
   2292                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2293                         pLastGoodPosition = pExtValueSubtagEnd;
   2294                     } else {
   2295                         /* stop parsing here */
   2296                         uprv_free(pExtension);
   2297                         pExtension = NULL;
   2298                         break;
   2299                     }
   2300                 }
   2301 
   2302                 /* create a new extension */
   2303                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   2304                 if (pExtension == NULL) {
   2305                     *status = U_MEMORY_ALLOCATION_ERROR;
   2306                     goto error;
   2307                 }
   2308                 *pSep = 0;
   2309                 pExtension->key = T_CString_toLowerCase(pSubtag);
   2310                 pExtension->value = NULL;   /* will be set later */
   2311 
   2312                 /*
   2313                  * reset the start and the end location of extension value
   2314                  * subtags for this extension
   2315                  */
   2316                 pExtValueSubtag = NULL;
   2317                 pExtValueSubtagEnd = NULL;
   2318 
   2319                 next = EXTV;
   2320                 continue;
   2321             }
   2322         }
   2323         if (next & EXTV) {
   2324             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   2325                 if (pExtValueSubtag == NULL) {
   2326                     /* if the start postion of this extension's value is not yet,
   2327                         this one is the first value subtag */
   2328                     pExtValueSubtag = pSubtag;
   2329                 }
   2330 
   2331                 /* Mark the end of this subtag */
   2332                 pExtValueSubtagEnd = pSep;
   2333                 next = EXTS | EXTV | PRIV;
   2334 
   2335                 continue;
   2336             }
   2337         }
   2338         if (next & PRIV) {
   2339             if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
   2340                 char *pPrivuseVal;
   2341 
   2342                 if (pExtension != NULL) {
   2343                     /* Process the last extension */
   2344                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2345                         /* the previous extension is incomplete */
   2346                         uprv_free(pExtension);
   2347                         pExtension = NULL;
   2348                         break;
   2349                     } else {
   2350                         /* terminate the previous extension value */
   2351                         *pExtValueSubtagEnd = 0;
   2352                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2353 
   2354                         /* insert the extension to the list */
   2355                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2356                             pLastGoodPosition = pExtValueSubtagEnd;
   2357                             pExtension = NULL;
   2358                         } else {
   2359                         /* stop parsing here */
   2360                             uprv_free(pExtension);
   2361                             pExtension = NULL;
   2362                             break;
   2363                         }
   2364                     }
   2365                 }
   2366 
   2367                 /* The rest of part will be private use value subtags */
   2368                 if (pNext == NULL) {
   2369                     /* empty private use subtag */
   2370                     break;
   2371                 }
   2372                 /* back up the private use value start position */
   2373                 pPrivuseVal = pNext;
   2374 
   2375                 /* validate private use value subtags */
   2376                 while (pNext) {
   2377                     pSubtag = pNext;
   2378                     pSep = pSubtag;
   2379                     while (*pSep) {
   2380                         if (*pSep == SEP) {
   2381                             break;
   2382                         }
   2383                         pSep++;
   2384                     }
   2385                     if (*pSep == 0) {
   2386                         /* last subtag */
   2387                         pNext = NULL;
   2388                     } else {
   2389                         pNext = pSep + 1;
   2390                     }
   2391                     subtagLen = (int32_t)(pSep - pSubtag);
   2392 
   2393                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
   2394                         *pSep = 0;
   2395                         next = VART;
   2396                         privateuseVar = TRUE;
   2397                         break;
   2398                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   2399                         pLastGoodPosition = pSep;
   2400                     } else {
   2401                         break;
   2402                     }
   2403                 }
   2404 
   2405                 if (next == VART) {
   2406                     continue;
   2407                 }
   2408 
   2409                 if (pLastGoodPosition - pPrivuseVal > 0) {
   2410                     *pLastGoodPosition = 0;
   2411                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   2412                 }
   2413                 /* No more subtags, exiting the parse loop */
   2414                 break;
   2415             }
   2416             break;
   2417         }
   2418 
   2419         /* If we fell through here, it means this subtag is illegal - quit parsing */
   2420         break;
   2421     }
   2422 
   2423     if (pExtension != NULL) {
   2424         /* Process the last extension */
   2425         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2426             /* the previous extension is incomplete */
   2427             uprv_free(pExtension);
   2428         } else {
   2429             /* terminate the previous extension value */
   2430             *pExtValueSubtagEnd = 0;
   2431             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2432             /* insert the extension to the list */
   2433             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2434                 pLastGoodPosition = pExtValueSubtagEnd;
   2435             } else {
   2436                 uprv_free(pExtension);
   2437             }
   2438         }
   2439     }
   2440 
   2441     if (parsedLen != NULL) {
   2442         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
   2443             (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
   2444     }
   2445 
   2446     return t;
   2447 
   2448 error:
   2449     ultag_close(t);
   2450     return NULL;
   2451 }
   2452 
   2453 /**
   2454 * Ticket #12705 - Turn optimization back on.
   2455 */
   2456 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
   2457 #pragma optimize( "", on )
   2458 #endif
   2459 
   2460 static void
   2461 ultag_close(ULanguageTag* langtag) {
   2462 
   2463     if (langtag == NULL) {
   2464         return;
   2465     }
   2466 
   2467     uprv_free(langtag->buf);
   2468 
   2469     if (langtag->variants) {
   2470         VariantListEntry *curVar = langtag->variants;
   2471         while (curVar) {
   2472             VariantListEntry *nextVar = curVar->next;
   2473             uprv_free(curVar);
   2474             curVar = nextVar;
   2475         }
   2476     }
   2477 
   2478     if (langtag->extensions) {
   2479         ExtensionListEntry *curExt = langtag->extensions;
   2480         while (curExt) {
   2481             ExtensionListEntry *nextExt = curExt->next;
   2482             uprv_free(curExt);
   2483             curExt = nextExt;
   2484         }
   2485     }
   2486 
   2487     uprv_free(langtag);
   2488 }
   2489 
   2490 static const char*
   2491 ultag_getLanguage(const ULanguageTag* langtag) {
   2492     return langtag->language;
   2493 }
   2494 
   2495 #if 0
   2496 static const char*
   2497 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2498     int32_t i;
   2499     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2500         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2501             return DEPRECATEDLANGS[i + 1];
   2502         }
   2503     }
   2504     return langtag->language;
   2505 }
   2506 #endif
   2507 
   2508 static const char*
   2509 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2510     if (idx >= 0 && idx < MAXEXTLANG) {
   2511         return langtag->extlang[idx];
   2512     }
   2513     return NULL;
   2514 }
   2515 
   2516 static int32_t
   2517 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2518     int32_t size = 0;
   2519     int32_t i;
   2520     for (i = 0; i < MAXEXTLANG; i++) {
   2521         if (langtag->extlang[i]) {
   2522             size++;
   2523         }
   2524     }
   2525     return size;
   2526 }
   2527 
   2528 static const char*
   2529 ultag_getScript(const ULanguageTag* langtag) {
   2530     return langtag->script;
   2531 }
   2532 
   2533 static const char*
   2534 ultag_getRegion(const ULanguageTag* langtag) {
   2535     return langtag->region;
   2536 }
   2537 
   2538 static const char*
   2539 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2540     const char *var = NULL;
   2541     VariantListEntry *cur = langtag->variants;
   2542     int32_t i = 0;
   2543     while (cur) {
   2544         if (i == idx) {
   2545             var = cur->variant;
   2546             break;
   2547         }
   2548         cur = cur->next;
   2549         i++;
   2550     }
   2551     return var;
   2552 }
   2553 
   2554 static int32_t
   2555 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2556     int32_t size = 0;
   2557     VariantListEntry *cur = langtag->variants;
   2558     while (TRUE) {
   2559         if (cur == NULL) {
   2560             break;
   2561         }
   2562         size++;
   2563         cur = cur->next;
   2564     }
   2565     return size;
   2566 }
   2567 
   2568 static const char*
   2569 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2570     const char *key = NULL;
   2571     ExtensionListEntry *cur = langtag->extensions;
   2572     int32_t i = 0;
   2573     while (cur) {
   2574         if (i == idx) {
   2575             key = cur->key;
   2576             break;
   2577         }
   2578         cur = cur->next;
   2579         i++;
   2580     }
   2581     return key;
   2582 }
   2583 
   2584 static const char*
   2585 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2586     const char *val = NULL;
   2587     ExtensionListEntry *cur = langtag->extensions;
   2588     int32_t i = 0;
   2589     while (cur) {
   2590         if (i == idx) {
   2591             val = cur->value;
   2592             break;
   2593         }
   2594         cur = cur->next;
   2595         i++;
   2596     }
   2597     return val;
   2598 }
   2599 
   2600 static int32_t
   2601 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2602     int32_t size = 0;
   2603     ExtensionListEntry *cur = langtag->extensions;
   2604     while (TRUE) {
   2605         if (cur == NULL) {
   2606             break;
   2607         }
   2608         size++;
   2609         cur = cur->next;
   2610     }
   2611     return size;
   2612 }
   2613 
   2614 static const char*
   2615 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2616     return langtag->privateuse;
   2617 }
   2618 
   2619 #if 0
   2620 static const char*
   2621 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2622     return langtag->grandfathered;
   2623 }
   2624 #endif
   2625 
   2626 
   2627 /*
   2628 * -------------------------------------------------
   2629 *
   2630 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2631 *
   2632 * -------------------------------------------------
   2633 */
   2634 U_CAPI int32_t U_EXPORT2
   2635 uloc_toLanguageTag(const char* localeID,
   2636                    char* langtag,
   2637                    int32_t langtagCapacity,
   2638                    UBool strict,
   2639                    UErrorCode* status) {
   2640     icu::CharString canonical;
   2641     int32_t reslen;
   2642     UErrorCode tmpStatus = U_ZERO_ERROR;
   2643     UBool hadPosix = FALSE;
   2644     const char* pKeywordStart;
   2645 
   2646     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2647     int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
   2648     if (resultCapacity > 0) {
   2649         char* buffer;
   2650 
   2651         for (;;) {
   2652             buffer = canonical.getAppendBuffer(
   2653                     /*minCapacity=*/resultCapacity,
   2654                     /*desiredCapacityHint=*/resultCapacity,
   2655                     resultCapacity,
   2656                     tmpStatus);
   2657 
   2658             if (U_FAILURE(tmpStatus)) {
   2659                 *status = tmpStatus;
   2660                 return 0;
   2661             }
   2662 
   2663             reslen =
   2664                 uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
   2665 
   2666             if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
   2667                 break;
   2668             }
   2669 
   2670             resultCapacity = reslen;
   2671             tmpStatus = U_ZERO_ERROR;
   2672         }
   2673 
   2674         if (U_FAILURE(tmpStatus)) {
   2675             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2676             return 0;
   2677         }
   2678 
   2679         canonical.append(buffer, reslen, tmpStatus);
   2680         if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   2681             tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
   2682         }
   2683 
   2684         if (U_FAILURE(tmpStatus)) {
   2685             *status = tmpStatus;
   2686             return 0;
   2687         }
   2688     }
   2689 
   2690     reslen = 0;
   2691 
   2692     /* For handling special case - private use only tag */
   2693     pKeywordStart = locale_getKeywordsStart(canonical.data());
   2694     if (pKeywordStart == canonical.data()) {
   2695         UEnumeration *kwdEnum;
   2696         int kwdCnt = 0;
   2697         UBool done = FALSE;
   2698 
   2699         kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
   2700         if (kwdEnum != NULL) {
   2701             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
   2702             if (kwdCnt == 1) {
   2703                 const char *key;
   2704                 int32_t len = 0;
   2705 
   2706                 key = uenum_next(kwdEnum, &len, &tmpStatus);
   2707                 if (len == 1 && *key == PRIVATEUSE) {
   2708                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   2709                     buf[0] = PRIVATEUSE;
   2710                     buf[1] = SEP;
   2711                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
   2712                     if (U_SUCCESS(tmpStatus)) {
   2713                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
   2714                             /* return private use only tag */
   2715                             reslen = len + 2;
   2716                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
   2717                             u_terminateChars(langtag, langtagCapacity, reslen, status);
   2718                             done = TRUE;
   2719                         } else if (strict) {
   2720                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2721                             done = TRUE;
   2722                         }
   2723                         /* if not strict mode, then "und" will be returned */
   2724                     } else {
   2725                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   2726                         done = TRUE;
   2727                     }
   2728                 }
   2729             }
   2730             uenum_close(kwdEnum);
   2731             if (done) {
   2732                 return reslen;
   2733             }
   2734         }
   2735     }
   2736 
   2737     reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
   2738     reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
   2739     reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
   2740     reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
   2741     reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2742     reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2743 
   2744     return reslen;
   2745 }
   2746 
   2747 
   2748 U_CAPI int32_t U_EXPORT2
   2749 uloc_forLanguageTag(const char* langtag,
   2750                     char* localeID,
   2751                     int32_t localeIDCapacity,
   2752                     int32_t* parsedLength,
   2753                     UErrorCode* status) {
   2754     return ulocimp_forLanguageTag(
   2755             langtag,
   2756             -1,
   2757             localeID,
   2758             localeIDCapacity,
   2759             parsedLength,
   2760             status);
   2761 }
   2762 
   2763 
   2764 U_CAPI int32_t U_EXPORT2
   2765 ulocimp_forLanguageTag(const char* langtag,
   2766                        int32_t tagLen,
   2767                        char* localeID,
   2768                        int32_t localeIDCapacity,
   2769                        int32_t* parsedLength,
   2770                        UErrorCode* status) {
   2771     ULanguageTag *lt;
   2772     int32_t reslen = 0;
   2773     const char *subtag, *p;
   2774     int32_t len;
   2775     int32_t i, n;
   2776     UBool noRegion = TRUE;
   2777 
   2778     lt = ultag_parse(langtag, tagLen, parsedLength, status);
   2779     if (U_FAILURE(*status)) {
   2780         return 0;
   2781     }
   2782 
   2783     /* language */
   2784     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2785     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2786         len = (int32_t)uprv_strlen(subtag);
   2787         if (len > 0) {
   2788             if (reslen < localeIDCapacity) {
   2789                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2790             }
   2791             reslen += len;
   2792         }
   2793     }
   2794 
   2795     /* script */
   2796     subtag = ultag_getScript(lt);
   2797     len = (int32_t)uprv_strlen(subtag);
   2798     if (len > 0) {
   2799         if (reslen < localeIDCapacity) {
   2800             *(localeID + reslen) = LOCALE_SEP;
   2801         }
   2802         reslen++;
   2803 
   2804         /* write out the script in title case */
   2805         p = subtag;
   2806         while (*p) {
   2807             if (reslen < localeIDCapacity) {
   2808                 if (p == subtag) {
   2809                     *(localeID + reslen) = uprv_toupper(*p);
   2810                 } else {
   2811                     *(localeID + reslen) = *p;
   2812                 }
   2813             }
   2814             reslen++;
   2815             p++;
   2816         }
   2817     }
   2818 
   2819     /* region */
   2820     subtag = ultag_getRegion(lt);
   2821     len = (int32_t)uprv_strlen(subtag);
   2822     if (len > 0) {
   2823         if (reslen < localeIDCapacity) {
   2824             *(localeID + reslen) = LOCALE_SEP;
   2825         }
   2826         reslen++;
   2827         /* write out the retion in upper case */
   2828         p = subtag;
   2829         while (*p) {
   2830             if (reslen < localeIDCapacity) {
   2831                 *(localeID + reslen) = uprv_toupper(*p);
   2832             }
   2833             reslen++;
   2834             p++;
   2835         }
   2836         noRegion = FALSE;
   2837     }
   2838 
   2839     /* variants */
   2840     n = ultag_getVariantsSize(lt);
   2841     if (n > 0) {
   2842         if (noRegion) {
   2843             if (reslen < localeIDCapacity) {
   2844                 *(localeID + reslen) = LOCALE_SEP;
   2845             }
   2846             reslen++;
   2847         }
   2848 
   2849         for (i = 0; i < n; i++) {
   2850             subtag = ultag_getVariant(lt, i);
   2851             if (reslen < localeIDCapacity) {
   2852                 *(localeID + reslen) = LOCALE_SEP;
   2853             }
   2854             reslen++;
   2855             /* write out the variant in upper case */
   2856             p = subtag;
   2857             while (*p) {
   2858                 if (reslen < localeIDCapacity) {
   2859                     *(localeID + reslen) = uprv_toupper(*p);
   2860                 }
   2861                 reslen++;
   2862                 p++;
   2863             }
   2864         }
   2865     }
   2866 
   2867     /* keywords */
   2868     n = ultag_getExtensionsSize(lt);
   2869     subtag = ultag_getPrivateUse(lt);
   2870     if (n > 0 || uprv_strlen(subtag) > 0) {
   2871         if (reslen == 0 && n > 0) {
   2872             /* need a language */
   2873             if (reslen < localeIDCapacity) {
   2874                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2875             }
   2876             reslen += LANG_UND_LEN;
   2877         }
   2878         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2879         reslen += len;
   2880     }
   2881 
   2882     ultag_close(lt);
   2883     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2884 }
   2885 
   2886 
   2887