Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2009-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 #include "unicode/ures.h"
     12 #include "unicode/putil.h"
     13 #include "unicode/uloc.h"
     14 #include "ustr_imp.h"
     15 #include "cmemory.h"
     16 #include "cstring.h"
     17 #include "putilimp.h"
     18 #include "uinvchar.h"
     19 #include "ulocimp.h"
     20 #include "uassert.h"
     21 
     22 
     23 /* struct holding a single variant */
     24 typedef struct VariantListEntry {
     25     const char              *variant;
     26     struct VariantListEntry *next;
     27 } VariantListEntry;
     28 
     29 /* struct holding a single attribute value */
     30 typedef struct AttributeListEntry {
     31     const char              *attribute;
     32     struct AttributeListEntry *next;
     33 } AttributeListEntry;
     34 
     35 /* struct holding a single extension */
     36 typedef struct ExtensionListEntry {
     37     const char                  *key;
     38     const char                  *value;
     39     struct ExtensionListEntry   *next;
     40 } ExtensionListEntry;
     41 
     42 #define MAXEXTLANG 3
     43 typedef struct ULanguageTag {
     44     char                *buf;   /* holding parsed subtags */
     45     const char          *language;
     46     const char          *extlang[MAXEXTLANG];
     47     const char          *script;
     48     const char          *region;
     49     VariantListEntry    *variants;
     50     ExtensionListEntry  *extensions;
     51     const char          *privateuse;
     52     const char          *grandfathered;
     53 } ULanguageTag;
     54 
     55 #define MINLEN 2
     56 #define SEP '-'
     57 #define PRIVATEUSE 'x'
     58 #define LDMLEXT 'u'
     59 
     60 #define LOCALE_SEP '_'
     61 #define LOCALE_EXT_SEP '@'
     62 #define LOCALE_KEYWORD_SEP ';'
     63 #define LOCALE_KEY_TYPE_SEP '='
     64 
     65 #define ISALPHA(c) uprv_isASCIILetter(c)
     66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     67 
     68 static const char EMPTY[] = "";
     69 static const char LANG_UND[] = "und";
     70 static const char PRIVATEUSE_KEY[] = "x";
     71 static const char _POSIX[] = "_POSIX";
     72 static const char POSIX_KEY[] = "va";
     73 static const char POSIX_VALUE[] = "posix";
     74 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
     75 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
     76 static const char LOCALE_TYPE_YES[] = "yes";
     77 
     78 #define LANG_UND_LEN 3
     79 
     80 static const char* const GRANDFATHERED[] = {
     81 /*  grandfathered   preferred */
     82     "art-lojban",   "jbo",
     83     "cel-gaulish",  "xtg-x-cel-gaulish",
     84     "en-GB-oed",    "en-GB-x-oed",
     85     "i-ami",        "ami",
     86     "i-bnn",        "bnn",
     87     "i-default",    "en-x-i-default",
     88     "i-enochian",   "und-x-i-enochian",
     89     "i-hak",        "hak",
     90     "i-klingon",    "tlh",
     91     "i-lux",        "lb",
     92     "i-mingo",      "see-x-i-mingo",
     93     "i-navajo",     "nv",
     94     "i-pwn",        "pwn",
     95     "i-tao",        "tao",
     96     "i-tay",        "tay",
     97     "i-tsu",        "tsu",
     98     "no-bok",       "nb",
     99     "no-nyn",       "nn",
    100     "sgn-be-fr",    "sfb",
    101     "sgn-be-nl",    "vgt",
    102     "sgn-ch-de",    "sgg",
    103     "zh-guoyu",     "cmn",
    104     "zh-hakka",     "hak",
    105     "zh-min",       "nan-x-zh-min",
    106     "zh-min-nan",   "nan",
    107     "zh-xiang",     "hsn",
    108     NULL,           NULL
    109 };
    110 
    111 static const char DEPRECATEDLANGS[][4] = {
    112 /*  deprecated  new */
    113     "iw",       "he",
    114     "ji",       "yi",
    115     "in",       "id"
    116 };
    117 
    118 /*
    119 * -------------------------------------------------
    120 *
    121 * These ultag_ functions may be exposed as APIs later
    122 *
    123 * -------------------------------------------------
    124 */
    125 
    126 static ULanguageTag*
    127 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    128 
    129 static void
    130 ultag_close(ULanguageTag* langtag);
    131 
    132 static const char*
    133 ultag_getLanguage(const ULanguageTag* langtag);
    134 
    135 #if 0
    136 static const char*
    137 ultag_getJDKLanguage(const ULanguageTag* langtag);
    138 #endif
    139 
    140 static const char*
    141 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    142 
    143 static int32_t
    144 ultag_getExtlangSize(const ULanguageTag* langtag);
    145 
    146 static const char*
    147 ultag_getScript(const ULanguageTag* langtag);
    148 
    149 static const char*
    150 ultag_getRegion(const ULanguageTag* langtag);
    151 
    152 static const char*
    153 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    154 
    155 static int32_t
    156 ultag_getVariantsSize(const ULanguageTag* langtag);
    157 
    158 static const char*
    159 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    160 
    161 static const char*
    162 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    163 
    164 static int32_t
    165 ultag_getExtensionsSize(const ULanguageTag* langtag);
    166 
    167 static const char*
    168 ultag_getPrivateUse(const ULanguageTag* langtag);
    169 
    170 #if 0
    171 static const char*
    172 ultag_getGrandfathered(const ULanguageTag* langtag);
    173 #endif
    174 
    175 /*
    176 * -------------------------------------------------
    177 *
    178 * Language subtag syntax validation functions
    179 *
    180 * -------------------------------------------------
    181 */
    182 
    183 static UBool
    184 _isAlphaString(const char* s, int32_t len) {
    185     int32_t i;
    186     for (i = 0; i < len; i++) {
    187         if (!ISALPHA(*(s + i))) {
    188             return FALSE;
    189         }
    190     }
    191     return TRUE;
    192 }
    193 
    194 static UBool
    195 _isNumericString(const char* s, int32_t len) {
    196     int32_t i;
    197     for (i = 0; i < len; i++) {
    198         if (!ISNUMERIC(*(s + i))) {
    199             return FALSE;
    200         }
    201     }
    202     return TRUE;
    203 }
    204 
    205 static UBool
    206 _isAlphaNumericString(const char* s, int32_t len) {
    207     int32_t i;
    208     for (i = 0; i < len; i++) {
    209         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    210             return FALSE;
    211         }
    212     }
    213     return TRUE;
    214 }
    215 
    216 static UBool
    217 _isLanguageSubtag(const char* s, int32_t len) {
    218     /*
    219      * language      = 2*3ALPHA            ; shortest ISO 639 code
    220      *                 ["-" extlang]       ; sometimes followed by
    221      *                                     ;   extended language subtags
    222      *               / 4ALPHA              ; or reserved for future use
    223      *               / 5*8ALPHA            ; or registered language subtag
    224      */
    225     if (len < 0) {
    226         len = (int32_t)uprv_strlen(s);
    227     }
    228     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    229         return TRUE;
    230     }
    231     return FALSE;
    232 }
    233 
    234 static UBool
    235 _isExtlangSubtag(const char* s, int32_t len) {
    236     /*
    237      * extlang       = 3ALPHA              ; selected ISO 639 codes
    238      *                 *2("-" 3ALPHA)      ; permanently reserved
    239      */
    240     if (len < 0) {
    241         len = (int32_t)uprv_strlen(s);
    242     }
    243     if (len == 3 && _isAlphaString(s, len)) {
    244         return TRUE;
    245     }
    246     return FALSE;
    247 }
    248 
    249 static UBool
    250 _isScriptSubtag(const char* s, int32_t len) {
    251     /*
    252      * script        = 4ALPHA              ; ISO 15924 code
    253      */
    254     if (len < 0) {
    255         len = (int32_t)uprv_strlen(s);
    256     }
    257     if (len == 4 && _isAlphaString(s, len)) {
    258         return TRUE;
    259     }
    260     return FALSE;
    261 }
    262 
    263 static UBool
    264 _isRegionSubtag(const char* s, int32_t len) {
    265     /*
    266      * region        = 2ALPHA              ; ISO 3166-1 code
    267      *               / 3DIGIT              ; UN M.49 code
    268      */
    269     if (len < 0) {
    270         len = (int32_t)uprv_strlen(s);
    271     }
    272     if (len == 2 && _isAlphaString(s, len)) {
    273         return TRUE;
    274     }
    275     if (len == 3 && _isNumericString(s, len)) {
    276         return TRUE;
    277     }
    278     return FALSE;
    279 }
    280 
    281 static UBool
    282 _isVariantSubtag(const char* s, int32_t len) {
    283     /*
    284      * variant       = 5*8alphanum         ; registered variants
    285      *               / (DIGIT 3alphanum)
    286      */
    287     if (len < 0) {
    288         len = (int32_t)uprv_strlen(s);
    289     }
    290     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
    291         return TRUE;
    292     }
    293     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    294         return TRUE;
    295     }
    296     return FALSE;
    297 }
    298 
    299 static UBool
    300 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
    301     /*
    302      * variant       = 1*8alphanum         ; registered variants
    303      *               / (DIGIT 3alphanum)
    304      */
    305     if (len < 0) {
    306         len = (int32_t)uprv_strlen(s);
    307     }
    308     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    309         return TRUE;
    310     }
    311     return FALSE;
    312 }
    313 
    314 static UBool
    315 _isExtensionSingleton(const char* s, int32_t len) {
    316     /*
    317      * extension     = singleton 1*("-" (2*8alphanum))
    318      */
    319     if (len < 0) {
    320         len = (int32_t)uprv_strlen(s);
    321     }
    322     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    323         return TRUE;
    324     }
    325     return FALSE;
    326 }
    327 
    328 static UBool
    329 _isExtensionSubtag(const char* s, int32_t len) {
    330     /*
    331      * extension     = singleton 1*("-" (2*8alphanum))
    332      */
    333     if (len < 0) {
    334         len = (int32_t)uprv_strlen(s);
    335     }
    336     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    337         return TRUE;
    338     }
    339     return FALSE;
    340 }
    341 
    342 static UBool
    343 _isExtensionSubtags(const char* s, int32_t len) {
    344     const char *p = s;
    345     const char *pSubtag = NULL;
    346 
    347     if (len < 0) {
    348         len = (int32_t)uprv_strlen(s);
    349     }
    350 
    351     while ((p - s) < len) {
    352         if (*p == SEP) {
    353             if (pSubtag == NULL) {
    354                 return FALSE;
    355             }
    356             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    357                 return FALSE;
    358             }
    359             pSubtag = NULL;
    360         } else if (pSubtag == NULL) {
    361             pSubtag = p;
    362         }
    363         p++;
    364     }
    365     if (pSubtag == NULL) {
    366         return FALSE;
    367     }
    368     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    369 }
    370 
    371 static UBool
    372 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    373     /*
    374      * privateuse    = "x" 1*("-" (1*8alphanum))
    375      */
    376     if (len < 0) {
    377         len = (int32_t)uprv_strlen(s);
    378     }
    379     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    380         return TRUE;
    381     }
    382     return FALSE;
    383 }
    384 
    385 static UBool
    386 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    387     const char *p = s;
    388     const char *pSubtag = NULL;
    389 
    390     if (len < 0) {
    391         len = (int32_t)uprv_strlen(s);
    392     }
    393 
    394     while ((p - s) < len) {
    395         if (*p == SEP) {
    396             if (pSubtag == NULL) {
    397                 return FALSE;
    398             }
    399             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    400                 return FALSE;
    401             }
    402             pSubtag = NULL;
    403         } else if (pSubtag == NULL) {
    404             pSubtag = p;
    405         }
    406         p++;
    407     }
    408     if (pSubtag == NULL) {
    409         return FALSE;
    410     }
    411     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    412 }
    413 
    414 U_CFUNC UBool
    415 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
    416     if (len < 0) {
    417         len = (int32_t)uprv_strlen(s);
    418     }
    419     if (len == 2 && _isAlphaNumericString(s, len)) {
    420         return TRUE;
    421     }
    422     return FALSE;
    423 }
    424 
    425 U_CFUNC UBool
    426 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
    427     const char* p;
    428     int32_t subtagLen = 0;
    429 
    430     if (len < 0) {
    431         len = (int32_t)uprv_strlen(s);
    432     }
    433 
    434     for (p = s; len > 0; p++, len--) {
    435         if (*p == SEP) {
    436             if (subtagLen < 3) {
    437                 return FALSE;
    438             }
    439             subtagLen = 0;
    440         } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
    441             subtagLen++;
    442             if (subtagLen > 8) {
    443                 return FALSE;
    444             }
    445         } else {
    446             return FALSE;
    447         }
    448     }
    449 
    450     return (subtagLen >= 3);
    451 }
    452 /*
    453 * -------------------------------------------------
    454 *
    455 * Helper functions
    456 *
    457 * -------------------------------------------------
    458 */
    459 
    460 static UBool
    461 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    462     UBool bAdded = TRUE;
    463 
    464     if (*first == NULL) {
    465         var->next = NULL;
    466         *first = var;
    467     } else {
    468         VariantListEntry *prev, *cur;
    469         int32_t cmp;
    470 
    471         /* variants order should be preserved */
    472         prev = NULL;
    473         cur = *first;
    474         while (TRUE) {
    475             if (cur == NULL) {
    476                 prev->next = var;
    477                 var->next = NULL;
    478                 break;
    479             }
    480 
    481             /* Checking for duplicate variant */
    482             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    483             if (cmp == 0) {
    484                 /* duplicated variant */
    485                 bAdded = FALSE;
    486                 break;
    487             }
    488             prev = cur;
    489             cur = cur->next;
    490         }
    491     }
    492 
    493     return bAdded;
    494 }
    495 
    496 static UBool
    497 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
    498     UBool bAdded = TRUE;
    499 
    500     if (*first == NULL) {
    501         attr->next = NULL;
    502         *first = attr;
    503     } else {
    504         AttributeListEntry *prev, *cur;
    505         int32_t cmp;
    506 
    507         /* reorder variants in alphabetical order */
    508         prev = NULL;
    509         cur = *first;
    510         while (TRUE) {
    511             if (cur == NULL) {
    512                 prev->next = attr;
    513                 attr->next = NULL;
    514                 break;
    515             }
    516             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
    517             if (cmp < 0) {
    518                 if (prev == NULL) {
    519                     *first = attr;
    520                 } else {
    521                     prev->next = attr;
    522                 }
    523                 attr->next = cur;
    524                 break;
    525             }
    526             if (cmp == 0) {
    527                 /* duplicated variant */
    528                 bAdded = FALSE;
    529                 break;
    530             }
    531             prev = cur;
    532             cur = cur->next;
    533         }
    534     }
    535 
    536     return bAdded;
    537 }
    538 
    539 
    540 static UBool
    541 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    542     UBool bAdded = TRUE;
    543 
    544     if (*first == NULL) {
    545         ext->next = NULL;
    546         *first = ext;
    547     } else {
    548         ExtensionListEntry *prev, *cur;
    549         int32_t cmp;
    550 
    551         /* reorder variants in alphabetical order */
    552         prev = NULL;
    553         cur = *first;
    554         while (TRUE) {
    555             if (cur == NULL) {
    556                 prev->next = ext;
    557                 ext->next = NULL;
    558                 break;
    559             }
    560             if (localeToBCP) {
    561                 /* special handling for locale to bcp conversion */
    562                 int32_t len, curlen;
    563 
    564                 len = (int32_t)uprv_strlen(ext->key);
    565                 curlen = (int32_t)uprv_strlen(cur->key);
    566 
    567                 if (len == 1 && curlen == 1) {
    568                     if (*(ext->key) == *(cur->key)) {
    569                         cmp = 0;
    570                     } else if (*(ext->key) == PRIVATEUSE) {
    571                         cmp = 1;
    572                     } else if (*(cur->key) == PRIVATEUSE) {
    573                         cmp = -1;
    574                     } else {
    575                         cmp = *(ext->key) - *(cur->key);
    576                     }
    577                 } else if (len == 1) {
    578                     cmp = *(ext->key) - LDMLEXT;
    579                 } else if (curlen == 1) {
    580                     cmp = LDMLEXT - *(cur->key);
    581                 } else {
    582                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    583                     /* Both are u extension keys - we need special handling for 'attribute' */
    584                     if (cmp != 0) {
    585                         if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
    586                             cmp = 1;
    587                         } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
    588                             cmp = -1;
    589                         }
    590                     }
    591                 }
    592             } else {
    593                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    594             }
    595             if (cmp < 0) {
    596                 if (prev == NULL) {
    597                     *first = ext;
    598                 } else {
    599                     prev->next = ext;
    600                 }
    601                 ext->next = cur;
    602                 break;
    603             }
    604             if (cmp == 0) {
    605                 /* duplicated extension key */
    606                 bAdded = FALSE;
    607                 break;
    608             }
    609             prev = cur;
    610             cur = cur->next;
    611         }
    612     }
    613 
    614     return bAdded;
    615 }
    616 
    617 static void
    618 _initializeULanguageTag(ULanguageTag* langtag) {
    619     int32_t i;
    620 
    621     langtag->buf = NULL;
    622 
    623     langtag->language = EMPTY;
    624     for (i = 0; i < MAXEXTLANG; i++) {
    625         langtag->extlang[i] = NULL;
    626     }
    627 
    628     langtag->script = EMPTY;
    629     langtag->region = EMPTY;
    630 
    631     langtag->variants = NULL;
    632     langtag->extensions = NULL;
    633 
    634     langtag->grandfathered = EMPTY;
    635     langtag->privateuse = EMPTY;
    636 }
    637 
    638 static int32_t
    639 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    640     char buf[ULOC_LANG_CAPACITY];
    641     UErrorCode tmpStatus = U_ZERO_ERROR;
    642     int32_t len, i;
    643     int32_t reslen = 0;
    644 
    645     if (U_FAILURE(*status)) {
    646         return 0;
    647     }
    648 
    649     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
    650     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    651         if (strict) {
    652             *status = U_ILLEGAL_ARGUMENT_ERROR;
    653             return 0;
    654         }
    655         len = 0;
    656     }
    657 
    658     /* Note: returned language code is in lower case letters */
    659 
    660     if (len == 0) {
    661         if (reslen < capacity) {
    662             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    663         }
    664         reslen += LANG_UND_LEN;
    665     } else if (!_isLanguageSubtag(buf, len)) {
    666             /* invalid language code */
    667         if (strict) {
    668             *status = U_ILLEGAL_ARGUMENT_ERROR;
    669             return 0;
    670         }
    671         if (reslen < capacity) {
    672             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    673         }
    674         reslen += LANG_UND_LEN;
    675     } else {
    676         /* resolve deprecated */
    677         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
    678             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
    679                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
    680                 len = (int32_t)uprv_strlen(buf);
    681                 break;
    682             }
    683         }
    684         if (reslen < capacity) {
    685             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    686         }
    687         reslen += len;
    688     }
    689     u_terminateChars(appendAt, capacity, reslen, status);
    690     return reslen;
    691 }
    692 
    693 static int32_t
    694 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    695     char buf[ULOC_SCRIPT_CAPACITY];
    696     UErrorCode tmpStatus = U_ZERO_ERROR;
    697     int32_t len;
    698     int32_t reslen = 0;
    699 
    700     if (U_FAILURE(*status)) {
    701         return 0;
    702     }
    703 
    704     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
    705     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    706         if (strict) {
    707             *status = U_ILLEGAL_ARGUMENT_ERROR;
    708         }
    709         return 0;
    710     }
    711 
    712     if (len > 0) {
    713         if (!_isScriptSubtag(buf, len)) {
    714             /* invalid script code */
    715             if (strict) {
    716                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    717             }
    718             return 0;
    719         } else {
    720             if (reslen < capacity) {
    721                 *(appendAt + reslen) = SEP;
    722             }
    723             reslen++;
    724 
    725             if (reslen < capacity) {
    726                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    727             }
    728             reslen += len;
    729         }
    730     }
    731     u_terminateChars(appendAt, capacity, reslen, status);
    732     return reslen;
    733 }
    734 
    735 static int32_t
    736 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    737     char buf[ULOC_COUNTRY_CAPACITY];
    738     UErrorCode tmpStatus = U_ZERO_ERROR;
    739     int32_t len;
    740     int32_t reslen = 0;
    741 
    742     if (U_FAILURE(*status)) {
    743         return 0;
    744     }
    745 
    746     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
    747     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    748         if (strict) {
    749             *status = U_ILLEGAL_ARGUMENT_ERROR;
    750         }
    751         return 0;
    752     }
    753 
    754     if (len > 0) {
    755         if (!_isRegionSubtag(buf, len)) {
    756             /* invalid region code */
    757             if (strict) {
    758                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    759             }
    760             return 0;
    761         } else {
    762             if (reslen < capacity) {
    763                 *(appendAt + reslen) = SEP;
    764             }
    765             reslen++;
    766 
    767             if (reslen < capacity) {
    768                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    769             }
    770             reslen += len;
    771         }
    772     }
    773     u_terminateChars(appendAt, capacity, reslen, status);
    774     return reslen;
    775 }
    776 
    777 static int32_t
    778 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
    779     char buf[ULOC_FULLNAME_CAPACITY];
    780     UErrorCode tmpStatus = U_ZERO_ERROR;
    781     int32_t len, i;
    782     int32_t reslen = 0;
    783 
    784     if (U_FAILURE(*status)) {
    785         return 0;
    786     }
    787 
    788     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
    789     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    790         if (strict) {
    791             *status = U_ILLEGAL_ARGUMENT_ERROR;
    792         }
    793         return 0;
    794     }
    795 
    796     if (len > 0) {
    797         char *p, *pVar;
    798         UBool bNext = TRUE;
    799         VariantListEntry *var;
    800         VariantListEntry *varFirst = NULL;
    801 
    802         pVar = NULL;
    803         p = buf;
    804         while (bNext) {
    805             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
    806                 if (*p == 0) {
    807                     bNext = FALSE;
    808                 } else {
    809                     *p = 0; /* terminate */
    810                 }
    811                 if (pVar == NULL) {
    812                     if (strict) {
    813                         *status = U_ILLEGAL_ARGUMENT_ERROR;
    814                         break;
    815                     }
    816                     /* ignore empty variant */
    817                 } else {
    818                     /* ICU uses upper case letters for variants, but
    819                        the canonical format is lowercase in BCP47 */
    820                     for (i = 0; *(pVar + i) != 0; i++) {
    821                         *(pVar + i) = uprv_tolower(*(pVar + i));
    822                     }
    823 
    824                     /* validate */
    825                     if (_isVariantSubtag(pVar, -1)) {
    826                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
    827                             /* emit the variant to the list */
    828                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
    829                             if (var == NULL) {
    830                                 *status = U_MEMORY_ALLOCATION_ERROR;
    831                                 break;
    832                             }
    833                             var->variant = pVar;
    834                             if (!_addVariantToList(&varFirst, var)) {
    835                                 /* duplicated variant */
    836                                 uprv_free(var);
    837                                 if (strict) {
    838                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
    839                                     break;
    840                                 }
    841                             }
    842                         } else {
    843                             /* Special handling for POSIX variant, need to remember that we had it and then */
    844                             /* treat it like an extension later. */
    845                             *hadPosix = TRUE;
    846                         }
    847                     } else if (strict) {
    848                         *status = U_ILLEGAL_ARGUMENT_ERROR;
    849                         break;
    850                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
    851                         /* Handle private use subtags separately */
    852                         break;
    853                     }
    854                 }
    855                 /* reset variant starting position */
    856                 pVar = NULL;
    857             } else if (pVar == NULL) {
    858                 pVar = p;
    859             }
    860             p++;
    861         }
    862 
    863         if (U_SUCCESS(*status)) {
    864             if (varFirst != NULL) {
    865                 int32_t varLen;
    866 
    867                 /* write out validated/normalized variants to the target */
    868                 var = varFirst;
    869                 while (var != NULL) {
    870                     if (reslen < capacity) {
    871                         *(appendAt + reslen) = SEP;
    872                     }
    873                     reslen++;
    874                     varLen = (int32_t)uprv_strlen(var->variant);
    875                     if (reslen < capacity) {
    876                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
    877                     }
    878                     reslen += varLen;
    879                     var = var->next;
    880                 }
    881             }
    882         }
    883 
    884         /* clean up */
    885         var = varFirst;
    886         while (var != NULL) {
    887             VariantListEntry *tmpVar = var->next;
    888             uprv_free(var);
    889             var = tmpVar;
    890         }
    891 
    892         if (U_FAILURE(*status)) {
    893             return 0;
    894         }
    895     }
    896 
    897     u_terminateChars(appendAt, capacity, reslen, status);
    898     return reslen;
    899 }
    900 
    901 static int32_t
    902 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
    903     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
    904     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
    905     int32_t attrBufLength = 0;
    906     UEnumeration *keywordEnum = NULL;
    907     int32_t reslen = 0;
    908 
    909     keywordEnum = uloc_openKeywords(localeID, status);
    910     if (U_FAILURE(*status) && !hadPosix) {
    911         uenum_close(keywordEnum);
    912         return 0;
    913     }
    914     if (keywordEnum != NULL || hadPosix) {
    915         /* reorder extensions */
    916         int32_t len;
    917         const char *key;
    918         ExtensionListEntry *firstExt = NULL;
    919         ExtensionListEntry *ext;
    920         AttributeListEntry *firstAttr = NULL;
    921         AttributeListEntry *attr;
    922         char *attrValue;
    923         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
    924         char *pExtBuf = extBuf;
    925         int32_t extBufCapacity = sizeof(extBuf);
    926         const char *bcpKey=nullptr, *bcpValue=nullptr;
    927         UErrorCode tmpStatus = U_ZERO_ERROR;
    928         int32_t keylen;
    929         UBool isBcpUExt;
    930 
    931         while (TRUE) {
    932             key = uenum_next(keywordEnum, NULL, status);
    933             if (key == NULL) {
    934                 break;
    935             }
    936             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
    937             /* buf must be null-terminated */
    938             if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    939                 if (strict) {
    940                     *status = U_ILLEGAL_ARGUMENT_ERROR;
    941                     break;
    942                 }
    943                 /* ignore this keyword */
    944                 tmpStatus = U_ZERO_ERROR;
    945                 continue;
    946             }
    947 
    948             keylen = (int32_t)uprv_strlen(key);
    949             isBcpUExt = (keylen > 1);
    950 
    951             /* special keyword used for representing Unicode locale attributes */
    952             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
    953                 if (len > 0) {
    954                     int32_t i = 0;
    955                     while (TRUE) {
    956                         attrBufLength = 0;
    957                         for (; i < len; i++) {
    958                             if (buf[i] != '-') {
    959                                 attrBuf[attrBufLength++] = buf[i];
    960                             } else {
    961                                 i++;
    962                                 break;
    963                             }
    964                         }
    965                         if (attrBufLength > 0) {
    966                             attrBuf[attrBufLength] = 0;
    967 
    968                         } else if (i >= len){
    969                             break;
    970                         }
    971 
    972                         /* create AttributeListEntry */
    973                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
    974                         if (attr == NULL) {
    975                             *status = U_MEMORY_ALLOCATION_ERROR;
    976                             break;
    977                         }
    978                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
    979                         if (attrValue == NULL) {
    980                             *status = U_MEMORY_ALLOCATION_ERROR;
    981                             break;
    982                         }
    983                         uprv_strcpy(attrValue, attrBuf);
    984                         attr->attribute = attrValue;
    985 
    986                         if (!_addAttributeToList(&firstAttr, attr)) {
    987                             uprv_free(attr);
    988                             uprv_free(attrValue);
    989                             if (strict) {
    990                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    991                                 break;
    992                             }
    993                         }
    994                     }
    995                     /* for a place holder ExtensionListEntry */
    996                     bcpKey = LOCALE_ATTRIBUTE_KEY;
    997                     bcpValue = NULL;
    998                 }
    999             } else if (isBcpUExt) {
   1000                 bcpKey = uloc_toUnicodeLocaleKey(key);
   1001                 if (bcpKey == NULL) {
   1002                     if (strict) {
   1003                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1004                         break;
   1005                     }
   1006                     continue;
   1007                 }
   1008 
   1009                 /* we've checked buf is null-terminated above */
   1010                 bcpValue = uloc_toUnicodeLocaleType(key, buf);
   1011                 if (bcpValue == NULL) {
   1012                     if (strict) {
   1013                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1014                         break;
   1015                     }
   1016                     continue;
   1017                 }
   1018                 if (bcpValue == buf) {
   1019                     /*
   1020                     When uloc_toUnicodeLocaleType(key, buf) returns the
   1021                     input value as is, the value is well-formed, but has
   1022                     no known mapping. This implementation normalizes the
   1023                     the value to lower case
   1024                     */
   1025                     int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
   1026                     if (bcpValueLen < extBufCapacity) {
   1027                         uprv_strcpy(pExtBuf, bcpValue);
   1028                         T_CString_toLowerCase(pExtBuf);
   1029 
   1030                         bcpValue = pExtBuf;
   1031 
   1032                         pExtBuf += (bcpValueLen + 1);
   1033                         extBufCapacity -= (bcpValueLen + 1);
   1034                     } else {
   1035                         if (strict) {
   1036                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1037                             break;
   1038                         }
   1039                         continue;
   1040                     }
   1041                 }
   1042             } else {
   1043                 if (*key == PRIVATEUSE) {
   1044                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1045                         if (strict) {
   1046                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1047                             break;
   1048                         }
   1049                         continue;
   1050                     }
   1051                 } else {
   1052                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1053                         if (strict) {
   1054                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1055                             break;
   1056                         }
   1057                         continue;
   1058                     }
   1059                 }
   1060                 bcpKey = key;
   1061                 if ((len + 1) < extBufCapacity) {
   1062                     uprv_memcpy(pExtBuf, buf, len);
   1063                     bcpValue = pExtBuf;
   1064 
   1065                     pExtBuf += len;
   1066 
   1067                     *pExtBuf = 0;
   1068                     pExtBuf++;
   1069 
   1070                     extBufCapacity -= (len + 1);
   1071                 } else {
   1072                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1073                     break;
   1074                 }
   1075             }
   1076 
   1077             /* create ExtensionListEntry */
   1078             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1079             if (ext == NULL) {
   1080                 *status = U_MEMORY_ALLOCATION_ERROR;
   1081                 break;
   1082             }
   1083             ext->key = bcpKey;
   1084             ext->value = bcpValue;
   1085 
   1086             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1087                 uprv_free(ext);
   1088                 if (strict) {
   1089                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1090                     break;
   1091                 }
   1092             }
   1093         }
   1094 
   1095         /* Special handling for POSIX variant - add the keywords for POSIX */
   1096         if (hadPosix) {
   1097             /* create ExtensionListEntry for POSIX */
   1098             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1099             if (ext == NULL) {
   1100                 *status = U_MEMORY_ALLOCATION_ERROR;
   1101                 goto cleanup;
   1102             }
   1103             ext->key = POSIX_KEY;
   1104             ext->value = POSIX_VALUE;
   1105 
   1106             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1107                 uprv_free(ext);
   1108             }
   1109         }
   1110 
   1111         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
   1112             UBool startLDMLExtension = FALSE;
   1113             for (ext = firstExt; ext; ext = ext->next) {
   1114                 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
   1115                     /* first LDML u singlton extension */
   1116                    if (reslen < capacity) {
   1117                        *(appendAt + reslen) = SEP;
   1118                    }
   1119                    reslen++;
   1120                    if (reslen < capacity) {
   1121                        *(appendAt + reslen) = LDMLEXT;
   1122                    }
   1123                    reslen++;
   1124 
   1125                    startLDMLExtension = TRUE;
   1126                 }
   1127 
   1128                 /* write out the sorted BCP47 attributes, extensions and private use */
   1129                 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
   1130                     /* write the value for the attributes */
   1131                     for (attr = firstAttr; attr; attr = attr->next) {
   1132                         if (reslen < capacity) {
   1133                             *(appendAt + reslen) = SEP;
   1134                         }
   1135                         reslen++;
   1136                         len = (int32_t)uprv_strlen(attr->attribute);
   1137                         if (reslen < capacity) {
   1138                             uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
   1139                         }
   1140                         reslen += len;
   1141                     }
   1142                 } else {
   1143                     if (reslen < capacity) {
   1144                         *(appendAt + reslen) = SEP;
   1145                     }
   1146                     reslen++;
   1147                     len = (int32_t)uprv_strlen(ext->key);
   1148                     if (reslen < capacity) {
   1149                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1150                     }
   1151                     reslen += len;
   1152                     if (reslen < capacity) {
   1153                         *(appendAt + reslen) = SEP;
   1154                     }
   1155                     reslen++;
   1156                     len = (int32_t)uprv_strlen(ext->value);
   1157                     if (reslen < capacity) {
   1158                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1159                     }
   1160                     reslen += len;
   1161                 }
   1162             }
   1163         }
   1164 cleanup:
   1165         /* clean up */
   1166         ext = firstExt;
   1167         while (ext != NULL) {
   1168             ExtensionListEntry *tmpExt = ext->next;
   1169             uprv_free(ext);
   1170             ext = tmpExt;
   1171         }
   1172 
   1173         attr = firstAttr;
   1174         while (attr != NULL) {
   1175             AttributeListEntry *tmpAttr = attr->next;
   1176             char *pValue = (char *)attr->attribute;
   1177             uprv_free(pValue);
   1178             uprv_free(attr);
   1179             attr = tmpAttr;
   1180         }
   1181 
   1182         uenum_close(keywordEnum);
   1183 
   1184         if (U_FAILURE(*status)) {
   1185             return 0;
   1186         }
   1187     }
   1188 
   1189     return u_terminateChars(appendAt, capacity, reslen, status);
   1190 }
   1191 
   1192 /**
   1193  * Append keywords parsed from LDML extension value
   1194  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1195  * Note: char* buf is used for storing keywords
   1196  */
   1197 static void
   1198 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
   1199     const char *pTag;   /* beginning of current subtag */
   1200     const char *pKwds;  /* beginning of key-type pairs */
   1201     UBool variantExists = *posixVariant;
   1202 
   1203     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
   1204     ExtensionListEntry *kwd, *nextKwd;
   1205 
   1206     AttributeListEntry *attrFirst = NULL;   /* first attribute */
   1207     AttributeListEntry *attr, *nextAttr;
   1208 
   1209     int32_t len;
   1210     int32_t bufIdx = 0;
   1211 
   1212     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1213     int32_t attrBufIdx = 0;
   1214 
   1215     /* Reset the posixVariant value */
   1216     *posixVariant = FALSE;
   1217 
   1218     pTag = ldmlext;
   1219     pKwds = NULL;
   1220 
   1221     /* Iterate through u extension attributes */
   1222     while (*pTag) {
   1223         /* locate next separator char */
   1224         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1225 
   1226         if (ultag_isUnicodeLocaleKey(pTag, len)) {
   1227             pKwds = pTag;
   1228             break;
   1229         }
   1230 
   1231         /* add this attribute to the list */
   1232         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
   1233         if (attr == NULL) {
   1234             *status = U_MEMORY_ALLOCATION_ERROR;
   1235             goto cleanup;
   1236         }
   1237 
   1238         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
   1239             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
   1240             attrBuf[attrBufIdx + len] = 0;
   1241             attr->attribute = &attrBuf[attrBufIdx];
   1242             attrBufIdx += (len + 1);
   1243         } else {
   1244             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1245             goto cleanup;
   1246         }
   1247 
   1248         if (!_addAttributeToList(&attrFirst, attr)) {
   1249             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1250             uprv_free(attr);
   1251             goto cleanup;
   1252         }
   1253 
   1254         /* next tag */
   1255         pTag += len;
   1256         if (*pTag) {
   1257             /* next to the separator */
   1258             pTag++;
   1259         }
   1260     }
   1261 
   1262     if (attrFirst) {
   1263         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
   1264 
   1265         if (attrBufIdx > bufSize) {
   1266             /* attrBufIdx == <total length of attribute subtag> + 1 */
   1267             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1268             goto cleanup;
   1269         }
   1270 
   1271         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1272         if (kwd == NULL) {
   1273             *status = U_MEMORY_ALLOCATION_ERROR;
   1274             goto cleanup;
   1275         }
   1276 
   1277         kwd->key = LOCALE_ATTRIBUTE_KEY;
   1278         kwd->value = buf;
   1279 
   1280         /* attribute subtags sorted in alphabetical order as type */
   1281         attr = attrFirst;
   1282         while (attr != NULL) {
   1283             nextAttr = attr->next;
   1284 
   1285             /* buffer size check is done above */
   1286             if (attr != attrFirst) {
   1287                 *(buf + bufIdx) = SEP;
   1288                 bufIdx++;
   1289             }
   1290 
   1291             len = static_cast<int32_t>(uprv_strlen(attr->attribute));
   1292             uprv_memcpy(buf + bufIdx, attr->attribute, len);
   1293             bufIdx += len;
   1294 
   1295             attr = nextAttr;
   1296         }
   1297         *(buf + bufIdx) = 0;
   1298         bufIdx++;
   1299 
   1300         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1301             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1302             uprv_free(kwd);
   1303             goto cleanup;
   1304         }
   1305 
   1306         /* once keyword entry is created, delete the attribute list */
   1307         attr = attrFirst;
   1308         while (attr != NULL) {
   1309             nextAttr = attr->next;
   1310             uprv_free(attr);
   1311             attr = nextAttr;
   1312         }
   1313         attrFirst = NULL;
   1314     }
   1315 
   1316     if (pKwds) {
   1317         const char *pBcpKey = NULL;     /* u extenstion key subtag */
   1318         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
   1319         int32_t bcpKeyLen = 0;
   1320         int32_t bcpTypeLen = 0;
   1321         UBool isDone = FALSE;
   1322 
   1323         pTag = pKwds;
   1324         /* BCP47 representation of LDML key/type pairs */
   1325         while (!isDone) {
   1326             const char *pNextBcpKey = NULL;
   1327             int32_t nextBcpKeyLen = 0;
   1328             UBool emitKeyword = FALSE;
   1329 
   1330             if (*pTag) {
   1331                 /* locate next separator char */
   1332                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1333 
   1334                 if (ultag_isUnicodeLocaleKey(pTag, len)) {
   1335                     if (pBcpKey) {
   1336                         emitKeyword = TRUE;
   1337                         pNextBcpKey = pTag;
   1338                         nextBcpKeyLen = len;
   1339                     } else {
   1340                         pBcpKey = pTag;
   1341                         bcpKeyLen = len;
   1342                     }
   1343                 } else {
   1344                     U_ASSERT(pBcpKey != NULL);
   1345                     /* within LDML type subtags */
   1346                     if (pBcpType) {
   1347                         bcpTypeLen += (len + 1);
   1348                     } else {
   1349                         pBcpType = pTag;
   1350                         bcpTypeLen = len;
   1351                     }
   1352                 }
   1353 
   1354                 /* next tag */
   1355                 pTag += len;
   1356                 if (*pTag) {
   1357                     /* next to the separator */
   1358                     pTag++;
   1359                 }
   1360             } else {
   1361                 /* processing last one */
   1362                 emitKeyword = TRUE;
   1363                 isDone = TRUE;
   1364             }
   1365 
   1366             if (emitKeyword) {
   1367                 const char *pKey = NULL;    /* LDML key */
   1368                 const char *pType = NULL;   /* LDML type */
   1369 
   1370                 char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
   1371 
   1372                 U_ASSERT(pBcpKey != NULL);
   1373 
   1374                 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
   1375                     /* the BCP key is invalid */
   1376                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1377                     goto cleanup;
   1378                 }
   1379 
   1380                 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
   1381                 bcpKeyBuf[bcpKeyLen] = 0;
   1382 
   1383                 /* u extension key to LDML key */
   1384                 pKey = uloc_toLegacyKey(bcpKeyBuf);
   1385                 if (pKey == NULL) {
   1386                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1387                     goto cleanup;
   1388                 }
   1389                 if (pKey == bcpKeyBuf) {
   1390                     /*
   1391                     The key returned by toLegacyKey points to the input buffer.
   1392                     We normalize the result key to lower case.
   1393                     */
   1394                     T_CString_toLowerCase(bcpKeyBuf);
   1395                     if (bufSize - bufIdx - 1 >= bcpKeyLen) {
   1396                         uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
   1397                         pKey = buf + bufIdx;
   1398                         bufIdx += bcpKeyLen;
   1399                         *(buf + bufIdx) = 0;
   1400                         bufIdx++;
   1401                     } else {
   1402                         *status = U_BUFFER_OVERFLOW_ERROR;
   1403                         goto cleanup;
   1404                     }
   1405                 }
   1406 
   1407                 if (pBcpType) {
   1408                     char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
   1409                     if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
   1410                         /* the BCP type is too long */
   1411                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1412                         goto cleanup;
   1413                     }
   1414 
   1415                     uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
   1416                     bcpTypeBuf[bcpTypeLen] = 0;
   1417 
   1418                     /* BCP type to locale type */
   1419                     pType = uloc_toLegacyType(pKey, bcpTypeBuf);
   1420                     if (pType == NULL) {
   1421                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1422                         goto cleanup;
   1423                     }
   1424                     if (pType == bcpTypeBuf) {
   1425                         /*
   1426                         The type returned by toLegacyType points to the input buffer.
   1427                         We normalize the result type to lower case.
   1428                         */
   1429                         /* normalize to lower case */
   1430                         T_CString_toLowerCase(bcpTypeBuf);
   1431                         if (bufSize - bufIdx - 1 >= bcpTypeLen) {
   1432                             uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
   1433                             pType = buf + bufIdx;
   1434                             bufIdx += bcpTypeLen;
   1435                             *(buf + bufIdx) = 0;
   1436                             bufIdx++;
   1437                         } else {
   1438                             *status = U_BUFFER_OVERFLOW_ERROR;
   1439                             goto cleanup;
   1440                         }
   1441                     }
   1442                 } else {
   1443                     /* typeless - default type value is "yes" */
   1444                     pType = LOCALE_TYPE_YES;
   1445                 }
   1446 
   1447                 /* Special handling for u-va-posix, since we want to treat this as a variant,
   1448                    not as a keyword */
   1449                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
   1450                     *posixVariant = TRUE;
   1451                 } else {
   1452                     /* create an ExtensionListEntry for this keyword */
   1453                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1454                     if (kwd == NULL) {
   1455                         *status = U_MEMORY_ALLOCATION_ERROR;
   1456                         goto cleanup;
   1457                     }
   1458 
   1459                     kwd->key = pKey;
   1460                     kwd->value = pType;
   1461 
   1462                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1463                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1464                         uprv_free(kwd);
   1465                         goto cleanup;
   1466                     }
   1467                 }
   1468 
   1469                 pBcpKey = pNextBcpKey;
   1470                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
   1471                 pBcpType = NULL;
   1472                 bcpTypeLen = 0;
   1473             }
   1474         }
   1475     }
   1476 
   1477     kwd = kwdFirst;
   1478     while (kwd != NULL) {
   1479         nextKwd = kwd->next;
   1480         _addExtensionToList(appendTo, kwd, FALSE);
   1481         kwd = nextKwd;
   1482     }
   1483 
   1484     return;
   1485 
   1486 cleanup:
   1487     attr = attrFirst;
   1488     while (attr != NULL) {
   1489         nextAttr = attr->next;
   1490         uprv_free(attr);
   1491         attr = nextAttr;
   1492     }
   1493 
   1494     kwd = kwdFirst;
   1495     while (kwd != NULL) {
   1496         nextKwd = kwd->next;
   1497         uprv_free(kwd);
   1498         kwd = nextKwd;
   1499     }
   1500 }
   1501 
   1502 
   1503 static int32_t
   1504 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1505     int32_t reslen = 0;
   1506     int32_t i, n;
   1507     int32_t len;
   1508     ExtensionListEntry *kwdFirst = NULL;
   1509     ExtensionListEntry *kwd;
   1510     const char *key, *type;
   1511     char *kwdBuf = NULL;
   1512     int32_t kwdBufLength = capacity;
   1513     UBool posixVariant = FALSE;
   1514 
   1515     if (U_FAILURE(*status)) {
   1516         return 0;
   1517     }
   1518 
   1519     kwdBuf = (char*)uprv_malloc(kwdBufLength);
   1520     if (kwdBuf == NULL) {
   1521         *status = U_MEMORY_ALLOCATION_ERROR;
   1522         return 0;
   1523     }
   1524 
   1525     /* Determine if variants already exists */
   1526     if (ultag_getVariantsSize(langtag)) {
   1527         posixVariant = TRUE;
   1528     }
   1529 
   1530     n = ultag_getExtensionsSize(langtag);
   1531 
   1532     /* resolve locale keywords and reordering keys */
   1533     for (i = 0; i < n; i++) {
   1534         key = ultag_getExtensionKey(langtag, i);
   1535         type = ultag_getExtensionValue(langtag, i);
   1536         if (*key == LDMLEXT) {
   1537             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
   1538             if (U_FAILURE(*status)) {
   1539                 break;
   1540             }
   1541         } else {
   1542             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1543             if (kwd == NULL) {
   1544                 *status = U_MEMORY_ALLOCATION_ERROR;
   1545                 break;
   1546             }
   1547             kwd->key = key;
   1548             kwd->value = type;
   1549             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1550                 uprv_free(kwd);
   1551                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1552                 break;
   1553             }
   1554         }
   1555     }
   1556 
   1557     if (U_SUCCESS(*status)) {
   1558         type = ultag_getPrivateUse(langtag);
   1559         if ((int32_t)uprv_strlen(type) > 0) {
   1560             /* add private use as a keyword */
   1561             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1562             if (kwd == NULL) {
   1563                 *status = U_MEMORY_ALLOCATION_ERROR;
   1564             } else {
   1565                 kwd->key = PRIVATEUSE_KEY;
   1566                 kwd->value = type;
   1567                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1568                     uprv_free(kwd);
   1569                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1570                 }
   1571             }
   1572         }
   1573     }
   1574 
   1575     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
   1576 
   1577     if (U_SUCCESS(*status) && posixVariant) {
   1578         len = (int32_t) uprv_strlen(_POSIX);
   1579         if (reslen < capacity) {
   1580             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
   1581         }
   1582         reslen += len;
   1583     }
   1584 
   1585     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1586         /* write out the sorted keywords */
   1587         UBool firstValue = TRUE;
   1588         kwd = kwdFirst;
   1589         do {
   1590             if (reslen < capacity) {
   1591                 if (firstValue) {
   1592                     /* '@' */
   1593                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1594                     firstValue = FALSE;
   1595                 } else {
   1596                     /* ';' */
   1597                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1598                 }
   1599             }
   1600             reslen++;
   1601 
   1602             /* key */
   1603             len = (int32_t)uprv_strlen(kwd->key);
   1604             if (reslen < capacity) {
   1605                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1606             }
   1607             reslen += len;
   1608 
   1609             /* '=' */
   1610             if (reslen < capacity) {
   1611                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1612             }
   1613             reslen++;
   1614 
   1615             /* type */
   1616             len = (int32_t)uprv_strlen(kwd->value);
   1617             if (reslen < capacity) {
   1618                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1619             }
   1620             reslen += len;
   1621 
   1622             kwd = kwd->next;
   1623         } while (kwd);
   1624     }
   1625 
   1626     /* clean up */
   1627     kwd = kwdFirst;
   1628     while (kwd != NULL) {
   1629         ExtensionListEntry *tmpKwd = kwd->next;
   1630         uprv_free(kwd);
   1631         kwd = tmpKwd;
   1632     }
   1633 
   1634     uprv_free(kwdBuf);
   1635 
   1636     if (U_FAILURE(*status)) {
   1637         return 0;
   1638     }
   1639 
   1640     return u_terminateChars(appendAt, capacity, reslen, status);
   1641 }
   1642 
   1643 static int32_t
   1644 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1645     (void)hadPosix;
   1646     char buf[ULOC_FULLNAME_CAPACITY];
   1647     char tmpAppend[ULOC_FULLNAME_CAPACITY];
   1648     UErrorCode tmpStatus = U_ZERO_ERROR;
   1649     int32_t len, i;
   1650     int32_t reslen = 0;
   1651 
   1652     if (U_FAILURE(*status)) {
   1653         return 0;
   1654     }
   1655 
   1656     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1657     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1658         if (strict) {
   1659             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1660         }
   1661         return 0;
   1662     }
   1663 
   1664     if (len > 0) {
   1665         char *p, *pPriv;
   1666         UBool bNext = TRUE;
   1667         UBool firstValue = TRUE;
   1668         UBool writeValue;
   1669 
   1670         pPriv = NULL;
   1671         p = buf;
   1672         while (bNext) {
   1673             writeValue = FALSE;
   1674             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1675                 if (*p == 0) {
   1676                     bNext = FALSE;
   1677                 } else {
   1678                     *p = 0; /* terminate */
   1679                 }
   1680                 if (pPriv != NULL) {
   1681                     /* Private use in the canonical format is lowercase in BCP47 */
   1682                     for (i = 0; *(pPriv + i) != 0; i++) {
   1683                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
   1684                     }
   1685 
   1686                     /* validate */
   1687                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
   1688                         if (firstValue) {
   1689                             if (!_isVariantSubtag(pPriv, -1)) {
   1690                                 writeValue = TRUE;
   1691                             }
   1692                         } else {
   1693                             writeValue = TRUE;
   1694                         }
   1695                     } else if (strict) {
   1696                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1697                         break;
   1698                     } else {
   1699                         break;
   1700                     }
   1701 
   1702                     if (writeValue) {
   1703                         if (reslen < capacity) {
   1704                             tmpAppend[reslen++] = SEP;
   1705                         }
   1706 
   1707                         if (firstValue) {
   1708                             if (reslen < capacity) {
   1709                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
   1710                             }
   1711 
   1712                             if (reslen < capacity) {
   1713                                 tmpAppend[reslen++] = SEP;
   1714                             }
   1715 
   1716                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
   1717                             if (reslen < capacity) {
   1718                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
   1719                             }
   1720                             reslen += len;
   1721 
   1722                             if (reslen < capacity) {
   1723                                 tmpAppend[reslen++] = SEP;
   1724                             }
   1725 
   1726                             firstValue = FALSE;
   1727                         }
   1728 
   1729                         len = (int32_t)uprv_strlen(pPriv);
   1730                         if (reslen < capacity) {
   1731                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
   1732                         }
   1733                         reslen += len;
   1734                     }
   1735                 }
   1736                 /* reset private use starting position */
   1737                 pPriv = NULL;
   1738             } else if (pPriv == NULL) {
   1739                 pPriv = p;
   1740             }
   1741             p++;
   1742         }
   1743 
   1744         if (U_FAILURE(*status)) {
   1745             return 0;
   1746         }
   1747     }
   1748 
   1749     if (U_SUCCESS(*status)) {
   1750         len = reslen;
   1751         if (reslen < capacity) {
   1752             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
   1753         }
   1754     }
   1755 
   1756     u_terminateChars(appendAt, capacity, reslen, status);
   1757 
   1758     return reslen;
   1759 }
   1760 
   1761 /*
   1762 * -------------------------------------------------
   1763 *
   1764 * ultag_ functions
   1765 *
   1766 * -------------------------------------------------
   1767 */
   1768 
   1769 /* Bit flags used by the parser */
   1770 #define LANG 0x0001
   1771 #define EXTL 0x0002
   1772 #define SCRT 0x0004
   1773 #define REGN 0x0008
   1774 #define VART 0x0010
   1775 #define EXTS 0x0020
   1776 #define EXTV 0x0040
   1777 #define PRIV 0x0080
   1778 
   1779 /**
   1780  * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
   1781  * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
   1782  * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
   1783  */
   1784 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
   1785 #pragma optimize( "", off )
   1786 #endif
   1787 
   1788 static ULanguageTag*
   1789 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   1790     ULanguageTag *t;
   1791     char *tagBuf;
   1792     int16_t next;
   1793     char *pSubtag, *pNext, *pLastGoodPosition;
   1794     int32_t subtagLen;
   1795     int32_t extlangIdx;
   1796     ExtensionListEntry *pExtension;
   1797     char *pExtValueSubtag, *pExtValueSubtagEnd;
   1798     int32_t i;
   1799     UBool privateuseVar = FALSE;
   1800     int32_t grandfatheredLen = 0;
   1801 
   1802     if (parsedLen != NULL) {
   1803         *parsedLen = 0;
   1804     }
   1805 
   1806     if (U_FAILURE(*status)) {
   1807         return NULL;
   1808     }
   1809 
   1810     if (tagLen < 0) {
   1811         tagLen = (int32_t)uprv_strlen(tag);
   1812     }
   1813 
   1814     /* copy the entire string */
   1815     tagBuf = (char*)uprv_malloc(tagLen + 1);
   1816     if (tagBuf == NULL) {
   1817         *status = U_MEMORY_ALLOCATION_ERROR;
   1818         return NULL;
   1819     }
   1820     uprv_memcpy(tagBuf, tag, tagLen);
   1821     *(tagBuf + tagLen) = 0;
   1822 
   1823     /* create a ULanguageTag */
   1824     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   1825     if (t == NULL) {
   1826         uprv_free(tagBuf);
   1827         *status = U_MEMORY_ALLOCATION_ERROR;
   1828         return NULL;
   1829     }
   1830     _initializeULanguageTag(t);
   1831     t->buf = tagBuf;
   1832 
   1833     if (tagLen < MINLEN) {
   1834         /* the input tag is too short - return empty ULanguageTag */
   1835         return t;
   1836     }
   1837 
   1838     /* check if the tag is grandfathered */
   1839     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   1840         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   1841             int32_t newTagLength;
   1842 
   1843             grandfatheredLen = tagLen;  /* back up for output parsedLen */
   1844             newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
   1845             if (tagLen < newTagLength) {
   1846                 uprv_free(tagBuf);
   1847                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
   1848                 if (tagBuf == NULL) {
   1849                     *status = U_MEMORY_ALLOCATION_ERROR;
   1850                     ultag_close(t);
   1851                     return NULL;
   1852                 }
   1853                 t->buf = tagBuf;
   1854                 tagLen = newTagLength;
   1855             }
   1856             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   1857             break;
   1858         }
   1859     }
   1860 
   1861     /*
   1862      * langtag      =   language
   1863      *                  ["-" script]
   1864      *                  ["-" region]
   1865      *                  *("-" variant)
   1866      *                  *("-" extension)
   1867      *                  ["-" privateuse]
   1868      */
   1869 
   1870     next = LANG | PRIV;
   1871     pNext = pLastGoodPosition = tagBuf;
   1872     extlangIdx = 0;
   1873     pExtension = NULL;
   1874     pExtValueSubtag = NULL;
   1875     pExtValueSubtagEnd = NULL;
   1876 
   1877     while (pNext) {
   1878         char *pSep;
   1879 
   1880         pSubtag = pNext;
   1881 
   1882         /* locate next separator char */
   1883         pSep = pSubtag;
   1884         while (*pSep) {
   1885             if (*pSep == SEP) {
   1886                 break;
   1887             }
   1888             pSep++;
   1889         }
   1890         if (*pSep == 0) {
   1891             /* last subtag */
   1892             pNext = NULL;
   1893         } else {
   1894             pNext = pSep + 1;
   1895         }
   1896         subtagLen = (int32_t)(pSep - pSubtag);
   1897 
   1898         if (next & LANG) {
   1899             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   1900                 *pSep = 0;  /* terminate */
   1901                 t->language = T_CString_toLowerCase(pSubtag);
   1902 
   1903                 pLastGoodPosition = pSep;
   1904                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1905                 continue;
   1906             }
   1907         }
   1908         if (next & EXTL) {
   1909             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   1910                 *pSep = 0;
   1911                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   1912 
   1913                 pLastGoodPosition = pSep;
   1914                 if (extlangIdx < 3) {
   1915                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1916                 } else {
   1917                     next = SCRT | REGN | VART | EXTS | PRIV;
   1918                 }
   1919                 continue;
   1920             }
   1921         }
   1922         if (next & SCRT) {
   1923             if (_isScriptSubtag(pSubtag, subtagLen)) {
   1924                 char *p = pSubtag;
   1925 
   1926                 *pSep = 0;
   1927 
   1928                 /* to title case */
   1929                 *p = uprv_toupper(*p);
   1930                 p++;
   1931                 for (; *p; p++) {
   1932                     *p = uprv_tolower(*p);
   1933                 }
   1934 
   1935                 t->script = pSubtag;
   1936 
   1937                 pLastGoodPosition = pSep;
   1938                 next = REGN | VART | EXTS | PRIV;
   1939                 continue;
   1940             }
   1941         }
   1942         if (next & REGN) {
   1943             if (_isRegionSubtag(pSubtag, subtagLen)) {
   1944                 *pSep = 0;
   1945                 t->region = T_CString_toUpperCase(pSubtag);
   1946 
   1947                 pLastGoodPosition = pSep;
   1948                 next = VART | EXTS | PRIV;
   1949                 continue;
   1950             }
   1951         }
   1952         if (next & VART) {
   1953             if (_isVariantSubtag(pSubtag, subtagLen) ||
   1954                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
   1955                 VariantListEntry *var;
   1956                 UBool isAdded;
   1957 
   1958                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1959                 if (var == NULL) {
   1960                     *status = U_MEMORY_ALLOCATION_ERROR;
   1961                     goto error;
   1962                 }
   1963                 *pSep = 0;
   1964                 var->variant = T_CString_toUpperCase(pSubtag);
   1965                 isAdded = _addVariantToList(&(t->variants), var);
   1966                 if (!isAdded) {
   1967                     /* duplicated variant entry */
   1968                     uprv_free(var);
   1969                     break;
   1970                 }
   1971                 pLastGoodPosition = pSep;
   1972                 next = VART | EXTS | PRIV;
   1973                 continue;
   1974             }
   1975         }
   1976         if (next & EXTS) {
   1977             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   1978                 if (pExtension != NULL) {
   1979                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1980                         /* the previous extension is incomplete */
   1981                         uprv_free(pExtension);
   1982                         pExtension = NULL;
   1983                         break;
   1984                     }
   1985 
   1986                     /* terminate the previous extension value */
   1987                     *pExtValueSubtagEnd = 0;
   1988                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1989 
   1990                     /* insert the extension to the list */
   1991                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1992                         pLastGoodPosition = pExtValueSubtagEnd;
   1993                     } else {
   1994                         /* stop parsing here */
   1995                         uprv_free(pExtension);
   1996                         pExtension = NULL;
   1997                         break;
   1998                     }
   1999                 }
   2000 
   2001                 /* create a new extension */
   2002                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   2003                 if (pExtension == NULL) {
   2004                     *status = U_MEMORY_ALLOCATION_ERROR;
   2005                     goto error;
   2006                 }
   2007                 *pSep = 0;
   2008                 pExtension->key = T_CString_toLowerCase(pSubtag);
   2009                 pExtension->value = NULL;   /* will be set later */
   2010 
   2011                 /*
   2012                  * reset the start and the end location of extension value
   2013                  * subtags for this extension
   2014                  */
   2015                 pExtValueSubtag = NULL;
   2016                 pExtValueSubtagEnd = NULL;
   2017 
   2018                 next = EXTV;
   2019                 continue;
   2020             }
   2021         }
   2022         if (next & EXTV) {
   2023             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   2024                 if (pExtValueSubtag == NULL) {
   2025                     /* if the start postion of this extension's value is not yet,
   2026                         this one is the first value subtag */
   2027                     pExtValueSubtag = pSubtag;
   2028                 }
   2029 
   2030                 /* Mark the end of this subtag */
   2031                 pExtValueSubtagEnd = pSep;
   2032                 next = EXTS | EXTV | PRIV;
   2033 
   2034                 continue;
   2035             }
   2036         }
   2037         if (next & PRIV) {
   2038             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
   2039                 char *pPrivuseVal;
   2040 
   2041                 if (pExtension != NULL) {
   2042                     /* Process the last extension */
   2043                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2044                         /* the previous extension is incomplete */
   2045                         uprv_free(pExtension);
   2046                         pExtension = NULL;
   2047                         break;
   2048                     } else {
   2049                         /* terminate the previous extension value */
   2050                         *pExtValueSubtagEnd = 0;
   2051                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2052 
   2053                         /* insert the extension to the list */
   2054                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2055                             pLastGoodPosition = pExtValueSubtagEnd;
   2056                             pExtension = NULL;
   2057                         } else {
   2058                         /* stop parsing here */
   2059                             uprv_free(pExtension);
   2060                             pExtension = NULL;
   2061                             break;
   2062                         }
   2063                     }
   2064                 }
   2065 
   2066                 /* The rest of part will be private use value subtags */
   2067                 if (pNext == NULL) {
   2068                     /* empty private use subtag */
   2069                     break;
   2070                 }
   2071                 /* back up the private use value start position */
   2072                 pPrivuseVal = pNext;
   2073 
   2074                 /* validate private use value subtags */
   2075                 while (pNext) {
   2076                     pSubtag = pNext;
   2077                     pSep = pSubtag;
   2078                     while (*pSep) {
   2079                         if (*pSep == SEP) {
   2080                             break;
   2081                         }
   2082                         pSep++;
   2083                     }
   2084                     if (*pSep == 0) {
   2085                         /* last subtag */
   2086                         pNext = NULL;
   2087                     } else {
   2088                         pNext = pSep + 1;
   2089                     }
   2090                     subtagLen = (int32_t)(pSep - pSubtag);
   2091 
   2092                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
   2093                         *pSep = 0;
   2094                         next = VART;
   2095                         privateuseVar = TRUE;
   2096                         break;
   2097                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   2098                         pLastGoodPosition = pSep;
   2099                     } else {
   2100                         break;
   2101                     }
   2102                 }
   2103 
   2104                 if (next == VART) {
   2105                     continue;
   2106                 }
   2107 
   2108                 if (pLastGoodPosition - pPrivuseVal > 0) {
   2109                     *pLastGoodPosition = 0;
   2110                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   2111                 }
   2112                 /* No more subtags, exiting the parse loop */
   2113                 break;
   2114             }
   2115             break;
   2116         }
   2117 
   2118         /* If we fell through here, it means this subtag is illegal - quit parsing */
   2119         break;
   2120     }
   2121 
   2122     if (pExtension != NULL) {
   2123         /* Process the last extension */
   2124         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2125             /* the previous extension is incomplete */
   2126             uprv_free(pExtension);
   2127         } else {
   2128             /* terminate the previous extension value */
   2129             *pExtValueSubtagEnd = 0;
   2130             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2131             /* insert the extension to the list */
   2132             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2133                 pLastGoodPosition = pExtValueSubtagEnd;
   2134             } else {
   2135                 uprv_free(pExtension);
   2136             }
   2137         }
   2138     }
   2139 
   2140     if (parsedLen != NULL) {
   2141         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
   2142     }
   2143 
   2144     return t;
   2145 
   2146 error:
   2147     ultag_close(t);
   2148     return NULL;
   2149 }
   2150 
   2151 /**
   2152 * Ticket #12705 - Turn optimization back on.
   2153 */
   2154 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
   2155 #pragma optimize( "", on )
   2156 #endif
   2157 
   2158 static void
   2159 ultag_close(ULanguageTag* langtag) {
   2160 
   2161     if (langtag == NULL) {
   2162         return;
   2163     }
   2164 
   2165     uprv_free(langtag->buf);
   2166 
   2167     if (langtag->variants) {
   2168         VariantListEntry *curVar = langtag->variants;
   2169         while (curVar) {
   2170             VariantListEntry *nextVar = curVar->next;
   2171             uprv_free(curVar);
   2172             curVar = nextVar;
   2173         }
   2174     }
   2175 
   2176     if (langtag->extensions) {
   2177         ExtensionListEntry *curExt = langtag->extensions;
   2178         while (curExt) {
   2179             ExtensionListEntry *nextExt = curExt->next;
   2180             uprv_free(curExt);
   2181             curExt = nextExt;
   2182         }
   2183     }
   2184 
   2185     uprv_free(langtag);
   2186 }
   2187 
   2188 static const char*
   2189 ultag_getLanguage(const ULanguageTag* langtag) {
   2190     return langtag->language;
   2191 }
   2192 
   2193 #if 0
   2194 static const char*
   2195 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2196     int32_t i;
   2197     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2198         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2199             return DEPRECATEDLANGS[i + 1];
   2200         }
   2201     }
   2202     return langtag->language;
   2203 }
   2204 #endif
   2205 
   2206 static const char*
   2207 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2208     if (idx >= 0 && idx < MAXEXTLANG) {
   2209         return langtag->extlang[idx];
   2210     }
   2211     return NULL;
   2212 }
   2213 
   2214 static int32_t
   2215 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2216     int32_t size = 0;
   2217     int32_t i;
   2218     for (i = 0; i < MAXEXTLANG; i++) {
   2219         if (langtag->extlang[i]) {
   2220             size++;
   2221         }
   2222     }
   2223     return size;
   2224 }
   2225 
   2226 static const char*
   2227 ultag_getScript(const ULanguageTag* langtag) {
   2228     return langtag->script;
   2229 }
   2230 
   2231 static const char*
   2232 ultag_getRegion(const ULanguageTag* langtag) {
   2233     return langtag->region;
   2234 }
   2235 
   2236 static const char*
   2237 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2238     const char *var = NULL;
   2239     VariantListEntry *cur = langtag->variants;
   2240     int32_t i = 0;
   2241     while (cur) {
   2242         if (i == idx) {
   2243             var = cur->variant;
   2244             break;
   2245         }
   2246         cur = cur->next;
   2247         i++;
   2248     }
   2249     return var;
   2250 }
   2251 
   2252 static int32_t
   2253 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2254     int32_t size = 0;
   2255     VariantListEntry *cur = langtag->variants;
   2256     while (TRUE) {
   2257         if (cur == NULL) {
   2258             break;
   2259         }
   2260         size++;
   2261         cur = cur->next;
   2262     }
   2263     return size;
   2264 }
   2265 
   2266 static const char*
   2267 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2268     const char *key = NULL;
   2269     ExtensionListEntry *cur = langtag->extensions;
   2270     int32_t i = 0;
   2271     while (cur) {
   2272         if (i == idx) {
   2273             key = cur->key;
   2274             break;
   2275         }
   2276         cur = cur->next;
   2277         i++;
   2278     }
   2279     return key;
   2280 }
   2281 
   2282 static const char*
   2283 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2284     const char *val = NULL;
   2285     ExtensionListEntry *cur = langtag->extensions;
   2286     int32_t i = 0;
   2287     while (cur) {
   2288         if (i == idx) {
   2289             val = cur->value;
   2290             break;
   2291         }
   2292         cur = cur->next;
   2293         i++;
   2294     }
   2295     return val;
   2296 }
   2297 
   2298 static int32_t
   2299 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2300     int32_t size = 0;
   2301     ExtensionListEntry *cur = langtag->extensions;
   2302     while (TRUE) {
   2303         if (cur == NULL) {
   2304             break;
   2305         }
   2306         size++;
   2307         cur = cur->next;
   2308     }
   2309     return size;
   2310 }
   2311 
   2312 static const char*
   2313 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2314     return langtag->privateuse;
   2315 }
   2316 
   2317 #if 0
   2318 static const char*
   2319 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2320     return langtag->grandfathered;
   2321 }
   2322 #endif
   2323 
   2324 
   2325 /*
   2326 * -------------------------------------------------
   2327 *
   2328 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2329 *
   2330 * -------------------------------------------------
   2331 */
   2332 U_CAPI int32_t U_EXPORT2
   2333 uloc_toLanguageTag(const char* localeID,
   2334                    char* langtag,
   2335                    int32_t langtagCapacity,
   2336                    UBool strict,
   2337                    UErrorCode* status) {
   2338     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2339     char canonical[256];
   2340     int32_t reslen = 0;
   2341     UErrorCode tmpStatus = U_ZERO_ERROR;
   2342     UBool hadPosix = FALSE;
   2343     const char* pKeywordStart;
   2344 
   2345     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2346     canonical[0] = 0;
   2347     if (uprv_strlen(localeID) > 0) {
   2348         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2349         if (tmpStatus != U_ZERO_ERROR) {
   2350             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2351             return 0;
   2352         }
   2353     }
   2354 
   2355     /* For handling special case - private use only tag */
   2356     pKeywordStart = locale_getKeywordsStart(canonical);
   2357     if (pKeywordStart == canonical) {
   2358         UEnumeration *kwdEnum;
   2359         int kwdCnt = 0;
   2360         UBool done = FALSE;
   2361 
   2362         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
   2363         if (kwdEnum != NULL) {
   2364             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
   2365             if (kwdCnt == 1) {
   2366                 const char *key;
   2367                 int32_t len = 0;
   2368 
   2369                 key = uenum_next(kwdEnum, &len, &tmpStatus);
   2370                 if (len == 1 && *key == PRIVATEUSE) {
   2371                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   2372                     buf[0] = PRIVATEUSE;
   2373                     buf[1] = SEP;
   2374                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
   2375                     if (U_SUCCESS(tmpStatus)) {
   2376                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
   2377                             /* return private use only tag */
   2378                             reslen = len + 2;
   2379                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
   2380                             u_terminateChars(langtag, langtagCapacity, reslen, status);
   2381                             done = TRUE;
   2382                         } else if (strict) {
   2383                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2384                             done = TRUE;
   2385                         }
   2386                         /* if not strict mode, then "und" will be returned */
   2387                     } else {
   2388                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   2389                         done = TRUE;
   2390                     }
   2391                 }
   2392             }
   2393             uenum_close(kwdEnum);
   2394             if (done) {
   2395                 return reslen;
   2396             }
   2397         }
   2398     }
   2399 
   2400     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2401     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2402     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2403     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
   2404     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2405     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2406 
   2407     return reslen;
   2408 }
   2409 
   2410 
   2411 U_CAPI int32_t U_EXPORT2
   2412 uloc_forLanguageTag(const char* langtag,
   2413                     char* localeID,
   2414                     int32_t localeIDCapacity,
   2415                     int32_t* parsedLength,
   2416                     UErrorCode* status) {
   2417     ULanguageTag *lt;
   2418     int32_t reslen = 0;
   2419     const char *subtag, *p;
   2420     int32_t len;
   2421     int32_t i, n;
   2422     UBool noRegion = TRUE;
   2423 
   2424     lt = ultag_parse(langtag, -1, parsedLength, status);
   2425     if (U_FAILURE(*status)) {
   2426         return 0;
   2427     }
   2428 
   2429     /* language */
   2430     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2431     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2432         len = (int32_t)uprv_strlen(subtag);
   2433         if (len > 0) {
   2434             if (reslen < localeIDCapacity) {
   2435                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2436             }
   2437             reslen += len;
   2438         }
   2439     }
   2440 
   2441     /* script */
   2442     subtag = ultag_getScript(lt);
   2443     len = (int32_t)uprv_strlen(subtag);
   2444     if (len > 0) {
   2445         if (reslen < localeIDCapacity) {
   2446             *(localeID + reslen) = LOCALE_SEP;
   2447         }
   2448         reslen++;
   2449 
   2450         /* write out the script in title case */
   2451         p = subtag;
   2452         while (*p) {
   2453             if (reslen < localeIDCapacity) {
   2454                 if (p == subtag) {
   2455                     *(localeID + reslen) = uprv_toupper(*p);
   2456                 } else {
   2457                     *(localeID + reslen) = *p;
   2458                 }
   2459             }
   2460             reslen++;
   2461             p++;
   2462         }
   2463     }
   2464 
   2465     /* region */
   2466     subtag = ultag_getRegion(lt);
   2467     len = (int32_t)uprv_strlen(subtag);
   2468     if (len > 0) {
   2469         if (reslen < localeIDCapacity) {
   2470             *(localeID + reslen) = LOCALE_SEP;
   2471         }
   2472         reslen++;
   2473         /* write out the retion in upper case */
   2474         p = subtag;
   2475         while (*p) {
   2476             if (reslen < localeIDCapacity) {
   2477                 *(localeID + reslen) = uprv_toupper(*p);
   2478             }
   2479             reslen++;
   2480             p++;
   2481         }
   2482         noRegion = FALSE;
   2483     }
   2484 
   2485     /* variants */
   2486     n = ultag_getVariantsSize(lt);
   2487     if (n > 0) {
   2488         if (noRegion) {
   2489             if (reslen < localeIDCapacity) {
   2490                 *(localeID + reslen) = LOCALE_SEP;
   2491             }
   2492             reslen++;
   2493         }
   2494 
   2495         for (i = 0; i < n; i++) {
   2496             subtag = ultag_getVariant(lt, i);
   2497             if (reslen < localeIDCapacity) {
   2498                 *(localeID + reslen) = LOCALE_SEP;
   2499             }
   2500             reslen++;
   2501             /* write out the variant in upper case */
   2502             p = subtag;
   2503             while (*p) {
   2504                 if (reslen < localeIDCapacity) {
   2505                     *(localeID + reslen) = uprv_toupper(*p);
   2506                 }
   2507                 reslen++;
   2508                 p++;
   2509             }
   2510         }
   2511     }
   2512 
   2513     /* keywords */
   2514     n = ultag_getExtensionsSize(lt);
   2515     subtag = ultag_getPrivateUse(lt);
   2516     if (n > 0 || uprv_strlen(subtag) > 0) {
   2517         if (reslen == 0 && n > 0) {
   2518             /* need a language */
   2519             if (reslen < localeIDCapacity) {
   2520                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2521             }
   2522             reslen += LANG_UND_LEN;
   2523         }
   2524         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2525         reslen += len;
   2526     }
   2527 
   2528     ultag_close(lt);
   2529     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2530 }
   2531 
   2532 
   2533