Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2009-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ures.h"
     10 #include "unicode/putil.h"
     11 #include "unicode/uloc.h"
     12 #include "ustr_imp.h"
     13 #include "cmemory.h"
     14 #include "cstring.h"
     15 #include "putilimp.h"
     16 #include "uinvchar.h"
     17 #include "ulocimp.h"
     18 #include "uassert.h"
     19 
     20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     21 
     22 /* struct holding a single variant */
     23 typedef struct VariantListEntry {
     24     const char              *variant;
     25     struct VariantListEntry *next;
     26 } VariantListEntry;
     27 
     28 /* struct holding a single attribute value */
     29 typedef struct AttributeListEntry {
     30     const char              *attribute;
     31     struct AttributeListEntry *next;
     32 } AttributeListEntry;
     33 
     34 /* struct holding a single extension */
     35 typedef struct ExtensionListEntry {
     36     const char                  *key;
     37     const char                  *value;
     38     struct ExtensionListEntry   *next;
     39 } ExtensionListEntry;
     40 
     41 #define MAXEXTLANG 3
     42 typedef struct ULanguageTag {
     43     char                *buf;   /* holding parsed subtags */
     44     const char          *language;
     45     const char          *extlang[MAXEXTLANG];
     46     const char          *script;
     47     const char          *region;
     48     VariantListEntry    *variants;
     49     ExtensionListEntry  *extensions;
     50     const char          *privateuse;
     51     const char          *grandfathered;
     52 } ULanguageTag;
     53 
     54 #define MINLEN 2
     55 #define SEP '-'
     56 #define PRIVATEUSE 'x'
     57 #define LDMLEXT 'u'
     58 
     59 #define LOCALE_SEP '_'
     60 #define LOCALE_EXT_SEP '@'
     61 #define LOCALE_KEYWORD_SEP ';'
     62 #define LOCALE_KEY_TYPE_SEP '='
     63 
     64 #define ISALPHA(c) uprv_isASCIILetter(c)
     65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     66 
     67 static const char EMPTY[] = "";
     68 static const char LANG_UND[] = "und";
     69 static const char PRIVATEUSE_KEY[] = "x";
     70 static const char _POSIX[] = "_POSIX";
     71 static const char POSIX_KEY[] = "va";
     72 static const char POSIX_VALUE[] = "posix";
     73 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
     74 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
     75 static const char LOCALE_TYPE_YES[] = "yes";
     76 
     77 #define LANG_UND_LEN 3
     78 
     79 static const char* const GRANDFATHERED[] = {
     80 /*  grandfathered   preferred */
     81     "art-lojban",   "jbo",
     82     "cel-gaulish",  "xtg-x-cel-gaulish",
     83     "en-GB-oed",    "en-GB-x-oed",
     84     "i-ami",        "ami",
     85     "i-bnn",        "bnn",
     86     "i-default",    "en-x-i-default",
     87     "i-enochian",   "und-x-i-enochian",
     88     "i-hak",        "hak",
     89     "i-klingon",    "tlh",
     90     "i-lux",        "lb",
     91     "i-mingo",      "see-x-i-mingo",
     92     "i-navajo",     "nv",
     93     "i-pwn",        "pwn",
     94     "i-tao",        "tao",
     95     "i-tay",        "tay",
     96     "i-tsu",        "tsu",
     97     "no-bok",       "nb",
     98     "no-nyn",       "nn",
     99     "sgn-be-fr",    "sfb",
    100     "sgn-be-nl",    "vgt",
    101     "sgn-ch-de",    "sgg",
    102     "zh-guoyu",     "cmn",
    103     "zh-hakka",     "hak",
    104     "zh-min",       "nan-x-zh-min",
    105     "zh-min-nan",   "nan",
    106     "zh-xiang",     "hsn",
    107     NULL,           NULL
    108 };
    109 
    110 static const char DEPRECATEDLANGS[][4] = {
    111 /*  deprecated  new */
    112     "iw",       "he",
    113     "ji",       "yi",
    114     "in",       "id"
    115 };
    116 
    117 /*
    118 * -------------------------------------------------
    119 *
    120 * These ultag_ functions may be exposed as APIs later
    121 *
    122 * -------------------------------------------------
    123 */
    124 
    125 static ULanguageTag*
    126 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    127 
    128 static void
    129 ultag_close(ULanguageTag* langtag);
    130 
    131 static const char*
    132 ultag_getLanguage(const ULanguageTag* langtag);
    133 
    134 #if 0
    135 static const char*
    136 ultag_getJDKLanguage(const ULanguageTag* langtag);
    137 #endif
    138 
    139 static const char*
    140 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    141 
    142 static int32_t
    143 ultag_getExtlangSize(const ULanguageTag* langtag);
    144 
    145 static const char*
    146 ultag_getScript(const ULanguageTag* langtag);
    147 
    148 static const char*
    149 ultag_getRegion(const ULanguageTag* langtag);
    150 
    151 static const char*
    152 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    153 
    154 static int32_t
    155 ultag_getVariantsSize(const ULanguageTag* langtag);
    156 
    157 static const char*
    158 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    159 
    160 static const char*
    161 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    162 
    163 static int32_t
    164 ultag_getExtensionsSize(const ULanguageTag* langtag);
    165 
    166 static const char*
    167 ultag_getPrivateUse(const ULanguageTag* langtag);
    168 
    169 #if 0
    170 static const char*
    171 ultag_getGrandfathered(const ULanguageTag* langtag);
    172 #endif
    173 
    174 /*
    175 * -------------------------------------------------
    176 *
    177 * Language subtag syntax validation functions
    178 *
    179 * -------------------------------------------------
    180 */
    181 
    182 static UBool
    183 _isAlphaString(const char* s, int32_t len) {
    184     int32_t i;
    185     for (i = 0; i < len; i++) {
    186         if (!ISALPHA(*(s + i))) {
    187             return FALSE;
    188         }
    189     }
    190     return TRUE;
    191 }
    192 
    193 static UBool
    194 _isNumericString(const char* s, int32_t len) {
    195     int32_t i;
    196     for (i = 0; i < len; i++) {
    197         if (!ISNUMERIC(*(s + i))) {
    198             return FALSE;
    199         }
    200     }
    201     return TRUE;
    202 }
    203 
    204 static UBool
    205 _isAlphaNumericString(const char* s, int32_t len) {
    206     int32_t i;
    207     for (i = 0; i < len; i++) {
    208         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    209             return FALSE;
    210         }
    211     }
    212     return TRUE;
    213 }
    214 
    215 static UBool
    216 _isLanguageSubtag(const char* s, int32_t len) {
    217     /*
    218      * language      = 2*3ALPHA            ; shortest ISO 639 code
    219      *                 ["-" extlang]       ; sometimes followed by
    220      *                                     ;   extended language subtags
    221      *               / 4ALPHA              ; or reserved for future use
    222      *               / 5*8ALPHA            ; or registered language subtag
    223      */
    224     if (len < 0) {
    225         len = (int32_t)uprv_strlen(s);
    226     }
    227     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    228         return TRUE;
    229     }
    230     return FALSE;
    231 }
    232 
    233 static UBool
    234 _isExtlangSubtag(const char* s, int32_t len) {
    235     /*
    236      * extlang       = 3ALPHA              ; selected ISO 639 codes
    237      *                 *2("-" 3ALPHA)      ; permanently reserved
    238      */
    239     if (len < 0) {
    240         len = (int32_t)uprv_strlen(s);
    241     }
    242     if (len == 3 && _isAlphaString(s, len)) {
    243         return TRUE;
    244     }
    245     return FALSE;
    246 }
    247 
    248 static UBool
    249 _isScriptSubtag(const char* s, int32_t len) {
    250     /*
    251      * script        = 4ALPHA              ; ISO 15924 code
    252      */
    253     if (len < 0) {
    254         len = (int32_t)uprv_strlen(s);
    255     }
    256     if (len == 4 && _isAlphaString(s, len)) {
    257         return TRUE;
    258     }
    259     return FALSE;
    260 }
    261 
    262 static UBool
    263 _isRegionSubtag(const char* s, int32_t len) {
    264     /*
    265      * region        = 2ALPHA              ; ISO 3166-1 code
    266      *               / 3DIGIT              ; UN M.49 code
    267      */
    268     if (len < 0) {
    269         len = (int32_t)uprv_strlen(s);
    270     }
    271     if (len == 2 && _isAlphaString(s, len)) {
    272         return TRUE;
    273     }
    274     if (len == 3 && _isNumericString(s, len)) {
    275         return TRUE;
    276     }
    277     return FALSE;
    278 }
    279 
    280 static UBool
    281 _isVariantSubtag(const char* s, int32_t len) {
    282     /*
    283      * variant       = 5*8alphanum         ; registered variants
    284      *               / (DIGIT 3alphanum)
    285      */
    286     if (len < 0) {
    287         len = (int32_t)uprv_strlen(s);
    288     }
    289     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
    290         return TRUE;
    291     }
    292     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    293         return TRUE;
    294     }
    295     return FALSE;
    296 }
    297 
    298 static UBool
    299 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
    300     /*
    301      * variant       = 1*8alphanum         ; registered variants
    302      *               / (DIGIT 3alphanum)
    303      */
    304     if (len < 0) {
    305         len = (int32_t)uprv_strlen(s);
    306     }
    307     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    308         return TRUE;
    309     }
    310     return FALSE;
    311 }
    312 
    313 static UBool
    314 _isExtensionSingleton(const char* s, int32_t len) {
    315     /*
    316      * extension     = singleton 1*("-" (2*8alphanum))
    317      */
    318     if (len < 0) {
    319         len = (int32_t)uprv_strlen(s);
    320     }
    321     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    322         return TRUE;
    323     }
    324     return FALSE;
    325 }
    326 
    327 static UBool
    328 _isExtensionSubtag(const char* s, int32_t len) {
    329     /*
    330      * extension     = singleton 1*("-" (2*8alphanum))
    331      */
    332     if (len < 0) {
    333         len = (int32_t)uprv_strlen(s);
    334     }
    335     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    336         return TRUE;
    337     }
    338     return FALSE;
    339 }
    340 
    341 static UBool
    342 _isExtensionSubtags(const char* s, int32_t len) {
    343     const char *p = s;
    344     const char *pSubtag = NULL;
    345 
    346     if (len < 0) {
    347         len = (int32_t)uprv_strlen(s);
    348     }
    349 
    350     while ((p - s) < len) {
    351         if (*p == SEP) {
    352             if (pSubtag == NULL) {
    353                 return FALSE;
    354             }
    355             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    356                 return FALSE;
    357             }
    358             pSubtag = NULL;
    359         } else if (pSubtag == NULL) {
    360             pSubtag = p;
    361         }
    362         p++;
    363     }
    364     if (pSubtag == NULL) {
    365         return FALSE;
    366     }
    367     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    368 }
    369 
    370 static UBool
    371 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    372     /*
    373      * privateuse    = "x" 1*("-" (1*8alphanum))
    374      */
    375     if (len < 0) {
    376         len = (int32_t)uprv_strlen(s);
    377     }
    378     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    379         return TRUE;
    380     }
    381     return FALSE;
    382 }
    383 
    384 static UBool
    385 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    386     const char *p = s;
    387     const char *pSubtag = NULL;
    388 
    389     if (len < 0) {
    390         len = (int32_t)uprv_strlen(s);
    391     }
    392 
    393     while ((p - s) < len) {
    394         if (*p == SEP) {
    395             if (pSubtag == NULL) {
    396                 return FALSE;
    397             }
    398             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    399                 return FALSE;
    400             }
    401             pSubtag = NULL;
    402         } else if (pSubtag == NULL) {
    403             pSubtag = p;
    404         }
    405         p++;
    406     }
    407     if (pSubtag == NULL) {
    408         return FALSE;
    409     }
    410     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    411 }
    412 
    413 static UBool
    414 _isLDMLKey(const char* s, int32_t len) {
    415     if (len < 0) {
    416         len = (int32_t)uprv_strlen(s);
    417     }
    418     if (len == 2 && _isAlphaNumericString(s, len)) {
    419         return TRUE;
    420     }
    421     return FALSE;
    422 }
    423 
    424 static UBool
    425 _isLDMLType(const char* s, int32_t len) {
    426     if (len < 0) {
    427         len = (int32_t)uprv_strlen(s);
    428     }
    429     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    430         return TRUE;
    431     }
    432     return FALSE;
    433 }
    434 
    435 /*
    436 * -------------------------------------------------
    437 *
    438 * Helper functions
    439 *
    440 * -------------------------------------------------
    441 */
    442 
    443 static UBool
    444 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    445     UBool bAdded = TRUE;
    446 
    447     if (*first == NULL) {
    448         var->next = NULL;
    449         *first = var;
    450     } else {
    451         VariantListEntry *prev, *cur;
    452         int32_t cmp;
    453 
    454         /* variants order should be preserved */
    455         prev = NULL;
    456         cur = *first;
    457         while (TRUE) {
    458             if (cur == NULL) {
    459                 prev->next = var;
    460                 var->next = NULL;
    461                 break;
    462             }
    463 
    464             /* Checking for duplicate variant */
    465             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    466             if (cmp == 0) {
    467                 /* duplicated variant */
    468                 bAdded = FALSE;
    469                 break;
    470             }
    471             prev = cur;
    472             cur = cur->next;
    473         }
    474     }
    475 
    476     return bAdded;
    477 }
    478 
    479 static UBool
    480 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
    481     UBool bAdded = TRUE;
    482 
    483     if (*first == NULL) {
    484         attr->next = NULL;
    485         *first = attr;
    486     } else {
    487         AttributeListEntry *prev, *cur;
    488         int32_t cmp;
    489 
    490         /* reorder variants in alphabetical order */
    491         prev = NULL;
    492         cur = *first;
    493         while (TRUE) {
    494             if (cur == NULL) {
    495                 prev->next = attr;
    496                 attr->next = NULL;
    497                 break;
    498             }
    499             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
    500             if (cmp < 0) {
    501                 if (prev == NULL) {
    502                     *first = attr;
    503                 } else {
    504                     prev->next = attr;
    505                 }
    506                 attr->next = cur;
    507                 break;
    508             }
    509             if (cmp == 0) {
    510                 /* duplicated variant */
    511                 bAdded = FALSE;
    512                 break;
    513             }
    514             prev = cur;
    515             cur = cur->next;
    516         }
    517     }
    518 
    519     return bAdded;
    520 }
    521 
    522 
    523 static UBool
    524 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    525     UBool bAdded = TRUE;
    526 
    527     if (*first == NULL) {
    528         ext->next = NULL;
    529         *first = ext;
    530     } else {
    531         ExtensionListEntry *prev, *cur;
    532         int32_t cmp;
    533 
    534         /* reorder variants in alphabetical order */
    535         prev = NULL;
    536         cur = *first;
    537         while (TRUE) {
    538             if (cur == NULL) {
    539                 prev->next = ext;
    540                 ext->next = NULL;
    541                 break;
    542             }
    543             if (localeToBCP) {
    544                 /* special handling for locale to bcp conversion */
    545                 int32_t len, curlen;
    546 
    547                 len = (int32_t)uprv_strlen(ext->key);
    548                 curlen = (int32_t)uprv_strlen(cur->key);
    549 
    550                 if (len == 1 && curlen == 1) {
    551                     if (*(ext->key) == *(cur->key)) {
    552                         cmp = 0;
    553                     } else if (*(ext->key) == PRIVATEUSE) {
    554                         cmp = 1;
    555                     } else if (*(cur->key) == PRIVATEUSE) {
    556                         cmp = -1;
    557                     } else {
    558                         cmp = *(ext->key) - *(cur->key);
    559                     }
    560                 } else if (len == 1) {
    561                     cmp = *(ext->key) - LDMLEXT;
    562                 } else if (curlen == 1) {
    563                     cmp = LDMLEXT - *(cur->key);
    564                 } else {
    565                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    566                 }
    567             } else {
    568                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    569             }
    570             if (cmp < 0) {
    571                 if (prev == NULL) {
    572                     *first = ext;
    573                 } else {
    574                     prev->next = ext;
    575                 }
    576                 ext->next = cur;
    577                 break;
    578             }
    579             if (cmp == 0) {
    580                 /* duplicated extension key */
    581                 bAdded = FALSE;
    582                 break;
    583             }
    584             prev = cur;
    585             cur = cur->next;
    586         }
    587     }
    588 
    589     return bAdded;
    590 }
    591 
    592 static void
    593 _initializeULanguageTag(ULanguageTag* langtag) {
    594     int32_t i;
    595 
    596     langtag->buf = NULL;
    597 
    598     langtag->language = EMPTY;
    599     for (i = 0; i < MAXEXTLANG; i++) {
    600         langtag->extlang[i] = NULL;
    601     }
    602 
    603     langtag->script = EMPTY;
    604     langtag->region = EMPTY;
    605 
    606     langtag->variants = NULL;
    607     langtag->extensions = NULL;
    608 
    609     langtag->grandfathered = EMPTY;
    610     langtag->privateuse = EMPTY;
    611 }
    612 
    613 #define KEYTYPEDATA     "keyTypeData"
    614 #define KEYMAP          "keyMap"
    615 #define TYPEMAP         "typeMap"
    616 #define TYPEALIAS       "typeAlias"
    617 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
    618 #define MAX_LDML_KEY_LEN        22
    619 #define MAX_LDML_TYPE_LEN       32
    620 
    621 static int32_t
    622 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
    623                 char* bcpKey, int32_t bcpKeyCapacity,
    624                 UErrorCode *status) {
    625     UResourceBundle *rb;
    626     char keyBuf[MAX_LDML_KEY_LEN];
    627     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    628     int32_t resultLen = 0;
    629     int32_t i;
    630     UErrorCode tmpStatus = U_ZERO_ERROR;
    631     const UChar *uBcpKey;
    632     int32_t bcpKeyLen;
    633 
    634     if (keyLen < 0) {
    635         keyLen = (int32_t)uprv_strlen(key);
    636     }
    637 
    638     if (keyLen >= sizeof(keyBuf)) {
    639         /* no known valid LDML key exceeding 21 */
    640         *status = U_ILLEGAL_ARGUMENT_ERROR;
    641         return 0;
    642     }
    643 
    644     uprv_memcpy(keyBuf, key, keyLen);
    645     keyBuf[keyLen] = 0;
    646 
    647     /* to lower case */
    648     for (i = 0; i < keyLen; i++) {
    649         keyBuf[i] = uprv_tolower(keyBuf[i]);
    650     }
    651 
    652     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    653     ures_getByKey(rb, KEYMAP, rb, status);
    654 
    655     if (U_FAILURE(*status)) {
    656         ures_close(rb);
    657         return 0;
    658     }
    659 
    660     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
    661     if (U_SUCCESS(tmpStatus)) {
    662         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
    663         bcpKeyBuf[bcpKeyLen] = 0;
    664         resultLen = bcpKeyLen;
    665     } else {
    666         if (_isLDMLKey(key, keyLen)) {
    667             uprv_memcpy(bcpKeyBuf, key, keyLen);
    668             bcpKeyBuf[keyLen] = 0;
    669             resultLen = keyLen;
    670         } else {
    671             /* mapping not availabe */
    672             *status = U_ILLEGAL_ARGUMENT_ERROR;
    673         }
    674     }
    675     ures_close(rb);
    676 
    677     if (U_FAILURE(*status)) {
    678         return 0;
    679     }
    680 
    681     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
    682     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
    683 }
    684 
    685 static int32_t
    686 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
    687                 char* key, int32_t keyCapacity,
    688                 UErrorCode *status) {
    689     UResourceBundle *rb;
    690     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    691     int32_t resultLen = 0;
    692     int32_t i;
    693     const char *resKey = NULL;
    694     UResourceBundle *mapData;
    695 
    696     if (bcpKeyLen < 0) {
    697         bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
    698     }
    699 
    700     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
    701         *status = U_ILLEGAL_ARGUMENT_ERROR;
    702         return 0;
    703     }
    704 
    705     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
    706     bcpKeyBuf[bcpKeyLen] = 0;
    707 
    708     /* to lower case */
    709     for (i = 0; i < bcpKeyLen; i++) {
    710         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
    711     }
    712 
    713     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    714     ures_getByKey(rb, KEYMAP, rb, status);
    715     if (U_FAILURE(*status)) {
    716         ures_close(rb);
    717         return 0;
    718     }
    719 
    720     mapData = ures_getNextResource(rb, NULL, status);
    721     while (U_SUCCESS(*status)) {
    722         const UChar *uBcpKey;
    723         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    724         int32_t tmpBcpKeyLen;
    725 
    726         uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
    727         if (U_FAILURE(*status)) {
    728             break;
    729         }
    730         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
    731         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
    732         if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
    733             /* found a matching BCP47 key */
    734             resKey = ures_getKey(mapData);
    735             resultLen = (int32_t)uprv_strlen(resKey);
    736             break;
    737         }
    738         if (!ures_hasNext(rb)) {
    739             break;
    740         }
    741         ures_getNextResource(rb, mapData, status);
    742     }
    743     ures_close(mapData);
    744     ures_close(rb);
    745 
    746     if (U_FAILURE(*status)) {
    747         return 0;
    748     }
    749 
    750     if (resKey == NULL) {
    751         resKey = bcpKeyBuf;
    752         resultLen = bcpKeyLen;
    753     }
    754 
    755     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
    756     return u_terminateChars(key, keyCapacity, resultLen, status);
    757 }
    758 
    759 static int32_t
    760 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
    761                  const char* type, int32_t typeLen,
    762                  char* bcpType, int32_t bcpTypeCapacity,
    763                  UErrorCode *status) {
    764     UResourceBundle *rb, *keyTypeData, *typeMapForKey;
    765     char keyBuf[MAX_LDML_KEY_LEN];
    766     char typeBuf[MAX_LDML_TYPE_LEN];
    767     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    768     int32_t resultLen = 0;
    769     int32_t i;
    770     UErrorCode tmpStatus = U_ZERO_ERROR;
    771     const UChar *uBcpType, *uCanonicalType;
    772     int32_t bcpTypeLen, canonicalTypeLen;
    773     UBool isTimezone = FALSE;
    774 
    775     if (keyLen < 0) {
    776         keyLen = (int32_t)uprv_strlen(key);
    777     }
    778     if (keyLen >= sizeof(keyBuf)) {
    779         /* no known valid LDML key exceeding 21 */
    780         *status = U_ILLEGAL_ARGUMENT_ERROR;
    781         return 0;
    782     }
    783     uprv_memcpy(keyBuf, key, keyLen);
    784     keyBuf[keyLen] = 0;
    785 
    786     /* to lower case */
    787     for (i = 0; i < keyLen; i++) {
    788         keyBuf[i] = uprv_tolower(keyBuf[i]);
    789     }
    790     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    791         isTimezone = TRUE;
    792     }
    793 
    794     if (typeLen < 0) {
    795         typeLen = (int32_t)uprv_strlen(type);
    796     }
    797     if (typeLen >= sizeof(typeBuf)) {
    798         *status = U_ILLEGAL_ARGUMENT_ERROR;
    799         return 0;
    800     }
    801 
    802     if (isTimezone) {
    803         /* replace '/' with ':' */
    804         for (i = 0; i < typeLen; i++) {
    805             if (*(type + i) == '/') {
    806                 typeBuf[i] = ':';
    807             } else {
    808                 typeBuf[i] = *(type + i);
    809             }
    810         }
    811         typeBuf[typeLen] = 0;
    812         type = &typeBuf[0];
    813     }
    814 
    815     keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
    816     rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
    817     if (U_FAILURE(*status)) {
    818         ures_close(rb);
    819         ures_close(keyTypeData);
    820         return 0;
    821     }
    822 
    823     typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
    824     uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
    825     if (U_SUCCESS(tmpStatus)) {
    826         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    827         resultLen = bcpTypeLen;
    828     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    829         /* is this type alias? */
    830         tmpStatus = U_ZERO_ERROR;
    831         ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
    832         ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    833         uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
    834         if (U_SUCCESS(tmpStatus)) {
    835             u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
    836             if (isTimezone) {
    837                 /* replace '/' with ':' */
    838                 for (i = 0; i < canonicalTypeLen; i++) {
    839                     if (typeBuf[i] == '/') {
    840                         typeBuf[i] = ':';
    841                     }
    842                 }
    843             }
    844             typeBuf[canonicalTypeLen] = 0;
    845 
    846             /* look up the canonical type */
    847             uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
    848             if (U_SUCCESS(tmpStatus)) {
    849                 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    850                 resultLen = bcpTypeLen;
    851             }
    852         }
    853         if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    854             if (_isLDMLType(type, typeLen)) {
    855                 uprv_memcpy(bcpTypeBuf, type, typeLen);
    856                 resultLen = typeLen;
    857             } else {
    858                 /* mapping not availabe */
    859                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    860             }
    861         }
    862     } else {
    863         *status = tmpStatus;
    864     }
    865     ures_close(rb);
    866     ures_close(typeMapForKey);
    867     ures_close(keyTypeData);
    868 
    869     if (U_FAILURE(*status)) {
    870         return 0;
    871     }
    872 
    873     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
    874     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
    875 }
    876 
    877 static int32_t
    878 _bcp47ToLDMLType(const char* key, int32_t keyLen,
    879                  const char* bcpType, int32_t bcpTypeLen,
    880                  char* type, int32_t typeCapacity,
    881                  UErrorCode *status) {
    882     UResourceBundle *rb;
    883     char keyBuf[MAX_LDML_KEY_LEN];
    884     char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
    885     int32_t resultLen = 0;
    886     int32_t i, typeSize;
    887     const char *resType = NULL;
    888     UResourceBundle *mapData;
    889     UErrorCode tmpStatus = U_ZERO_ERROR;
    890     int32_t copyLen;
    891 
    892     if (keyLen < 0) {
    893         keyLen = (int32_t)uprv_strlen(key);
    894     }
    895 
    896     if (keyLen >= sizeof(keyBuf)) {
    897         /* no known valid LDML key exceeding 21 */
    898         *status = U_ILLEGAL_ARGUMENT_ERROR;
    899         return 0;
    900     }
    901     uprv_memcpy(keyBuf, key, keyLen);
    902     keyBuf[keyLen] = 0;
    903 
    904     /* to lower case */
    905     for (i = 0; i < keyLen; i++) {
    906         keyBuf[i] = uprv_tolower(keyBuf[i]);
    907     }
    908 
    909 
    910     if (bcpTypeLen < 0) {
    911         bcpTypeLen = (int32_t)uprv_strlen(bcpType);
    912     }
    913 
    914     typeSize = 0;
    915     for (i = 0; i < bcpTypeLen; i++) {
    916         if (bcpType[i] == SEP) {
    917             if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
    918                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    919                 return 0;
    920             }
    921             typeSize = 0;
    922         } else {
    923             typeSize++;
    924         }
    925     }
    926 
    927     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
    928     bcpTypeBuf[bcpTypeLen] = 0;
    929 
    930     /* to lower case */
    931     for (i = 0; i < bcpTypeLen; i++) {
    932         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
    933     }
    934 
    935     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    936     ures_getByKey(rb, TYPEMAP, rb, status);
    937     if (U_FAILURE(*status)) {
    938         ures_close(rb);
    939         return 0;
    940     }
    941 
    942     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    943     mapData = ures_getNextResource(rb, NULL, &tmpStatus);
    944     while (U_SUCCESS(tmpStatus)) {
    945         const UChar *uBcpType;
    946         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    947         int32_t tmpBcpTypeLen;
    948 
    949         uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
    950         if (U_FAILURE(tmpStatus)) {
    951             break;
    952         }
    953         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
    954         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
    955         if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
    956             /* found a matching BCP47 type */
    957             resType = ures_getKey(mapData);
    958             resultLen = (int32_t)uprv_strlen(resType);
    959             break;
    960         }
    961         if (!ures_hasNext(rb)) {
    962             break;
    963         }
    964         ures_getNextResource(rb, mapData, &tmpStatus);
    965     }
    966     ures_close(mapData);
    967     ures_close(rb);
    968 
    969     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
    970         *status = tmpStatus;
    971         return 0;
    972     }
    973 
    974     if (resType == NULL) {
    975         resType = bcpTypeBuf;
    976         resultLen = bcpTypeLen;
    977     }
    978 
    979     copyLen = uprv_min(resultLen, typeCapacity);
    980     uprv_memcpy(type, resType, copyLen);
    981 
    982     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    983         for (i = 0; i < copyLen; i++) {
    984             if (*(type + i) == ':') {
    985                 *(type + i) = '/';
    986             }
    987         }
    988     }
    989 
    990     return u_terminateChars(type, typeCapacity, resultLen, status);
    991 }
    992 
    993 static int32_t
    994 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    995     char buf[ULOC_LANG_CAPACITY];
    996     UErrorCode tmpStatus = U_ZERO_ERROR;
    997     int32_t len, i;
    998     int32_t reslen = 0;
    999 
   1000     if (U_FAILURE(*status)) {
   1001         return 0;
   1002     }
   1003 
   1004     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
   1005     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1006         if (strict) {
   1007             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1008             return 0;
   1009         }
   1010         len = 0;
   1011     }
   1012 
   1013     /* Note: returned language code is in lower case letters */
   1014 
   1015     if (len == 0) {
   1016         if (reslen < capacity) {
   1017             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
   1018         }
   1019         reslen += LANG_UND_LEN;
   1020     } else if (!_isLanguageSubtag(buf, len)) {
   1021             /* invalid language code */
   1022         if (strict) {
   1023             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1024             return 0;
   1025         }
   1026         if (reslen < capacity) {
   1027             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
   1028         }
   1029         reslen += LANG_UND_LEN;
   1030     } else {
   1031         /* resolve deprecated */
   1032         for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) {
   1033             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
   1034                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
   1035                 len = (int32_t)uprv_strlen(buf);
   1036                 break;
   1037             }
   1038         }
   1039         if (reslen < capacity) {
   1040             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1041         }
   1042         reslen += len;
   1043     }
   1044     u_terminateChars(appendAt, capacity, reslen, status);
   1045     return reslen;
   1046 }
   1047 
   1048 static int32_t
   1049 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1050     char buf[ULOC_SCRIPT_CAPACITY];
   1051     UErrorCode tmpStatus = U_ZERO_ERROR;
   1052     int32_t len;
   1053     int32_t reslen = 0;
   1054 
   1055     if (U_FAILURE(*status)) {
   1056         return 0;
   1057     }
   1058 
   1059     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
   1060     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1061         if (strict) {
   1062             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1063         }
   1064         return 0;
   1065     }
   1066 
   1067     if (len > 0) {
   1068         if (!_isScriptSubtag(buf, len)) {
   1069             /* invalid script code */
   1070             if (strict) {
   1071                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1072             }
   1073             return 0;
   1074         } else {
   1075             if (reslen < capacity) {
   1076                 *(appendAt + reslen) = SEP;
   1077             }
   1078             reslen++;
   1079 
   1080             if (reslen < capacity) {
   1081                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1082             }
   1083             reslen += len;
   1084         }
   1085     }
   1086     u_terminateChars(appendAt, capacity, reslen, status);
   1087     return reslen;
   1088 }
   1089 
   1090 static int32_t
   1091 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1092     char buf[ULOC_COUNTRY_CAPACITY];
   1093     UErrorCode tmpStatus = U_ZERO_ERROR;
   1094     int32_t len;
   1095     int32_t reslen = 0;
   1096 
   1097     if (U_FAILURE(*status)) {
   1098         return 0;
   1099     }
   1100 
   1101     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
   1102     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1103         if (strict) {
   1104             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1105         }
   1106         return 0;
   1107     }
   1108 
   1109     if (len > 0) {
   1110         if (!_isRegionSubtag(buf, len)) {
   1111             /* invalid region code */
   1112             if (strict) {
   1113                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1114             }
   1115             return 0;
   1116         } else {
   1117             if (reslen < capacity) {
   1118                 *(appendAt + reslen) = SEP;
   1119             }
   1120             reslen++;
   1121 
   1122             if (reslen < capacity) {
   1123                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1124             }
   1125             reslen += len;
   1126         }
   1127     }
   1128     u_terminateChars(appendAt, capacity, reslen, status);
   1129     return reslen;
   1130 }
   1131 
   1132 static int32_t
   1133 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
   1134     char buf[ULOC_FULLNAME_CAPACITY];
   1135     UErrorCode tmpStatus = U_ZERO_ERROR;
   1136     int32_t len, i;
   1137     int32_t reslen = 0;
   1138 
   1139     if (U_FAILURE(*status)) {
   1140         return 0;
   1141     }
   1142 
   1143     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1144     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1145         if (strict) {
   1146             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1147         }
   1148         return 0;
   1149     }
   1150 
   1151     if (len > 0) {
   1152         char *p, *pVar;
   1153         UBool bNext = TRUE;
   1154         VariantListEntry *var;
   1155         VariantListEntry *varFirst = NULL;
   1156 
   1157         pVar = NULL;
   1158         p = buf;
   1159         while (bNext) {
   1160             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1161                 if (*p == 0) {
   1162                     bNext = FALSE;
   1163                 } else {
   1164                     *p = 0; /* terminate */
   1165                 }
   1166                 if (pVar == NULL) {
   1167                     if (strict) {
   1168                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1169                         break;
   1170                     }
   1171                     /* ignore empty variant */
   1172                 } else {
   1173                     /* ICU uses upper case letters for variants, but
   1174                        the canonical format is lowercase in BCP47 */
   1175                     for (i = 0; *(pVar + i) != 0; i++) {
   1176                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1177                     }
   1178 
   1179                     /* validate */
   1180                     if (_isVariantSubtag(pVar, -1)) {
   1181                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
   1182                             /* emit the variant to the list */
   1183                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1184                             if (var == NULL) {
   1185                                 *status = U_MEMORY_ALLOCATION_ERROR;
   1186                                 break;
   1187                             }
   1188                             var->variant = pVar;
   1189                             if (!_addVariantToList(&varFirst, var)) {
   1190                                 /* duplicated variant */
   1191                                 uprv_free(var);
   1192                                 if (strict) {
   1193                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1194                                     break;
   1195                                 }
   1196                             }
   1197                         } else {
   1198                             /* Special handling for POSIX variant, need to remember that we had it and then */
   1199                             /* treat it like an extension later. */
   1200                             *hadPosix = TRUE;
   1201                         }
   1202                     } else if (strict) {
   1203                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1204                         break;
   1205                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
   1206                         /* Handle private use subtags separately */
   1207                         break;
   1208                     }
   1209                 }
   1210                 /* reset variant starting position */
   1211                 pVar = NULL;
   1212             } else if (pVar == NULL) {
   1213                 pVar = p;
   1214             }
   1215             p++;
   1216         }
   1217 
   1218         if (U_SUCCESS(*status)) {
   1219             if (varFirst != NULL) {
   1220                 int32_t varLen;
   1221 
   1222                 /* write out validated/normalized variants to the target */
   1223                 var = varFirst;
   1224                 while (var != NULL) {
   1225                     if (reslen < capacity) {
   1226                         *(appendAt + reslen) = SEP;
   1227                     }
   1228                     reslen++;
   1229                     varLen = (int32_t)uprv_strlen(var->variant);
   1230                     if (reslen < capacity) {
   1231                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1232                     }
   1233                     reslen += varLen;
   1234                     var = var->next;
   1235                 }
   1236             }
   1237         }
   1238 
   1239         /* clean up */
   1240         var = varFirst;
   1241         while (var != NULL) {
   1242             VariantListEntry *tmpVar = var->next;
   1243             uprv_free(var);
   1244             var = tmpVar;
   1245         }
   1246 
   1247         if (U_FAILURE(*status)) {
   1248             return 0;
   1249         }
   1250     }
   1251 
   1252     u_terminateChars(appendAt, capacity, reslen, status);
   1253     return reslen;
   1254 }
   1255 
   1256 static int32_t
   1257 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1258     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1259     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
   1260     int32_t attrBufLength = 0;
   1261     UBool isAttribute = FALSE;
   1262     UEnumeration *keywordEnum = NULL;
   1263     int32_t reslen = 0;
   1264 
   1265     keywordEnum = uloc_openKeywords(localeID, status);
   1266     if (U_FAILURE(*status) && !hadPosix) {
   1267         uenum_close(keywordEnum);
   1268         return 0;
   1269     }
   1270     if (keywordEnum != NULL || hadPosix) {
   1271         /* reorder extensions */
   1272         int32_t len;
   1273         const char *key;
   1274         ExtensionListEntry *firstExt = NULL;
   1275         ExtensionListEntry *ext;
   1276         AttributeListEntry *firstAttr = NULL;
   1277         AttributeListEntry *attr;
   1278         char *attrValue;
   1279         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1280         char *pExtBuf = extBuf;
   1281         int32_t extBufCapacity = sizeof(extBuf);
   1282         const char *bcpKey, *bcpValue;
   1283         UErrorCode tmpStatus = U_ZERO_ERROR;
   1284         int32_t keylen;
   1285         UBool isLDMLKeyword;
   1286 
   1287         while (TRUE) {
   1288             isAttribute = FALSE;
   1289             key = uenum_next(keywordEnum, NULL, status);
   1290             if (key == NULL) {
   1291                 break;
   1292             }
   1293             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
   1294             if (U_FAILURE(tmpStatus)) {
   1295                 if (strict) {
   1296                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1297                     break;
   1298                 }
   1299                 /* ignore this keyword */
   1300                 tmpStatus = U_ZERO_ERROR;
   1301                 continue;
   1302             }
   1303 
   1304             keylen = (int32_t)uprv_strlen(key);
   1305             isLDMLKeyword = (keylen > 1);
   1306 
   1307             /* special keyword used for representing Unicode locale attributes */
   1308             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
   1309                 isAttribute = TRUE;
   1310                 if (len > 0) {
   1311                     int32_t i = 0;
   1312                     while (TRUE) {
   1313                         attrBufLength = 0;
   1314                         for (; i < len; i++) {
   1315                             if (buf[i] != '-') {
   1316                                 attrBuf[attrBufLength++] = buf[i];
   1317                             } else {
   1318                                 i++;
   1319                                 break;
   1320                             }
   1321                         }
   1322                         if (attrBufLength > 0) {
   1323                             attrBuf[attrBufLength] = 0;
   1324 
   1325                         } else if (i >= len){
   1326                             break;
   1327                         }
   1328 
   1329                         /* create AttributeListEntry */
   1330                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
   1331                         if (attr == NULL) {
   1332                             *status = U_MEMORY_ALLOCATION_ERROR;
   1333                             break;
   1334                         }
   1335                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
   1336                         if (attrValue == NULL) {
   1337                             *status = U_MEMORY_ALLOCATION_ERROR;
   1338                             break;
   1339                         }
   1340                         uprv_strcpy(attrValue, attrBuf);
   1341                         attr->attribute = attrValue;
   1342 
   1343                         if (!_addAttributeToList(&firstAttr, attr)) {
   1344                             uprv_free(attr);
   1345                             uprv_free(attrValue);
   1346                             if (strict) {
   1347                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1348                                 break;
   1349                             }
   1350                         }
   1351                     }
   1352                 }
   1353             } else if (isLDMLKeyword) {
   1354                 int32_t modKeyLen;
   1355 
   1356                 /* transform key and value to bcp47 style */
   1357                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
   1358                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1359                     if (strict) {
   1360                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1361                         break;
   1362                     }
   1363                     tmpStatus = U_ZERO_ERROR;
   1364                     continue;
   1365                 }
   1366 
   1367                 bcpKey = pExtBuf;
   1368                 pExtBuf += (modKeyLen + 1);
   1369                 extBufCapacity -= (modKeyLen + 1);
   1370 
   1371                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
   1372                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1373                     if (strict) {
   1374                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1375                         break;
   1376                     }
   1377                     tmpStatus = U_ZERO_ERROR;
   1378                     continue;
   1379                 }
   1380                 bcpValue = pExtBuf;
   1381                 pExtBuf += (len + 1);
   1382                 extBufCapacity -= (len + 1);
   1383             } else {
   1384                 if (*key == PRIVATEUSE) {
   1385                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1386                         if (strict) {
   1387                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1388                             break;
   1389                         }
   1390                         continue;
   1391                     }
   1392                 } else {
   1393                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1394                         if (strict) {
   1395                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1396                             break;
   1397                         }
   1398                         continue;
   1399                     }
   1400                 }
   1401                 bcpKey = key;
   1402                 if ((len + 1) < extBufCapacity) {
   1403                     uprv_memcpy(pExtBuf, buf, len);
   1404                     bcpValue = pExtBuf;
   1405 
   1406                     pExtBuf += len;
   1407 
   1408                     *pExtBuf = 0;
   1409                     pExtBuf++;
   1410 
   1411                     extBufCapacity -= (len + 1);
   1412                 } else {
   1413                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1414                     break;
   1415                 }
   1416             }
   1417 
   1418             if (!isAttribute) {
   1419                 /* create ExtensionListEntry */
   1420                 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1421                 if (ext == NULL) {
   1422                     *status = U_MEMORY_ALLOCATION_ERROR;
   1423                     break;
   1424                 }
   1425                 ext->key = bcpKey;
   1426                 ext->value = bcpValue;
   1427 
   1428                 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1429                     uprv_free(ext);
   1430                     if (strict) {
   1431                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1432                         break;
   1433                     }
   1434                 }
   1435             }
   1436         }
   1437 
   1438         /* Special handling for POSIX variant - add the keywords for POSIX */
   1439         if (hadPosix) {
   1440             /* create ExtensionListEntry for POSIX */
   1441             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1442             if (ext == NULL) {
   1443                 *status = U_MEMORY_ALLOCATION_ERROR;
   1444                 goto cleanup;
   1445             }
   1446             ext->key = POSIX_KEY;
   1447             ext->value = POSIX_VALUE;
   1448 
   1449             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1450                 uprv_free(ext);
   1451             }
   1452         }
   1453 
   1454         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
   1455             UBool startLDMLExtension = FALSE;
   1456 
   1457             attr = firstAttr;
   1458             ext = firstExt;
   1459             do {
   1460                 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
   1461                    /* write LDML singleton extension */
   1462                    if (reslen < capacity) {
   1463                        *(appendAt + reslen) = SEP;
   1464                    }
   1465                    reslen++;
   1466                    if (reslen < capacity) {
   1467                        *(appendAt + reslen) = LDMLEXT;
   1468                    }
   1469                    reslen++;
   1470 
   1471                    startLDMLExtension = TRUE;
   1472                 }
   1473 
   1474                 /* write out the sorted BCP47 attributes, extensions and private use */
   1475                 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
   1476                     if (reslen < capacity) {
   1477                         *(appendAt + reslen) = SEP;
   1478                     }
   1479                     reslen++;
   1480                     len = (int32_t)uprv_strlen(ext->key);
   1481                     if (reslen < capacity) {
   1482                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1483                     }
   1484                     reslen += len;
   1485                     if (reslen < capacity) {
   1486                         *(appendAt + reslen) = SEP;
   1487                     }
   1488                     reslen++;
   1489                     len = (int32_t)uprv_strlen(ext->value);
   1490                     if (reslen < capacity) {
   1491                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1492                     }
   1493                     reslen += len;
   1494 
   1495                     ext = ext->next;
   1496                 } else if (attr) {
   1497                     /* write the value for the attributes */
   1498                     if (reslen < capacity) {
   1499                         *(appendAt + reslen) = SEP;
   1500                     }
   1501                     reslen++;
   1502                     len = (int32_t)uprv_strlen(attr->attribute);
   1503                     if (reslen < capacity) {
   1504                         uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
   1505                     }
   1506                     reslen += len;
   1507 
   1508                     attr = attr->next;
   1509                 }
   1510             } while (attr != NULL || ext != NULL);
   1511         }
   1512 cleanup:
   1513         /* clean up */
   1514         ext = firstExt;
   1515         while (ext != NULL) {
   1516             ExtensionListEntry *tmpExt = ext->next;
   1517             uprv_free(ext);
   1518             ext = tmpExt;
   1519         }
   1520 
   1521         attr = firstAttr;
   1522         while (attr != NULL) {
   1523             AttributeListEntry *tmpAttr = attr->next;
   1524             char *pValue = (char *)attr->attribute;
   1525             uprv_free(pValue);
   1526             uprv_free(attr);
   1527             attr = tmpAttr;
   1528         }
   1529 
   1530         uenum_close(keywordEnum);
   1531 
   1532         if (U_FAILURE(*status)) {
   1533             return 0;
   1534         }
   1535     }
   1536 
   1537     return u_terminateChars(appendAt, capacity, reslen, status);
   1538 }
   1539 
   1540 /**
   1541  * Append keywords parsed from LDML extension value
   1542  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1543  * Note: char* buf is used for storing keywords
   1544  */
   1545 static void
   1546 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
   1547     const char *pTag;   /* beginning of current subtag */
   1548     const char *pKwds;  /* beginning of key-type pairs */
   1549     UBool variantExists = *posixVariant;
   1550 
   1551     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
   1552     ExtensionListEntry *kwd, *nextKwd;
   1553 
   1554     AttributeListEntry *attrFirst = NULL;   /* first attribute */
   1555     AttributeListEntry *attr, *nextAttr;
   1556 
   1557     int32_t len;
   1558     int32_t bufIdx = 0;
   1559 
   1560     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1561     int32_t attrBufIdx = 0;
   1562 
   1563     /* Reset the posixVariant value */
   1564     *posixVariant = FALSE;
   1565 
   1566     pTag = ldmlext;
   1567     pKwds = NULL;
   1568 
   1569     /* Iterate through u extension attributes */
   1570     while (*pTag) {
   1571         /* locate next separator char */
   1572         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1573 
   1574         if (_isLDMLKey(pTag, len)) {
   1575             pKwds = pTag;
   1576             break;
   1577         }
   1578 
   1579         /* add this attribute to the list */
   1580         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
   1581         if (attr == NULL) {
   1582             *status = U_MEMORY_ALLOCATION_ERROR;
   1583             goto cleanup;
   1584         }
   1585 
   1586         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
   1587             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
   1588             attrBuf[attrBufIdx + len] = 0;
   1589             attr->attribute = &attrBuf[attrBufIdx];
   1590             attrBufIdx += (len + 1);
   1591         } else {
   1592             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1593             goto cleanup;
   1594         }
   1595 
   1596         if (!_addAttributeToList(&attrFirst, attr)) {
   1597             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1598             uprv_free(attr);
   1599             goto cleanup;
   1600         }
   1601 
   1602         /* next tag */
   1603         pTag += len;
   1604         if (*pTag) {
   1605             /* next to the separator */
   1606             pTag++;
   1607         }
   1608     }
   1609 
   1610     if (attrFirst) {
   1611         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
   1612 
   1613         if (attrBufIdx > bufSize) {
   1614             /* attrBufIdx == <total length of attribute subtag> + 1 */
   1615             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1616             goto cleanup;
   1617         }
   1618 
   1619         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1620         if (kwd == NULL) {
   1621             *status = U_MEMORY_ALLOCATION_ERROR;
   1622             goto cleanup;
   1623         }
   1624 
   1625         kwd->key = LOCALE_ATTRIBUTE_KEY;
   1626         kwd->value = buf;
   1627 
   1628         /* attribute subtags sorted in alphabetical order as type */
   1629         attr = attrFirst;
   1630         while (attr != NULL) {
   1631             nextAttr = attr->next;
   1632 
   1633             /* buffer size check is done above */
   1634             if (attr != attrFirst) {
   1635                 *(buf + bufIdx) = SEP;
   1636                 bufIdx++;
   1637             }
   1638 
   1639             len = uprv_strlen(attr->attribute);
   1640             uprv_memcpy(buf + bufIdx, attr->attribute, len);
   1641             bufIdx += len;
   1642 
   1643             attr = nextAttr;
   1644         }
   1645         *(buf + bufIdx) = 0;
   1646         bufIdx++;
   1647 
   1648         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1649             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1650             uprv_free(kwd);
   1651             goto cleanup;
   1652         }
   1653 
   1654         /* once keyword entry is created, delete the attribute list */
   1655         attr = attrFirst;
   1656         while (attr != NULL) {
   1657             nextAttr = attr->next;
   1658             uprv_free(attr);
   1659             attr = nextAttr;
   1660         }
   1661         attrFirst = NULL;
   1662     }
   1663 
   1664     if (pKwds) {
   1665         const char *pBcpKey = NULL;     /* u extenstion key subtag */
   1666         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
   1667         int32_t bcpKeyLen = 0;
   1668         int32_t bcpTypeLen = 0;
   1669         UBool isDone = FALSE;
   1670 
   1671         pTag = pKwds;
   1672         /* BCP47 representation of LDML key/type pairs */
   1673         while (!isDone) {
   1674             const char *pNextBcpKey = NULL;
   1675             int32_t nextBcpKeyLen;
   1676             UBool emitKeyword = FALSE;
   1677 
   1678             if (*pTag) {
   1679                 /* locate next separator char */
   1680                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
   1681 
   1682                 if (_isLDMLKey(pTag, len)) {
   1683                     if (pBcpKey) {
   1684                         emitKeyword = TRUE;
   1685                         pNextBcpKey = pTag;
   1686                         nextBcpKeyLen = len;
   1687                     } else {
   1688                         pBcpKey = pTag;
   1689                         bcpKeyLen = len;
   1690                     }
   1691                 } else {
   1692                     U_ASSERT(pBcpKey != NULL);
   1693                     /* within LDML type subtags */
   1694                     if (pBcpType) {
   1695                         bcpTypeLen += (len + 1);
   1696                     } else {
   1697                         pBcpType = pTag;
   1698                         bcpTypeLen = len;
   1699                     }
   1700                 }
   1701 
   1702                 /* next tag */
   1703                 pTag += len;
   1704                 if (*pTag) {
   1705                     /* next to the separator */
   1706                     pTag++;
   1707                 }
   1708             } else {
   1709                 /* processing last one */
   1710                 emitKeyword = TRUE;
   1711                 isDone = TRUE;
   1712             }
   1713 
   1714             if (emitKeyword) {
   1715                 const char *pKey = NULL;    /* LDML key */
   1716                 const char *pType = NULL;   /* LDML type */
   1717 
   1718                 U_ASSERT(pBcpKey != NULL);
   1719 
   1720                 /* u extension key to LDML key */
   1721                 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1722                 if (U_FAILURE(*status)) {
   1723                     goto cleanup;
   1724                 }
   1725                 pKey = buf + bufIdx;
   1726                 bufIdx += len;
   1727                 *(buf + bufIdx) = 0;
   1728                 bufIdx++;
   1729 
   1730                 if (pBcpType) {
   1731                     /* BCP type to locale type */
   1732                     len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1733                     if (U_FAILURE(*status)) {
   1734                         goto cleanup;
   1735                     }
   1736                     pType = buf + bufIdx;
   1737                     bufIdx += len;
   1738                     *(buf + bufIdx) = 0;
   1739                     bufIdx++;
   1740                 } else {
   1741                     /* typeless - default type value is "yes" */
   1742                     pType = LOCALE_TYPE_YES;
   1743                 }
   1744 
   1745                 /* Special handling for u-va-posix, since we want to treat this as a variant,
   1746                    not as a keyword */
   1747                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
   1748                     *posixVariant = TRUE;
   1749                 } else {
   1750                     /* create an ExtensionListEntry for this keyword */
   1751                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1752                     if (kwd == NULL) {
   1753                         *status = U_MEMORY_ALLOCATION_ERROR;
   1754                         goto cleanup;
   1755                     }
   1756 
   1757                     kwd->key = pKey;
   1758                     kwd->value = pType;
   1759 
   1760                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1761                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1762                         uprv_free(kwd);
   1763                         goto cleanup;
   1764                     }
   1765                 }
   1766 
   1767                 pBcpKey = pNextBcpKey;
   1768                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
   1769                 pBcpType = NULL;
   1770                 bcpTypeLen = 0;
   1771             }
   1772         }
   1773     }
   1774 
   1775     kwd = kwdFirst;
   1776     while (kwd != NULL) {
   1777         nextKwd = kwd->next;
   1778         _addExtensionToList(appendTo, kwd, FALSE);
   1779         kwd = nextKwd;
   1780     }
   1781 
   1782     return;
   1783 
   1784 cleanup:
   1785     attr = attrFirst;
   1786     while (attr != NULL) {
   1787         nextAttr = attr->next;
   1788         uprv_free(attr);
   1789         attr = nextAttr;
   1790     }
   1791 
   1792     kwd = kwdFirst;
   1793     while (kwd != NULL) {
   1794         nextKwd = kwd->next;
   1795         uprv_free(kwd);
   1796         kwd = nextKwd;
   1797     }
   1798 }
   1799 
   1800 
   1801 static int32_t
   1802 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1803     int32_t reslen = 0;
   1804     int32_t i, n;
   1805     int32_t len;
   1806     ExtensionListEntry *kwdFirst = NULL;
   1807     ExtensionListEntry *kwd;
   1808     const char *key, *type;
   1809     char *kwdBuf = NULL;
   1810     int32_t kwdBufLength = capacity;
   1811     UBool posixVariant = FALSE;
   1812 
   1813     if (U_FAILURE(*status)) {
   1814         return 0;
   1815     }
   1816 
   1817     kwdBuf = (char*)uprv_malloc(kwdBufLength);
   1818     if (kwdBuf == NULL) {
   1819         *status = U_MEMORY_ALLOCATION_ERROR;
   1820         return 0;
   1821     }
   1822 
   1823     /* Determine if variants already exists */
   1824     if (ultag_getVariantsSize(langtag)) {
   1825         posixVariant = TRUE;
   1826     }
   1827 
   1828     n = ultag_getExtensionsSize(langtag);
   1829 
   1830     /* resolve locale keywords and reordering keys */
   1831     for (i = 0; i < n; i++) {
   1832         key = ultag_getExtensionKey(langtag, i);
   1833         type = ultag_getExtensionValue(langtag, i);
   1834         if (*key == LDMLEXT) {
   1835             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
   1836             if (U_FAILURE(*status)) {
   1837                 break;
   1838             }
   1839         } else {
   1840             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1841             if (kwd == NULL) {
   1842                 *status = U_MEMORY_ALLOCATION_ERROR;
   1843                 break;
   1844             }
   1845             kwd->key = key;
   1846             kwd->value = type;
   1847             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1848                 uprv_free(kwd);
   1849                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1850                 break;
   1851             }
   1852         }
   1853     }
   1854 
   1855     if (U_SUCCESS(*status)) {
   1856         type = ultag_getPrivateUse(langtag);
   1857         if ((int32_t)uprv_strlen(type) > 0) {
   1858             /* add private use as a keyword */
   1859             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   1860             if (kwd == NULL) {
   1861                 *status = U_MEMORY_ALLOCATION_ERROR;
   1862             } else {
   1863                 kwd->key = PRIVATEUSE_KEY;
   1864                 kwd->value = type;
   1865                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1866                     uprv_free(kwd);
   1867                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1868                 }
   1869             }
   1870         }
   1871     }
   1872 
   1873     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
   1874 
   1875     if (U_SUCCESS(*status) && posixVariant) {
   1876         len = (int32_t) uprv_strlen(_POSIX);
   1877         if (reslen < capacity) {
   1878             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
   1879         }
   1880         reslen += len;
   1881     }
   1882 
   1883     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1884         /* write out the sorted keywords */
   1885         UBool firstValue = TRUE;
   1886         kwd = kwdFirst;
   1887         do {
   1888             if (reslen < capacity) {
   1889                 if (firstValue) {
   1890                     /* '@' */
   1891                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1892                     firstValue = FALSE;
   1893                 } else {
   1894                     /* ';' */
   1895                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1896                 }
   1897             }
   1898             reslen++;
   1899 
   1900             /* key */
   1901             len = (int32_t)uprv_strlen(kwd->key);
   1902             if (reslen < capacity) {
   1903                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1904             }
   1905             reslen += len;
   1906 
   1907             /* '=' */
   1908             if (reslen < capacity) {
   1909                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1910             }
   1911             reslen++;
   1912 
   1913             /* type */
   1914             len = (int32_t)uprv_strlen(kwd->value);
   1915             if (reslen < capacity) {
   1916                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1917             }
   1918             reslen += len;
   1919 
   1920             kwd = kwd->next;
   1921         } while (kwd);
   1922     }
   1923 
   1924     /* clean up */
   1925     kwd = kwdFirst;
   1926     while (kwd != NULL) {
   1927         ExtensionListEntry *tmpKwd = kwd->next;
   1928         uprv_free(kwd);
   1929         kwd = tmpKwd;
   1930     }
   1931 
   1932     uprv_free(kwdBuf);
   1933 
   1934     if (U_FAILURE(*status)) {
   1935         return 0;
   1936     }
   1937 
   1938     return u_terminateChars(appendAt, capacity, reslen, status);
   1939 }
   1940 
   1941 static int32_t
   1942 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1943     char buf[ULOC_FULLNAME_CAPACITY];
   1944     char tmpAppend[ULOC_FULLNAME_CAPACITY];
   1945     UErrorCode tmpStatus = U_ZERO_ERROR;
   1946     int32_t len, i;
   1947     int32_t reslen = 0;
   1948 
   1949     if (U_FAILURE(*status)) {
   1950         return 0;
   1951     }
   1952 
   1953     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1954     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1955         if (strict) {
   1956             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1957         }
   1958         return 0;
   1959     }
   1960 
   1961     if (len > 0) {
   1962         char *p, *pPriv;
   1963         UBool bNext = TRUE;
   1964         UBool firstValue = TRUE;
   1965         UBool writeValue;
   1966 
   1967         pPriv = NULL;
   1968         p = buf;
   1969         while (bNext) {
   1970             writeValue = FALSE;
   1971             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1972                 if (*p == 0) {
   1973                     bNext = FALSE;
   1974                 } else {
   1975                     *p = 0; /* terminate */
   1976                 }
   1977                 if (pPriv != NULL) {
   1978                     /* Private use in the canonical format is lowercase in BCP47 */
   1979                     for (i = 0; *(pPriv + i) != 0; i++) {
   1980                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
   1981                     }
   1982 
   1983                     /* validate */
   1984                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
   1985                         if (firstValue) {
   1986                             if (!_isVariantSubtag(pPriv, -1)) {
   1987                                 writeValue = TRUE;
   1988                             }
   1989                         } else {
   1990                             writeValue = TRUE;
   1991                         }
   1992                     } else if (strict) {
   1993                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1994                         break;
   1995                     } else {
   1996                         break;
   1997                     }
   1998 
   1999                     if (writeValue) {
   2000                         if (reslen < capacity) {
   2001                             tmpAppend[reslen++] = SEP;
   2002                         }
   2003 
   2004                         if (firstValue) {
   2005                             if (reslen < capacity) {
   2006                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
   2007                             }
   2008 
   2009                             if (reslen < capacity) {
   2010                                 tmpAppend[reslen++] = SEP;
   2011                             }
   2012 
   2013                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
   2014                             if (reslen < capacity) {
   2015                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
   2016                             }
   2017                             reslen += len;
   2018 
   2019                             if (reslen < capacity) {
   2020                                 tmpAppend[reslen++] = SEP;
   2021                             }
   2022 
   2023                             firstValue = FALSE;
   2024                         }
   2025 
   2026                         len = (int32_t)uprv_strlen(pPriv);
   2027                         if (reslen < capacity) {
   2028                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
   2029                         }
   2030                         reslen += len;
   2031                     }
   2032                 }
   2033                 /* reset private use starting position */
   2034                 pPriv = NULL;
   2035             } else if (pPriv == NULL) {
   2036                 pPriv = p;
   2037             }
   2038             p++;
   2039         }
   2040 
   2041         if (U_FAILURE(*status)) {
   2042             return 0;
   2043         }
   2044     }
   2045 
   2046     if (U_SUCCESS(*status)) {
   2047         len = reslen;
   2048         if (reslen < capacity) {
   2049             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
   2050         }
   2051     }
   2052 
   2053     u_terminateChars(appendAt, capacity, reslen, status);
   2054 
   2055     return reslen;
   2056 }
   2057 
   2058 /*
   2059 * -------------------------------------------------
   2060 *
   2061 * ultag_ functions
   2062 *
   2063 * -------------------------------------------------
   2064 */
   2065 
   2066 /* Bit flags used by the parser */
   2067 #define LANG 0x0001
   2068 #define EXTL 0x0002
   2069 #define SCRT 0x0004
   2070 #define REGN 0x0008
   2071 #define VART 0x0010
   2072 #define EXTS 0x0020
   2073 #define EXTV 0x0040
   2074 #define PRIV 0x0080
   2075 
   2076 static ULanguageTag*
   2077 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   2078     ULanguageTag *t;
   2079     char *tagBuf;
   2080     int16_t next;
   2081     char *pSubtag, *pNext, *pLastGoodPosition;
   2082     int32_t subtagLen;
   2083     int32_t extlangIdx;
   2084     ExtensionListEntry *pExtension;
   2085     AttributeListEntry *pAttribute;
   2086     char *pExtValueSubtag, *pExtValueSubtagEnd;
   2087     int32_t i;
   2088     UBool isLDMLExtension, privateuseVar = FALSE;
   2089     int32_t grandfatheredLen = 0;
   2090 
   2091     if (parsedLen != NULL) {
   2092         *parsedLen = 0;
   2093     }
   2094 
   2095     if (U_FAILURE(*status)) {
   2096         return NULL;
   2097     }
   2098 
   2099     if (tagLen < 0) {
   2100         tagLen = (int32_t)uprv_strlen(tag);
   2101     }
   2102 
   2103     /* copy the entire string */
   2104     tagBuf = (char*)uprv_malloc(tagLen + 1);
   2105     if (tagBuf == NULL) {
   2106         *status = U_MEMORY_ALLOCATION_ERROR;
   2107         return NULL;
   2108     }
   2109     uprv_memcpy(tagBuf, tag, tagLen);
   2110     *(tagBuf + tagLen) = 0;
   2111 
   2112     /* create a ULanguageTag */
   2113     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   2114     if (t == NULL) {
   2115         uprv_free(tagBuf);
   2116         *status = U_MEMORY_ALLOCATION_ERROR;
   2117         return NULL;
   2118     }
   2119     _initializeULanguageTag(t);
   2120     t->buf = tagBuf;
   2121 
   2122     if (tagLen < MINLEN) {
   2123         /* the input tag is too short - return empty ULanguageTag */
   2124         return t;
   2125     }
   2126 
   2127     /* check if the tag is grandfathered */
   2128     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   2129         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   2130             int32_t newTagLength;
   2131 
   2132             grandfatheredLen = tagLen;  /* back up for output parsedLen */
   2133             newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
   2134             if (tagLen < newTagLength) {
   2135                 uprv_free(tagBuf);
   2136                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
   2137                 if (tagBuf == NULL) {
   2138                     *status = U_MEMORY_ALLOCATION_ERROR;
   2139                     return NULL;
   2140                 }
   2141                 t->buf = tagBuf;
   2142                 tagLen = newTagLength;
   2143             }
   2144             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   2145             break;
   2146         }
   2147     }
   2148 
   2149     /*
   2150      * langtag      =   language
   2151      *                  ["-" script]
   2152      *                  ["-" region]
   2153      *                  *("-" variant)
   2154      *                  *("-" extension)
   2155      *                  ["-" privateuse]
   2156      */
   2157 
   2158     next = LANG | PRIV;
   2159     pNext = pLastGoodPosition = tagBuf;
   2160     extlangIdx = 0;
   2161     pExtension = NULL;
   2162     pExtValueSubtag = NULL;
   2163     pExtValueSubtagEnd = NULL;
   2164     pAttribute = NULL;
   2165     isLDMLExtension = FALSE;
   2166 
   2167     while (pNext) {
   2168         char *pSep;
   2169 
   2170         pSubtag = pNext;
   2171 
   2172         /* locate next separator char */
   2173         pSep = pSubtag;
   2174         while (*pSep) {
   2175             if (*pSep == SEP) {
   2176                 break;
   2177             }
   2178             pSep++;
   2179         }
   2180         if (*pSep == 0) {
   2181             /* last subtag */
   2182             pNext = NULL;
   2183         } else {
   2184             pNext = pSep + 1;
   2185         }
   2186         subtagLen = (int32_t)(pSep - pSubtag);
   2187 
   2188         if (next & LANG) {
   2189             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   2190                 *pSep = 0;  /* terminate */
   2191                 t->language = T_CString_toLowerCase(pSubtag);
   2192 
   2193                 pLastGoodPosition = pSep;
   2194                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   2195                 continue;
   2196             }
   2197         }
   2198         if (next & EXTL) {
   2199             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   2200                 *pSep = 0;
   2201                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   2202 
   2203                 pLastGoodPosition = pSep;
   2204                 if (extlangIdx < 3) {
   2205                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   2206                 } else {
   2207                     next = SCRT | REGN | VART | EXTS | PRIV;
   2208                 }
   2209                 continue;
   2210             }
   2211         }
   2212         if (next & SCRT) {
   2213             if (_isScriptSubtag(pSubtag, subtagLen)) {
   2214                 char *p = pSubtag;
   2215 
   2216                 *pSep = 0;
   2217 
   2218                 /* to title case */
   2219                 *p = uprv_toupper(*p);
   2220                 p++;
   2221                 for (; *p; p++) {
   2222                     *p = uprv_tolower(*p);
   2223                 }
   2224 
   2225                 t->script = pSubtag;
   2226 
   2227                 pLastGoodPosition = pSep;
   2228                 next = REGN | VART | EXTS | PRIV;
   2229                 continue;
   2230             }
   2231         }
   2232         if (next & REGN) {
   2233             if (_isRegionSubtag(pSubtag, subtagLen)) {
   2234                 *pSep = 0;
   2235                 t->region = T_CString_toUpperCase(pSubtag);
   2236 
   2237                 pLastGoodPosition = pSep;
   2238                 next = VART | EXTS | PRIV;
   2239                 continue;
   2240             }
   2241         }
   2242         if (next & VART) {
   2243             if (_isVariantSubtag(pSubtag, subtagLen) ||
   2244                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
   2245                 VariantListEntry *var;
   2246                 UBool isAdded;
   2247 
   2248                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   2249                 if (var == NULL) {
   2250                     *status = U_MEMORY_ALLOCATION_ERROR;
   2251                     goto error;
   2252                 }
   2253                 *pSep = 0;
   2254                 var->variant = T_CString_toUpperCase(pSubtag);
   2255                 isAdded = _addVariantToList(&(t->variants), var);
   2256                 if (!isAdded) {
   2257                     /* duplicated variant entry */
   2258                     uprv_free(var);
   2259                     break;
   2260                 }
   2261                 pLastGoodPosition = pSep;
   2262                 next = VART | EXTS | PRIV;
   2263                 continue;
   2264             }
   2265         }
   2266         if (next & EXTS) {
   2267             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   2268                 if (pExtension != NULL) {
   2269                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2270                         /* the previous extension is incomplete */
   2271                         uprv_free(pExtension);
   2272                         pExtension = NULL;
   2273                         break;
   2274                     }
   2275 
   2276                     /* terminate the previous extension value */
   2277                     *pExtValueSubtagEnd = 0;
   2278                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2279 
   2280                     /* insert the extension to the list */
   2281                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2282                         pLastGoodPosition = pExtValueSubtagEnd;
   2283                     } else {
   2284                         /* stop parsing here */
   2285                         uprv_free(pExtension);
   2286                         pExtension = NULL;
   2287                         break;
   2288                     }
   2289                 }
   2290 
   2291                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
   2292 
   2293                 /* create a new extension */
   2294                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
   2295                 if (pExtension == NULL) {
   2296                     *status = U_MEMORY_ALLOCATION_ERROR;
   2297                     goto error;
   2298                 }
   2299                 *pSep = 0;
   2300                 pExtension->key = T_CString_toLowerCase(pSubtag);
   2301                 pExtension->value = NULL;   /* will be set later */
   2302 
   2303                 /*
   2304                  * reset the start and the end location of extension value
   2305                  * subtags for this extension
   2306                  */
   2307                 pExtValueSubtag = NULL;
   2308                 pExtValueSubtagEnd = NULL;
   2309 
   2310                 next = EXTV;
   2311                 continue;
   2312             }
   2313         }
   2314         if (next & EXTV) {
   2315             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   2316                 if (pExtValueSubtag == NULL) {
   2317                     /* if the start postion of this extension's value is not yet,
   2318                         this one is the first value subtag */
   2319                     pExtValueSubtag = pSubtag;
   2320                 }
   2321 
   2322                 /* Mark the end of this subtag */
   2323                 pExtValueSubtagEnd = pSep;
   2324                 next = EXTS | EXTV | PRIV;
   2325 
   2326                 continue;
   2327             }
   2328         }
   2329         if (next & PRIV) {
   2330             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
   2331                 char *pPrivuseVal;
   2332 
   2333                 if (pExtension != NULL) {
   2334                     /* Process the last extension */
   2335                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2336                         /* the previous extension is incomplete */
   2337                         uprv_free(pExtension);
   2338                         pExtension = NULL;
   2339                         break;
   2340                     } else {
   2341                         /* terminate the previous extension value */
   2342                         *pExtValueSubtagEnd = 0;
   2343                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2344 
   2345                         /* insert the extension to the list */
   2346                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2347                             pLastGoodPosition = pExtValueSubtagEnd;
   2348                             pExtension = NULL;
   2349                         } else {
   2350                         /* stop parsing here */
   2351                             uprv_free(pExtension);
   2352                             pExtension = NULL;
   2353                             break;
   2354                         }
   2355                     }
   2356                 }
   2357 
   2358                 /* The rest of part will be private use value subtags */
   2359                 if (pNext == NULL) {
   2360                     /* empty private use subtag */
   2361                     break;
   2362                 }
   2363                 /* back up the private use value start position */
   2364                 pPrivuseVal = pNext;
   2365 
   2366                 /* validate private use value subtags */
   2367                 while (pNext) {
   2368                     pSubtag = pNext;
   2369                     pSep = pSubtag;
   2370                     while (*pSep) {
   2371                         if (*pSep == SEP) {
   2372                             break;
   2373                         }
   2374                         pSep++;
   2375                     }
   2376                     if (*pSep == 0) {
   2377                         /* last subtag */
   2378                         pNext = NULL;
   2379                     } else {
   2380                         pNext = pSep + 1;
   2381                     }
   2382                     subtagLen = (int32_t)(pSep - pSubtag);
   2383 
   2384                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
   2385                         *pSep = 0;
   2386                         next = VART;
   2387                         privateuseVar = TRUE;
   2388                         break;
   2389                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   2390                         pLastGoodPosition = pSep;
   2391                     } else {
   2392                         break;
   2393                     }
   2394                 }
   2395 
   2396                 if (next == VART) {
   2397                     continue;
   2398                 }
   2399 
   2400                 if (pLastGoodPosition - pPrivuseVal > 0) {
   2401                     *pLastGoodPosition = 0;
   2402                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   2403                 }
   2404                 /* No more subtags, exiting the parse loop */
   2405                 break;
   2406             }
   2407             break;
   2408         }
   2409 
   2410         /* If we fell through here, it means this subtag is illegal - quit parsing */
   2411         break;
   2412     }
   2413 
   2414     if (pExtension != NULL) {
   2415         /* Process the last extension */
   2416         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2417             /* the previous extension is incomplete */
   2418             uprv_free(pExtension);
   2419         } else {
   2420             /* terminate the previous extension value */
   2421             *pExtValueSubtagEnd = 0;
   2422             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2423             /* insert the extension to the list */
   2424             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2425                 pLastGoodPosition = pExtValueSubtagEnd;
   2426             } else {
   2427                 uprv_free(pExtension);
   2428             }
   2429         }
   2430     }
   2431 
   2432     if (parsedLen != NULL) {
   2433         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
   2434     }
   2435 
   2436     return t;
   2437 
   2438 error:
   2439     uprv_free(t);
   2440     return NULL;
   2441 }
   2442 
   2443 static void
   2444 ultag_close(ULanguageTag* langtag) {
   2445 
   2446     if (langtag == NULL) {
   2447         return;
   2448     }
   2449 
   2450     uprv_free(langtag->buf);
   2451 
   2452     if (langtag->variants) {
   2453         VariantListEntry *curVar = langtag->variants;
   2454         while (curVar) {
   2455             VariantListEntry *nextVar = curVar->next;
   2456             uprv_free(curVar);
   2457             curVar = nextVar;
   2458         }
   2459     }
   2460 
   2461     if (langtag->extensions) {
   2462         ExtensionListEntry *curExt = langtag->extensions;
   2463         while (curExt) {
   2464             ExtensionListEntry *nextExt = curExt->next;
   2465             uprv_free(curExt);
   2466             curExt = nextExt;
   2467         }
   2468     }
   2469 
   2470     uprv_free(langtag);
   2471 }
   2472 
   2473 static const char*
   2474 ultag_getLanguage(const ULanguageTag* langtag) {
   2475     return langtag->language;
   2476 }
   2477 
   2478 #if 0
   2479 static const char*
   2480 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2481     int32_t i;
   2482     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2483         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2484             return DEPRECATEDLANGS[i + 1];
   2485         }
   2486     }
   2487     return langtag->language;
   2488 }
   2489 #endif
   2490 
   2491 static const char*
   2492 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2493     if (idx >= 0 && idx < MAXEXTLANG) {
   2494         return langtag->extlang[idx];
   2495     }
   2496     return NULL;
   2497 }
   2498 
   2499 static int32_t
   2500 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2501     int32_t size = 0;
   2502     int32_t i;
   2503     for (i = 0; i < MAXEXTLANG; i++) {
   2504         if (langtag->extlang[i]) {
   2505             size++;
   2506         }
   2507     }
   2508     return size;
   2509 }
   2510 
   2511 static const char*
   2512 ultag_getScript(const ULanguageTag* langtag) {
   2513     return langtag->script;
   2514 }
   2515 
   2516 static const char*
   2517 ultag_getRegion(const ULanguageTag* langtag) {
   2518     return langtag->region;
   2519 }
   2520 
   2521 static const char*
   2522 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2523     const char *var = NULL;
   2524     VariantListEntry *cur = langtag->variants;
   2525     int32_t i = 0;
   2526     while (cur) {
   2527         if (i == idx) {
   2528             var = cur->variant;
   2529             break;
   2530         }
   2531         cur = cur->next;
   2532         i++;
   2533     }
   2534     return var;
   2535 }
   2536 
   2537 static int32_t
   2538 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2539     int32_t size = 0;
   2540     VariantListEntry *cur = langtag->variants;
   2541     while (TRUE) {
   2542         if (cur == NULL) {
   2543             break;
   2544         }
   2545         size++;
   2546         cur = cur->next;
   2547     }
   2548     return size;
   2549 }
   2550 
   2551 static const char*
   2552 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2553     const char *key = NULL;
   2554     ExtensionListEntry *cur = langtag->extensions;
   2555     int32_t i = 0;
   2556     while (cur) {
   2557         if (i == idx) {
   2558             key = cur->key;
   2559             break;
   2560         }
   2561         cur = cur->next;
   2562         i++;
   2563     }
   2564     return key;
   2565 }
   2566 
   2567 static const char*
   2568 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2569     const char *val = NULL;
   2570     ExtensionListEntry *cur = langtag->extensions;
   2571     int32_t i = 0;
   2572     while (cur) {
   2573         if (i == idx) {
   2574             val = cur->value;
   2575             break;
   2576         }
   2577         cur = cur->next;
   2578         i++;
   2579     }
   2580     return val;
   2581 }
   2582 
   2583 static int32_t
   2584 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2585     int32_t size = 0;
   2586     ExtensionListEntry *cur = langtag->extensions;
   2587     while (TRUE) {
   2588         if (cur == NULL) {
   2589             break;
   2590         }
   2591         size++;
   2592         cur = cur->next;
   2593     }
   2594     return size;
   2595 }
   2596 
   2597 static const char*
   2598 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2599     return langtag->privateuse;
   2600 }
   2601 
   2602 #if 0
   2603 static const char*
   2604 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2605     return langtag->grandfathered;
   2606 }
   2607 #endif
   2608 
   2609 
   2610 /*
   2611 * -------------------------------------------------
   2612 *
   2613 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2614 *
   2615 * -------------------------------------------------
   2616 */
   2617 U_CAPI int32_t U_EXPORT2
   2618 uloc_toLanguageTag(const char* localeID,
   2619                    char* langtag,
   2620                    int32_t langtagCapacity,
   2621                    UBool strict,
   2622                    UErrorCode* status) {
   2623     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2624     char canonical[256];
   2625     int32_t reslen = 0;
   2626     UErrorCode tmpStatus = U_ZERO_ERROR;
   2627     UBool hadPosix = FALSE;
   2628     const char* pKeywordStart;
   2629 
   2630     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2631     canonical[0] = 0;
   2632     if (uprv_strlen(localeID) > 0) {
   2633         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2634         if (tmpStatus != U_ZERO_ERROR) {
   2635             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2636             return 0;
   2637         }
   2638     }
   2639 
   2640     /* For handling special case - private use only tag */
   2641     pKeywordStart = locale_getKeywordsStart(canonical);
   2642     if (pKeywordStart == canonical) {
   2643         UEnumeration *kwdEnum;
   2644         int kwdCnt = 0;
   2645         UBool done = FALSE;
   2646 
   2647         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
   2648         if (kwdEnum != NULL) {
   2649             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
   2650             if (kwdCnt == 1) {
   2651                 const char *key;
   2652                 int32_t len = 0;
   2653 
   2654                 key = uenum_next(kwdEnum, &len, &tmpStatus);
   2655                 if (len == 1 && *key == PRIVATEUSE) {
   2656                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   2657                     buf[0] = PRIVATEUSE;
   2658                     buf[1] = SEP;
   2659                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
   2660                     if (U_SUCCESS(tmpStatus)) {
   2661                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
   2662                             /* return private use only tag */
   2663                             reslen = len + 2;
   2664                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
   2665                             u_terminateChars(langtag, langtagCapacity, reslen, status);
   2666                             done = TRUE;
   2667                         } else if (strict) {
   2668                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2669                             done = TRUE;
   2670                         }
   2671                         /* if not strict mode, then "und" will be returned */
   2672                     } else {
   2673                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   2674                         done = TRUE;
   2675                     }
   2676                 }
   2677             }
   2678             uenum_close(kwdEnum);
   2679             if (done) {
   2680                 return reslen;
   2681             }
   2682         }
   2683     }
   2684 
   2685     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2686     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2687     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2688     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
   2689     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2690     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2691 
   2692     return reslen;
   2693 }
   2694 
   2695 
   2696 U_CAPI int32_t U_EXPORT2
   2697 uloc_forLanguageTag(const char* langtag,
   2698                     char* localeID,
   2699                     int32_t localeIDCapacity,
   2700                     int32_t* parsedLength,
   2701                     UErrorCode* status) {
   2702     ULanguageTag *lt;
   2703     int32_t reslen = 0;
   2704     const char *subtag, *p;
   2705     int32_t len;
   2706     int32_t i, n;
   2707     UBool noRegion = TRUE;
   2708 
   2709     lt = ultag_parse(langtag, -1, parsedLength, status);
   2710     if (U_FAILURE(*status)) {
   2711         return 0;
   2712     }
   2713 
   2714     /* language */
   2715     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2716     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2717         len = (int32_t)uprv_strlen(subtag);
   2718         if (len > 0) {
   2719             if (reslen < localeIDCapacity) {
   2720                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2721             }
   2722             reslen += len;
   2723         }
   2724     }
   2725 
   2726     /* script */
   2727     subtag = ultag_getScript(lt);
   2728     len = (int32_t)uprv_strlen(subtag);
   2729     if (len > 0) {
   2730         if (reslen < localeIDCapacity) {
   2731             *(localeID + reslen) = LOCALE_SEP;
   2732         }
   2733         reslen++;
   2734 
   2735         /* write out the script in title case */
   2736         p = subtag;
   2737         while (*p) {
   2738             if (reslen < localeIDCapacity) {
   2739                 if (p == subtag) {
   2740                     *(localeID + reslen) = uprv_toupper(*p);
   2741                 } else {
   2742                     *(localeID + reslen) = *p;
   2743                 }
   2744             }
   2745             reslen++;
   2746             p++;
   2747         }
   2748     }
   2749 
   2750     /* region */
   2751     subtag = ultag_getRegion(lt);
   2752     len = (int32_t)uprv_strlen(subtag);
   2753     if (len > 0) {
   2754         if (reslen < localeIDCapacity) {
   2755             *(localeID + reslen) = LOCALE_SEP;
   2756         }
   2757         reslen++;
   2758         /* write out the retion in upper case */
   2759         p = subtag;
   2760         while (*p) {
   2761             if (reslen < localeIDCapacity) {
   2762                 *(localeID + reslen) = uprv_toupper(*p);
   2763             }
   2764             reslen++;
   2765             p++;
   2766         }
   2767         noRegion = FALSE;
   2768     }
   2769 
   2770     /* variants */
   2771     n = ultag_getVariantsSize(lt);
   2772     if (n > 0) {
   2773         if (noRegion) {
   2774             if (reslen < localeIDCapacity) {
   2775                 *(localeID + reslen) = LOCALE_SEP;
   2776             }
   2777             reslen++;
   2778         }
   2779 
   2780         for (i = 0; i < n; i++) {
   2781             subtag = ultag_getVariant(lt, i);
   2782             if (reslen < localeIDCapacity) {
   2783                 *(localeID + reslen) = LOCALE_SEP;
   2784             }
   2785             reslen++;
   2786             /* write out the variant in upper case */
   2787             p = subtag;
   2788             while (*p) {
   2789                 if (reslen < localeIDCapacity) {
   2790                     *(localeID + reslen) = uprv_toupper(*p);
   2791                 }
   2792                 reslen++;
   2793                 p++;
   2794             }
   2795         }
   2796     }
   2797 
   2798     /* keywords */
   2799     n = ultag_getExtensionsSize(lt);
   2800     subtag = ultag_getPrivateUse(lt);
   2801     if (n > 0 || uprv_strlen(subtag) > 0) {
   2802         if (reslen == 0 && n > 0) {
   2803             /* need a language */
   2804             if (reslen < localeIDCapacity) {
   2805                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2806             }
   2807             reslen += LANG_UND_LEN;
   2808         }
   2809         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2810         reslen += len;
   2811     }
   2812 
   2813     ultag_close(lt);
   2814     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2815 }
   2816 
   2817 
   2818