Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2009-2011, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ures.h"
     10 #include "unicode/putil.h"
     11 #include "unicode/uloc.h"
     12 #include "ustr_imp.h"
     13 #include "cmemory.h"
     14 #include "cstring.h"
     15 #include "putilimp.h"
     16 #include "uinvchar.h"
     17 #include "ulocimp.h"
     18 
     19 /* struct holding a single variant */
     20 typedef struct VariantListEntry {
     21     const char              *variant;
     22     struct VariantListEntry *next;
     23 } VariantListEntry;
     24 
     25 /* struct holding a single attribute value */
     26 typedef struct AttributeListEntry {
     27     const char              *attribute;
     28     struct AttributeListEntry *next;
     29 } AttributeListEntry;
     30 
     31 /* struct holding a single extension */
     32 typedef struct ExtensionListEntry {
     33     const char                  *key;
     34     const char                  *value;
     35     struct ExtensionListEntry   *next;
     36 } ExtensionListEntry;
     37 
     38 #define MAXEXTLANG 3
     39 typedef struct ULanguageTag {
     40     char                *buf;   /* holding parsed subtags */
     41     const char          *language;
     42     const char          *extlang[MAXEXTLANG];
     43     const char          *script;
     44     const char          *region;
     45     VariantListEntry    *variants;
     46     ExtensionListEntry  *extensions;
     47     AttributeListEntry  *attributes;
     48     const char          *privateuse;
     49     const char          *grandfathered;
     50 } ULanguageTag;
     51 
     52 #define MINLEN 2
     53 #define SEP '-'
     54 #define PRIVATEUSE 'x'
     55 #define LDMLEXT 'u'
     56 
     57 #define LOCALE_SEP '_'
     58 #define LOCALE_EXT_SEP '@'
     59 #define LOCALE_KEYWORD_SEP ';'
     60 #define LOCALE_KEY_TYPE_SEP '='
     61 
     62 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
     63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     64 
     65 static const char* EMPTY = "";
     66 static const char* LANG_UND = "und";
     67 static const char* PRIVATEUSE_KEY = "x";
     68 static const char* _POSIX = "_POSIX";
     69 static const char* POSIX_KEY = "va";
     70 static const char* POSIX_VALUE = "posix";
     71 static const char* LOCALE_ATTRIBUTE_KEY = "attribute";
     72 static const char* PRIVUSE_VARIANT_PREFIX = "lvariant";
     73 
     74 #define LANG_UND_LEN 3
     75 
     76 static const char* GRANDFATHERED[] = {
     77 /*  grandfathered   preferred */
     78     "art-lojban",   "jbo",
     79     "cel-gaulish",  "xtg-x-cel-gaulish",
     80     "en-GB-oed",    "en-GB-x-oed",
     81     "i-ami",        "ami",
     82     "i-bnn",        "bnn",
     83     "i-default",    "en-x-i-default",
     84     "i-enochian",   "und-x-i-enochian",
     85     "i-hak",        "hak",
     86     "i-klingon",    "tlh",
     87     "i-lux",        "lb",
     88     "i-mingo",      "see-x-i-mingo",
     89     "i-navajo",     "nv",
     90     "i-pwn",        "pwn",
     91     "i-tao",        "tao",
     92     "i-tay",        "tay",
     93     "i-tsu",        "tsu",
     94     "no-bok",       "nb",
     95     "no-nyn",       "nn",
     96     "sgn-be-fr",    "sfb",
     97     "sgn-be-nl",    "vgt",
     98     "sgn-ch-de",    "sgg",
     99     "zh-guoyu",     "cmn",
    100     "zh-hakka",     "hak",
    101     "zh-min",       "nan-x-zh-min",
    102     "zh-min-nan",   "nan",
    103     "zh-xiang",     "hsn",
    104     NULL,           NULL
    105 };
    106 
    107 static const char* DEPRECATEDLANGS[] = {
    108 /*  deprecated  new */
    109     "iw",       "he",
    110     "ji",       "yi",
    111     "in",       "id",
    112     NULL,       NULL
    113 };
    114 
    115 /*
    116 * -------------------------------------------------
    117 *
    118 * These ultag_ functions may be exposed as APIs later
    119 *
    120 * -------------------------------------------------
    121 */
    122 
    123 static ULanguageTag*
    124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    125 
    126 static void
    127 ultag_close(ULanguageTag* langtag);
    128 
    129 static const char*
    130 ultag_getLanguage(const ULanguageTag* langtag);
    131 
    132 #if 0
    133 static const char*
    134 ultag_getJDKLanguage(const ULanguageTag* langtag);
    135 #endif
    136 
    137 static const char*
    138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    139 
    140 static int32_t
    141 ultag_getExtlangSize(const ULanguageTag* langtag);
    142 
    143 static const char*
    144 ultag_getScript(const ULanguageTag* langtag);
    145 
    146 static const char*
    147 ultag_getRegion(const ULanguageTag* langtag);
    148 
    149 static const char*
    150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    151 
    152 static int32_t
    153 ultag_getVariantsSize(const ULanguageTag* langtag);
    154 
    155 #if 0
    156 /* Currently not being used. */
    157 static const char*
    158 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx);
    159 #endif
    160 
    161 static int32_t
    162 ultag_getAttributesSize(const ULanguageTag* langtag);
    163 
    164 static const char*
    165 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    166 
    167 static const char*
    168 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    169 
    170 static int32_t
    171 ultag_getExtensionsSize(const ULanguageTag* langtag);
    172 
    173 static const char*
    174 ultag_getPrivateUse(const ULanguageTag* langtag);
    175 
    176 #if 0
    177 static const char*
    178 ultag_getGrandfathered(const ULanguageTag* langtag);
    179 #endif
    180 
    181 /*
    182 * -------------------------------------------------
    183 *
    184 * Language subtag syntax validation functions
    185 *
    186 * -------------------------------------------------
    187 */
    188 
    189 static UBool
    190 _isAlphaString(const char* s, int32_t len) {
    191     int32_t i;
    192     for (i = 0; i < len; i++) {
    193         if (!ISALPHA(*(s + i))) {
    194             return FALSE;
    195         }
    196     }
    197     return TRUE;
    198 }
    199 
    200 static UBool
    201 _isNumericString(const char* s, int32_t len) {
    202     int32_t i;
    203     for (i = 0; i < len; i++) {
    204         if (!ISNUMERIC(*(s + i))) {
    205             return FALSE;
    206         }
    207     }
    208     return TRUE;
    209 }
    210 
    211 static UBool
    212 _isAlphaNumericString(const char* s, int32_t len) {
    213     int32_t i;
    214     for (i = 0; i < len; i++) {
    215         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    216             return FALSE;
    217         }
    218     }
    219     return TRUE;
    220 }
    221 
    222 static UBool
    223 _isLanguageSubtag(const char* s, int32_t len) {
    224     /*
    225      * language      = 2*3ALPHA            ; shortest ISO 639 code
    226      *                 ["-" extlang]       ; sometimes followed by
    227      *                                     ;   extended language subtags
    228      *               / 4ALPHA              ; or reserved for future use
    229      *               / 5*8ALPHA            ; or registered language subtag
    230      */
    231     if (len < 0) {
    232         len = (int32_t)uprv_strlen(s);
    233     }
    234     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    235         return TRUE;
    236     }
    237     return FALSE;
    238 }
    239 
    240 static UBool
    241 _isExtlangSubtag(const char* s, int32_t len) {
    242     /*
    243      * extlang       = 3ALPHA              ; selected ISO 639 codes
    244      *                 *2("-" 3ALPHA)      ; permanently reserved
    245      */
    246     if (len < 0) {
    247         len = (int32_t)uprv_strlen(s);
    248     }
    249     if (len == 3 && _isAlphaString(s, len)) {
    250         return TRUE;
    251     }
    252     return FALSE;
    253 }
    254 
    255 static UBool
    256 _isScriptSubtag(const char* s, int32_t len) {
    257     /*
    258      * script        = 4ALPHA              ; ISO 15924 code
    259      */
    260     if (len < 0) {
    261         len = (int32_t)uprv_strlen(s);
    262     }
    263     if (len == 4 && _isAlphaString(s, len)) {
    264         return TRUE;
    265     }
    266     return FALSE;
    267 }
    268 
    269 static UBool
    270 _isRegionSubtag(const char* s, int32_t len) {
    271     /*
    272      * region        = 2ALPHA              ; ISO 3166-1 code
    273      *               / 3DIGIT              ; UN M.49 code
    274      */
    275     if (len < 0) {
    276         len = (int32_t)uprv_strlen(s);
    277     }
    278     if (len == 2 && _isAlphaString(s, len)) {
    279         return TRUE;
    280     }
    281     if (len == 3 && _isNumericString(s, len)) {
    282         return TRUE;
    283     }
    284     return FALSE;
    285 }
    286 
    287 static UBool
    288 _isVariantSubtag(const char* s, int32_t len) {
    289     /*
    290      * variant       = 5*8alphanum         ; registered variants
    291      *               / (DIGIT 3alphanum)
    292      */
    293     if (len < 0) {
    294         len = (int32_t)uprv_strlen(s);
    295     }
    296     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
    297         return TRUE;
    298     }
    299     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    300         return TRUE;
    301     }
    302     return FALSE;
    303 }
    304 
    305 static UBool
    306 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
    307     /*
    308      * variant       = 1*8alphanum         ; registered variants
    309      *               / (DIGIT 3alphanum)
    310      */
    311     if (len < 0) {
    312         len = (int32_t)uprv_strlen(s);
    313     }
    314     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    315         return TRUE;
    316     }
    317     return FALSE;
    318 }
    319 
    320 static UBool
    321 _isAttributeSubtag(const char* s, int32_t len) {
    322     /*
    323      * attribute     = 3*8alphanum
    324      */
    325     if (len < 0) {
    326         len = (int32_t)uprv_strlen(s);
    327     }
    328     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    329         return TRUE;
    330     }
    331     return FALSE;
    332 }
    333 
    334 static UBool
    335 _isExtensionSingleton(const char* s, int32_t len) {
    336     /*
    337      * extension     = singleton 1*("-" (2*8alphanum))
    338      */
    339     if (len < 0) {
    340         len = (int32_t)uprv_strlen(s);
    341     }
    342     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    343         return TRUE;
    344     }
    345     return FALSE;
    346 }
    347 
    348 static UBool
    349 _isExtensionSubtag(const char* s, int32_t len) {
    350     /*
    351      * extension     = singleton 1*("-" (2*8alphanum))
    352      */
    353     if (len < 0) {
    354         len = (int32_t)uprv_strlen(s);
    355     }
    356     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    357         return TRUE;
    358     }
    359     return FALSE;
    360 }
    361 
    362 static UBool
    363 _isExtensionSubtags(const char* s, int32_t len) {
    364     const char *p = s;
    365     const char *pSubtag = NULL;
    366 
    367     if (len < 0) {
    368         len = (int32_t)uprv_strlen(s);
    369     }
    370 
    371     while ((p - s) < len) {
    372         if (*p == SEP) {
    373             if (pSubtag == NULL) {
    374                 return FALSE;
    375             }
    376             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    377                 return FALSE;
    378             }
    379             pSubtag = NULL;
    380         } else if (pSubtag == NULL) {
    381             pSubtag = p;
    382         }
    383         p++;
    384     }
    385     if (pSubtag == NULL) {
    386         return FALSE;
    387     }
    388     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    389 }
    390 
    391 static UBool
    392 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    393     /*
    394      * privateuse    = "x" 1*("-" (1*8alphanum))
    395      */
    396     if (len < 0) {
    397         len = (int32_t)uprv_strlen(s);
    398     }
    399     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    400         return TRUE;
    401     }
    402     return FALSE;
    403 }
    404 
    405 static UBool
    406 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    407     const char *p = s;
    408     const char *pSubtag = NULL;
    409 
    410     if (len < 0) {
    411         len = (int32_t)uprv_strlen(s);
    412     }
    413 
    414     while ((p - s) < len) {
    415         if (*p == SEP) {
    416             if (pSubtag == NULL) {
    417                 return FALSE;
    418             }
    419             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    420                 return FALSE;
    421             }
    422             pSubtag = NULL;
    423         } else if (pSubtag == NULL) {
    424             pSubtag = p;
    425         }
    426         p++;
    427     }
    428     if (pSubtag == NULL) {
    429         return FALSE;
    430     }
    431     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    432 }
    433 
    434 static UBool
    435 _isLDMLKey(const char* s, int32_t len) {
    436     if (len < 0) {
    437         len = (int32_t)uprv_strlen(s);
    438     }
    439     if (len == 2 && _isAlphaNumericString(s, len)) {
    440         return TRUE;
    441     }
    442     return FALSE;
    443 }
    444 
    445 static UBool
    446 _isLDMLType(const char* s, int32_t len) {
    447     if (len < 0) {
    448         len = (int32_t)uprv_strlen(s);
    449     }
    450     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    451         return TRUE;
    452     }
    453     return FALSE;
    454 }
    455 
    456 /*
    457 * -------------------------------------------------
    458 *
    459 * Helper functions
    460 *
    461 * -------------------------------------------------
    462 */
    463 
    464 static UBool
    465 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    466     UBool bAdded = TRUE;
    467 
    468     if (*first == NULL) {
    469         var->next = NULL;
    470         *first = var;
    471     } else {
    472         VariantListEntry *prev, *cur;
    473         int32_t cmp;
    474 
    475         /* variants order should be preserved */
    476         prev = NULL;
    477         cur = *first;
    478         while (TRUE) {
    479             if (cur == NULL) {
    480                 prev->next = var;
    481                 var->next = NULL;
    482                 break;
    483             }
    484 
    485             /* Checking for duplicate variant */
    486             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    487             if (cmp == 0) {
    488                 /* duplicated variant */
    489                 bAdded = FALSE;
    490                 break;
    491             }
    492             prev = cur;
    493             cur = cur->next;
    494         }
    495     }
    496 
    497     return bAdded;
    498 }
    499 
    500 static UBool
    501 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
    502     UBool bAdded = TRUE;
    503 
    504     if (*first == NULL) {
    505         attr->next = NULL;
    506         *first = attr;
    507     } else {
    508         AttributeListEntry *prev, *cur;
    509         int32_t cmp;
    510 
    511         /* reorder variants in alphabetical order */
    512         prev = NULL;
    513         cur = *first;
    514         while (TRUE) {
    515             if (cur == NULL) {
    516                 prev->next = attr;
    517                 attr->next = NULL;
    518                 break;
    519             }
    520             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
    521             if (cmp < 0) {
    522                 if (prev == NULL) {
    523                     *first = attr;
    524                 } else {
    525                     prev->next = attr;
    526                 }
    527                 attr->next = cur;
    528                 break;
    529             }
    530             if (cmp == 0) {
    531                 /* duplicated variant */
    532                 bAdded = FALSE;
    533                 break;
    534             }
    535             prev = cur;
    536             cur = cur->next;
    537         }
    538     }
    539 
    540     return bAdded;
    541 }
    542 
    543 
    544 static UBool
    545 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    546     UBool bAdded = TRUE;
    547 
    548     if (*first == NULL) {
    549         ext->next = NULL;
    550         *first = ext;
    551     } else {
    552         ExtensionListEntry *prev, *cur;
    553         int32_t cmp;
    554 
    555         /* reorder variants in alphabetical order */
    556         prev = NULL;
    557         cur = *first;
    558         while (TRUE) {
    559             if (cur == NULL) {
    560                 prev->next = ext;
    561                 ext->next = NULL;
    562                 break;
    563             }
    564             if (localeToBCP) {
    565                 /* special handling for locale to bcp conversion */
    566                 int32_t len, curlen;
    567 
    568                 len = (int32_t)uprv_strlen(ext->key);
    569                 curlen = (int32_t)uprv_strlen(cur->key);
    570 
    571                 if (len == 1 && curlen == 1) {
    572                     if (*(ext->key) == *(cur->key)) {
    573                         cmp = 0;
    574                     } else if (*(ext->key) == PRIVATEUSE) {
    575                         cmp = 1;
    576                     } else if (*(cur->key) == PRIVATEUSE) {
    577                         cmp = -1;
    578                     } else {
    579                         cmp = *(ext->key) - *(cur->key);
    580                     }
    581                 } else if (len == 1) {
    582                     cmp = *(ext->key) - LDMLEXT;
    583                 } else if (curlen == 1) {
    584                     cmp = LDMLEXT - *(cur->key);
    585                 } else {
    586                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    587                 }
    588             } else {
    589                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    590             }
    591             if (cmp < 0) {
    592                 if (prev == NULL) {
    593                     *first = ext;
    594                 } else {
    595                     prev->next = ext;
    596                 }
    597                 ext->next = cur;
    598                 break;
    599             }
    600             if (cmp == 0) {
    601                 /* duplicated extension key */
    602                 bAdded = FALSE;
    603                 break;
    604             }
    605             prev = cur;
    606             cur = cur->next;
    607         }
    608     }
    609 
    610     return bAdded;
    611 }
    612 
    613 static void
    614 _initializeULanguageTag(ULanguageTag* langtag) {
    615     int32_t i;
    616 
    617     langtag->buf = NULL;
    618 
    619     langtag->language = EMPTY;
    620     for (i = 0; i < MAXEXTLANG; i++) {
    621         langtag->extlang[i] = NULL;
    622     }
    623 
    624     langtag->script = EMPTY;
    625     langtag->region = EMPTY;
    626 
    627     langtag->variants = NULL;
    628     langtag->extensions = NULL;
    629 
    630     langtag->attributes = NULL;
    631 
    632     langtag->grandfathered = EMPTY;
    633     langtag->privateuse = EMPTY;
    634 }
    635 
    636 #define KEYTYPEDATA     "keyTypeData"
    637 #define KEYMAP          "keyMap"
    638 #define TYPEMAP         "typeMap"
    639 #define TYPEALIAS       "typeAlias"
    640 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
    641 #define MAX_LDML_KEY_LEN        22
    642 #define MAX_LDML_TYPE_LEN       32
    643 
    644 static int32_t
    645 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
    646                 char* bcpKey, int32_t bcpKeyCapacity,
    647                 UErrorCode *status) {
    648     UResourceBundle *rb;
    649     char keyBuf[MAX_LDML_KEY_LEN];
    650     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    651     int32_t resultLen = 0;
    652     int32_t i;
    653     UErrorCode tmpStatus = U_ZERO_ERROR;
    654     const UChar *uBcpKey;
    655     int32_t bcpKeyLen;
    656 
    657     if (keyLen < 0) {
    658         keyLen = (int32_t)uprv_strlen(key);
    659     }
    660 
    661     if (keyLen >= sizeof(keyBuf)) {
    662         /* no known valid LDML key exceeding 21 */
    663         *status = U_ILLEGAL_ARGUMENT_ERROR;
    664         return 0;
    665     }
    666 
    667     uprv_memcpy(keyBuf, key, keyLen);
    668     keyBuf[keyLen] = 0;
    669 
    670     /* to lower case */
    671     for (i = 0; i < keyLen; i++) {
    672         keyBuf[i] = uprv_tolower(keyBuf[i]);
    673     }
    674 
    675     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    676     ures_getByKey(rb, KEYMAP, rb, status);
    677 
    678     if (U_FAILURE(*status)) {
    679         ures_close(rb);
    680         return 0;
    681     }
    682 
    683     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
    684     if (U_SUCCESS(tmpStatus)) {
    685         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
    686         bcpKeyBuf[bcpKeyLen] = 0;
    687         resultLen = bcpKeyLen;
    688     } else {
    689         if (_isLDMLKey(key, keyLen)) {
    690             uprv_memcpy(bcpKeyBuf, key, keyLen);
    691             bcpKeyBuf[keyLen] = 0;
    692             resultLen = keyLen;
    693         } else {
    694             /* mapping not availabe */
    695             *status = U_ILLEGAL_ARGUMENT_ERROR;
    696         }
    697     }
    698     ures_close(rb);
    699 
    700     if (U_FAILURE(*status)) {
    701         return 0;
    702     }
    703 
    704     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
    705     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
    706 }
    707 
    708 static int32_t
    709 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
    710                 char* key, int32_t keyCapacity,
    711                 UErrorCode *status) {
    712     UResourceBundle *rb;
    713     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    714     int32_t resultLen = 0;
    715     int32_t i;
    716     const char *resKey = NULL;
    717     UResourceBundle *mapData;
    718 
    719     if (bcpKeyLen < 0) {
    720         bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
    721     }
    722 
    723     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
    724         *status = U_ILLEGAL_ARGUMENT_ERROR;
    725         return 0;
    726     }
    727 
    728     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
    729     bcpKeyBuf[bcpKeyLen] = 0;
    730 
    731     /* to lower case */
    732     for (i = 0; i < bcpKeyLen; i++) {
    733         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
    734     }
    735 
    736     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    737     ures_getByKey(rb, KEYMAP, rb, status);
    738     if (U_FAILURE(*status)) {
    739         ures_close(rb);
    740         return 0;
    741     }
    742 
    743     mapData = ures_getNextResource(rb, NULL, status);
    744     while (U_SUCCESS(*status)) {
    745         const UChar *uBcpKey;
    746         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    747         int32_t tmpBcpKeyLen;
    748 
    749         uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
    750         if (U_FAILURE(*status)) {
    751             break;
    752         }
    753         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
    754         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
    755         if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
    756             /* found a matching BCP47 key */
    757             resKey = ures_getKey(mapData);
    758             resultLen = (int32_t)uprv_strlen(resKey);
    759             break;
    760         }
    761         if (!ures_hasNext(rb)) {
    762             break;
    763         }
    764         ures_getNextResource(rb, mapData, status);
    765     }
    766     ures_close(mapData);
    767     ures_close(rb);
    768 
    769     if (U_FAILURE(*status)) {
    770         return 0;
    771     }
    772 
    773     if (resKey == NULL) {
    774         resKey = bcpKeyBuf;
    775         resultLen = bcpKeyLen;
    776     }
    777 
    778     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
    779     return u_terminateChars(key, keyCapacity, resultLen, status);
    780 }
    781 
    782 static int32_t
    783 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
    784                  const char* type, int32_t typeLen,
    785                  char* bcpType, int32_t bcpTypeCapacity,
    786                  UErrorCode *status) {
    787     UResourceBundle *rb, *keyTypeData, *typeMapForKey;
    788     char keyBuf[MAX_LDML_KEY_LEN];
    789     char typeBuf[MAX_LDML_TYPE_LEN];
    790     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    791     int32_t resultLen = 0;
    792     int32_t i;
    793     UErrorCode tmpStatus = U_ZERO_ERROR;
    794     const UChar *uBcpType, *uCanonicalType;
    795     int32_t bcpTypeLen, canonicalTypeLen;
    796     UBool isTimezone = FALSE;
    797 
    798     if (keyLen < 0) {
    799         keyLen = (int32_t)uprv_strlen(key);
    800     }
    801     if (keyLen >= sizeof(keyBuf)) {
    802         /* no known valid LDML key exceeding 21 */
    803         *status = U_ILLEGAL_ARGUMENT_ERROR;
    804         return 0;
    805     }
    806     uprv_memcpy(keyBuf, key, keyLen);
    807     keyBuf[keyLen] = 0;
    808 
    809     /* to lower case */
    810     for (i = 0; i < keyLen; i++) {
    811         keyBuf[i] = uprv_tolower(keyBuf[i]);
    812     }
    813     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    814         isTimezone = TRUE;
    815     }
    816 
    817     if (typeLen < 0) {
    818         typeLen = (int32_t)uprv_strlen(type);
    819     }
    820     if (typeLen >= sizeof(typeBuf)) {
    821         *status = U_ILLEGAL_ARGUMENT_ERROR;
    822         return 0;
    823     }
    824 
    825     if (isTimezone) {
    826         /* replace '/' with ':' */
    827         for (i = 0; i < typeLen; i++) {
    828             if (*(type + i) == '/') {
    829                 typeBuf[i] = ':';
    830             } else {
    831                 typeBuf[i] = *(type + i);
    832             }
    833         }
    834         typeBuf[typeLen] = 0;
    835         type = &typeBuf[0];
    836     }
    837 
    838     keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
    839     rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
    840     if (U_FAILURE(*status)) {
    841         ures_close(rb);
    842         ures_close(keyTypeData);
    843         return 0;
    844     }
    845 
    846     typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
    847     uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
    848     if (U_SUCCESS(tmpStatus)) {
    849         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    850         resultLen = bcpTypeLen;
    851     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    852         /* is this type alias? */
    853         tmpStatus = U_ZERO_ERROR;
    854         ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
    855         ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    856         uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
    857         if (U_SUCCESS(tmpStatus)) {
    858             u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
    859             if (isTimezone) {
    860                 /* replace '/' with ':' */
    861                 for (i = 0; i < canonicalTypeLen; i++) {
    862                     if (typeBuf[i] == '/') {
    863                         typeBuf[i] = ':';
    864                     }
    865                 }
    866             }
    867             typeBuf[canonicalTypeLen] = 0;
    868 
    869             /* look up the canonical type */
    870             uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
    871             if (U_SUCCESS(tmpStatus)) {
    872                 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    873                 resultLen = bcpTypeLen;
    874             }
    875         }
    876         if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    877             if (_isLDMLType(type, typeLen)) {
    878                 uprv_memcpy(bcpTypeBuf, type, typeLen);
    879                 resultLen = typeLen;
    880             } else {
    881                 /* mapping not availabe */
    882                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    883             }
    884         }
    885     } else {
    886         *status = tmpStatus;
    887     }
    888     ures_close(rb);
    889     ures_close(typeMapForKey);
    890     ures_close(keyTypeData);
    891 
    892     if (U_FAILURE(*status)) {
    893         return 0;
    894     }
    895 
    896     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
    897     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
    898 }
    899 
    900 static int32_t
    901 _bcp47ToLDMLType(const char* key, int32_t keyLen,
    902                  const char* bcpType, int32_t bcpTypeLen,
    903                  char* type, int32_t typeCapacity,
    904                  UErrorCode *status) {
    905     UResourceBundle *rb;
    906     char keyBuf[MAX_LDML_KEY_LEN];
    907     char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
    908     int32_t resultLen = 0;
    909     int32_t i, typeSize;
    910     const char *resType = NULL;
    911     UResourceBundle *mapData;
    912     UErrorCode tmpStatus = U_ZERO_ERROR;
    913     int32_t copyLen;
    914 
    915     if (keyLen < 0) {
    916         keyLen = (int32_t)uprv_strlen(key);
    917     }
    918 
    919     if (keyLen >= sizeof(keyBuf)) {
    920         /* no known valid LDML key exceeding 21 */
    921         *status = U_ILLEGAL_ARGUMENT_ERROR;
    922         return 0;
    923     }
    924     uprv_memcpy(keyBuf, key, keyLen);
    925     keyBuf[keyLen] = 0;
    926 
    927     /* to lower case */
    928     for (i = 0; i < keyLen; i++) {
    929         keyBuf[i] = uprv_tolower(keyBuf[i]);
    930     }
    931 
    932 
    933     if (bcpTypeLen < 0) {
    934         bcpTypeLen = (int32_t)uprv_strlen(bcpType);
    935     }
    936 
    937     typeSize = 0;
    938     for (i = 0; i < bcpTypeLen; i++) {
    939         if (bcpType[i] == SEP) {
    940             if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
    941                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    942                 return 0;
    943             }
    944             typeSize = 0;
    945         } else {
    946             typeSize++;
    947         }
    948     }
    949 
    950     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
    951     bcpTypeBuf[bcpTypeLen] = 0;
    952 
    953     /* to lower case */
    954     for (i = 0; i < bcpTypeLen; i++) {
    955         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
    956     }
    957 
    958     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    959     ures_getByKey(rb, TYPEMAP, rb, status);
    960     if (U_FAILURE(*status)) {
    961         ures_close(rb);
    962         return 0;
    963     }
    964 
    965     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    966     mapData = ures_getNextResource(rb, NULL, &tmpStatus);
    967     while (U_SUCCESS(tmpStatus)) {
    968         const UChar *uBcpType;
    969         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    970         int32_t tmpBcpTypeLen;
    971 
    972         uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
    973         if (U_FAILURE(tmpStatus)) {
    974             break;
    975         }
    976         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
    977         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
    978         if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
    979             /* found a matching BCP47 type */
    980             resType = ures_getKey(mapData);
    981             resultLen = (int32_t)uprv_strlen(resType);
    982             break;
    983         }
    984         if (!ures_hasNext(rb)) {
    985             break;
    986         }
    987         ures_getNextResource(rb, mapData, &tmpStatus);
    988     }
    989     ures_close(mapData);
    990     ures_close(rb);
    991 
    992     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
    993         *status = tmpStatus;
    994         return 0;
    995     }
    996 
    997     if (resType == NULL) {
    998         resType = bcpTypeBuf;
    999         resultLen = bcpTypeLen;
   1000     }
   1001 
   1002     copyLen = uprv_min(resultLen, typeCapacity);
   1003     uprv_memcpy(type, resType, copyLen);
   1004 
   1005     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
   1006         for (i = 0; i < copyLen; i++) {
   1007             if (*(type + i) == ':') {
   1008                 *(type + i) = '/';
   1009             }
   1010         }
   1011     }
   1012 
   1013     return u_terminateChars(type, typeCapacity, resultLen, status);
   1014 }
   1015 
   1016 static int32_t
   1017 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1018     char buf[ULOC_LANG_CAPACITY];
   1019     UErrorCode tmpStatus = U_ZERO_ERROR;
   1020     int32_t len, i;
   1021     int32_t reslen = 0;
   1022 
   1023     if (U_FAILURE(*status)) {
   1024         return 0;
   1025     }
   1026 
   1027     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
   1028     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1029         if (strict) {
   1030             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1031             return 0;
   1032         }
   1033         len = 0;
   1034     }
   1035 
   1036     /* Note: returned language code is in lower case letters */
   1037 
   1038     if (len == 0) {
   1039         if (reslen < capacity) {
   1040             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
   1041         }
   1042         reslen += LANG_UND_LEN;
   1043     } else if (!_isLanguageSubtag(buf, len)) {
   1044             /* invalid language code */
   1045         if (strict) {
   1046             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1047             return 0;
   1048         }
   1049         if (reslen < capacity) {
   1050             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
   1051         }
   1052         reslen += LANG_UND_LEN;
   1053     } else {
   1054         /* resolve deprecated */
   1055         for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   1056             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
   1057                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
   1058                 len = (int32_t)uprv_strlen(buf);
   1059                 break;
   1060             }
   1061         }
   1062         if (reslen < capacity) {
   1063             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1064         }
   1065         reslen += len;
   1066     }
   1067     u_terminateChars(appendAt, capacity, reslen, status);
   1068     return reslen;
   1069 }
   1070 
   1071 static int32_t
   1072 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1073     char buf[ULOC_SCRIPT_CAPACITY];
   1074     UErrorCode tmpStatus = U_ZERO_ERROR;
   1075     int32_t len;
   1076     int32_t reslen = 0;
   1077 
   1078     if (U_FAILURE(*status)) {
   1079         return 0;
   1080     }
   1081 
   1082     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
   1083     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1084         if (strict) {
   1085             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1086         }
   1087         return 0;
   1088     }
   1089 
   1090     if (len > 0) {
   1091         if (!_isScriptSubtag(buf, len)) {
   1092             /* invalid script code */
   1093             if (strict) {
   1094                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1095             }
   1096             return 0;
   1097         } else {
   1098             if (reslen < capacity) {
   1099                 *(appendAt + reslen) = SEP;
   1100             }
   1101             reslen++;
   1102 
   1103             if (reslen < capacity) {
   1104                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1105             }
   1106             reslen += len;
   1107         }
   1108     }
   1109     u_terminateChars(appendAt, capacity, reslen, status);
   1110     return reslen;
   1111 }
   1112 
   1113 static int32_t
   1114 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1115     char buf[ULOC_COUNTRY_CAPACITY];
   1116     UErrorCode tmpStatus = U_ZERO_ERROR;
   1117     int32_t len;
   1118     int32_t reslen = 0;
   1119 
   1120     if (U_FAILURE(*status)) {
   1121         return 0;
   1122     }
   1123 
   1124     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
   1125     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1126         if (strict) {
   1127             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1128         }
   1129         return 0;
   1130     }
   1131 
   1132     if (len > 0) {
   1133         if (!_isRegionSubtag(buf, len)) {
   1134             /* invalid region code */
   1135             if (strict) {
   1136                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1137             }
   1138             return 0;
   1139         } else {
   1140             if (reslen < capacity) {
   1141                 *(appendAt + reslen) = SEP;
   1142             }
   1143             reslen++;
   1144 
   1145             if (reslen < capacity) {
   1146                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1147             }
   1148             reslen += len;
   1149         }
   1150     }
   1151     u_terminateChars(appendAt, capacity, reslen, status);
   1152     return reslen;
   1153 }
   1154 
   1155 static int32_t
   1156 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
   1157     char buf[ULOC_FULLNAME_CAPACITY];
   1158     UErrorCode tmpStatus = U_ZERO_ERROR;
   1159     int32_t len, i;
   1160     int32_t reslen = 0;
   1161 
   1162     if (U_FAILURE(*status)) {
   1163         return 0;
   1164     }
   1165 
   1166     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1167     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1168         if (strict) {
   1169             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1170         }
   1171         return 0;
   1172     }
   1173 
   1174     if (len > 0) {
   1175         char *p, *pVar;
   1176         UBool bNext = TRUE;
   1177         VariantListEntry *var;
   1178         VariantListEntry *varFirst = NULL;
   1179 
   1180         pVar = NULL;
   1181         p = buf;
   1182         while (bNext) {
   1183             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1184                 if (*p == 0) {
   1185                     bNext = FALSE;
   1186                 } else {
   1187                     *p = 0; /* terminate */
   1188                 }
   1189                 if (pVar == NULL) {
   1190                     if (strict) {
   1191                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1192                         break;
   1193                     }
   1194                     /* ignore empty variant */
   1195                 } else {
   1196                     /* ICU uses upper case letters for variants, but
   1197                        the canonical format is lowercase in BCP47 */
   1198                     for (i = 0; *(pVar + i) != 0; i++) {
   1199                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1200                     }
   1201 
   1202                     /* validate */
   1203                     if (_isVariantSubtag(pVar, -1)) {
   1204                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
   1205                             /* emit the variant to the list */
   1206                             var = uprv_malloc(sizeof(VariantListEntry));
   1207                             if (var == NULL) {
   1208                                 *status = U_MEMORY_ALLOCATION_ERROR;
   1209                                 break;
   1210                             }
   1211                             var->variant = pVar;
   1212                             if (!_addVariantToList(&varFirst, var)) {
   1213                                 /* duplicated variant */
   1214                                 uprv_free(var);
   1215                                 if (strict) {
   1216                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1217                                     break;
   1218                                 }
   1219                             }
   1220                         } else {
   1221                             /* Special handling for POSIX variant, need to remember that we had it and then */
   1222                             /* treat it like an extension later. */
   1223                             *hadPosix = TRUE;
   1224                         }
   1225                     } else if (strict) {
   1226                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1227                         break;
   1228                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
   1229                         /* Handle private use subtags separately */
   1230                         break;
   1231                     }
   1232                 }
   1233                 /* reset variant starting position */
   1234                 pVar = NULL;
   1235             } else if (pVar == NULL) {
   1236                 pVar = p;
   1237             }
   1238             p++;
   1239         }
   1240 
   1241         if (U_SUCCESS(*status)) {
   1242             if (varFirst != NULL) {
   1243                 int32_t varLen;
   1244 
   1245                 /* write out validated/normalized variants to the target */
   1246                 var = varFirst;
   1247                 while (var != NULL) {
   1248                     if (reslen < capacity) {
   1249                         *(appendAt + reslen) = SEP;
   1250                     }
   1251                     reslen++;
   1252                     varLen = (int32_t)uprv_strlen(var->variant);
   1253                     if (reslen < capacity) {
   1254                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1255                     }
   1256                     reslen += varLen;
   1257                     var = var->next;
   1258                 }
   1259             }
   1260         }
   1261 
   1262         /* clean up */
   1263         var = varFirst;
   1264         while (var != NULL) {
   1265             VariantListEntry *tmpVar = var->next;
   1266             uprv_free(var);
   1267             var = tmpVar;
   1268         }
   1269 
   1270         if (U_FAILURE(*status)) {
   1271             return 0;
   1272         }
   1273     }
   1274 
   1275     u_terminateChars(appendAt, capacity, reslen, status);
   1276     return reslen;
   1277 }
   1278 
   1279 static int32_t
   1280 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1281     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1282     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1283     int32_t attrBufLength = 0;
   1284     UBool isAttribute = FALSE;
   1285     UEnumeration *keywordEnum = NULL;
   1286     int32_t reslen = 0;
   1287 
   1288     keywordEnum = uloc_openKeywords(localeID, status);
   1289     if (U_FAILURE(*status) && !hadPosix) {
   1290         uenum_close(keywordEnum);
   1291         return 0;
   1292     }
   1293     if (keywordEnum != NULL || hadPosix) {
   1294         /* reorder extensions */
   1295         int32_t len;
   1296         const char *key;
   1297         ExtensionListEntry *firstExt = NULL;
   1298         ExtensionListEntry *ext;
   1299         AttributeListEntry *firstAttr = NULL;
   1300         AttributeListEntry *attr;
   1301         char *attrValue;
   1302         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1303         char *pExtBuf = extBuf;
   1304         int32_t extBufCapacity = sizeof(extBuf);
   1305         const char *bcpKey, *bcpValue;
   1306         UErrorCode tmpStatus = U_ZERO_ERROR;
   1307         int32_t keylen;
   1308         UBool isLDMLKeyword;
   1309 
   1310         while (TRUE) {
   1311             isAttribute = FALSE;
   1312             key = uenum_next(keywordEnum, NULL, status);
   1313             if (key == NULL) {
   1314                 break;
   1315             }
   1316             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
   1317             if (U_FAILURE(tmpStatus)) {
   1318                 if (strict) {
   1319                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1320                     break;
   1321                 }
   1322                 /* ignore this keyword */
   1323                 tmpStatus = U_ZERO_ERROR;
   1324                 continue;
   1325             }
   1326 
   1327             keylen = (int32_t)uprv_strlen(key);
   1328             isLDMLKeyword = (keylen > 1);
   1329 
   1330             /* special keyword used for representing Unicode locale attributes */
   1331             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
   1332                 isAttribute = TRUE;
   1333                 if (len > 0) {
   1334                     int32_t i = 0;
   1335                     while (TRUE) {
   1336                         attrBufLength = 0;
   1337                         for (; i < len; i++) {
   1338                             if (buf[i] != '-') {
   1339                                 attrBuf[attrBufLength++] = buf[i];
   1340                             } else {
   1341                                 i++;
   1342                                 break;
   1343                             }
   1344                         }
   1345                         if (attrBufLength > 0) {
   1346                             attrBuf[attrBufLength] = 0;
   1347 
   1348                         } else if (i >= len){
   1349                             break;
   1350                         }
   1351 
   1352                         /* create AttributeListEntry */
   1353                         attr = uprv_malloc(sizeof(AttributeListEntry));
   1354                         if (attr == NULL) {
   1355                             *status = U_MEMORY_ALLOCATION_ERROR;
   1356                             break;
   1357                         }
   1358                         attrValue = uprv_malloc(attrBufLength + 1);
   1359                         if (attrValue == NULL) {
   1360                             *status = U_MEMORY_ALLOCATION_ERROR;
   1361                             break;
   1362                         }
   1363                         uprv_strcpy(attrValue, attrBuf);
   1364                         attr->attribute = attrValue;
   1365 
   1366                         if (!_addAttributeToList(&firstAttr, attr)) {
   1367                             uprv_free(attr);
   1368                             uprv_free(attrValue);
   1369                             if (strict) {
   1370                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1371                                 break;
   1372                             }
   1373                         }
   1374                     }
   1375                 }
   1376             } else if (isLDMLKeyword) {
   1377                 int32_t modKeyLen;
   1378 
   1379                 /* transform key and value to bcp47 style */
   1380                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
   1381                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1382                     if (strict) {
   1383                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1384                         break;
   1385                     }
   1386                     tmpStatus = U_ZERO_ERROR;
   1387                     continue;
   1388                 }
   1389 
   1390                 bcpKey = pExtBuf;
   1391                 pExtBuf += (modKeyLen + 1);
   1392                 extBufCapacity -= (modKeyLen + 1);
   1393 
   1394                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
   1395                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1396                     if (strict) {
   1397                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1398                         break;
   1399                     }
   1400                     tmpStatus = U_ZERO_ERROR;
   1401                     continue;
   1402                 }
   1403                 bcpValue = pExtBuf;
   1404                 pExtBuf += (len + 1);
   1405                 extBufCapacity -= (len + 1);
   1406             } else {
   1407                 if (*key == PRIVATEUSE) {
   1408                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1409                         if (strict) {
   1410                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1411                             break;
   1412                         }
   1413                         continue;
   1414                     }
   1415                 } else {
   1416                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1417                         if (strict) {
   1418                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1419                             break;
   1420                         }
   1421                         continue;
   1422                     }
   1423                 }
   1424                 bcpKey = key;
   1425                 if ((len + 1) < extBufCapacity) {
   1426                     uprv_memcpy(pExtBuf, buf, len);
   1427                     bcpValue = pExtBuf;
   1428 
   1429                     pExtBuf += len;
   1430 
   1431                     *pExtBuf = 0;
   1432                     pExtBuf++;
   1433 
   1434                     extBufCapacity -= (len + 1);
   1435                 } else {
   1436                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1437                     break;
   1438                 }
   1439             }
   1440 
   1441             if (!isAttribute) {
   1442                 /* create ExtensionListEntry */
   1443                 ext = uprv_malloc(sizeof(ExtensionListEntry));
   1444                 if (ext == NULL) {
   1445                     *status = U_MEMORY_ALLOCATION_ERROR;
   1446                     break;
   1447                 }
   1448                 ext->key = bcpKey;
   1449                 ext->value = bcpValue;
   1450 
   1451                 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1452                     uprv_free(ext);
   1453                     if (strict) {
   1454                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1455                         break;
   1456                     }
   1457                 }
   1458             }
   1459         }
   1460 
   1461         /* Special handling for POSIX variant - add the keywords for POSIX */
   1462         if (hadPosix) {
   1463             /* create ExtensionListEntry for POSIX */
   1464             ext = uprv_malloc(sizeof(ExtensionListEntry));
   1465             if (ext == NULL) {
   1466                 *status = U_MEMORY_ALLOCATION_ERROR;
   1467                 goto cleanup;
   1468             }
   1469             ext->key = POSIX_KEY;
   1470             ext->value = POSIX_VALUE;
   1471 
   1472             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1473                 uprv_free(ext);
   1474             }
   1475         }
   1476 
   1477         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
   1478             UBool startLDMLExtension = FALSE;
   1479 
   1480             attr = firstAttr;
   1481             ext = firstExt;
   1482             do {
   1483                 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
   1484                    /* write LDML singleton extension */
   1485                    if (reslen < capacity) {
   1486                        *(appendAt + reslen) = SEP;
   1487                    }
   1488                    reslen++;
   1489                    if (reslen < capacity) {
   1490                        *(appendAt + reslen) = LDMLEXT;
   1491                    }
   1492                    reslen++;
   1493 
   1494                    startLDMLExtension = TRUE;
   1495                 }
   1496 
   1497                 /* write out the sorted BCP47 attributes, extensions and private use */
   1498                 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
   1499                     if (reslen < capacity) {
   1500                         *(appendAt + reslen) = SEP;
   1501                     }
   1502                     reslen++;
   1503                     len = (int32_t)uprv_strlen(ext->key);
   1504                     if (reslen < capacity) {
   1505                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1506                     }
   1507                     reslen += len;
   1508                     if (reslen < capacity) {
   1509                         *(appendAt + reslen) = SEP;
   1510                     }
   1511                     reslen++;
   1512                     len = (int32_t)uprv_strlen(ext->value);
   1513                     if (reslen < capacity) {
   1514                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1515                     }
   1516                     reslen += len;
   1517 
   1518                     ext = ext->next;
   1519                 } else if (attr) {
   1520                     /* write the value for the attributes */
   1521                     if (reslen < capacity) {
   1522                         *(appendAt + reslen) = SEP;
   1523                     }
   1524                     reslen++;
   1525                     len = (int32_t)uprv_strlen(attr->attribute);
   1526                     if (reslen < capacity) {
   1527                         uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
   1528                     }
   1529                     reslen += len;
   1530 
   1531                     attr = attr->next;
   1532                 }
   1533             } while (attr != NULL || ext != NULL);
   1534         }
   1535 cleanup:
   1536         /* clean up */
   1537         ext = firstExt;
   1538         while (ext != NULL) {
   1539             ExtensionListEntry *tmpExt = ext->next;
   1540             uprv_free(ext);
   1541             ext = tmpExt;
   1542         }
   1543 
   1544         attr = firstAttr;
   1545         while (attr != NULL) {
   1546             AttributeListEntry *tmpAttr = attr->next;
   1547             char *pValue = (char *)attr->attribute;
   1548             uprv_free(pValue);
   1549             uprv_free(attr);
   1550             attr = tmpAttr;
   1551         }
   1552 
   1553         uenum_close(keywordEnum);
   1554 
   1555         if (U_FAILURE(*status)) {
   1556             return 0;
   1557         }
   1558     }
   1559 
   1560     return u_terminateChars(appendAt, capacity, reslen, status);
   1561 }
   1562 
   1563 /**
   1564  * Append keywords parsed from LDML extension value
   1565  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1566  * Note: char* buf is used for storing keywords
   1567  */
   1568 static void
   1569 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
   1570     const char *p, *pNext, *pSep, *pTmp, *pTmpStart;
   1571     const char *pBcpKey, *pBcpType;
   1572     const char *pKey, *pType;
   1573     int32_t bcpKeyLen = 0, bcpTypeLen;
   1574     ExtensionListEntry *kwd, *nextKwd;
   1575     ExtensionListEntry *kwdFirst = NULL;
   1576     int32_t bufIdx = 0;
   1577     int32_t  len;
   1578     UBool variantExists = *posixVariant;
   1579     UBool searchFurther;
   1580 
   1581     /* Reset the posixVariant value */
   1582     *posixVariant = FALSE;
   1583 
   1584     pNext = ldmlext;
   1585     pBcpKey = pBcpType = NULL;
   1586     while (pNext) {
   1587         p = pSep = pNext;
   1588 
   1589         /* locate next separator char */
   1590         while (*pSep) {
   1591             if (*pSep == SEP) {
   1592                 searchFurther = FALSE;
   1593                 if (pBcpKey != NULL) {
   1594                     pTmpStart = (pSep + 1);
   1595                     pTmp = pTmpStart;
   1596                     /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */
   1597                     while (*pTmp) {
   1598                         if (*pTmp == SEP || *(pTmp + 1) == 0) {
   1599                             if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) {
   1600                                 searchFurther = TRUE;
   1601                             }
   1602                             break;
   1603                         }
   1604                         pTmp++;
   1605                     }
   1606                 }
   1607                 if (searchFurther) {
   1608                     pSep++;
   1609                     continue;
   1610                 } else {
   1611                     break;
   1612                 }
   1613             }
   1614             pSep++;
   1615         }
   1616         if (*pSep == 0) {
   1617             /* last subtag */
   1618             pNext = NULL;
   1619         } else {
   1620             pNext = pSep + 1;
   1621         }
   1622 
   1623         if (pBcpKey == NULL) {
   1624             pBcpKey = p;
   1625             bcpKeyLen = (int32_t)(pSep - p);
   1626         } else {
   1627             pBcpType = p;
   1628             bcpTypeLen = (int32_t)(pSep - p);
   1629 
   1630             /* BCP key to locale key */
   1631             len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1632             if (U_FAILURE(*status)) {
   1633                 goto cleanup;
   1634             }
   1635             pKey = buf + bufIdx;
   1636             bufIdx += len;
   1637             *(buf + bufIdx) = 0;
   1638             bufIdx++;
   1639 
   1640             /* BCP type to locale type */
   1641             len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1642             if (U_FAILURE(*status)) {
   1643                 goto cleanup;
   1644             }
   1645             pType = buf + bufIdx;
   1646             bufIdx += len;
   1647             *(buf + bufIdx) = 0;
   1648             bufIdx++;
   1649 
   1650             /* Special handling for u-va-posix, since we want to treat this as a variant, not */
   1651             /* as a keyword.                                                                  */
   1652 
   1653             if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
   1654                 *posixVariant = TRUE;
   1655             } else {
   1656                 /* create an ExtensionListEntry for this keyword */
   1657                 kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1658                 if (kwd == NULL) {
   1659                     *status = U_MEMORY_ALLOCATION_ERROR;
   1660                     goto cleanup;
   1661                 }
   1662 
   1663                 kwd->key = pKey;
   1664                 kwd->value = pType;
   1665 
   1666                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1667                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1668                     uprv_free(kwd);
   1669                     goto cleanup;
   1670                 }
   1671             }
   1672 
   1673             /* for next pair */
   1674             pBcpKey = NULL;
   1675             pBcpType = NULL;
   1676         }
   1677     }
   1678 
   1679     if (pBcpKey != NULL) {
   1680         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1681         goto cleanup;
   1682     }
   1683 
   1684     kwd = kwdFirst;
   1685     while (kwd != NULL) {
   1686         nextKwd = kwd->next;
   1687         _addExtensionToList(appendTo, kwd, FALSE);
   1688         kwd = nextKwd;
   1689     }
   1690 
   1691     return;
   1692 
   1693 cleanup:
   1694     kwd = kwdFirst;
   1695     while (kwd != NULL) {
   1696         nextKwd = kwd->next;
   1697         uprv_free(kwd);
   1698         kwd = nextKwd;
   1699     }
   1700 }
   1701 
   1702 
   1703 static int32_t
   1704 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1705     int32_t reslen = 0;
   1706     int32_t i, n;
   1707     int32_t len;
   1708     ExtensionListEntry *kwdFirst = NULL;
   1709     ExtensionListEntry *kwd;
   1710     AttributeListEntry *attrFirst = NULL;
   1711     AttributeListEntry *attr;
   1712     const char *key, *type;
   1713     char *kwdBuf = NULL;
   1714     int32_t kwdBufLength = capacity;
   1715     UBool posixVariant = FALSE;
   1716 
   1717     if (U_FAILURE(*status)) {
   1718         return 0;
   1719     }
   1720 
   1721     kwdBuf = (char *)uprv_malloc(kwdBufLength);
   1722     if (kwdBuf == NULL) {
   1723         *status = U_MEMORY_ALLOCATION_ERROR;
   1724         return 0;
   1725     }
   1726 
   1727     /* Determine if variants already exists */
   1728     if (ultag_getVariantsSize(langtag)) {
   1729         posixVariant = TRUE;
   1730     }
   1731 
   1732     n = ultag_getExtensionsSize(langtag);
   1733 
   1734     /* resolve locale keywords and reordering keys */
   1735     for (i = 0; i < n; i++) {
   1736         key = ultag_getExtensionKey(langtag, i);
   1737         type = ultag_getExtensionValue(langtag, i);
   1738         if (*key == LDMLEXT) {
   1739             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
   1740             if (U_FAILURE(*status)) {
   1741                 break;
   1742             }
   1743         } else {
   1744             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1745             if (kwd == NULL) {
   1746                 *status = U_MEMORY_ALLOCATION_ERROR;
   1747                 break;
   1748             }
   1749             kwd->key = key;
   1750             kwd->value = type;
   1751             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1752                 uprv_free(kwd);
   1753                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1754                 break;
   1755             }
   1756         }
   1757     }
   1758 
   1759     if (U_SUCCESS(*status)) {
   1760         type = ultag_getPrivateUse(langtag);
   1761         if ((int32_t)uprv_strlen(type) > 0) {
   1762             /* add private use as a keyword */
   1763             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1764             if (kwd == NULL) {
   1765                 *status = U_MEMORY_ALLOCATION_ERROR;
   1766             } else {
   1767                 kwd->key = PRIVATEUSE_KEY;
   1768                 kwd->value = type;
   1769                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1770                     uprv_free(kwd);
   1771                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1772                 }
   1773             }
   1774         }
   1775     }
   1776 
   1777     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
   1778 
   1779     if (U_SUCCESS(*status) && posixVariant) {
   1780         len = (int32_t) uprv_strlen(_POSIX);
   1781         if (reslen < capacity) {
   1782             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
   1783         }
   1784         reslen += len;
   1785     }
   1786 
   1787     attrFirst = langtag->attributes;
   1788     if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) {
   1789         /* write out the sorted keywords */
   1790         UBool firstValue = TRUE;
   1791         UBool firstAttr = TRUE;
   1792         kwd = kwdFirst;
   1793         attr = attrFirst;
   1794         do {
   1795             if (reslen < capacity) {
   1796                 if (firstValue) {
   1797                     /* '@' */
   1798                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1799                     firstValue = FALSE;
   1800                 } else if (attr) {
   1801                     /* '-' */
   1802                     *(appendAt + reslen) = SEP;
   1803                 }else {
   1804                     /* ';' */
   1805                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1806                 }
   1807             }
   1808             reslen++;
   1809 
   1810             if (attr) {
   1811                 if (firstAttr) {
   1812                     len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY);
   1813                     if (reslen < capacity) {
   1814                         uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen));
   1815                     }
   1816                     reslen += len;
   1817 
   1818                     /* '=' */
   1819                     if (reslen < capacity) {
   1820                         *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1821                     }
   1822 
   1823                     reslen++;
   1824 
   1825                     firstAttr = FALSE;
   1826                 }
   1827 
   1828                 len = (int32_t)uprv_strlen(attr->attribute);
   1829                 if (reslen < capacity) {
   1830                     uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
   1831                 }
   1832                 reslen += len;
   1833 
   1834                 attr = attr->next;
   1835             } else if (kwd) {
   1836                 /* key */
   1837                 len = (int32_t)uprv_strlen(kwd->key);
   1838                 if (reslen < capacity) {
   1839                     uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1840                 }
   1841                 reslen += len;
   1842 
   1843                 /* '=' */
   1844                 if (reslen < capacity) {
   1845                     *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1846                 }
   1847                 reslen++;
   1848 
   1849                 /* type */
   1850                 len = (int32_t)uprv_strlen(kwd->value);
   1851                 if (reslen < capacity) {
   1852                     uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1853                 }
   1854                 reslen += len;
   1855 
   1856                 kwd = kwd->next;
   1857             }
   1858         } while (kwd || attr);
   1859     }
   1860 
   1861     /* clean up */
   1862     kwd = kwdFirst;
   1863     while (kwd != NULL) {
   1864         ExtensionListEntry *tmpKwd = kwd->next;
   1865         uprv_free(kwd);
   1866         kwd = tmpKwd;
   1867     }
   1868 
   1869     uprv_free(kwdBuf);
   1870 
   1871     if (U_FAILURE(*status)) {
   1872         return 0;
   1873     }
   1874 
   1875     return u_terminateChars(appendAt, capacity, reslen, status);
   1876 }
   1877 
   1878 static int32_t
   1879 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1880     char buf[ULOC_FULLNAME_CAPACITY];
   1881     char tmpAppend[ULOC_FULLNAME_CAPACITY];
   1882     UErrorCode tmpStatus = U_ZERO_ERROR;
   1883     int32_t len, i;
   1884     int32_t reslen = 0;
   1885 
   1886     if (U_FAILURE(*status)) {
   1887         return 0;
   1888     }
   1889 
   1890     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1891     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1892         if (strict) {
   1893             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1894         }
   1895         return 0;
   1896     }
   1897 
   1898     if (len > 0) {
   1899         char *p, *pPriv;
   1900         UBool bNext = TRUE;
   1901         UBool firstValue = TRUE;
   1902         UBool writeValue;
   1903 
   1904         pPriv = NULL;
   1905         p = buf;
   1906         while (bNext) {
   1907             writeValue = FALSE;
   1908             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1909                 if (*p == 0) {
   1910                     bNext = FALSE;
   1911                 } else {
   1912                     *p = 0; /* terminate */
   1913                 }
   1914                 if (pPriv != NULL) {
   1915                     /* Private use in the canonical format is lowercase in BCP47 */
   1916                     for (i = 0; *(pPriv + i) != 0; i++) {
   1917                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
   1918                     }
   1919 
   1920                     /* validate */
   1921                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
   1922                         if (firstValue) {
   1923                             if (!_isVariantSubtag(pPriv, -1)) {
   1924                                 writeValue = TRUE;
   1925                             }
   1926                         } else {
   1927                             writeValue = TRUE;
   1928                         }
   1929                     } else if (strict) {
   1930                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1931                         break;
   1932                     } else {
   1933                         break;
   1934                     }
   1935 
   1936                     if (writeValue) {
   1937                         if (reslen < capacity) {
   1938                             tmpAppend[reslen++] = SEP;
   1939                         }
   1940 
   1941                         if (firstValue) {
   1942                             if (reslen < capacity) {
   1943                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
   1944                             }
   1945 
   1946                             if (reslen < capacity) {
   1947                                 tmpAppend[reslen++] = SEP;
   1948                             }
   1949 
   1950                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
   1951                             if (reslen < capacity) {
   1952                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
   1953                             }
   1954                             reslen += len;
   1955 
   1956                             if (reslen < capacity) {
   1957                                 tmpAppend[reslen++] = SEP;
   1958                             }
   1959 
   1960                             firstValue = FALSE;
   1961                         }
   1962 
   1963                         len = (int32_t)uprv_strlen(pPriv);
   1964                         if (reslen < capacity) {
   1965                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
   1966                         }
   1967                         reslen += len;
   1968                     }
   1969                 }
   1970                 /* reset private use starting position */
   1971                 pPriv = NULL;
   1972             } else if (pPriv == NULL) {
   1973                 pPriv = p;
   1974             }
   1975             p++;
   1976         }
   1977 
   1978         if (U_FAILURE(*status)) {
   1979             return 0;
   1980         }
   1981     }
   1982 
   1983     if (U_SUCCESS(*status)) {
   1984         len = reslen;
   1985         if (reslen < capacity) {
   1986             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
   1987         }
   1988     }
   1989 
   1990     u_terminateChars(appendAt, capacity, reslen, status);
   1991 
   1992     return reslen;
   1993 }
   1994 
   1995 /*
   1996 * -------------------------------------------------
   1997 *
   1998 * ultag_ functions
   1999 *
   2000 * -------------------------------------------------
   2001 */
   2002 
   2003 /* Bit flags used by the parser */
   2004 #define LANG 0x0001
   2005 #define EXTL 0x0002
   2006 #define SCRT 0x0004
   2007 #define REGN 0x0008
   2008 #define VART 0x0010
   2009 #define EXTS 0x0020
   2010 #define EXTV 0x0040
   2011 #define PRIV 0x0080
   2012 #define ATTR 0x0100
   2013 
   2014 static ULanguageTag*
   2015 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   2016     ULanguageTag *t;
   2017     char *tagBuf;
   2018     int16_t next;
   2019     char *pSubtag, *pNext, *pLastGoodPosition;
   2020     int32_t subtagLen;
   2021     int32_t extlangIdx;
   2022     ExtensionListEntry *pExtension;
   2023     AttributeListEntry *pAttribute;
   2024     char *pExtValueSubtag, *pExtValueSubtagEnd;
   2025     char *pAttrValue;
   2026     int32_t i;
   2027     UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE;
   2028 
   2029     if (parsedLen != NULL) {
   2030         *parsedLen = 0;
   2031     }
   2032 
   2033     if (U_FAILURE(*status)) {
   2034         return NULL;
   2035     }
   2036 
   2037     if (tagLen < 0) {
   2038         tagLen = (int32_t)uprv_strlen(tag);
   2039     }
   2040 
   2041     /* copy the entire string */
   2042     tagBuf = (char*)uprv_malloc(tagLen + 1);
   2043     if (tagBuf == NULL) {
   2044         *status = U_MEMORY_ALLOCATION_ERROR;
   2045         return NULL;
   2046     }
   2047     uprv_memcpy(tagBuf, tag, tagLen);
   2048     *(tagBuf + tagLen) = 0;
   2049 
   2050     /* create a ULanguageTag */
   2051     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   2052     if (t == NULL) {
   2053         uprv_free(tagBuf);
   2054         *status = U_MEMORY_ALLOCATION_ERROR;
   2055         return NULL;
   2056     }
   2057     _initializeULanguageTag(t);
   2058     t->buf = tagBuf;
   2059 
   2060     if (tagLen < MINLEN) {
   2061         /* the input tag is too short - return empty ULanguageTag */
   2062         return t;
   2063     }
   2064 
   2065     /* check if the tag is grandfathered */
   2066     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   2067         if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   2068             /* a grandfathered tag is always longer than its preferred mapping */
   2069             int32_t newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
   2070             if (tagLen < newTagLength) {
   2071                 uprv_free(tagBuf);
   2072                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
   2073                 if (tagBuf == NULL) {
   2074                     *status = U_MEMORY_ALLOCATION_ERROR;
   2075                     return NULL;
   2076                 }
   2077                 t->buf = tagBuf;
   2078                 tagLen = newTagLength;
   2079             }
   2080             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   2081 
   2082             break;
   2083         }
   2084     }
   2085 
   2086     /*
   2087      * langtag      =   language
   2088      *                  ["-" script]
   2089      *                  ["-" region]
   2090      *                  *("-" variant)
   2091      *                  *("-" extension)
   2092      *                  ["-" privateuse]
   2093      */
   2094 
   2095     next = LANG | PRIV;
   2096     pNext = pLastGoodPosition = tagBuf;
   2097     extlangIdx = 0;
   2098     pExtension = NULL;
   2099     pExtValueSubtag = NULL;
   2100     pExtValueSubtagEnd = NULL;
   2101     pAttribute = NULL;
   2102     pAttrValue = NULL;
   2103     isLDMLExtension = FALSE;
   2104     reqLDMLType = FALSE;
   2105 
   2106     while (pNext) {
   2107         char *pSep;
   2108 
   2109         pSubtag = pNext;
   2110 
   2111         /* locate next separator char */
   2112         pSep = pSubtag;
   2113         while (*pSep) {
   2114             if (*pSep == SEP) {
   2115                 break;
   2116             }
   2117             pSep++;
   2118         }
   2119         if (*pSep == 0) {
   2120             /* last subtag */
   2121             pNext = NULL;
   2122         } else {
   2123             pNext = pSep + 1;
   2124         }
   2125         subtagLen = (int32_t)(pSep - pSubtag);
   2126 
   2127         if (next & LANG) {
   2128             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   2129                 *pSep = 0;  /* terminate */
   2130                 t->language = T_CString_toLowerCase(pSubtag);
   2131 
   2132                 pLastGoodPosition = pSep;
   2133                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   2134                 continue;
   2135             }
   2136         }
   2137         if (next & EXTL) {
   2138             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   2139                 *pSep = 0;
   2140                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   2141 
   2142                 pLastGoodPosition = pSep;
   2143                 if (extlangIdx < 3) {
   2144                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   2145                 } else {
   2146                     next = SCRT | REGN | VART | EXTS | PRIV;
   2147                 }
   2148                 continue;
   2149             }
   2150         }
   2151         if (next & SCRT) {
   2152             if (_isScriptSubtag(pSubtag, subtagLen)) {
   2153                 char *p = pSubtag;
   2154 
   2155                 *pSep = 0;
   2156 
   2157                 /* to title case */
   2158                 *p = uprv_toupper(*p);
   2159                 p++;
   2160                 for (; *p; p++) {
   2161                     *p = uprv_tolower(*p);
   2162                 }
   2163 
   2164                 t->script = pSubtag;
   2165 
   2166                 pLastGoodPosition = pSep;
   2167                 next = REGN | VART | EXTS | PRIV;
   2168                 continue;
   2169             }
   2170         }
   2171         if (next & REGN) {
   2172             if (_isRegionSubtag(pSubtag, subtagLen)) {
   2173                 *pSep = 0;
   2174                 t->region = T_CString_toUpperCase(pSubtag);
   2175 
   2176                 pLastGoodPosition = pSep;
   2177                 next = VART | EXTS | PRIV;
   2178                 continue;
   2179             }
   2180         }
   2181         if (next & VART) {
   2182             if (_isVariantSubtag(pSubtag, subtagLen) ||
   2183                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
   2184                 VariantListEntry *var;
   2185                 UBool isAdded;
   2186 
   2187                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   2188                 if (var == NULL) {
   2189                     *status = U_MEMORY_ALLOCATION_ERROR;
   2190                     goto error;
   2191                 }
   2192                 *pSep = 0;
   2193                 var->variant = T_CString_toUpperCase(pSubtag);
   2194                 isAdded = _addVariantToList(&(t->variants), var);
   2195                 if (!isAdded) {
   2196                     /* duplicated variant entry */
   2197                     uprv_free(var);
   2198                     break;
   2199                 }
   2200                 pLastGoodPosition = pSep;
   2201                 next = VART | EXTS | PRIV;
   2202                 continue;
   2203             }
   2204         }
   2205         if (next & EXTS) {
   2206             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   2207                 if (pExtension != NULL) {
   2208                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2209                         /* the previous extension is incomplete */
   2210                         uprv_free(pExtension);
   2211                         pExtension = NULL;
   2212                         break;
   2213                     }
   2214 
   2215                     /* terminate the previous extension value */
   2216                     *pExtValueSubtagEnd = 0;
   2217                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2218 
   2219                     /* insert the extension to the list */
   2220                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2221                         pLastGoodPosition = pExtValueSubtagEnd;
   2222                     } else {
   2223                         /* stop parsing here */
   2224                         uprv_free(pExtension);
   2225                         pExtension = NULL;
   2226                         break;
   2227                     }
   2228 
   2229                     if (isLDMLExtension && reqLDMLType) {
   2230                         /* incomplete LDML extension key and type pair */
   2231                         pExtension = NULL;
   2232                         break;
   2233                     }
   2234                 }
   2235 
   2236                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
   2237 
   2238                 /* create a new extension */
   2239                 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
   2240                 if (pExtension == NULL) {
   2241                     *status = U_MEMORY_ALLOCATION_ERROR;
   2242                     goto error;
   2243                 }
   2244                 *pSep = 0;
   2245                 pExtension->key = T_CString_toLowerCase(pSubtag);
   2246                 pExtension->value = NULL;   /* will be set later */
   2247 
   2248                 /*
   2249                  * reset the start and the end location of extension value
   2250                  * subtags for this extension
   2251                  */
   2252                 pExtValueSubtag = NULL;
   2253                 pExtValueSubtagEnd = NULL;
   2254 
   2255                 next = EXTV;
   2256                 continue;
   2257             }
   2258         }
   2259         if (next & EXTV) {
   2260             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   2261                 if (isLDMLExtension) {
   2262                     if (reqLDMLType) {
   2263                         /* already saw an LDML key */
   2264                         if (!_isLDMLType(pSubtag, subtagLen)) {
   2265                             /* stop parsing here and let the valid LDML extension key/type
   2266                                pairs processed by the code out of this while loop */
   2267                             break;
   2268                         }
   2269                         pExtValueSubtagEnd = pSep;
   2270                         reqLDMLType = FALSE;
   2271                         next = EXTS | EXTV | PRIV;
   2272                     } else {
   2273                         /* LDML key */
   2274                         if (!_isLDMLKey(pSubtag, subtagLen)) {
   2275                             /* May be part of incomplete type */
   2276                             if (pExtValueSubtag != NULL) {
   2277                                 if (_isLDMLType(pSubtag, subtagLen)) {
   2278                                     pExtValueSubtagEnd = pSep;
   2279                                     reqLDMLType = FALSE;
   2280                                     next = EXTS | EXTV | PRIV;
   2281                                 }
   2282                             } else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) {
   2283                                 /* Get attribute */
   2284                                 next = ATTR;
   2285                             } else {
   2286                                 /* stop parsing here and let the valid LDML extension key/type
   2287                                    pairs processed by the code out of this while loop */
   2288                                 break;
   2289                             }
   2290                         } else {
   2291                             reqLDMLType = TRUE;
   2292                             next = EXTV;
   2293                         }
   2294                     }
   2295                 } else {
   2296                     /* Mark the end of this subtag */
   2297                     pExtValueSubtagEnd = pSep;
   2298                     next = EXTS | EXTV | PRIV;
   2299                 }
   2300 
   2301                 if (next != ATTR) {
   2302                     if (pExtValueSubtag == NULL) {
   2303                         /* if the start postion of this extension's value is not yet,
   2304                            this one is the first value subtag */
   2305                         pExtValueSubtag = pSubtag;
   2306                     }
   2307 
   2308                     continue;
   2309                 }
   2310             }
   2311         }
   2312         if (next & PRIV) {
   2313             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
   2314                 char *pPrivuseVal;
   2315 
   2316                 if (pExtension != NULL) {
   2317                     /* Process the last extension */
   2318                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2319                         /* the previous extension is incomplete */
   2320                         uprv_free(pExtension);
   2321                         pExtension = NULL;
   2322                         break;
   2323                     } else {
   2324                         /* terminate the previous extension value */
   2325                         *pExtValueSubtagEnd = 0;
   2326                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2327 
   2328                         /* insert the extension to the list */
   2329                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2330                             pLastGoodPosition = pExtValueSubtagEnd;
   2331                             pExtension = NULL;
   2332                         } else {
   2333                         /* stop parsing here */
   2334                             uprv_free(pExtension);
   2335                             pExtension = NULL;
   2336                             break;
   2337                         }
   2338                     }
   2339                 }
   2340 
   2341                 /* The rest of part will be private use value subtags */
   2342                 if (pNext == NULL) {
   2343                     /* empty private use subtag */
   2344                     break;
   2345                 }
   2346                 /* back up the private use value start position */
   2347                 pPrivuseVal = pNext;
   2348 
   2349                 /* validate private use value subtags */
   2350                 while (pNext) {
   2351                     pSubtag = pNext;
   2352                     pSep = pSubtag;
   2353                     while (*pSep) {
   2354                         if (*pSep == SEP) {
   2355                             break;
   2356                         }
   2357                         pSep++;
   2358                     }
   2359                     if (*pSep == 0) {
   2360                         /* last subtag */
   2361                         pNext = NULL;
   2362                     } else {
   2363                         pNext = pSep + 1;
   2364                     }
   2365                     subtagLen = (int32_t)(pSep - pSubtag);
   2366 
   2367                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
   2368                         *pSep = 0;
   2369                         next = VART;
   2370                         privateuseVar = TRUE;
   2371                         break;
   2372                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   2373                         pLastGoodPosition = pSep;
   2374                     } else {
   2375                         break;
   2376                     }
   2377                 }
   2378 
   2379                 if (next == VART) {
   2380                     continue;
   2381                 }
   2382 
   2383                 if (pLastGoodPosition - pPrivuseVal > 0) {
   2384                     *pLastGoodPosition = 0;
   2385                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   2386                 }
   2387                 /* No more subtags, exiting the parse loop */
   2388                 break;
   2389             }
   2390             break;
   2391         }
   2392 
   2393         if (next & ATTR) {
   2394             /* create a new attribute */
   2395             pAttribute = uprv_malloc(sizeof(AttributeListEntry));
   2396             if (pAttribute == NULL) {
   2397                 *status = U_MEMORY_ALLOCATION_ERROR;
   2398                 goto error;
   2399             }
   2400 
   2401             *pSep = 0;
   2402             pAttribute->attribute =T_CString_toLowerCase(pSubtag);
   2403 
   2404             if (!_addAttributeToList(&(t->attributes), pAttribute)) {
   2405                 uprv_free(pAttribute);
   2406             }
   2407 
   2408             next = EXTS | EXTV | PRIV;
   2409             continue;
   2410         }
   2411         /* If we fell through here, it means this subtag is illegal - quit parsing */
   2412         break;
   2413     }
   2414 
   2415     if (pExtension != NULL) {
   2416         /* Process the last extension */
   2417         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   2418             /* the previous extension is incomplete */
   2419             uprv_free(pExtension);
   2420         } else {
   2421             /* terminate the previous extension value */
   2422             *pExtValueSubtagEnd = 0;
   2423             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   2424             /* insert the extension to the list */
   2425             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   2426                 pLastGoodPosition = pExtValueSubtagEnd;
   2427             } else {
   2428                 uprv_free(pExtension);
   2429             }
   2430         }
   2431     }
   2432 
   2433     if (parsedLen != NULL) {
   2434         *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
   2435     }
   2436 
   2437     return t;
   2438 
   2439 error:
   2440     uprv_free(t);
   2441     return NULL;
   2442 }
   2443 
   2444 static void
   2445 ultag_close(ULanguageTag* langtag) {
   2446 
   2447     if (langtag == NULL) {
   2448         return;
   2449     }
   2450 
   2451     uprv_free(langtag->buf);
   2452 
   2453     if (langtag->variants) {
   2454         VariantListEntry *curVar = langtag->variants;
   2455         while (curVar) {
   2456             VariantListEntry *nextVar = curVar->next;
   2457             uprv_free(curVar);
   2458             curVar = nextVar;
   2459         }
   2460     }
   2461 
   2462     if (langtag->extensions) {
   2463         ExtensionListEntry *curExt = langtag->extensions;
   2464         while (curExt) {
   2465             ExtensionListEntry *nextExt = curExt->next;
   2466             uprv_free(curExt);
   2467             curExt = nextExt;
   2468         }
   2469     }
   2470 
   2471     if (langtag->attributes) {
   2472         AttributeListEntry *curAttr = langtag->attributes;
   2473         while (curAttr) {
   2474             AttributeListEntry *nextAttr = curAttr->next;
   2475             uprv_free(curAttr);
   2476             curAttr = nextAttr;
   2477         }
   2478     }
   2479 
   2480     uprv_free(langtag);
   2481 }
   2482 
   2483 static const char*
   2484 ultag_getLanguage(const ULanguageTag* langtag) {
   2485     return langtag->language;
   2486 }
   2487 
   2488 #if 0
   2489 static const char*
   2490 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2491     int32_t i;
   2492     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2493         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2494             return DEPRECATEDLANGS[i + 1];
   2495         }
   2496     }
   2497     return langtag->language;
   2498 }
   2499 #endif
   2500 
   2501 static const char*
   2502 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2503     if (idx >= 0 && idx < MAXEXTLANG) {
   2504         return langtag->extlang[idx];
   2505     }
   2506     return NULL;
   2507 }
   2508 
   2509 static int32_t
   2510 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2511     int32_t size = 0;
   2512     int32_t i;
   2513     for (i = 0; i < MAXEXTLANG; i++) {
   2514         if (langtag->extlang[i]) {
   2515             size++;
   2516         }
   2517     }
   2518     return size;
   2519 }
   2520 
   2521 static const char*
   2522 ultag_getScript(const ULanguageTag* langtag) {
   2523     return langtag->script;
   2524 }
   2525 
   2526 static const char*
   2527 ultag_getRegion(const ULanguageTag* langtag) {
   2528     return langtag->region;
   2529 }
   2530 
   2531 static const char*
   2532 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2533     const char *var = NULL;
   2534     VariantListEntry *cur = langtag->variants;
   2535     int32_t i = 0;
   2536     while (cur) {
   2537         if (i == idx) {
   2538             var = cur->variant;
   2539             break;
   2540         }
   2541         cur = cur->next;
   2542         i++;
   2543     }
   2544     return var;
   2545 }
   2546 
   2547 static int32_t
   2548 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2549     int32_t size = 0;
   2550     VariantListEntry *cur = langtag->variants;
   2551     while (TRUE) {
   2552         if (cur == NULL) {
   2553             break;
   2554         }
   2555         size++;
   2556         cur = cur->next;
   2557     }
   2558     return size;
   2559 }
   2560 
   2561 #if 0
   2562 /* Currently not being used. */
   2563 static const char*
   2564 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) {
   2565     const char *attr = NULL;
   2566     AttributeListEntry *cur = langtag->attributes;
   2567     int32_t i = 0;
   2568     while (cur) {
   2569         if (i == idx) {
   2570             attr = cur->attribute;
   2571             break;
   2572         }
   2573         cur = cur->next;
   2574         i++;
   2575     }
   2576     return attr;
   2577 }
   2578 #endif
   2579 
   2580 static int32_t
   2581 ultag_getAttributesSize(const ULanguageTag* langtag) {
   2582     int32_t size = 0;
   2583     AttributeListEntry *cur = langtag->attributes;
   2584     while (TRUE) {
   2585         if (cur == NULL) {
   2586             break;
   2587         }
   2588         size++;
   2589         cur = cur->next;
   2590     }
   2591     return size;
   2592 }
   2593 
   2594 static const char*
   2595 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2596     const char *key = NULL;
   2597     ExtensionListEntry *cur = langtag->extensions;
   2598     int32_t i = 0;
   2599     while (cur) {
   2600         if (i == idx) {
   2601             key = cur->key;
   2602             break;
   2603         }
   2604         cur = cur->next;
   2605         i++;
   2606     }
   2607     return key;
   2608 }
   2609 
   2610 static const char*
   2611 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2612     const char *val = NULL;
   2613     ExtensionListEntry *cur = langtag->extensions;
   2614     int32_t i = 0;
   2615     while (cur) {
   2616         if (i == idx) {
   2617             val = cur->value;
   2618             break;
   2619         }
   2620         cur = cur->next;
   2621         i++;
   2622     }
   2623     return val;
   2624 }
   2625 
   2626 static int32_t
   2627 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2628     int32_t size = 0;
   2629     ExtensionListEntry *cur = langtag->extensions;
   2630     while (TRUE) {
   2631         if (cur == NULL) {
   2632             break;
   2633         }
   2634         size++;
   2635         cur = cur->next;
   2636     }
   2637     return size;
   2638 }
   2639 
   2640 static const char*
   2641 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2642     return langtag->privateuse;
   2643 }
   2644 
   2645 #if 0
   2646 static const char*
   2647 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2648     return langtag->grandfathered;
   2649 }
   2650 #endif
   2651 
   2652 
   2653 /*
   2654 * -------------------------------------------------
   2655 *
   2656 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2657 *
   2658 * -------------------------------------------------
   2659 */
   2660 U_DRAFT int32_t U_EXPORT2
   2661 uloc_toLanguageTag(const char* localeID,
   2662                    char* langtag,
   2663                    int32_t langtagCapacity,
   2664                    UBool strict,
   2665                    UErrorCode* status) {
   2666     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2667     char canonical[256];
   2668     int32_t reslen = 0;
   2669     UErrorCode tmpStatus = U_ZERO_ERROR;
   2670     UBool hadPosix = FALSE;
   2671     const char* pKeywordStart;
   2672 
   2673     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2674     canonical[0] = 0;
   2675     if (uprv_strlen(localeID) > 0) {
   2676         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2677         if (tmpStatus != U_ZERO_ERROR) {
   2678             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2679             return 0;
   2680         }
   2681     }
   2682 
   2683     /* For handling special case - private use only tag */
   2684     pKeywordStart = locale_getKeywordsStart(canonical);
   2685     if (pKeywordStart == canonical) {
   2686         UEnumeration *kwdEnum;
   2687         int kwdCnt = 0;
   2688         UBool done = FALSE;
   2689 
   2690         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
   2691         if (kwdEnum != NULL) {
   2692             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
   2693             if (kwdCnt == 1) {
   2694                 const char *key;
   2695                 int32_t len = 0;
   2696 
   2697                 key = uenum_next(kwdEnum, &len, &tmpStatus);
   2698                 if (len == 1 && *key == PRIVATEUSE) {
   2699                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   2700                     buf[0] = PRIVATEUSE;
   2701                     buf[1] = SEP;
   2702                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
   2703                     if (U_SUCCESS(tmpStatus)) {
   2704                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
   2705                             /* return private use only tag */
   2706                             reslen = len + 2;
   2707                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
   2708                             u_terminateChars(langtag, langtagCapacity, reslen, status);
   2709                             done = TRUE;
   2710                         } else if (strict) {
   2711                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2712                             done = TRUE;
   2713                         }
   2714                         /* if not strict mode, then "und" will be returned */
   2715                     } else {
   2716                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   2717                         done = TRUE;
   2718                     }
   2719                 }
   2720             }
   2721             uenum_close(kwdEnum);
   2722             if (done) {
   2723                 return reslen;
   2724             }
   2725         }
   2726     }
   2727 
   2728     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2729     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2730     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2731     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
   2732     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2733     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2734 
   2735     return reslen;
   2736 }
   2737 
   2738 
   2739 U_DRAFT int32_t U_EXPORT2
   2740 uloc_forLanguageTag(const char* langtag,
   2741                     char* localeID,
   2742                     int32_t localeIDCapacity,
   2743                     int32_t* parsedLength,
   2744                     UErrorCode* status) {
   2745     ULanguageTag *lt;
   2746     int32_t reslen = 0;
   2747     const char *subtag, *p;
   2748     int32_t len;
   2749     int32_t i, n, m;
   2750     UBool noRegion = TRUE;
   2751 
   2752     lt = ultag_parse(langtag, -1, parsedLength, status);
   2753     if (U_FAILURE(*status)) {
   2754         return 0;
   2755     }
   2756 
   2757     /* language */
   2758     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2759     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2760         len = (int32_t)uprv_strlen(subtag);
   2761         if (len > 0) {
   2762             if (reslen < localeIDCapacity) {
   2763                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2764             }
   2765             reslen += len;
   2766         }
   2767     }
   2768 
   2769     /* script */
   2770     subtag = ultag_getScript(lt);
   2771     len = (int32_t)uprv_strlen(subtag);
   2772     if (len > 0) {
   2773         if (reslen < localeIDCapacity) {
   2774             *(localeID + reslen) = LOCALE_SEP;
   2775         }
   2776         reslen++;
   2777 
   2778         /* write out the script in title case */
   2779         p = subtag;
   2780         while (*p) {
   2781             if (reslen < localeIDCapacity) {
   2782                 if (p == subtag) {
   2783                     *(localeID + reslen) = uprv_toupper(*p);
   2784                 } else {
   2785                     *(localeID + reslen) = *p;
   2786                 }
   2787             }
   2788             reslen++;
   2789             p++;
   2790         }
   2791     }
   2792 
   2793     /* region */
   2794     subtag = ultag_getRegion(lt);
   2795     len = (int32_t)uprv_strlen(subtag);
   2796     if (len > 0) {
   2797         if (reslen < localeIDCapacity) {
   2798             *(localeID + reslen) = LOCALE_SEP;
   2799         }
   2800         reslen++;
   2801         /* write out the retion in upper case */
   2802         p = subtag;
   2803         while (*p) {
   2804             if (reslen < localeIDCapacity) {
   2805                 *(localeID + reslen) = uprv_toupper(*p);
   2806             }
   2807             reslen++;
   2808             p++;
   2809         }
   2810         noRegion = FALSE;
   2811     }
   2812 
   2813     /* variants */
   2814     n = ultag_getVariantsSize(lt);
   2815     if (n > 0) {
   2816         if (noRegion) {
   2817             if (reslen < localeIDCapacity) {
   2818                 *(localeID + reslen) = LOCALE_SEP;
   2819             }
   2820             reslen++;
   2821         }
   2822 
   2823         for (i = 0; i < n; i++) {
   2824             subtag = ultag_getVariant(lt, i);
   2825             if (reslen < localeIDCapacity) {
   2826                 *(localeID + reslen) = LOCALE_SEP;
   2827             }
   2828             reslen++;
   2829             /* write out the variant in upper case */
   2830             p = subtag;
   2831             while (*p) {
   2832                 if (reslen < localeIDCapacity) {
   2833                     *(localeID + reslen) = uprv_toupper(*p);
   2834                 }
   2835                 reslen++;
   2836                 p++;
   2837             }
   2838         }
   2839     }
   2840 
   2841     /* keywords */
   2842     n = ultag_getExtensionsSize(lt);
   2843     m = ultag_getAttributesSize(lt);
   2844     subtag = ultag_getPrivateUse(lt);
   2845     if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) {
   2846         if (reslen == 0 && (n > 0 || m > 0)) {
   2847             /* need a language */
   2848             if (reslen < localeIDCapacity) {
   2849                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2850             }
   2851             reslen += LANG_UND_LEN;
   2852         }
   2853         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2854         reslen += len;
   2855     }
   2856 
   2857     ultag_close(lt);
   2858     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2859 }
   2860 
   2861 
   2862