Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ures.h"
     10 #include "unicode/putil.h"
     11 #include "unicode/uloc.h"
     12 #include "ustr_imp.h"
     13 #include "cmemory.h"
     14 #include "cstring.h"
     15 #include "putilimp.h"
     16 
     17 /* struct holding a single variant */
     18 typedef struct VariantListEntry {
     19     const char              *variant;
     20     struct VariantListEntry *next;
     21 } VariantListEntry;
     22 
     23 /* struct holding a single extension */
     24 typedef struct ExtensionListEntry {
     25     const char                  *key;
     26     const char                  *value;
     27     struct ExtensionListEntry   *next;
     28 } ExtensionListEntry;
     29 
     30 #define MAXEXTLANG 3
     31 typedef struct ULanguageTag {
     32     char                *buf;   /* holding parsed subtags */
     33     const char          *language;
     34     const char          *extlang[MAXEXTLANG];
     35     const char          *script;
     36     const char          *region;
     37     VariantListEntry    *variants;
     38     ExtensionListEntry  *extensions;
     39     const char          *privateuse;
     40     const char          *grandfathered;
     41 } ULanguageTag;
     42 
     43 #define MINLEN 2
     44 #define SEP '-'
     45 #define PRIVATEUSE 'x'
     46 #define LDMLEXT 'u'
     47 
     48 #define LOCALE_SEP '_'
     49 #define LOCALE_EXT_SEP '@'
     50 #define LOCALE_KEYWORD_SEP ';'
     51 #define LOCALE_KEY_TYPE_SEP '='
     52 
     53 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
     54 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     55 
     56 static const char* EMPTY = "";
     57 static const char* LANG_UND = "und";
     58 static const char* PRIVATEUSE_KEY = "x";
     59 
     60 #define LANG_UND_LEN 3
     61 
     62 static const char* GRANDFATHERED[] = {
     63 /*  grandfathered   preferred */
     64     "art-lojban",   "jbo",
     65     "cel-gaulish",  "",
     66     "en-GB-oed",    "",
     67     "i-ami",        "ami",
     68     "i-bnn",        "bnn",
     69     "i-default",    "",
     70     "i-enochian",   "",
     71     "i-hak",        "hak",
     72     "i-klingon",    "tlh",
     73     "i-lux",        "lb",
     74     "i-mingo",      "",
     75     "i-navajo",     "nv",
     76     "i-pwn",        "pwn",
     77     "i-tao",        "tao",
     78     "i-tay",        "tay",
     79     "i-tsu",        "tsu",
     80     "no-bok",       "nb",
     81     "no-nyn",       "nn",
     82     "sgn-be-fr",    "sfb",
     83     "sgn-be-nl",    "vgt",
     84     "sgn-ch-de",    "sgg",
     85     "zh-guoyu",     "cmn",
     86     "zh-hakka",     "hak",
     87     "zh-min",       "",
     88     "zh-min-nan",   "nan",
     89     "zh-xiang",     "hsn",
     90     NULL,           NULL
     91 };
     92 
     93 static const char* DEPRECATEDLANGS[] = {
     94 /*  deprecated  new */
     95     "iw",       "he",
     96     "ji",       "yi",
     97     "in",       "id",
     98     NULL,       NULL
     99 };
    100 
    101 /*
    102 * -------------------------------------------------
    103 *
    104 * These ultag_ functions may be exposed as APIs later
    105 *
    106 * -------------------------------------------------
    107 */
    108 
    109 static ULanguageTag*
    110 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    111 
    112 static void
    113 ultag_close(ULanguageTag* langtag);
    114 
    115 static const char*
    116 ultag_getLanguage(const ULanguageTag* langtag);
    117 
    118 #if 0
    119 static const char*
    120 ultag_getJDKLanguage(const ULanguageTag* langtag);
    121 #endif
    122 
    123 static const char*
    124 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    125 
    126 static int32_t
    127 ultag_getExtlangSize(const ULanguageTag* langtag);
    128 
    129 static const char*
    130 ultag_getScript(const ULanguageTag* langtag);
    131 
    132 static const char*
    133 ultag_getRegion(const ULanguageTag* langtag);
    134 
    135 static const char*
    136 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    137 
    138 static int32_t
    139 ultag_getVariantsSize(const ULanguageTag* langtag);
    140 
    141 static const char*
    142 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    143 
    144 static const char*
    145 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    146 
    147 static int32_t
    148 ultag_getExtensionsSize(const ULanguageTag* langtag);
    149 
    150 static const char*
    151 ultag_getPrivateUse(const ULanguageTag* langtag);
    152 
    153 #if 0
    154 static const char*
    155 ultag_getGrandfathered(const ULanguageTag* langtag);
    156 #endif
    157 
    158 /*
    159 * -------------------------------------------------
    160 *
    161 * Language subtag syntax validation functions
    162 *
    163 * -------------------------------------------------
    164 */
    165 
    166 static UBool
    167 _isAlphaString(const char* s, int32_t len) {
    168     int32_t i;
    169     for (i = 0; i < len; i++) {
    170         if (!ISALPHA(*(s + i))) {
    171             return FALSE;
    172         }
    173     }
    174     return TRUE;
    175 }
    176 
    177 static UBool
    178 _isNumericString(const char* s, int32_t len) {
    179     int32_t i;
    180     for (i = 0; i < len; i++) {
    181         if (!ISNUMERIC(*(s + i))) {
    182             return FALSE;
    183         }
    184     }
    185     return TRUE;
    186 }
    187 
    188 static UBool
    189 _isAlphaNumericString(const char* s, int32_t len) {
    190     int32_t i;
    191     for (i = 0; i < len; i++) {
    192         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    193             return FALSE;
    194         }
    195     }
    196     return TRUE;
    197 }
    198 
    199 static UBool
    200 _isLanguageSubtag(const char* s, int32_t len) {
    201     /*
    202      * language      = 2*3ALPHA            ; shortest ISO 639 code
    203      *                 ["-" extlang]       ; sometimes followed by
    204      *                                     ;   extended language subtags
    205      *               / 4ALPHA              ; or reserved for future use
    206      *               / 5*8ALPHA            ; or registered language subtag
    207      */
    208     if (len < 0) {
    209         len = uprv_strlen(s);
    210     }
    211     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    212         return TRUE;
    213     }
    214     return FALSE;
    215 }
    216 
    217 static UBool
    218 _isExtlangSubtag(const char* s, int32_t len) {
    219     /*
    220      * extlang       = 3ALPHA              ; selected ISO 639 codes
    221      *                 *2("-" 3ALPHA)      ; permanently reserved
    222      */
    223     if (len < 0) {
    224         len = uprv_strlen(s);
    225     }
    226     if (len == 3 && _isAlphaString(s, len)) {
    227         return TRUE;
    228     }
    229     return FALSE;
    230 }
    231 
    232 static UBool
    233 _isScriptSubtag(const char* s, int32_t len) {
    234     /*
    235      * script        = 4ALPHA              ; ISO 15924 code
    236      */
    237     if (len < 0) {
    238         len = uprv_strlen(s);
    239     }
    240     if (len == 4 && _isAlphaString(s, len)) {
    241         return TRUE;
    242     }
    243     return FALSE;
    244 }
    245 
    246 static UBool
    247 _isRegionSubtag(const char* s, int32_t len) {
    248     /*
    249      * region        = 2ALPHA              ; ISO 3166-1 code
    250      *               / 3DIGIT              ; UN M.49 code
    251      */
    252     if (len < 0) {
    253         len = uprv_strlen(s);
    254     }
    255     if (len == 2 && _isAlphaString(s, len)) {
    256         return TRUE;
    257     }
    258     if (len == 3 && _isNumericString(s, len)) {
    259         return TRUE;
    260     }
    261     return FALSE;
    262 }
    263 
    264 static UBool
    265 _isVariantSubtag(const char* s, int32_t len) {
    266     /*
    267      * variant       = 5*8alphanum         ; registered variants
    268      *               / (DIGIT 3alphanum)
    269      */
    270     if (len < 0) {
    271         len = uprv_strlen(s);
    272     }
    273     if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
    274         return TRUE;
    275     }
    276     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    277         return TRUE;
    278     }
    279     return FALSE;
    280 }
    281 
    282 static UBool
    283 _isExtensionSingleton(const char* s, int32_t len) {
    284     /*
    285      * extension     = singleton 1*("-" (2*8alphanum))
    286      */
    287     if (len < 0) {
    288         len = uprv_strlen(s);
    289     }
    290     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    291         return TRUE;
    292     }
    293     return FALSE;
    294 }
    295 
    296 static UBool
    297 _isExtensionSubtag(const char* s, int32_t len) {
    298     /*
    299      * extension     = singleton 1*("-" (2*8alphanum))
    300      */
    301     if (len < 0) {
    302         len = uprv_strlen(s);
    303     }
    304     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    305         return TRUE;
    306     }
    307     return FALSE;
    308 }
    309 
    310 static UBool
    311 _isExtensionSubtags(const char* s, int32_t len) {
    312     const char *p = s;
    313     const char *pSubtag = NULL;
    314 
    315     if (len < 0) {
    316         len = uprv_strlen(s);
    317     }
    318 
    319     while ((p - s) < len) {
    320         if (*p == SEP) {
    321             if (pSubtag == NULL) {
    322                 return FALSE;
    323             }
    324             if (!_isExtensionSubtag(pSubtag, p - pSubtag)) {
    325                 return FALSE;
    326             }
    327             pSubtag = NULL;
    328         } else if (pSubtag == NULL) {
    329             pSubtag = p;
    330         }
    331         p++;
    332     }
    333     if (pSubtag == NULL) {
    334         return FALSE;
    335     }
    336     return _isExtensionSubtag(pSubtag, p - pSubtag);
    337 }
    338 
    339 static UBool
    340 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    341     /*
    342      * privateuse    = "x" 1*("-" (1*8alphanum))
    343      */
    344     if (len < 0) {
    345         len = uprv_strlen(s);
    346     }
    347     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    348         return TRUE;
    349     }
    350     return FALSE;
    351 }
    352 
    353 static UBool
    354 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    355     const char *p = s;
    356     const char *pSubtag = NULL;
    357 
    358     if (len < 0) {
    359         len = uprv_strlen(s);
    360     }
    361 
    362     while ((p - s) < len) {
    363         if (*p == SEP) {
    364             if (pSubtag == NULL) {
    365                 return FALSE;
    366             }
    367             if (!_isPrivateuseValueSubtag(pSubtag, p - pSubtag)) {
    368                 return FALSE;
    369             }
    370             pSubtag = NULL;
    371         } else if (pSubtag == NULL) {
    372             pSubtag = p;
    373         }
    374         p++;
    375     }
    376     if (pSubtag == NULL) {
    377         return FALSE;
    378     }
    379     return _isPrivateuseValueSubtag(pSubtag, p - pSubtag);
    380 }
    381 
    382 static UBool
    383 _isLDMLKey(const char* s, int32_t len) {
    384     if (len < 0) {
    385         len = uprv_strlen(s);
    386     }
    387     if (len == 2 && _isAlphaNumericString(s, len)) {
    388         return TRUE;
    389     }
    390     return FALSE;
    391 }
    392 
    393 static UBool
    394 _isLDMLType(const char* s, int32_t len) {
    395     if (len < 0) {
    396         len = uprv_strlen(s);
    397     }
    398     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    399         return TRUE;
    400     }
    401     return FALSE;
    402 }
    403 
    404 /*
    405 * -------------------------------------------------
    406 *
    407 * Helper functions
    408 *
    409 * -------------------------------------------------
    410 */
    411 
    412 static UBool
    413 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    414     UBool bAdded = TRUE;
    415 
    416     if (*first == NULL) {
    417         var->next = NULL;
    418         *first = var;
    419     } else {
    420         VariantListEntry *prev, *cur;
    421         int32_t cmp;
    422 
    423         /* reorder variants in alphabetical order */
    424         prev = NULL;
    425         cur = *first;
    426         while (TRUE) {
    427             if (cur == NULL) {
    428                 prev->next = var;
    429                 var->next = NULL;
    430                 break;
    431             }
    432             cmp = uprv_strcmp(var->variant, cur->variant);
    433             if (cmp < 0) {
    434                 if (prev == NULL) {
    435                     *first = var;
    436                 } else {
    437                     prev->next = var;
    438                 }
    439                 var->next = cur;
    440                 break;
    441             }
    442             if (cmp == 0) {
    443                 /* duplicated variant */
    444                 bAdded = FALSE;
    445                 break;
    446             }
    447             prev = cur;
    448             cur = cur->next;
    449         }
    450     }
    451 
    452     return bAdded;
    453 }
    454 
    455 
    456 static UBool
    457 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    458     UBool bAdded = TRUE;
    459 
    460     if (*first == NULL) {
    461         ext->next = NULL;
    462         *first = ext;
    463     } else {
    464         ExtensionListEntry *prev, *cur;
    465         int32_t cmp;
    466 
    467         /* reorder variants in alphabetical order */
    468         prev = NULL;
    469         cur = *first;
    470         while (TRUE) {
    471             if (cur == NULL) {
    472                 prev->next = ext;
    473                 ext->next = NULL;
    474                 break;
    475             }
    476             if (localeToBCP) {
    477                 /* special handling for locale to bcp conversion */
    478                 int32_t len, curlen;
    479 
    480                 len = uprv_strlen(ext->key);
    481                 curlen = uprv_strlen(cur->key);
    482 
    483                 if (len == 1 && curlen == 1) {
    484                     if (*(ext->key) == *(cur->key)) {
    485                         cmp = 0;
    486                     } else if (*(ext->key) == PRIVATEUSE) {
    487                         cmp = 1;
    488                     } else if (*(cur->key) == PRIVATEUSE) {
    489                         cmp = -1;
    490                     } else {
    491                         cmp = *(ext->key) - *(cur->key);
    492                     }
    493                 } else if (len == 1) {
    494                     cmp = *(ext->key) - LDMLEXT;
    495                 } else if (curlen == 1) {
    496                     cmp = LDMLEXT - *(cur->key);
    497                 } else {
    498                     cmp = uprv_strcmp(ext->key, cur->key);
    499                 }
    500             } else {
    501                 cmp = uprv_strcmp(ext->key, cur->key);
    502             }
    503             if (cmp < 0) {
    504                 if (prev == NULL) {
    505                     *first = ext;
    506                 } else {
    507                     prev->next = ext;
    508                 }
    509                 ext->next = cur;
    510                 break;
    511             }
    512             if (cmp == 0) {
    513                 /* duplicated extension key */
    514                 bAdded = FALSE;
    515                 break;
    516             }
    517             prev = cur;
    518             cur = cur->next;
    519         }
    520     }
    521 
    522     return bAdded;
    523 }
    524 
    525 static void
    526 _initializeULanguageTag(ULanguageTag* langtag) {
    527     int32_t i;
    528 
    529     langtag->buf = NULL;
    530 
    531     langtag->language = EMPTY;
    532     for (i = 0; i < MAXEXTLANG; i++) {
    533         langtag->extlang[i] = NULL;
    534     }
    535 
    536     langtag->script = EMPTY;
    537     langtag->region = EMPTY;
    538 
    539     langtag->variants = NULL;
    540     langtag->extensions = NULL;
    541 
    542     langtag->grandfathered = EMPTY;
    543     langtag->privateuse = EMPTY;
    544 }
    545 
    546 #define SUPPLEMENTAL    "supplementalData"
    547 #define BCP47MAPPINGS   "bcp47KeywordMappings"
    548 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
    549 #define MAX_LDML_KEY_LEN        22
    550 #define MAX_LDML_TYPE_LEN       32
    551 
    552 static int32_t
    553 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
    554                 char* bcpKey, int32_t bcpKeyCapacity,
    555                 UErrorCode *status) {
    556     UResourceBundle *rb;
    557     char keyBuf[MAX_LDML_KEY_LEN];
    558     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    559     int32_t resultLen = 0;
    560     int32_t i;
    561     UErrorCode tmpStatus = U_ZERO_ERROR;
    562     const UChar *uBcpKey;
    563     int32_t bcpKeyLen;
    564 
    565     if (keyLen < 0) {
    566         keyLen = uprv_strlen(key);
    567     }
    568 
    569     if (keyLen >= sizeof(keyBuf)) {
    570         /* no known valid LDML key exceeding 21 */
    571         *status = U_ILLEGAL_ARGUMENT_ERROR;
    572         return 0;
    573     }
    574 
    575     uprv_memcpy(keyBuf, key, keyLen);
    576     keyBuf[keyLen] = 0;
    577 
    578     /* to lower case */
    579     for (i = 0; i < keyLen; i++) {
    580         keyBuf[i] = uprv_tolower(keyBuf[i]);
    581     }
    582 
    583     rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
    584     ures_getByKey(rb, BCP47MAPPINGS, rb, status);
    585     ures_getByKey(rb, "key", rb, status);
    586 
    587     if (U_FAILURE(*status)) {
    588         ures_close(rb);
    589         return 0;
    590     }
    591 
    592     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
    593     if (U_SUCCESS(tmpStatus)) {
    594         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
    595         bcpKeyBuf[bcpKeyLen] = 0;
    596         resultLen = bcpKeyLen;
    597     } else {
    598         if (_isLDMLKey(key, keyLen)) {
    599             uprv_memcpy(bcpKeyBuf, key, keyLen);
    600             bcpKeyBuf[keyLen] = 0;
    601             resultLen = keyLen;
    602         } else {
    603             /* mapping not availabe */
    604             *status = U_ILLEGAL_ARGUMENT_ERROR;
    605         }
    606     }
    607     ures_close(rb);
    608 
    609     if (U_FAILURE(*status)) {
    610         return 0;
    611     }
    612 
    613     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
    614     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
    615 }
    616 
    617 static int32_t
    618 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
    619                 char* key, int32_t keyCapacity,
    620                 UErrorCode *status) {
    621     UResourceBundle *rb;
    622     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    623     int32_t resultLen = 0;
    624     int32_t i;
    625     const char *resKey = NULL;
    626     UResourceBundle *keyMap;
    627 
    628     if (bcpKeyLen < 0) {
    629         bcpKeyLen = uprv_strlen(bcpKey);
    630     }
    631 
    632     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
    633         *status = U_ILLEGAL_ARGUMENT_ERROR;
    634         return 0;
    635     }
    636 
    637     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
    638     bcpKeyBuf[bcpKeyLen] = 0;
    639 
    640     /* to lower case */
    641     for (i = 0; i < bcpKeyLen; i++) {
    642         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
    643     }
    644 
    645     rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
    646     ures_getByKey(rb, BCP47MAPPINGS, rb, status);
    647     ures_getByKey(rb, "key", rb, status);
    648     if (U_FAILURE(*status)) {
    649         ures_close(rb);
    650         return 0;
    651     }
    652 
    653     keyMap = ures_getNextResource(rb, NULL, status);
    654     while (U_SUCCESS(*status)) {
    655         const UChar *uBcpKey;
    656         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    657         int32_t tmpBcpKeyLen;
    658 
    659         uBcpKey = ures_getString(keyMap, &tmpBcpKeyLen, status);
    660         if (U_FAILURE(*status)) {
    661             break;
    662         }
    663         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
    664         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
    665         if (uprv_strcmp(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
    666             /* found a matching BCP47 key */
    667             resKey = ures_getKey(keyMap);
    668             resultLen = uprv_strlen(resKey);
    669             break;
    670         }
    671         if (!ures_hasNext(rb)) {
    672             break;
    673         }
    674         ures_getNextResource(rb, keyMap, status);
    675     }
    676     ures_close(keyMap);
    677     ures_close(rb);
    678 
    679     if (U_FAILURE(*status)) {
    680         return 0;
    681     }
    682 
    683     if (resKey == NULL) {
    684         resKey = bcpKeyBuf;
    685         resultLen = bcpKeyLen;
    686     }
    687 
    688     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
    689     return u_terminateChars(key, keyCapacity, resultLen, status);
    690 }
    691 
    692 static int32_t
    693 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
    694                  const char* type, int32_t typeLen,
    695                  char* bcpType, int32_t bcpTypeCapacity,
    696                  UErrorCode *status) {
    697 
    698     UResourceBundle *rb;
    699     char keyBuf[MAX_LDML_KEY_LEN];
    700     char typeBuf[MAX_LDML_TYPE_LEN];
    701     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    702     int32_t resultLen = 0;
    703     int32_t i;
    704     UErrorCode tmpStatus = U_ZERO_ERROR;
    705     const UChar *uBcpType;
    706     int32_t bcpTypeLen;
    707     UBool isTimezone = FALSE;
    708 
    709     if (keyLen < 0) {
    710         keyLen = uprv_strlen(key);
    711     }
    712     if (keyLen >= sizeof(keyBuf)) {
    713         /* no known valid LDML key exceeding 21 */
    714         *status = U_ILLEGAL_ARGUMENT_ERROR;
    715         return 0;
    716     }
    717     uprv_memcpy(keyBuf, key, keyLen);
    718     keyBuf[keyLen] = 0;
    719 
    720     /* to lower case */
    721     for (i = 0; i < keyLen; i++) {
    722         keyBuf[i] = uprv_tolower(keyBuf[i]);
    723     }
    724     if (uprv_strcmp(keyBuf, "timezone") == 0) {
    725         isTimezone = TRUE;
    726     }
    727 
    728     if (typeLen < 0) {
    729         typeLen = uprv_strlen(type);
    730     }
    731     if (typeLen >= sizeof(typeBuf)) {
    732         *status = U_ILLEGAL_ARGUMENT_ERROR;
    733         return 0;
    734     }
    735     uprv_memcpy(typeBuf, type, typeLen);
    736     typeBuf[typeLen] = 0;
    737 
    738     for (i = 0; i < typeLen; i++) {
    739         if (isTimezone && typeBuf[i] == '/') {
    740             typeBuf[i] = ':';
    741         } else {
    742             typeBuf[i] = uprv_tolower(typeBuf[i]);
    743         }
    744     }
    745 
    746     rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
    747     ures_getByKey(rb, BCP47MAPPINGS, rb, status);
    748     if (U_FAILURE(*status)) {
    749         ures_close(rb);
    750         return 0;
    751     }
    752 
    753     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    754     uBcpType = ures_getStringByKey(rb, typeBuf, &bcpTypeLen, &tmpStatus);
    755     if (U_SUCCESS(tmpStatus)) {
    756         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    757         resultLen = bcpTypeLen;
    758     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    759         if (_isLDMLType(type, typeLen)) {
    760             uprv_memcpy(bcpTypeBuf, type, typeLen);
    761             resultLen = typeLen;
    762         } else {
    763             /* mapping not availabe */
    764             *status = U_ILLEGAL_ARGUMENT_ERROR;
    765         }
    766     } else {
    767         *status = tmpStatus;
    768     }
    769     ures_close(rb);
    770 
    771     if (U_FAILURE(*status)) {
    772         return 0;
    773     }
    774 
    775     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
    776     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
    777 }
    778 
    779 static int32_t
    780 _bcp47ToLDMLType(const char* key, int32_t keyLen,
    781                  const char* bcpType, int32_t bcpTypeLen,
    782                  char* type, int32_t typeCapacity,
    783                  UErrorCode *status) {
    784     UResourceBundle *rb;
    785     char keyBuf[MAX_LDML_KEY_LEN];
    786     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    787     int32_t resultLen = 0;
    788     int32_t i;
    789     const char *resType = NULL;
    790     UResourceBundle *typeMap;
    791     UErrorCode tmpStatus = U_ZERO_ERROR;
    792     int32_t copyLen;
    793 
    794     if (keyLen < 0) {
    795         keyLen = uprv_strlen(key);
    796     }
    797 
    798     if (keyLen >= sizeof(keyBuf)) {
    799         /* no known valid LDML key exceeding 21 */
    800         *status = U_ILLEGAL_ARGUMENT_ERROR;
    801         return 0;
    802     }
    803     uprv_memcpy(keyBuf, key, keyLen);
    804     keyBuf[keyLen] = 0;
    805 
    806     /* to lower case */
    807     for (i = 0; i < keyLen; i++) {
    808         keyBuf[i] = uprv_tolower(keyBuf[i]);
    809     }
    810 
    811 
    812     if (bcpTypeLen < 0) {
    813         bcpTypeLen = uprv_strlen(bcpType);
    814     }
    815 
    816     if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
    817         *status = U_ILLEGAL_ARGUMENT_ERROR;
    818         return 0;
    819     }
    820 
    821     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
    822     bcpTypeBuf[bcpTypeLen] = 0;
    823 
    824     /* to lower case */
    825     for (i = 0; i < bcpTypeLen; i++) {
    826         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
    827     }
    828 
    829     rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
    830     ures_getByKey(rb, BCP47MAPPINGS, rb, status);
    831     if (U_FAILURE(*status)) {
    832         ures_close(rb);
    833         return 0;
    834     }
    835 
    836     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    837     typeMap = ures_getNextResource(rb, NULL, &tmpStatus);
    838     while (U_SUCCESS(tmpStatus)) {
    839         const UChar *uBcpType;
    840         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    841         int32_t tmpBcpTypeLen;
    842 
    843         uBcpType = ures_getString(typeMap, &tmpBcpTypeLen, &tmpStatus);
    844         if (U_FAILURE(tmpStatus)) {
    845             break;
    846         }
    847         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
    848         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
    849         if (uprv_strcmp(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
    850             /* found a matching BCP47 type */
    851             resType = ures_getKey(typeMap);
    852             resultLen = uprv_strlen(resType);
    853             break;
    854         }
    855         if (!ures_hasNext(rb)) {
    856             break;
    857         }
    858         ures_getNextResource(rb, typeMap, &tmpStatus);
    859     }
    860     ures_close(typeMap);
    861     ures_close(rb);
    862 
    863     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
    864         *status = tmpStatus;
    865         return 0;
    866     }
    867 
    868     if (resType == NULL) {
    869         resType = bcpTypeBuf;
    870         resultLen = bcpTypeLen;
    871     }
    872 
    873     copyLen = uprv_min(resultLen, typeCapacity);
    874     uprv_memcpy(type, resType, copyLen);
    875 
    876     if (uprv_strcmp(keyBuf, "timezone") == 0) {
    877         for (i = 0; i < copyLen; i++) {
    878             if (*(type + i) == ':') {
    879                 *(type + i) = '/';
    880             }
    881         }
    882     }
    883 
    884     return u_terminateChars(type, typeCapacity, resultLen, status);
    885 }
    886 
    887 static int32_t
    888 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    889     char buf[ULOC_LANG_CAPACITY];
    890     UErrorCode tmpStatus = U_ZERO_ERROR;
    891     int32_t len, i;
    892     int32_t reslen = 0;
    893 
    894     if (U_FAILURE(*status)) {
    895         return 0;
    896     }
    897 
    898     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
    899     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    900         if (strict) {
    901             *status = U_ILLEGAL_ARGUMENT_ERROR;
    902             return 0;
    903         }
    904         len = 0;
    905     }
    906 
    907     /* Note: returned language code is in lower case letters */
    908 
    909     if (len == 0) {
    910         if (reslen < capacity) {
    911             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    912         }
    913         reslen += LANG_UND_LEN;
    914     } else if (!_isLanguageSubtag(buf, len)) {
    915             /* invalid language code */
    916         if (strict) {
    917             *status = U_ILLEGAL_ARGUMENT_ERROR;
    918             return 0;
    919         }
    920         if (reslen < capacity) {
    921             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    922         }
    923         reslen += LANG_UND_LEN;
    924     } else {
    925         /* resolve deprecated */
    926         for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
    927             if (uprv_strcmp(buf, DEPRECATEDLANGS[i]) == 0) {
    928                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
    929                 len = uprv_strlen(buf);
    930                 break;
    931             }
    932         }
    933         if (reslen < capacity) {
    934             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    935         }
    936         reslen += len;
    937     }
    938     u_terminateChars(appendAt, capacity, reslen, status);
    939     return reslen;
    940 }
    941 
    942 static int32_t
    943 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    944     char buf[ULOC_SCRIPT_CAPACITY];
    945     UErrorCode tmpStatus = U_ZERO_ERROR;
    946     int32_t len, i;
    947     int32_t reslen = 0;
    948 
    949     if (U_FAILURE(*status)) {
    950         return 0;
    951     }
    952 
    953     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
    954     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    955         if (strict) {
    956             *status = U_ILLEGAL_ARGUMENT_ERROR;
    957         }
    958         return 0;
    959     }
    960 
    961     if (len > 0) {
    962         if (!_isScriptSubtag(buf, len)) {
    963             /* invalid script code */
    964             if (strict) {
    965                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    966             }
    967             return 0;
    968         } else {
    969             /* to lowercase */
    970             for (i = 0; i < len; i++) {
    971                 buf[i] = uprv_tolower(buf[i]);
    972             }
    973             if (reslen < capacity) {
    974                 *(appendAt + reslen) = SEP;
    975             }
    976             reslen++;
    977 
    978             if (reslen < capacity) {
    979                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    980             }
    981             reslen += len;
    982         }
    983     }
    984     u_terminateChars(appendAt, capacity, reslen, status);
    985     return reslen;
    986 }
    987 
    988 static int32_t
    989 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    990     char buf[ULOC_COUNTRY_CAPACITY];
    991     UErrorCode tmpStatus = U_ZERO_ERROR;
    992     int32_t len, i;
    993     int32_t reslen = 0;
    994 
    995     if (U_FAILURE(*status)) {
    996         return 0;
    997     }
    998 
    999     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
   1000     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1001         if (strict) {
   1002             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1003         }
   1004         return 0;
   1005     }
   1006 
   1007     if (len > 0) {
   1008         if (!_isRegionSubtag(buf, len)) {
   1009             /* invalid region code */
   1010             if (strict) {
   1011                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1012             }
   1013             return 0;
   1014         } else {
   1015             /* to lowercase */
   1016             for (i = 0; i < len; i++) {
   1017                 buf[i] = uprv_tolower(buf[i]);
   1018             }
   1019             if (reslen < capacity) {
   1020                 *(appendAt + reslen) = SEP;
   1021             }
   1022             reslen++;
   1023 
   1024             if (reslen < capacity) {
   1025                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1026             }
   1027             reslen += len;
   1028         }
   1029     }
   1030     u_terminateChars(appendAt, capacity, reslen, status);
   1031     return reslen;
   1032 }
   1033 
   1034 static int32_t
   1035 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1036     char buf[ULOC_FULLNAME_CAPACITY];
   1037     UErrorCode tmpStatus = U_ZERO_ERROR;
   1038     int32_t len, i;
   1039     int32_t reslen = 0;
   1040 
   1041     if (U_FAILURE(*status)) {
   1042         return 0;
   1043     }
   1044 
   1045     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1046     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1047         if (strict) {
   1048             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1049         }
   1050         return 0;
   1051     }
   1052 
   1053     if (len > 0) {
   1054         char *p, *pVar;
   1055         UBool bNext = TRUE;
   1056         VariantListEntry *var;
   1057         VariantListEntry *varFirst = NULL;
   1058 
   1059         pVar = NULL;
   1060         p = buf;
   1061         while (bNext) {
   1062             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1063                 if (*p == 0) {
   1064                     bNext = FALSE;
   1065                 } else {
   1066                     *p = 0; /* terminate */
   1067                 }
   1068                 if (pVar == NULL) {
   1069                     if (strict) {
   1070                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1071                         break;
   1072                     }
   1073                     /* ignore empty variant */
   1074                 } else {
   1075                     /* to lowercase */
   1076                     for (i = 0; *(pVar + i) != 0; i++) {
   1077                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1078                     }
   1079 
   1080                     /* validate */
   1081                     if (_isVariantSubtag(pVar, -1)) {
   1082                         /* emit the variant to the list */
   1083                         var = uprv_malloc(sizeof(VariantListEntry));
   1084                         if (var == NULL) {
   1085                             *status = U_MEMORY_ALLOCATION_ERROR;
   1086                             break;
   1087                         }
   1088                         var->variant = pVar;
   1089                         if (!_addVariantToList(&varFirst, var)) {
   1090                             /* duplicated variant */
   1091                             uprv_free(var);
   1092                             if (strict) {
   1093                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1094                                 break;
   1095                             }
   1096                         }
   1097                     } else if (strict) {
   1098                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1099                         break;
   1100                     }
   1101                 }
   1102                 /* reset variant starting position */
   1103                 pVar = NULL;
   1104             } else if (pVar == NULL) {
   1105                 pVar = p;
   1106             }
   1107             p++;
   1108         }
   1109 
   1110         if (U_SUCCESS(*status)) {
   1111             if (varFirst != NULL) {
   1112                 int32_t varLen;
   1113 
   1114                 /* write out sorted/validated/normalized variants to the target */
   1115                 var = varFirst;
   1116                 while (var != NULL) {
   1117                     if (reslen < capacity) {
   1118                         *(appendAt + reslen) = SEP;
   1119                     }
   1120                     reslen++;
   1121                     varLen = uprv_strlen(var->variant);
   1122                     if (reslen < capacity) {
   1123                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1124                     }
   1125                     reslen += varLen;
   1126                     var = var->next;
   1127                 }
   1128             }
   1129         }
   1130 
   1131         /* clean up */
   1132         var = varFirst;
   1133         while (var != NULL) {
   1134             VariantListEntry *tmpVar = var->next;
   1135             uprv_free(var);
   1136             var = tmpVar;
   1137         }
   1138 
   1139         if (U_FAILURE(*status)) {
   1140             return 0;
   1141         }
   1142     }
   1143 
   1144     u_terminateChars(appendAt, capacity, reslen, status);
   1145     return reslen;
   1146 }
   1147 
   1148 static int32_t
   1149 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1150     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1151     UEnumeration *keywordEnum = NULL;
   1152     int32_t reslen = 0;
   1153 
   1154     keywordEnum = uloc_openKeywords(localeID, status);
   1155     if (U_FAILURE(*status)) {
   1156         uenum_close(keywordEnum);
   1157         return 0;
   1158     }
   1159     if (keywordEnum != NULL) {
   1160         /* reorder extensions */
   1161         int32_t len;
   1162         const char *key;
   1163         ExtensionListEntry *firstExt = NULL;
   1164         ExtensionListEntry *ext;
   1165         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1166         char *pExtBuf = extBuf;
   1167         int32_t extBufCapacity = sizeof(extBuf);
   1168         const char *bcpKey, *bcpValue;
   1169         UErrorCode tmpStatus = U_ZERO_ERROR;
   1170         int32_t keylen;
   1171         UBool isLDMLKeyword;
   1172 
   1173         while (TRUE) {
   1174             key = uenum_next(keywordEnum, NULL, status);
   1175             if (key == NULL) {
   1176                 break;
   1177             }
   1178             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
   1179             if (U_FAILURE(tmpStatus)) {
   1180                 if (strict) {
   1181                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1182                     break;
   1183                 }
   1184                 /* ignore this keyword */
   1185                 tmpStatus = U_ZERO_ERROR;
   1186                 continue;
   1187             }
   1188 
   1189             keylen = uprv_strlen(key);
   1190             isLDMLKeyword = (keylen > 1);
   1191 
   1192             if (isLDMLKeyword) {
   1193                 int32_t modKeyLen;
   1194 
   1195                 /* transform key and value to bcp47 style */
   1196                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
   1197                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1198                     if (strict) {
   1199                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1200                         break;
   1201                     }
   1202                     tmpStatus = U_ZERO_ERROR;
   1203                     continue;
   1204                 }
   1205 
   1206                 bcpKey = pExtBuf;
   1207                 pExtBuf += (modKeyLen + 1);
   1208                 extBufCapacity -= (modKeyLen + 1);
   1209 
   1210                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
   1211                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1212                     if (strict) {
   1213                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1214                         break;
   1215                     }
   1216                     tmpStatus = U_ZERO_ERROR;
   1217                     continue;
   1218                 }
   1219                 bcpValue = pExtBuf;
   1220                 pExtBuf += (len + 1);
   1221                 extBufCapacity -= (len + 1);
   1222             } else {
   1223                 if (*key == PRIVATEUSE) {
   1224                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1225                         if (strict) {
   1226                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1227                             break;
   1228                         }
   1229                         continue;
   1230                     }
   1231                 } else {
   1232                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1233                         if (strict) {
   1234                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1235                             break;
   1236                         }
   1237                         continue;
   1238                     }
   1239                 }
   1240                 bcpKey = key;
   1241                 if ((len + 1) < extBufCapacity) {
   1242                     uprv_memcpy(pExtBuf, buf, len);
   1243                     bcpValue = pExtBuf;
   1244 
   1245                     pExtBuf += len;
   1246 
   1247                     *pExtBuf = 0;
   1248                     pExtBuf++;
   1249 
   1250                     extBufCapacity -= (len + 1);
   1251                 } else {
   1252                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1253                     break;
   1254                 }
   1255             }
   1256 
   1257             /* create ExtensionListEntry */
   1258             ext = uprv_malloc(sizeof(ExtensionListEntry));
   1259             if (ext == NULL) {
   1260                 *status = U_MEMORY_ALLOCATION_ERROR;
   1261                 break;
   1262             }
   1263             ext->key = bcpKey;
   1264             ext->value = bcpValue;
   1265 
   1266             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1267                 uprv_free(ext);
   1268                 if (strict) {
   1269                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1270                     break;
   1271                 }
   1272             }
   1273         }
   1274         if (U_SUCCESS(*status) && (firstExt != NULL)) {
   1275             UBool startLDMLExtension = FALSE;
   1276 
   1277             /* write out the sorted BCP47 extensions and private use */
   1278             ext = firstExt;
   1279             while (ext != NULL) {
   1280                 if (uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
   1281                     /* write LDML singleton extension */
   1282                     if (reslen < capacity) {
   1283                         *(appendAt + reslen) = SEP;
   1284                     }
   1285                     reslen++;
   1286                     if (reslen < capacity) {
   1287                         *(appendAt + reslen) = LDMLEXT;
   1288                     }
   1289                     reslen++;
   1290                     startLDMLExtension = TRUE;
   1291                 }
   1292 
   1293                 if (reslen < capacity) {
   1294                     *(appendAt + reslen) = SEP;
   1295                 }
   1296                 reslen++;
   1297                 len = uprv_strlen(ext->key);
   1298                 if (reslen < capacity) {
   1299                     uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1300                 }
   1301                 reslen += len;
   1302                 if (reslen < capacity) {
   1303                     *(appendAt + reslen) = SEP;
   1304                 }
   1305                 reslen++;
   1306                 len = uprv_strlen(ext->value);
   1307                 if (reslen < capacity) {
   1308                     uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1309                 }
   1310                 reslen += len;
   1311 
   1312                 ext = ext->next;
   1313             }
   1314         }
   1315         /* clean up */
   1316         ext = firstExt;
   1317         while (ext != NULL) {
   1318             ExtensionListEntry *tmpExt = ext->next;
   1319             uprv_free(ext);
   1320             ext = tmpExt;
   1321         }
   1322 
   1323         uenum_close(keywordEnum);
   1324 
   1325         if (U_FAILURE(*status)) {
   1326             return 0;
   1327         }
   1328     }
   1329 
   1330     return u_terminateChars(appendAt, capacity, reslen, status);
   1331 }
   1332 
   1333 /**
   1334  * Append keywords parsed from LDML extension value
   1335  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1336  * Note: char* buf is used for storing keywords
   1337  */
   1338 static void
   1339 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UErrorCode *status) {
   1340     const char *p, *pNext, *pSep;
   1341     const char *pBcpKey, *pBcpType;
   1342     const char *pKey, *pType;
   1343     int32_t bcpKeyLen = 0, bcpTypeLen;
   1344     ExtensionListEntry *kwd, *nextKwd;
   1345     ExtensionListEntry *kwdFirst = NULL;
   1346     int32_t bufIdx = 0;
   1347     int32_t  len;
   1348 
   1349     pNext = ldmlext;
   1350     pBcpKey = pBcpType = NULL;
   1351     while (pNext) {
   1352         p = pSep = pNext;
   1353 
   1354         /* locate next separator char */
   1355         while (*pSep) {
   1356             if (*pSep == SEP) {
   1357                 break;
   1358             }
   1359             pSep++;
   1360         }
   1361         if (*pSep == 0) {
   1362             /* last subtag */
   1363             pNext = NULL;
   1364         } else {
   1365             pNext = pSep + 1;
   1366         }
   1367 
   1368         if (pBcpKey == NULL) {
   1369             pBcpKey = p;
   1370             bcpKeyLen = pSep - p;
   1371         } else {
   1372             pBcpType = p;
   1373             bcpTypeLen = pSep - p;
   1374 
   1375             /* BCP key to locale key */
   1376             len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1377             if (U_FAILURE(*status)) {
   1378                 goto cleanup;
   1379             }
   1380             pKey = buf + bufIdx;
   1381             bufIdx += len;
   1382             *(buf + bufIdx) = 0;
   1383             bufIdx++;
   1384 
   1385             /* BCP type to locale type */
   1386             len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1387             if (U_FAILURE(*status)) {
   1388                 goto cleanup;
   1389             }
   1390             pType = buf + bufIdx;
   1391             bufIdx += len;
   1392             *(buf + bufIdx) = 0;
   1393             bufIdx++;
   1394 
   1395             /* create an ExtensionListEntry for this keyword */
   1396             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1397             if (kwd == NULL) {
   1398                 *status = U_MEMORY_ALLOCATION_ERROR;
   1399                 goto cleanup;
   1400             }
   1401 
   1402             kwd->key = pKey;
   1403             kwd->value = pType;
   1404 
   1405             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1406                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1407                 uprv_free(kwd);
   1408                 goto cleanup;
   1409             }
   1410 
   1411             /* for next pair */
   1412             pBcpKey = NULL;
   1413             pBcpType = NULL;
   1414         }
   1415     }
   1416 
   1417     if (pBcpKey != NULL) {
   1418         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1419         goto cleanup;
   1420     }
   1421 
   1422     kwd = kwdFirst;
   1423     while (kwd != NULL) {
   1424         nextKwd = kwd->next;
   1425         _addExtensionToList(appendTo, kwd, FALSE);
   1426         kwd = nextKwd;
   1427     }
   1428 
   1429     return;
   1430 
   1431 cleanup:
   1432     kwd = kwdFirst;
   1433     while (kwd != NULL) {
   1434         nextKwd = kwd->next;
   1435         uprv_free(kwd);
   1436         kwd = nextKwd;
   1437     }
   1438 }
   1439 
   1440 
   1441 static int32_t
   1442 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1443     int32_t reslen = 0;
   1444     int32_t i, n;
   1445     int32_t len;
   1446     ExtensionListEntry *kwdFirst = NULL;
   1447     ExtensionListEntry *kwd;
   1448     const char *key, *type;
   1449     char kwdBuf[ULOC_KEYWORDS_CAPACITY];
   1450 
   1451     if (U_FAILURE(*status)) {
   1452         return 0;
   1453     }
   1454 
   1455     n = ultag_getExtensionsSize(langtag);
   1456 
   1457     /* resolve locale keywords and reordering keys */
   1458     for (i = 0; i < n; i++) {
   1459         key = ultag_getExtensionKey(langtag, i);
   1460         type = ultag_getExtensionValue(langtag, i);
   1461         if (*key == LDMLEXT) {
   1462             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), status);
   1463             if (U_FAILURE(*status)) {
   1464                 break;
   1465             }
   1466         } else {
   1467             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1468             if (kwd == NULL) {
   1469                 *status = U_MEMORY_ALLOCATION_ERROR;
   1470                 break;
   1471             }
   1472             kwd->key = key;
   1473             kwd->value = type;
   1474             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1475                 uprv_free(kwd);
   1476                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1477                 break;
   1478             }
   1479         }
   1480     }
   1481 
   1482     if (U_SUCCESS(*status)) {
   1483         type = ultag_getPrivateUse(langtag);
   1484         if (uprv_strlen(type) > 0) {
   1485             /* add private use as a keyword */
   1486             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1487             if (kwd == NULL) {
   1488                 *status = U_MEMORY_ALLOCATION_ERROR;
   1489             } else {
   1490                 kwd->key = PRIVATEUSE_KEY;
   1491                 kwd->value = type;
   1492                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1493                     uprv_free(kwd);
   1494                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1495                 }
   1496             }
   1497         }
   1498     }
   1499 
   1500     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1501         /* write out the sorted keywords */
   1502         kwd = kwdFirst;
   1503         while (kwd != NULL) {
   1504             if (reslen < capacity) {
   1505                 if (kwd == kwdFirst) {
   1506                     /* '@' */
   1507                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1508                 } else {
   1509                     /* ';' */
   1510                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1511                 }
   1512             }
   1513             reslen++;
   1514 
   1515             /* key */
   1516             len = uprv_strlen(kwd->key);
   1517             if (reslen < capacity) {
   1518                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1519             }
   1520             reslen += len;
   1521 
   1522             /* '=' */
   1523             if (reslen < capacity) {
   1524                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1525             }
   1526             reslen++;
   1527 
   1528             /* type */
   1529             len = uprv_strlen(kwd->value);
   1530             if (reslen < capacity) {
   1531                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1532             }
   1533             reslen += len;
   1534 
   1535             kwd = kwd->next;
   1536         }
   1537     }
   1538 
   1539     /* clean up */
   1540     kwd = kwdFirst;
   1541     while (kwd != NULL) {
   1542         ExtensionListEntry *tmpKwd = kwd->next;
   1543         uprv_free(kwd);
   1544         kwd = tmpKwd;
   1545     }
   1546 
   1547     if (U_FAILURE(*status)) {
   1548         return 0;
   1549     }
   1550 
   1551     return u_terminateChars(appendAt, capacity, reslen, status);
   1552 }
   1553 
   1554 /*
   1555 * -------------------------------------------------
   1556 *
   1557 * ultag_ functions
   1558 *
   1559 * -------------------------------------------------
   1560 */
   1561 
   1562 /* Bit flags used by the parser */
   1563 #define LANG 0x0001
   1564 #define EXTL 0x0002
   1565 #define SCRT 0x0004
   1566 #define REGN 0x0008
   1567 #define VART 0x0010
   1568 #define EXTS 0x0020
   1569 #define EXTV 0x0040
   1570 #define PRIV 0x0080
   1571 
   1572 static ULanguageTag*
   1573 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   1574     ULanguageTag *t;
   1575     char *tagBuf;
   1576     int16_t next;
   1577     char *pSubtag, *pNext, *pLastGoodPosition;
   1578     int32_t subtagLen;
   1579     int32_t extlangIdx;
   1580     ExtensionListEntry *pExtension;
   1581     char *pExtValueSubtagEnd;
   1582     int32_t i;
   1583     UBool isLDMLExtension, reqLDMLType;
   1584 
   1585     if (parsedLen != NULL) {
   1586         *parsedLen = 0;
   1587     }
   1588 
   1589     if (U_FAILURE(*status)) {
   1590         return NULL;
   1591     }
   1592 
   1593     if (tagLen < 0) {
   1594         tagLen = uprv_strlen(tag);
   1595     }
   1596 
   1597     /* copy the entire string */
   1598     tagBuf = (char*)uprv_malloc(tagLen + 1);
   1599     if (tagBuf == NULL) {
   1600         *status = U_MEMORY_ALLOCATION_ERROR;
   1601         return NULL;
   1602     }
   1603     uprv_memcpy(tagBuf, tag, tagLen);
   1604     *(tagBuf + tagLen) = 0;
   1605 
   1606     /* to lower case */
   1607     for (i = 0; i < tagLen; i++) {
   1608         tagBuf[i] = uprv_tolower(tagBuf[i]);
   1609     }
   1610 
   1611     /* create a ULanguageTag */
   1612     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   1613     _initializeULanguageTag(t);
   1614     t->buf = tagBuf;
   1615     if (t == NULL) {
   1616         uprv_free(tagBuf);
   1617         *status = U_MEMORY_ALLOCATION_ERROR;
   1618         return NULL;
   1619     }
   1620 
   1621     if (tagLen < MINLEN) {
   1622         /* the input tag is too short - return empty ULanguageTag */
   1623         return t;
   1624     }
   1625 
   1626     /* check if the tag is grandfathered */
   1627     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   1628         if (uprv_strcmp(GRANDFATHERED[i], tagBuf) == 0) {
   1629             /* a grandfathered tag is always longer than its preferred mapping */
   1630             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   1631             t->language = t->buf;
   1632             if (parsedLen != NULL) {
   1633                 *parsedLen = tagLen;
   1634             }
   1635             return t;
   1636         }
   1637     }
   1638 
   1639     /*
   1640      * langtag      =   language
   1641      *                  ["-" script]
   1642      *                  ["-" region]
   1643      *                  *("-" variant)
   1644      *                  *("-" extension)
   1645      *                  ["-" privateuse]
   1646      */
   1647 
   1648     next = LANG | PRIV;
   1649     pNext = pLastGoodPosition = tagBuf;
   1650     extlangIdx = 0;
   1651     pExtension = NULL;
   1652     pExtValueSubtagEnd = NULL;
   1653     isLDMLExtension = FALSE;
   1654     reqLDMLType = FALSE;
   1655 
   1656     while (pNext) {
   1657         char *pSep;
   1658 
   1659         pSubtag = pNext;
   1660 
   1661         /* locate next separator char */
   1662         pSep = pSubtag;
   1663         while (*pSep) {
   1664             if (*pSep == SEP) {
   1665                 break;
   1666             }
   1667             pSep++;
   1668         }
   1669         if (*pSep == 0) {
   1670             /* last subtag */
   1671             pNext = NULL;
   1672         } else {
   1673             pNext = pSep + 1;
   1674         }
   1675         subtagLen = pSep - pSubtag;
   1676 
   1677         if (next & LANG) {
   1678             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   1679                 *pSep = 0;  /* terminate */
   1680                 t->language = pSubtag;
   1681 
   1682                 pLastGoodPosition = pSep;
   1683                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1684                 continue;
   1685             }
   1686         }
   1687         if (next & EXTL) {
   1688             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   1689                 *pSep = 0;
   1690                 t->extlang[extlangIdx++] = pSubtag;
   1691 
   1692                 pLastGoodPosition = pSep;
   1693                 if (extlangIdx < 3) {
   1694                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1695                 } else {
   1696                     next = SCRT | REGN | VART | EXTS | PRIV;
   1697                 }
   1698                 continue;
   1699             }
   1700         }
   1701         if (next & SCRT) {
   1702             if (_isScriptSubtag(pSubtag, subtagLen)) {
   1703                 *pSep = 0;
   1704                 t->script = pSubtag;
   1705 
   1706                 pLastGoodPosition = pSep;
   1707                 next = REGN | VART | EXTS | PRIV;
   1708                 continue;
   1709             }
   1710         }
   1711         if (next & REGN) {
   1712             if (_isRegionSubtag(pSubtag, subtagLen)) {
   1713                 *pSep = 0;
   1714                 t->region = pSubtag;
   1715 
   1716                 pLastGoodPosition = pSep;
   1717                 next = VART | EXTS | PRIV;
   1718                 continue;
   1719             }
   1720         }
   1721         if (next & VART) {
   1722             if (_isVariantSubtag(pSubtag, subtagLen)) {
   1723                 VariantListEntry *var;
   1724                 UBool isAdded;
   1725 
   1726                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1727                 if (var == NULL) {
   1728                     *status = U_MEMORY_ALLOCATION_ERROR;
   1729                     goto error;
   1730                 }
   1731                 *pSep = 0;
   1732                 var->variant = pSubtag;
   1733                 isAdded = _addVariantToList(&(t->variants), var);
   1734                 if (!isAdded) {
   1735                     /* duplicated variant entry */
   1736                     uprv_free(var);
   1737                     break;
   1738                 }
   1739                 pLastGoodPosition = pSep;
   1740                 next = VART | EXTS | PRIV;
   1741                 continue;
   1742             }
   1743         }
   1744         if (next & EXTS) {
   1745             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   1746                 if (pExtension != NULL) {
   1747                     if (pExtValueSubtagEnd == NULL) {
   1748                         /* the previous extension is incomplete */
   1749                         uprv_free(pExtension);
   1750                         break;
   1751                     }
   1752 
   1753                     /* terminate the previous extension value */
   1754                     *pExtValueSubtagEnd = 0;
   1755 
   1756                     /* insert the extension to the list */
   1757                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1758                         pLastGoodPosition = pExtValueSubtagEnd;
   1759                     } else {
   1760                         /* stop parsing here */
   1761                         uprv_free(pExtension);
   1762                         pExtension = NULL;
   1763                         break;
   1764                     }
   1765 
   1766                     if (isLDMLExtension && reqLDMLType) {
   1767                         /* incomplete LDML extension key and type pair */
   1768                         pExtension = NULL;
   1769                         break;
   1770                     }
   1771                 }
   1772 
   1773                 isLDMLExtension = (*pSubtag == LDMLEXT);
   1774 
   1775                 /* create a new extension */
   1776                 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
   1777                 if (pExtension == NULL) {
   1778                     *status = U_MEMORY_ALLOCATION_ERROR;
   1779                     goto error;
   1780                 }
   1781                 *pSep = 0;
   1782                 pExtension->key = pSubtag;
   1783                 pExtension->value = NULL;   /* will be set later */
   1784 
   1785                 /*
   1786                  * reset the end location of extension value
   1787                  * subtags for this extension
   1788                  */
   1789                 pExtValueSubtagEnd = NULL;
   1790 
   1791                 next = EXTV;
   1792                 continue;
   1793             }
   1794         }
   1795         if (next & EXTV) {
   1796             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   1797                 if (isLDMLExtension) {
   1798                     if (reqLDMLType) {
   1799                         /* already saw an LDML key */
   1800                         if (!_isLDMLType(pSubtag, subtagLen)) {
   1801                             /* stop parsing here and let the valid LDML extension key/type
   1802                                pairs processed by the code out of this while loop */
   1803                             break;
   1804                         }
   1805                         pExtValueSubtagEnd = pSep;
   1806                         reqLDMLType = FALSE;
   1807                         next = EXTS | EXTV | PRIV;
   1808                     } else {
   1809                         /* LDML key */
   1810                         if (!_isLDMLKey(pSubtag, subtagLen)) {
   1811                             /* stop parsing here and let the valid LDML extension key/type
   1812                                pairs processed by the code out of this while loop */
   1813                             break;
   1814                         }
   1815                         reqLDMLType = TRUE;
   1816                         next = EXTV;
   1817                     }
   1818                 } else {
   1819                     /* Mark the end of this subtag */
   1820                     pExtValueSubtagEnd = pSep;
   1821                     next = EXTS | EXTV | PRIV;
   1822                 }
   1823 
   1824                 if (pExtension->value == NULL) {
   1825                     /* if the start postion of this extension's value is not yet,
   1826                        this one is the first value subtag */
   1827                     pExtension->value = pSubtag;
   1828                 }
   1829                 continue;
   1830             }
   1831         }
   1832         if (next & PRIV) {
   1833             if (*pSubtag == PRIVATEUSE) {
   1834                 char *pPrivuseVal;
   1835 
   1836                 if (pExtension != NULL) {
   1837                     /* Process the last extension */
   1838                     if (pExtValueSubtagEnd == NULL) {
   1839                         /* the previous extension is incomplete */
   1840                         uprv_free(pExtension);
   1841                         break;
   1842                     } else {
   1843                         /* terminate the previous extension value */
   1844                         *pExtValueSubtagEnd = 0;
   1845 
   1846                         /* insert the extension to the list */
   1847                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1848                             pLastGoodPosition = pExtValueSubtagEnd;
   1849                             pExtension = NULL;
   1850                             pExtValueSubtagEnd = NULL;
   1851                         } else {
   1852                         /* stop parsing here */
   1853                             uprv_free(pExtension);
   1854                             pExtension = NULL;
   1855                             pExtValueSubtagEnd = NULL;
   1856                             break;
   1857                         }
   1858                     }
   1859                 }
   1860 
   1861                 /* The rest of part will be private use value subtags */
   1862                 if (pNext == NULL) {
   1863                     /* empty private use subtag */
   1864                     break;
   1865                 }
   1866                 /* back up the private use value start position */
   1867                 pPrivuseVal = pNext;
   1868 
   1869                 /* validate private use value subtags */
   1870                 while (pNext) {
   1871                     pSubtag = pNext;
   1872                     pSep = pSubtag;
   1873                     while (*pSep) {
   1874                         if (*pSep == SEP) {
   1875                             break;
   1876                         }
   1877                         pSep++;
   1878                     }
   1879                     if (*pSep == 0) {
   1880                         /* last subtag */
   1881                         pNext = NULL;
   1882                     } else {
   1883                         pNext = pSep + 1;
   1884                     }
   1885                     subtagLen = pSep - pSubtag;
   1886 
   1887                     if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   1888                         pLastGoodPosition = pSep;
   1889                     } else {
   1890                         break;
   1891                     }
   1892                 }
   1893                 if (pLastGoodPosition - pPrivuseVal > 0) {
   1894                     *pLastGoodPosition = 0;
   1895                     t->privateuse = pPrivuseVal;
   1896                 }
   1897                 /* No more subtags, exiting the parse loop */
   1898                 break;
   1899             }
   1900             break;
   1901         }
   1902         /* If we fell through here, it means this subtag is illegal - quit parsing */
   1903         break;
   1904     }
   1905 
   1906     if (pExtension != NULL) {
   1907         /* Process the last extension */
   1908         if (pExtValueSubtagEnd == NULL) {
   1909             /* the previous extension is incomplete */
   1910             uprv_free(pExtension);
   1911         } else {
   1912             /* terminate the previous extension value */
   1913             *pExtValueSubtagEnd = 0;
   1914             /* insert the extension to the list */
   1915             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1916                 pLastGoodPosition = pExtValueSubtagEnd;
   1917             } else {
   1918                 uprv_free(pExtension);
   1919             }
   1920         }
   1921     }
   1922 
   1923     if (parsedLen != NULL) {
   1924         *parsedLen = pLastGoodPosition - t->buf;
   1925     }
   1926 
   1927     return t;
   1928 
   1929 error:
   1930     uprv_free(t);
   1931     return NULL;
   1932 }
   1933 
   1934 static void
   1935 ultag_close(ULanguageTag* langtag) {
   1936 
   1937     if (langtag == NULL) {
   1938         return;
   1939     }
   1940 
   1941     uprv_free(langtag->buf);
   1942 
   1943     if (langtag->variants) {
   1944         VariantListEntry *curVar = langtag->variants;
   1945         while (curVar) {
   1946             VariantListEntry *nextVar = curVar->next;
   1947             uprv_free(curVar);
   1948             curVar = nextVar;
   1949         }
   1950     }
   1951 
   1952     if (langtag->extensions) {
   1953         ExtensionListEntry *curExt = langtag->extensions;
   1954         while (curExt) {
   1955             ExtensionListEntry *nextExt = curExt->next;
   1956             uprv_free(curExt);
   1957             curExt = nextExt;
   1958         }
   1959     }
   1960 
   1961     uprv_free(langtag);
   1962 }
   1963 
   1964 static const char*
   1965 ultag_getLanguage(const ULanguageTag* langtag) {
   1966     return langtag->language;
   1967 }
   1968 
   1969 #if 0
   1970 static const char*
   1971 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   1972     int32_t i;
   1973     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   1974         if (uprv_strcmp(DEPRECATEDLANGS[i], langtag->language) == 0) {
   1975             return DEPRECATEDLANGS[i + 1];
   1976         }
   1977     }
   1978     return langtag->language;
   1979 }
   1980 #endif
   1981 
   1982 static const char*
   1983 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   1984     if (idx >= 0 && idx < MAXEXTLANG) {
   1985         return langtag->extlang[idx];
   1986     }
   1987     return NULL;
   1988 }
   1989 
   1990 static int32_t
   1991 ultag_getExtlangSize(const ULanguageTag* langtag) {
   1992     int32_t size = 0;
   1993     int32_t i;
   1994     for (i = 0; i < MAXEXTLANG; i++) {
   1995         if (langtag->extlang[i]) {
   1996             size++;
   1997         }
   1998     }
   1999     return size;
   2000 }
   2001 
   2002 static const char*
   2003 ultag_getScript(const ULanguageTag* langtag) {
   2004     return langtag->script;
   2005 }
   2006 
   2007 static const char*
   2008 ultag_getRegion(const ULanguageTag* langtag) {
   2009     return langtag->region;
   2010 }
   2011 
   2012 static const char*
   2013 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2014     const char *var = NULL;
   2015     VariantListEntry *cur = langtag->variants;
   2016     int32_t i = 0;
   2017     while (cur) {
   2018         if (i == idx) {
   2019             var = cur->variant;
   2020             break;
   2021         }
   2022         cur = cur->next;
   2023         i++;
   2024     }
   2025     return var;
   2026 }
   2027 
   2028 static int32_t
   2029 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2030     int32_t size = 0;
   2031     VariantListEntry *cur = langtag->variants;
   2032     while (TRUE) {
   2033         if (cur == NULL) {
   2034             break;
   2035         }
   2036         size++;
   2037         cur = cur->next;
   2038     }
   2039     return size;
   2040 }
   2041 
   2042 static const char*
   2043 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2044     const char *key = NULL;
   2045     ExtensionListEntry *cur = langtag->extensions;
   2046     int32_t i = 0;
   2047     while (cur) {
   2048         if (i == idx) {
   2049             key = cur->key;
   2050             break;
   2051         }
   2052         cur = cur->next;
   2053         i++;
   2054     }
   2055     return key;
   2056 }
   2057 
   2058 static const char*
   2059 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2060     const char *val = NULL;
   2061     ExtensionListEntry *cur = langtag->extensions;
   2062     int32_t i = 0;
   2063     while (cur) {
   2064         if (i == idx) {
   2065             val = cur->value;
   2066             break;
   2067         }
   2068         cur = cur->next;
   2069         i++;
   2070     }
   2071     return val;
   2072 }
   2073 
   2074 static int32_t
   2075 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2076     int32_t size = 0;
   2077     ExtensionListEntry *cur = langtag->extensions;
   2078     while (TRUE) {
   2079         if (cur == NULL) {
   2080             break;
   2081         }
   2082         size++;
   2083         cur = cur->next;
   2084     }
   2085     return size;
   2086 }
   2087 
   2088 static const char*
   2089 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2090     return langtag->privateuse;
   2091 }
   2092 
   2093 #if 0
   2094 static const char*
   2095 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2096     return langtag->grandfathered;
   2097 }
   2098 #endif
   2099 
   2100 
   2101 /*
   2102 * -------------------------------------------------
   2103 *
   2104 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2105 *
   2106 * -------------------------------------------------
   2107 */
   2108 U_DRAFT int32_t U_EXPORT2
   2109 uloc_toLanguageTag(const char* localeID,
   2110                    char* langtag,
   2111                    int32_t langtagCapacity,
   2112                    UBool strict,
   2113                    UErrorCode* status) {
   2114     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2115     char canonical[256];
   2116     int32_t reslen = 0;
   2117     UErrorCode tmpStatus = U_ZERO_ERROR;
   2118 
   2119     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2120     canonical[0] = 0;
   2121     if (uprv_strlen(localeID) > 0) {
   2122         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2123         if (tmpStatus != U_ZERO_ERROR) {
   2124             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2125             return 0;
   2126         }
   2127     }
   2128 
   2129     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2130     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2131     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2132     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2133     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2134 
   2135     return reslen;
   2136 }
   2137 
   2138 
   2139 U_DRAFT int32_t U_EXPORT2
   2140 uloc_forLanguageTag(const char* langtag,
   2141                     char* localeID,
   2142                     int32_t localeIDCapacity,
   2143                     int32_t* parsedLength,
   2144                     UErrorCode* status) {
   2145     ULanguageTag *lt;
   2146     int32_t reslen = 0;
   2147     const char *subtag, *p;
   2148     int32_t len;
   2149     int32_t i, n;
   2150     UBool noRegion = TRUE;
   2151 
   2152     lt = ultag_parse(langtag, -1, parsedLength, status);
   2153     if (U_FAILURE(*status)) {
   2154         return 0;
   2155     }
   2156 
   2157     /* language */
   2158     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2159     if (uprv_strcmp(subtag, LANG_UND) != 0) {
   2160         len = uprv_strlen(subtag);
   2161         if (len > 0) {
   2162             if (reslen < localeIDCapacity) {
   2163                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2164             }
   2165             reslen += len;
   2166         }
   2167     }
   2168 
   2169     /* script */
   2170     subtag = ultag_getScript(lt);
   2171     len = uprv_strlen(subtag);
   2172     if (len > 0) {
   2173         if (reslen < localeIDCapacity) {
   2174             *(localeID + reslen) = LOCALE_SEP;
   2175         }
   2176         reslen++;
   2177 
   2178         /* write out the script in title case */
   2179         p = subtag;
   2180         while (*p) {
   2181             if (reslen < localeIDCapacity) {
   2182                 if (p == subtag) {
   2183                     *(localeID + reslen) = uprv_toupper(*p);
   2184                 } else {
   2185                     *(localeID + reslen) = *p;
   2186                 }
   2187             }
   2188             reslen++;
   2189             p++;
   2190         }
   2191     }
   2192 
   2193     /* region */
   2194     subtag = ultag_getRegion(lt);
   2195     len = uprv_strlen(subtag);
   2196     if (len > 0) {
   2197         if (reslen < localeIDCapacity) {
   2198             *(localeID + reslen) = LOCALE_SEP;
   2199         }
   2200         reslen++;
   2201         /* write out the retion in upper case */
   2202         p = subtag;
   2203         while (*p) {
   2204             if (reslen < localeIDCapacity) {
   2205                 *(localeID + reslen) = uprv_toupper(*p);
   2206             }
   2207             reslen++;
   2208             p++;
   2209         }
   2210         noRegion = FALSE;
   2211     }
   2212 
   2213     /* variants */
   2214     n = ultag_getVariantsSize(lt);
   2215     if (n > 0) {
   2216         if (noRegion) {
   2217             if (reslen < localeIDCapacity) {
   2218                 *(localeID + reslen) = LOCALE_SEP;
   2219             }
   2220             reslen++;
   2221         }
   2222 
   2223         for (i = 0; i < n; i++) {
   2224             subtag = ultag_getVariant(lt, i);
   2225             if (reslen < localeIDCapacity) {
   2226                 *(localeID + reslen) = LOCALE_SEP;
   2227             }
   2228             reslen++;
   2229             /* write out the variant in upper case */
   2230             p = subtag;
   2231             while (*p) {
   2232                 if (reslen < localeIDCapacity) {
   2233                     *(localeID + reslen) = uprv_toupper(*p);
   2234                 }
   2235                 reslen++;
   2236                 p++;
   2237             }
   2238         }
   2239     }
   2240 
   2241     /* keywords */
   2242     n = ultag_getExtensionsSize(lt);
   2243     subtag = ultag_getPrivateUse(lt);
   2244     if (n > 0 || uprv_strlen(subtag) > 0) {
   2245         if (reslen == 0) {
   2246             /* need a language */
   2247             if (reslen < localeIDCapacity) {
   2248                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2249             }
   2250             reslen += LANG_UND_LEN;
   2251         }
   2252         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2253         reslen += len;
   2254     }
   2255 
   2256     ultag_close(lt);
   2257     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2258 }
   2259 
   2260 
   2261