Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2009-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ures.h"
     10 #include "unicode/putil.h"
     11 #include "unicode/uloc.h"
     12 #include "ustr_imp.h"
     13 #include "cmemory.h"
     14 #include "cstring.h"
     15 #include "putilimp.h"
     16 #include "uinvchar.h"
     17 
     18 /* struct holding a single variant */
     19 typedef struct VariantListEntry {
     20     const char              *variant;
     21     struct VariantListEntry *next;
     22 } VariantListEntry;
     23 
     24 /* struct holding a single extension */
     25 typedef struct ExtensionListEntry {
     26     const char                  *key;
     27     const char                  *value;
     28     struct ExtensionListEntry   *next;
     29 } ExtensionListEntry;
     30 
     31 #define MAXEXTLANG 3
     32 typedef struct ULanguageTag {
     33     char                *buf;   /* holding parsed subtags */
     34     const char          *language;
     35     const char          *extlang[MAXEXTLANG];
     36     const char          *script;
     37     const char          *region;
     38     VariantListEntry    *variants;
     39     ExtensionListEntry  *extensions;
     40     const char          *privateuse;
     41     const char          *grandfathered;
     42 } ULanguageTag;
     43 
     44 #define MINLEN 2
     45 #define SEP '-'
     46 #define PRIVATEUSE 'x'
     47 #define LDMLEXT 'u'
     48 
     49 #define LOCALE_SEP '_'
     50 #define LOCALE_EXT_SEP '@'
     51 #define LOCALE_KEYWORD_SEP ';'
     52 #define LOCALE_KEY_TYPE_SEP '='
     53 
     54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
     55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     56 
     57 static const char* EMPTY = "";
     58 static const char* LANG_UND = "und";
     59 static const char* PRIVATEUSE_KEY = "x";
     60 
     61 #define LANG_UND_LEN 3
     62 
     63 static const char* GRANDFATHERED[] = {
     64 /*  grandfathered   preferred */
     65     "art-lojban",   "jbo",
     66     "cel-gaulish",  "",
     67     "en-GB-oed",    "",
     68     "i-ami",        "ami",
     69     "i-bnn",        "bnn",
     70     "i-default",    "",
     71     "i-enochian",   "",
     72     "i-hak",        "hak",
     73     "i-klingon",    "tlh",
     74     "i-lux",        "lb",
     75     "i-mingo",      "",
     76     "i-navajo",     "nv",
     77     "i-pwn",        "pwn",
     78     "i-tao",        "tao",
     79     "i-tay",        "tay",
     80     "i-tsu",        "tsu",
     81     "no-bok",       "nb",
     82     "no-nyn",       "nn",
     83     "sgn-be-fr",    "sfb",
     84     "sgn-be-nl",    "vgt",
     85     "sgn-ch-de",    "sgg",
     86     "zh-guoyu",     "cmn",
     87     "zh-hakka",     "hak",
     88     "zh-min",       "",
     89     "zh-min-nan",   "nan",
     90     "zh-xiang",     "hsn",
     91     NULL,           NULL
     92 };
     93 
     94 static const char* DEPRECATEDLANGS[] = {
     95 /*  deprecated  new */
     96     "iw",       "he",
     97     "ji",       "yi",
     98     "in",       "id",
     99     NULL,       NULL
    100 };
    101 
    102 /*
    103 * -------------------------------------------------
    104 *
    105 * These ultag_ functions may be exposed as APIs later
    106 *
    107 * -------------------------------------------------
    108 */
    109 
    110 static ULanguageTag*
    111 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    112 
    113 static void
    114 ultag_close(ULanguageTag* langtag);
    115 
    116 static const char*
    117 ultag_getLanguage(const ULanguageTag* langtag);
    118 
    119 #if 0
    120 static const char*
    121 ultag_getJDKLanguage(const ULanguageTag* langtag);
    122 #endif
    123 
    124 static const char*
    125 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    126 
    127 static int32_t
    128 ultag_getExtlangSize(const ULanguageTag* langtag);
    129 
    130 static const char*
    131 ultag_getScript(const ULanguageTag* langtag);
    132 
    133 static const char*
    134 ultag_getRegion(const ULanguageTag* langtag);
    135 
    136 static const char*
    137 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    138 
    139 static int32_t
    140 ultag_getVariantsSize(const ULanguageTag* langtag);
    141 
    142 static const char*
    143 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    144 
    145 static const char*
    146 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    147 
    148 static int32_t
    149 ultag_getExtensionsSize(const ULanguageTag* langtag);
    150 
    151 static const char*
    152 ultag_getPrivateUse(const ULanguageTag* langtag);
    153 
    154 #if 0
    155 static const char*
    156 ultag_getGrandfathered(const ULanguageTag* langtag);
    157 #endif
    158 
    159 /*
    160 * -------------------------------------------------
    161 *
    162 * Language subtag syntax validation functions
    163 *
    164 * -------------------------------------------------
    165 */
    166 
    167 static UBool
    168 _isAlphaString(const char* s, int32_t len) {
    169     int32_t i;
    170     for (i = 0; i < len; i++) {
    171         if (!ISALPHA(*(s + i))) {
    172             return FALSE;
    173         }
    174     }
    175     return TRUE;
    176 }
    177 
    178 static UBool
    179 _isNumericString(const char* s, int32_t len) {
    180     int32_t i;
    181     for (i = 0; i < len; i++) {
    182         if (!ISNUMERIC(*(s + i))) {
    183             return FALSE;
    184         }
    185     }
    186     return TRUE;
    187 }
    188 
    189 static UBool
    190 _isAlphaNumericString(const char* s, int32_t len) {
    191     int32_t i;
    192     for (i = 0; i < len; i++) {
    193         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    194             return FALSE;
    195         }
    196     }
    197     return TRUE;
    198 }
    199 
    200 static UBool
    201 _isLanguageSubtag(const char* s, int32_t len) {
    202     /*
    203      * language      = 2*3ALPHA            ; shortest ISO 639 code
    204      *                 ["-" extlang]       ; sometimes followed by
    205      *                                     ;   extended language subtags
    206      *               / 4ALPHA              ; or reserved for future use
    207      *               / 5*8ALPHA            ; or registered language subtag
    208      */
    209     if (len < 0) {
    210         len = (int32_t)uprv_strlen(s);
    211     }
    212     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    213         return TRUE;
    214     }
    215     return FALSE;
    216 }
    217 
    218 static UBool
    219 _isExtlangSubtag(const char* s, int32_t len) {
    220     /*
    221      * extlang       = 3ALPHA              ; selected ISO 639 codes
    222      *                 *2("-" 3ALPHA)      ; permanently reserved
    223      */
    224     if (len < 0) {
    225         len = (int32_t)uprv_strlen(s);
    226     }
    227     if (len == 3 && _isAlphaString(s, len)) {
    228         return TRUE;
    229     }
    230     return FALSE;
    231 }
    232 
    233 static UBool
    234 _isScriptSubtag(const char* s, int32_t len) {
    235     /*
    236      * script        = 4ALPHA              ; ISO 15924 code
    237      */
    238     if (len < 0) {
    239         len = (int32_t)uprv_strlen(s);
    240     }
    241     if (len == 4 && _isAlphaString(s, len)) {
    242         return TRUE;
    243     }
    244     return FALSE;
    245 }
    246 
    247 static UBool
    248 _isRegionSubtag(const char* s, int32_t len) {
    249     /*
    250      * region        = 2ALPHA              ; ISO 3166-1 code
    251      *               / 3DIGIT              ; UN M.49 code
    252      */
    253     if (len < 0) {
    254         len = (int32_t)uprv_strlen(s);
    255     }
    256     if (len == 2 && _isAlphaString(s, len)) {
    257         return TRUE;
    258     }
    259     if (len == 3 && _isNumericString(s, len)) {
    260         return TRUE;
    261     }
    262     return FALSE;
    263 }
    264 
    265 static UBool
    266 _isVariantSubtag(const char* s, int32_t len) {
    267     /*
    268      * variant       = 5*8alphanum         ; registered variants
    269      *               / (DIGIT 3alphanum)
    270      */
    271     if (len < 0) {
    272         len = (int32_t)uprv_strlen(s);
    273     }
    274     if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
    275         return TRUE;
    276     }
    277     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    278         return TRUE;
    279     }
    280     return FALSE;
    281 }
    282 
    283 static UBool
    284 _isExtensionSingleton(const char* s, int32_t len) {
    285     /*
    286      * extension     = singleton 1*("-" (2*8alphanum))
    287      */
    288     if (len < 0) {
    289         len = (int32_t)uprv_strlen(s);
    290     }
    291     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    292         return TRUE;
    293     }
    294     return FALSE;
    295 }
    296 
    297 static UBool
    298 _isExtensionSubtag(const char* s, int32_t len) {
    299     /*
    300      * extension     = singleton 1*("-" (2*8alphanum))
    301      */
    302     if (len < 0) {
    303         len = (int32_t)uprv_strlen(s);
    304     }
    305     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    306         return TRUE;
    307     }
    308     return FALSE;
    309 }
    310 
    311 static UBool
    312 _isExtensionSubtags(const char* s, int32_t len) {
    313     const char *p = s;
    314     const char *pSubtag = NULL;
    315 
    316     if (len < 0) {
    317         len = (int32_t)uprv_strlen(s);
    318     }
    319 
    320     while ((p - s) < len) {
    321         if (*p == SEP) {
    322             if (pSubtag == NULL) {
    323                 return FALSE;
    324             }
    325             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    326                 return FALSE;
    327             }
    328             pSubtag = NULL;
    329         } else if (pSubtag == NULL) {
    330             pSubtag = p;
    331         }
    332         p++;
    333     }
    334     if (pSubtag == NULL) {
    335         return FALSE;
    336     }
    337     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    338 }
    339 
    340 static UBool
    341 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    342     /*
    343      * privateuse    = "x" 1*("-" (1*8alphanum))
    344      */
    345     if (len < 0) {
    346         len = (int32_t)uprv_strlen(s);
    347     }
    348     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    349         return TRUE;
    350     }
    351     return FALSE;
    352 }
    353 
    354 static UBool
    355 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    356     const char *p = s;
    357     const char *pSubtag = NULL;
    358 
    359     if (len < 0) {
    360         len = (int32_t)uprv_strlen(s);
    361     }
    362 
    363     while ((p - s) < len) {
    364         if (*p == SEP) {
    365             if (pSubtag == NULL) {
    366                 return FALSE;
    367             }
    368             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    369                 return FALSE;
    370             }
    371             pSubtag = NULL;
    372         } else if (pSubtag == NULL) {
    373             pSubtag = p;
    374         }
    375         p++;
    376     }
    377     if (pSubtag == NULL) {
    378         return FALSE;
    379     }
    380     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    381 }
    382 
    383 static UBool
    384 _isLDMLKey(const char* s, int32_t len) {
    385     if (len < 0) {
    386         len = (int32_t)uprv_strlen(s);
    387     }
    388     if (len == 2 && _isAlphaNumericString(s, len)) {
    389         return TRUE;
    390     }
    391     return FALSE;
    392 }
    393 
    394 static UBool
    395 _isLDMLType(const char* s, int32_t len) {
    396     if (len < 0) {
    397         len = (int32_t)uprv_strlen(s);
    398     }
    399     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    400         return TRUE;
    401     }
    402     return FALSE;
    403 }
    404 
    405 /*
    406 * -------------------------------------------------
    407 *
    408 * Helper functions
    409 *
    410 * -------------------------------------------------
    411 */
    412 
    413 static UBool
    414 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    415     UBool bAdded = TRUE;
    416 
    417     if (*first == NULL) {
    418         var->next = NULL;
    419         *first = var;
    420     } else {
    421         VariantListEntry *prev, *cur;
    422         int32_t cmp;
    423 
    424         /* reorder variants in alphabetical order */
    425         prev = NULL;
    426         cur = *first;
    427         while (TRUE) {
    428             if (cur == NULL) {
    429                 prev->next = var;
    430                 var->next = NULL;
    431                 break;
    432             }
    433             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    434             if (cmp < 0) {
    435                 if (prev == NULL) {
    436                     *first = var;
    437                 } else {
    438                     prev->next = var;
    439                 }
    440                 var->next = cur;
    441                 break;
    442             }
    443             if (cmp == 0) {
    444                 /* duplicated variant */
    445                 bAdded = FALSE;
    446                 break;
    447             }
    448             prev = cur;
    449             cur = cur->next;
    450         }
    451     }
    452 
    453     return bAdded;
    454 }
    455 
    456 
    457 static UBool
    458 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    459     UBool bAdded = TRUE;
    460 
    461     if (*first == NULL) {
    462         ext->next = NULL;
    463         *first = ext;
    464     } else {
    465         ExtensionListEntry *prev, *cur;
    466         int32_t cmp;
    467 
    468         /* reorder variants in alphabetical order */
    469         prev = NULL;
    470         cur = *first;
    471         while (TRUE) {
    472             if (cur == NULL) {
    473                 prev->next = ext;
    474                 ext->next = NULL;
    475                 break;
    476             }
    477             if (localeToBCP) {
    478                 /* special handling for locale to bcp conversion */
    479                 int32_t len, curlen;
    480 
    481                 len = (int32_t)uprv_strlen(ext->key);
    482                 curlen = (int32_t)uprv_strlen(cur->key);
    483 
    484                 if (len == 1 && curlen == 1) {
    485                     if (*(ext->key) == *(cur->key)) {
    486                         cmp = 0;
    487                     } else if (*(ext->key) == PRIVATEUSE) {
    488                         cmp = 1;
    489                     } else if (*(cur->key) == PRIVATEUSE) {
    490                         cmp = -1;
    491                     } else {
    492                         cmp = *(ext->key) - *(cur->key);
    493                     }
    494                 } else if (len == 1) {
    495                     cmp = *(ext->key) - LDMLEXT;
    496                 } else if (curlen == 1) {
    497                     cmp = LDMLEXT - *(cur->key);
    498                 } else {
    499                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    500                 }
    501             } else {
    502                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    503             }
    504             if (cmp < 0) {
    505                 if (prev == NULL) {
    506                     *first = ext;
    507                 } else {
    508                     prev->next = ext;
    509                 }
    510                 ext->next = cur;
    511                 break;
    512             }
    513             if (cmp == 0) {
    514                 /* duplicated extension key */
    515                 bAdded = FALSE;
    516                 break;
    517             }
    518             prev = cur;
    519             cur = cur->next;
    520         }
    521     }
    522 
    523     return bAdded;
    524 }
    525 
    526 static void
    527 _initializeULanguageTag(ULanguageTag* langtag) {
    528     int32_t i;
    529 
    530     langtag->buf = NULL;
    531 
    532     langtag->language = EMPTY;
    533     for (i = 0; i < MAXEXTLANG; i++) {
    534         langtag->extlang[i] = NULL;
    535     }
    536 
    537     langtag->script = EMPTY;
    538     langtag->region = EMPTY;
    539 
    540     langtag->variants = NULL;
    541     langtag->extensions = NULL;
    542 
    543     langtag->grandfathered = EMPTY;
    544     langtag->privateuse = EMPTY;
    545 }
    546 
    547 #define KEYTYPEDATA     "keyTypeData"
    548 #define KEYMAP          "keyMap"
    549 #define TYPEMAP         "typeMap"
    550 #define TYPEALIAS       "typeAlias"
    551 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
    552 #define MAX_LDML_KEY_LEN        22
    553 #define MAX_LDML_TYPE_LEN       32
    554 
    555 static int32_t
    556 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
    557                 char* bcpKey, int32_t bcpKeyCapacity,
    558                 UErrorCode *status) {
    559     UResourceBundle *rb;
    560     char keyBuf[MAX_LDML_KEY_LEN];
    561     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    562     int32_t resultLen = 0;
    563     int32_t i;
    564     UErrorCode tmpStatus = U_ZERO_ERROR;
    565     const UChar *uBcpKey;
    566     int32_t bcpKeyLen;
    567 
    568     if (keyLen < 0) {
    569         keyLen = (int32_t)uprv_strlen(key);
    570     }
    571 
    572     if (keyLen >= sizeof(keyBuf)) {
    573         /* no known valid LDML key exceeding 21 */
    574         *status = U_ILLEGAL_ARGUMENT_ERROR;
    575         return 0;
    576     }
    577 
    578     uprv_memcpy(keyBuf, key, keyLen);
    579     keyBuf[keyLen] = 0;
    580 
    581     /* to lower case */
    582     for (i = 0; i < keyLen; i++) {
    583         keyBuf[i] = uprv_tolower(keyBuf[i]);
    584     }
    585 
    586     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    587     ures_getByKey(rb, KEYMAP, rb, status);
    588 
    589     if (U_FAILURE(*status)) {
    590         ures_close(rb);
    591         return 0;
    592     }
    593 
    594     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
    595     if (U_SUCCESS(tmpStatus)) {
    596         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
    597         bcpKeyBuf[bcpKeyLen] = 0;
    598         resultLen = bcpKeyLen;
    599     } else {
    600         if (_isLDMLKey(key, keyLen)) {
    601             uprv_memcpy(bcpKeyBuf, key, keyLen);
    602             bcpKeyBuf[keyLen] = 0;
    603             resultLen = keyLen;
    604         } else {
    605             /* mapping not availabe */
    606             *status = U_ILLEGAL_ARGUMENT_ERROR;
    607         }
    608     }
    609     ures_close(rb);
    610 
    611     if (U_FAILURE(*status)) {
    612         return 0;
    613     }
    614 
    615     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
    616     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
    617 }
    618 
    619 static int32_t
    620 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
    621                 char* key, int32_t keyCapacity,
    622                 UErrorCode *status) {
    623     UResourceBundle *rb;
    624     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    625     int32_t resultLen = 0;
    626     int32_t i;
    627     const char *resKey = NULL;
    628     UResourceBundle *mapData;
    629 
    630     if (bcpKeyLen < 0) {
    631         bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
    632     }
    633 
    634     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
    635         *status = U_ILLEGAL_ARGUMENT_ERROR;
    636         return 0;
    637     }
    638 
    639     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
    640     bcpKeyBuf[bcpKeyLen] = 0;
    641 
    642     /* to lower case */
    643     for (i = 0; i < bcpKeyLen; i++) {
    644         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
    645     }
    646 
    647     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    648     ures_getByKey(rb, KEYMAP, rb, status);
    649     if (U_FAILURE(*status)) {
    650         ures_close(rb);
    651         return 0;
    652     }
    653 
    654     mapData = ures_getNextResource(rb, NULL, status);
    655     while (U_SUCCESS(*status)) {
    656         const UChar *uBcpKey;
    657         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    658         int32_t tmpBcpKeyLen;
    659 
    660         uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
    661         if (U_FAILURE(*status)) {
    662             break;
    663         }
    664         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
    665         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
    666         if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
    667             /* found a matching BCP47 key */
    668             resKey = ures_getKey(mapData);
    669             resultLen = (int32_t)uprv_strlen(resKey);
    670             break;
    671         }
    672         if (!ures_hasNext(rb)) {
    673             break;
    674         }
    675         ures_getNextResource(rb, mapData, status);
    676     }
    677     ures_close(mapData);
    678     ures_close(rb);
    679 
    680     if (U_FAILURE(*status)) {
    681         return 0;
    682     }
    683 
    684     if (resKey == NULL) {
    685         resKey = bcpKeyBuf;
    686         resultLen = bcpKeyLen;
    687     }
    688 
    689     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
    690     return u_terminateChars(key, keyCapacity, resultLen, status);
    691 }
    692 
    693 static int32_t
    694 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
    695                  const char* type, int32_t typeLen,
    696                  char* bcpType, int32_t bcpTypeCapacity,
    697                  UErrorCode *status) {
    698     UResourceBundle *rb, *keyTypeData, *typeMapForKey;
    699     char keyBuf[MAX_LDML_KEY_LEN];
    700     char typeBuf[MAX_LDML_TYPE_LEN];
    701     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    702     int32_t resultLen = 0;
    703     int32_t i;
    704     UErrorCode tmpStatus = U_ZERO_ERROR;
    705     const UChar *uBcpType, *uCanonicalType;
    706     int32_t bcpTypeLen, canonicalTypeLen;
    707     UBool isTimezone = FALSE;
    708 
    709     if (keyLen < 0) {
    710         keyLen = (int32_t)uprv_strlen(key);
    711     }
    712     if (keyLen >= sizeof(keyBuf)) {
    713         /* no known valid LDML key exceeding 21 */
    714         *status = U_ILLEGAL_ARGUMENT_ERROR;
    715         return 0;
    716     }
    717     uprv_memcpy(keyBuf, key, keyLen);
    718     keyBuf[keyLen] = 0;
    719 
    720     /* to lower case */
    721     for (i = 0; i < keyLen; i++) {
    722         keyBuf[i] = uprv_tolower(keyBuf[i]);
    723     }
    724     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    725         isTimezone = TRUE;
    726     }
    727 
    728     if (typeLen < 0) {
    729         typeLen = (int32_t)uprv_strlen(type);
    730     }
    731     if (typeLen >= sizeof(typeBuf)) {
    732         *status = U_ILLEGAL_ARGUMENT_ERROR;
    733         return 0;
    734     }
    735 
    736     if (isTimezone) {
    737         /* replace '/' with ':' */
    738         for (i = 0; i < typeLen; i++) {
    739             if (*(type + i) == '/') {
    740                 typeBuf[i] = ':';
    741             } else {
    742                 typeBuf[i] = *(type + i);
    743             }
    744         }
    745         typeBuf[typeLen] = 0;
    746         type = &typeBuf[0];
    747     }
    748 
    749     keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
    750     rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
    751     if (U_FAILURE(*status)) {
    752         ures_close(rb);
    753         ures_close(keyTypeData);
    754         return 0;
    755     }
    756 
    757     typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
    758     uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
    759     if (U_SUCCESS(tmpStatus)) {
    760         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    761         resultLen = bcpTypeLen;
    762     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    763         /* is this type alias? */
    764         tmpStatus = U_ZERO_ERROR;
    765         ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
    766         ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    767         uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
    768         if (U_SUCCESS(tmpStatus)) {
    769             u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
    770             if (isTimezone) {
    771                 /* replace '/' with ':' */
    772                 for (i = 0; i < canonicalTypeLen; i++) {
    773                     if (typeBuf[i] == '/') {
    774                         typeBuf[i] = ':';
    775                     }
    776                 }
    777             }
    778             typeBuf[canonicalTypeLen] = 0;
    779 
    780             /* look up the canonical type */
    781             uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
    782             if (U_SUCCESS(tmpStatus)) {
    783                 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    784                 resultLen = bcpTypeLen;
    785             }
    786         }
    787         if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    788             if (_isLDMLType(type, typeLen)) {
    789                 uprv_memcpy(bcpTypeBuf, type, typeLen);
    790                 resultLen = typeLen;
    791             } else {
    792                 /* mapping not availabe */
    793                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    794             }
    795         }
    796     } else {
    797         *status = tmpStatus;
    798     }
    799     ures_close(rb);
    800     ures_close(typeMapForKey);
    801     ures_close(keyTypeData);
    802 
    803     if (U_FAILURE(*status)) {
    804         return 0;
    805     }
    806 
    807     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
    808     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
    809 }
    810 
    811 static int32_t
    812 _bcp47ToLDMLType(const char* key, int32_t keyLen,
    813                  const char* bcpType, int32_t bcpTypeLen,
    814                  char* type, int32_t typeCapacity,
    815                  UErrorCode *status) {
    816     UResourceBundle *rb;
    817     char keyBuf[MAX_LDML_KEY_LEN];
    818     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    819     int32_t resultLen = 0;
    820     int32_t i;
    821     const char *resType = NULL;
    822     UResourceBundle *mapData;
    823     UErrorCode tmpStatus = U_ZERO_ERROR;
    824     int32_t copyLen;
    825 
    826     if (keyLen < 0) {
    827         keyLen = (int32_t)uprv_strlen(key);
    828     }
    829 
    830     if (keyLen >= sizeof(keyBuf)) {
    831         /* no known valid LDML key exceeding 21 */
    832         *status = U_ILLEGAL_ARGUMENT_ERROR;
    833         return 0;
    834     }
    835     uprv_memcpy(keyBuf, key, keyLen);
    836     keyBuf[keyLen] = 0;
    837 
    838     /* to lower case */
    839     for (i = 0; i < keyLen; i++) {
    840         keyBuf[i] = uprv_tolower(keyBuf[i]);
    841     }
    842 
    843 
    844     if (bcpTypeLen < 0) {
    845         bcpTypeLen = (int32_t)uprv_strlen(bcpType);
    846     }
    847 
    848     if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
    849         *status = U_ILLEGAL_ARGUMENT_ERROR;
    850         return 0;
    851     }
    852 
    853     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
    854     bcpTypeBuf[bcpTypeLen] = 0;
    855 
    856     /* to lower case */
    857     for (i = 0; i < bcpTypeLen; i++) {
    858         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
    859     }
    860 
    861     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    862     ures_getByKey(rb, TYPEMAP, rb, status);
    863     if (U_FAILURE(*status)) {
    864         ures_close(rb);
    865         return 0;
    866     }
    867 
    868     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    869     mapData = ures_getNextResource(rb, NULL, &tmpStatus);
    870     while (U_SUCCESS(tmpStatus)) {
    871         const UChar *uBcpType;
    872         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    873         int32_t tmpBcpTypeLen;
    874 
    875         uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
    876         if (U_FAILURE(tmpStatus)) {
    877             break;
    878         }
    879         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
    880         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
    881         if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
    882             /* found a matching BCP47 type */
    883             resType = ures_getKey(mapData);
    884             resultLen = (int32_t)uprv_strlen(resType);
    885             break;
    886         }
    887         if (!ures_hasNext(rb)) {
    888             break;
    889         }
    890         ures_getNextResource(rb, mapData, &tmpStatus);
    891     }
    892     ures_close(mapData);
    893     ures_close(rb);
    894 
    895     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
    896         *status = tmpStatus;
    897         return 0;
    898     }
    899 
    900     if (resType == NULL) {
    901         resType = bcpTypeBuf;
    902         resultLen = bcpTypeLen;
    903     }
    904 
    905     copyLen = uprv_min(resultLen, typeCapacity);
    906     uprv_memcpy(type, resType, copyLen);
    907 
    908     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    909         for (i = 0; i < copyLen; i++) {
    910             if (*(type + i) == ':') {
    911                 *(type + i) = '/';
    912             }
    913         }
    914     }
    915 
    916     return u_terminateChars(type, typeCapacity, resultLen, status);
    917 }
    918 
    919 static int32_t
    920 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    921     char buf[ULOC_LANG_CAPACITY];
    922     UErrorCode tmpStatus = U_ZERO_ERROR;
    923     int32_t len, i;
    924     int32_t reslen = 0;
    925 
    926     if (U_FAILURE(*status)) {
    927         return 0;
    928     }
    929 
    930     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
    931     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    932         if (strict) {
    933             *status = U_ILLEGAL_ARGUMENT_ERROR;
    934             return 0;
    935         }
    936         len = 0;
    937     }
    938 
    939     /* Note: returned language code is in lower case letters */
    940 
    941     if (len == 0) {
    942         if (reslen < capacity) {
    943             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    944         }
    945         reslen += LANG_UND_LEN;
    946     } else if (!_isLanguageSubtag(buf, len)) {
    947             /* invalid language code */
    948         if (strict) {
    949             *status = U_ILLEGAL_ARGUMENT_ERROR;
    950             return 0;
    951         }
    952         if (reslen < capacity) {
    953             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    954         }
    955         reslen += LANG_UND_LEN;
    956     } else {
    957         /* resolve deprecated */
    958         for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
    959             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
    960                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
    961                 len = (int32_t)uprv_strlen(buf);
    962                 break;
    963             }
    964         }
    965         if (reslen < capacity) {
    966             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    967         }
    968         reslen += len;
    969     }
    970     u_terminateChars(appendAt, capacity, reslen, status);
    971     return reslen;
    972 }
    973 
    974 static int32_t
    975 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    976     char buf[ULOC_SCRIPT_CAPACITY];
    977     UErrorCode tmpStatus = U_ZERO_ERROR;
    978     int32_t len;
    979     int32_t reslen = 0;
    980 
    981     if (U_FAILURE(*status)) {
    982         return 0;
    983     }
    984 
    985     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
    986     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    987         if (strict) {
    988             *status = U_ILLEGAL_ARGUMENT_ERROR;
    989         }
    990         return 0;
    991     }
    992 
    993     if (len > 0) {
    994         if (!_isScriptSubtag(buf, len)) {
    995             /* invalid script code */
    996             if (strict) {
    997                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    998             }
    999             return 0;
   1000         } else {
   1001             if (reslen < capacity) {
   1002                 *(appendAt + reslen) = SEP;
   1003             }
   1004             reslen++;
   1005 
   1006             if (reslen < capacity) {
   1007                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1008             }
   1009             reslen += len;
   1010         }
   1011     }
   1012     u_terminateChars(appendAt, capacity, reslen, status);
   1013     return reslen;
   1014 }
   1015 
   1016 static int32_t
   1017 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1018     char buf[ULOC_COUNTRY_CAPACITY];
   1019     UErrorCode tmpStatus = U_ZERO_ERROR;
   1020     int32_t len;
   1021     int32_t reslen = 0;
   1022 
   1023     if (U_FAILURE(*status)) {
   1024         return 0;
   1025     }
   1026 
   1027     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
   1028     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1029         if (strict) {
   1030             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1031         }
   1032         return 0;
   1033     }
   1034 
   1035     if (len > 0) {
   1036         if (!_isRegionSubtag(buf, len)) {
   1037             /* invalid region code */
   1038             if (strict) {
   1039                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1040             }
   1041             return 0;
   1042         } else {
   1043             if (reslen < capacity) {
   1044                 *(appendAt + reslen) = SEP;
   1045             }
   1046             reslen++;
   1047 
   1048             if (reslen < capacity) {
   1049                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1050             }
   1051             reslen += len;
   1052         }
   1053     }
   1054     u_terminateChars(appendAt, capacity, reslen, status);
   1055     return reslen;
   1056 }
   1057 
   1058 static int32_t
   1059 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1060     char buf[ULOC_FULLNAME_CAPACITY];
   1061     UErrorCode tmpStatus = U_ZERO_ERROR;
   1062     int32_t len, i;
   1063     int32_t reslen = 0;
   1064 
   1065     if (U_FAILURE(*status)) {
   1066         return 0;
   1067     }
   1068 
   1069     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1070     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1071         if (strict) {
   1072             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1073         }
   1074         return 0;
   1075     }
   1076 
   1077     if (len > 0) {
   1078         char *p, *pVar;
   1079         UBool bNext = TRUE;
   1080         VariantListEntry *var;
   1081         VariantListEntry *varFirst = NULL;
   1082 
   1083         pVar = NULL;
   1084         p = buf;
   1085         while (bNext) {
   1086             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1087                 if (*p == 0) {
   1088                     bNext = FALSE;
   1089                 } else {
   1090                     *p = 0; /* terminate */
   1091                 }
   1092                 if (pVar == NULL) {
   1093                     if (strict) {
   1094                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1095                         break;
   1096                     }
   1097                     /* ignore empty variant */
   1098                 } else {
   1099                     /* ICU uses upper case letters for variants, but
   1100                        the canonical format is lowercase in BCP47 */
   1101                     for (i = 0; *(pVar + i) != 0; i++) {
   1102                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1103                     }
   1104 
   1105                     /* validate */
   1106                     if (_isVariantSubtag(pVar, -1)) {
   1107                         /* emit the variant to the list */
   1108                         var = uprv_malloc(sizeof(VariantListEntry));
   1109                         if (var == NULL) {
   1110                             *status = U_MEMORY_ALLOCATION_ERROR;
   1111                             break;
   1112                         }
   1113                         var->variant = pVar;
   1114                         if (!_addVariantToList(&varFirst, var)) {
   1115                             /* duplicated variant */
   1116                             uprv_free(var);
   1117                             if (strict) {
   1118                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1119                                 break;
   1120                             }
   1121                         }
   1122                     } else if (strict) {
   1123                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1124                         break;
   1125                     }
   1126                 }
   1127                 /* reset variant starting position */
   1128                 pVar = NULL;
   1129             } else if (pVar == NULL) {
   1130                 pVar = p;
   1131             }
   1132             p++;
   1133         }
   1134 
   1135         if (U_SUCCESS(*status)) {
   1136             if (varFirst != NULL) {
   1137                 int32_t varLen;
   1138 
   1139                 /* write out sorted/validated/normalized variants to the target */
   1140                 var = varFirst;
   1141                 while (var != NULL) {
   1142                     if (reslen < capacity) {
   1143                         *(appendAt + reslen) = SEP;
   1144                     }
   1145                     reslen++;
   1146                     varLen = (int32_t)uprv_strlen(var->variant);
   1147                     if (reslen < capacity) {
   1148                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1149                     }
   1150                     reslen += varLen;
   1151                     var = var->next;
   1152                 }
   1153             }
   1154         }
   1155 
   1156         /* clean up */
   1157         var = varFirst;
   1158         while (var != NULL) {
   1159             VariantListEntry *tmpVar = var->next;
   1160             uprv_free(var);
   1161             var = tmpVar;
   1162         }
   1163 
   1164         if (U_FAILURE(*status)) {
   1165             return 0;
   1166         }
   1167     }
   1168 
   1169     u_terminateChars(appendAt, capacity, reslen, status);
   1170     return reslen;
   1171 }
   1172 
   1173 static int32_t
   1174 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1175     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1176     UEnumeration *keywordEnum = NULL;
   1177     int32_t reslen = 0;
   1178 
   1179     keywordEnum = uloc_openKeywords(localeID, status);
   1180     if (U_FAILURE(*status)) {
   1181         uenum_close(keywordEnum);
   1182         return 0;
   1183     }
   1184     if (keywordEnum != NULL) {
   1185         /* reorder extensions */
   1186         int32_t len;
   1187         const char *key;
   1188         ExtensionListEntry *firstExt = NULL;
   1189         ExtensionListEntry *ext;
   1190         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1191         char *pExtBuf = extBuf;
   1192         int32_t extBufCapacity = sizeof(extBuf);
   1193         const char *bcpKey, *bcpValue;
   1194         UErrorCode tmpStatus = U_ZERO_ERROR;
   1195         int32_t keylen;
   1196         UBool isLDMLKeyword;
   1197 
   1198         while (TRUE) {
   1199             key = uenum_next(keywordEnum, NULL, status);
   1200             if (key == NULL) {
   1201                 break;
   1202             }
   1203             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
   1204             if (U_FAILURE(tmpStatus)) {
   1205                 if (strict) {
   1206                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1207                     break;
   1208                 }
   1209                 /* ignore this keyword */
   1210                 tmpStatus = U_ZERO_ERROR;
   1211                 continue;
   1212             }
   1213 
   1214             keylen = (int32_t)uprv_strlen(key);
   1215             isLDMLKeyword = (keylen > 1);
   1216 
   1217             if (isLDMLKeyword) {
   1218                 int32_t modKeyLen;
   1219 
   1220                 /* transform key and value to bcp47 style */
   1221                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
   1222                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1223                     if (strict) {
   1224                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1225                         break;
   1226                     }
   1227                     tmpStatus = U_ZERO_ERROR;
   1228                     continue;
   1229                 }
   1230 
   1231                 bcpKey = pExtBuf;
   1232                 pExtBuf += (modKeyLen + 1);
   1233                 extBufCapacity -= (modKeyLen + 1);
   1234 
   1235                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
   1236                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1237                     if (strict) {
   1238                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1239                         break;
   1240                     }
   1241                     tmpStatus = U_ZERO_ERROR;
   1242                     continue;
   1243                 }
   1244                 bcpValue = pExtBuf;
   1245                 pExtBuf += (len + 1);
   1246                 extBufCapacity -= (len + 1);
   1247             } else {
   1248                 if (*key == PRIVATEUSE) {
   1249                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1250                         if (strict) {
   1251                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1252                             break;
   1253                         }
   1254                         continue;
   1255                     }
   1256                 } else {
   1257                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1258                         if (strict) {
   1259                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1260                             break;
   1261                         }
   1262                         continue;
   1263                     }
   1264                 }
   1265                 bcpKey = key;
   1266                 if ((len + 1) < extBufCapacity) {
   1267                     uprv_memcpy(pExtBuf, buf, len);
   1268                     bcpValue = pExtBuf;
   1269 
   1270                     pExtBuf += len;
   1271 
   1272                     *pExtBuf = 0;
   1273                     pExtBuf++;
   1274 
   1275                     extBufCapacity -= (len + 1);
   1276                 } else {
   1277                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1278                     break;
   1279                 }
   1280             }
   1281 
   1282             /* create ExtensionListEntry */
   1283             ext = uprv_malloc(sizeof(ExtensionListEntry));
   1284             if (ext == NULL) {
   1285                 *status = U_MEMORY_ALLOCATION_ERROR;
   1286                 break;
   1287             }
   1288             ext->key = bcpKey;
   1289             ext->value = bcpValue;
   1290 
   1291             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1292                 uprv_free(ext);
   1293                 if (strict) {
   1294                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1295                     break;
   1296                 }
   1297             }
   1298         }
   1299         if (U_SUCCESS(*status) && (firstExt != NULL)) {
   1300             UBool startLDMLExtension = FALSE;
   1301 
   1302             /* write out the sorted BCP47 extensions and private use */
   1303             ext = firstExt;
   1304             while (ext != NULL) {
   1305                 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
   1306                     /* write LDML singleton extension */
   1307                     if (reslen < capacity) {
   1308                         *(appendAt + reslen) = SEP;
   1309                     }
   1310                     reslen++;
   1311                     if (reslen < capacity) {
   1312                         *(appendAt + reslen) = LDMLEXT;
   1313                     }
   1314                     reslen++;
   1315                     startLDMLExtension = TRUE;
   1316                 }
   1317 
   1318                 if (reslen < capacity) {
   1319                     *(appendAt + reslen) = SEP;
   1320                 }
   1321                 reslen++;
   1322                 len = (int32_t)uprv_strlen(ext->key);
   1323                 if (reslen < capacity) {
   1324                     uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1325                 }
   1326                 reslen += len;
   1327                 if (reslen < capacity) {
   1328                     *(appendAt + reslen) = SEP;
   1329                 }
   1330                 reslen++;
   1331                 len = (int32_t)uprv_strlen(ext->value);
   1332                 if (reslen < capacity) {
   1333                     uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1334                 }
   1335                 reslen += len;
   1336 
   1337                 ext = ext->next;
   1338             }
   1339         }
   1340         /* clean up */
   1341         ext = firstExt;
   1342         while (ext != NULL) {
   1343             ExtensionListEntry *tmpExt = ext->next;
   1344             uprv_free(ext);
   1345             ext = tmpExt;
   1346         }
   1347 
   1348         uenum_close(keywordEnum);
   1349 
   1350         if (U_FAILURE(*status)) {
   1351             return 0;
   1352         }
   1353     }
   1354 
   1355     return u_terminateChars(appendAt, capacity, reslen, status);
   1356 }
   1357 
   1358 /**
   1359  * Append keywords parsed from LDML extension value
   1360  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1361  * Note: char* buf is used for storing keywords
   1362  */
   1363 static void
   1364 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UErrorCode *status) {
   1365     const char *p, *pNext, *pSep;
   1366     const char *pBcpKey, *pBcpType;
   1367     const char *pKey, *pType;
   1368     int32_t bcpKeyLen = 0, bcpTypeLen;
   1369     ExtensionListEntry *kwd, *nextKwd;
   1370     ExtensionListEntry *kwdFirst = NULL;
   1371     int32_t bufIdx = 0;
   1372     int32_t  len;
   1373 
   1374     pNext = ldmlext;
   1375     pBcpKey = pBcpType = NULL;
   1376     while (pNext) {
   1377         p = pSep = pNext;
   1378 
   1379         /* locate next separator char */
   1380         while (*pSep) {
   1381             if (*pSep == SEP) {
   1382                 break;
   1383             }
   1384             pSep++;
   1385         }
   1386         if (*pSep == 0) {
   1387             /* last subtag */
   1388             pNext = NULL;
   1389         } else {
   1390             pNext = pSep + 1;
   1391         }
   1392 
   1393         if (pBcpKey == NULL) {
   1394             pBcpKey = p;
   1395             bcpKeyLen = (int32_t)(pSep - p);
   1396         } else {
   1397             pBcpType = p;
   1398             bcpTypeLen = (int32_t)(pSep - p);
   1399 
   1400             /* BCP key to locale key */
   1401             len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1402             if (U_FAILURE(*status)) {
   1403                 goto cleanup;
   1404             }
   1405             pKey = buf + bufIdx;
   1406             bufIdx += len;
   1407             *(buf + bufIdx) = 0;
   1408             bufIdx++;
   1409 
   1410             /* BCP type to locale type */
   1411             len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1412             if (U_FAILURE(*status)) {
   1413                 goto cleanup;
   1414             }
   1415             pType = buf + bufIdx;
   1416             bufIdx += len;
   1417             *(buf + bufIdx) = 0;
   1418             bufIdx++;
   1419 
   1420             /* create an ExtensionListEntry for this keyword */
   1421             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1422             if (kwd == NULL) {
   1423                 *status = U_MEMORY_ALLOCATION_ERROR;
   1424                 goto cleanup;
   1425             }
   1426 
   1427             kwd->key = pKey;
   1428             kwd->value = pType;
   1429 
   1430             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1431                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1432                 uprv_free(kwd);
   1433                 goto cleanup;
   1434             }
   1435 
   1436             /* for next pair */
   1437             pBcpKey = NULL;
   1438             pBcpType = NULL;
   1439         }
   1440     }
   1441 
   1442     if (pBcpKey != NULL) {
   1443         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1444         goto cleanup;
   1445     }
   1446 
   1447     kwd = kwdFirst;
   1448     while (kwd != NULL) {
   1449         nextKwd = kwd->next;
   1450         _addExtensionToList(appendTo, kwd, FALSE);
   1451         kwd = nextKwd;
   1452     }
   1453 
   1454     return;
   1455 
   1456 cleanup:
   1457     kwd = kwdFirst;
   1458     while (kwd != NULL) {
   1459         nextKwd = kwd->next;
   1460         uprv_free(kwd);
   1461         kwd = nextKwd;
   1462     }
   1463 }
   1464 
   1465 
   1466 static int32_t
   1467 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1468     int32_t reslen = 0;
   1469     int32_t i, n;
   1470     int32_t len;
   1471     ExtensionListEntry *kwdFirst = NULL;
   1472     ExtensionListEntry *kwd;
   1473     const char *key, *type;
   1474     char kwdBuf[ULOC_KEYWORDS_CAPACITY];
   1475 
   1476     if (U_FAILURE(*status)) {
   1477         return 0;
   1478     }
   1479 
   1480     n = ultag_getExtensionsSize(langtag);
   1481 
   1482     /* resolve locale keywords and reordering keys */
   1483     for (i = 0; i < n; i++) {
   1484         key = ultag_getExtensionKey(langtag, i);
   1485         type = ultag_getExtensionValue(langtag, i);
   1486         if (*key == LDMLEXT) {
   1487             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), status);
   1488             if (U_FAILURE(*status)) {
   1489                 break;
   1490             }
   1491         } else {
   1492             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1493             if (kwd == NULL) {
   1494                 *status = U_MEMORY_ALLOCATION_ERROR;
   1495                 break;
   1496             }
   1497             kwd->key = key;
   1498             kwd->value = type;
   1499             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1500                 uprv_free(kwd);
   1501                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1502                 break;
   1503             }
   1504         }
   1505     }
   1506 
   1507     if (U_SUCCESS(*status)) {
   1508         type = ultag_getPrivateUse(langtag);
   1509         if ((int32_t)uprv_strlen(type) > 0) {
   1510             /* add private use as a keyword */
   1511             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1512             if (kwd == NULL) {
   1513                 *status = U_MEMORY_ALLOCATION_ERROR;
   1514             } else {
   1515                 kwd->key = PRIVATEUSE_KEY;
   1516                 kwd->value = type;
   1517                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1518                     uprv_free(kwd);
   1519                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1520                 }
   1521             }
   1522         }
   1523     }
   1524 
   1525     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1526         /* write out the sorted keywords */
   1527         kwd = kwdFirst;
   1528         while (kwd != NULL) {
   1529             if (reslen < capacity) {
   1530                 if (kwd == kwdFirst) {
   1531                     /* '@' */
   1532                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1533                 } else {
   1534                     /* ';' */
   1535                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1536                 }
   1537             }
   1538             reslen++;
   1539 
   1540             /* key */
   1541             len = (int32_t)uprv_strlen(kwd->key);
   1542             if (reslen < capacity) {
   1543                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1544             }
   1545             reslen += len;
   1546 
   1547             /* '=' */
   1548             if (reslen < capacity) {
   1549                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1550             }
   1551             reslen++;
   1552 
   1553             /* type */
   1554             len = (int32_t)uprv_strlen(kwd->value);
   1555             if (reslen < capacity) {
   1556                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1557             }
   1558             reslen += len;
   1559 
   1560             kwd = kwd->next;
   1561         }
   1562     }
   1563 
   1564     /* clean up */
   1565     kwd = kwdFirst;
   1566     while (kwd != NULL) {
   1567         ExtensionListEntry *tmpKwd = kwd->next;
   1568         uprv_free(kwd);
   1569         kwd = tmpKwd;
   1570     }
   1571 
   1572     if (U_FAILURE(*status)) {
   1573         return 0;
   1574     }
   1575 
   1576     return u_terminateChars(appendAt, capacity, reslen, status);
   1577 }
   1578 
   1579 /*
   1580 * -------------------------------------------------
   1581 *
   1582 * ultag_ functions
   1583 *
   1584 * -------------------------------------------------
   1585 */
   1586 
   1587 /* Bit flags used by the parser */
   1588 #define LANG 0x0001
   1589 #define EXTL 0x0002
   1590 #define SCRT 0x0004
   1591 #define REGN 0x0008
   1592 #define VART 0x0010
   1593 #define EXTS 0x0020
   1594 #define EXTV 0x0040
   1595 #define PRIV 0x0080
   1596 
   1597 static ULanguageTag*
   1598 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   1599     ULanguageTag *t;
   1600     char *tagBuf;
   1601     int16_t next;
   1602     char *pSubtag, *pNext, *pLastGoodPosition;
   1603     int32_t subtagLen;
   1604     int32_t extlangIdx;
   1605     ExtensionListEntry *pExtension;
   1606     char *pExtValueSubtag, *pExtValueSubtagEnd;
   1607     int32_t i;
   1608     UBool isLDMLExtension, reqLDMLType;
   1609 
   1610     if (parsedLen != NULL) {
   1611         *parsedLen = 0;
   1612     }
   1613 
   1614     if (U_FAILURE(*status)) {
   1615         return NULL;
   1616     }
   1617 
   1618     if (tagLen < 0) {
   1619         tagLen = (int32_t)uprv_strlen(tag);
   1620     }
   1621 
   1622     /* copy the entire string */
   1623     tagBuf = (char*)uprv_malloc(tagLen + 1);
   1624     if (tagBuf == NULL) {
   1625         *status = U_MEMORY_ALLOCATION_ERROR;
   1626         return NULL;
   1627     }
   1628     uprv_memcpy(tagBuf, tag, tagLen);
   1629     *(tagBuf + tagLen) = 0;
   1630 
   1631     /* create a ULanguageTag */
   1632     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   1633     _initializeULanguageTag(t);
   1634     t->buf = tagBuf;
   1635     if (t == NULL) {
   1636         uprv_free(tagBuf);
   1637         *status = U_MEMORY_ALLOCATION_ERROR;
   1638         return NULL;
   1639     }
   1640 
   1641     if (tagLen < MINLEN) {
   1642         /* the input tag is too short - return empty ULanguageTag */
   1643         return t;
   1644     }
   1645 
   1646     /* check if the tag is grandfathered */
   1647     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   1648         if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   1649             /* a grandfathered tag is always longer than its preferred mapping */
   1650             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   1651             t->language = t->buf;
   1652             if (parsedLen != NULL) {
   1653                 *parsedLen = tagLen;
   1654             }
   1655             return t;
   1656         }
   1657     }
   1658 
   1659     /*
   1660      * langtag      =   language
   1661      *                  ["-" script]
   1662      *                  ["-" region]
   1663      *                  *("-" variant)
   1664      *                  *("-" extension)
   1665      *                  ["-" privateuse]
   1666      */
   1667 
   1668     next = LANG | PRIV;
   1669     pNext = pLastGoodPosition = tagBuf;
   1670     extlangIdx = 0;
   1671     pExtension = NULL;
   1672     pExtValueSubtag = NULL;
   1673     pExtValueSubtagEnd = NULL;
   1674     isLDMLExtension = FALSE;
   1675     reqLDMLType = FALSE;
   1676 
   1677     while (pNext) {
   1678         char *pSep;
   1679 
   1680         pSubtag = pNext;
   1681 
   1682         /* locate next separator char */
   1683         pSep = pSubtag;
   1684         while (*pSep) {
   1685             if (*pSep == SEP) {
   1686                 break;
   1687             }
   1688             pSep++;
   1689         }
   1690         if (*pSep == 0) {
   1691             /* last subtag */
   1692             pNext = NULL;
   1693         } else {
   1694             pNext = pSep + 1;
   1695         }
   1696         subtagLen = (int32_t)(pSep - pSubtag);
   1697 
   1698         if (next & LANG) {
   1699             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   1700                 *pSep = 0;  /* terminate */
   1701                 t->language = T_CString_toLowerCase(pSubtag);
   1702 
   1703                 pLastGoodPosition = pSep;
   1704                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1705                 continue;
   1706             }
   1707         }
   1708         if (next & EXTL) {
   1709             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   1710                 *pSep = 0;
   1711                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   1712 
   1713                 pLastGoodPosition = pSep;
   1714                 if (extlangIdx < 3) {
   1715                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1716                 } else {
   1717                     next = SCRT | REGN | VART | EXTS | PRIV;
   1718                 }
   1719                 continue;
   1720             }
   1721         }
   1722         if (next & SCRT) {
   1723             if (_isScriptSubtag(pSubtag, subtagLen)) {
   1724                 char *p = pSubtag;
   1725 
   1726                 *pSep = 0;
   1727 
   1728                 /* to title case */
   1729                 *p = uprv_toupper(*p);
   1730                 p++;
   1731                 for (; *p; p++) {
   1732                     *p = uprv_tolower(*p);
   1733                 }
   1734 
   1735                 t->script = pSubtag;
   1736 
   1737                 pLastGoodPosition = pSep;
   1738                 next = REGN | VART | EXTS | PRIV;
   1739                 continue;
   1740             }
   1741         }
   1742         if (next & REGN) {
   1743             if (_isRegionSubtag(pSubtag, subtagLen)) {
   1744                 *pSep = 0;
   1745                 t->region = T_CString_toUpperCase(pSubtag);
   1746 
   1747                 pLastGoodPosition = pSep;
   1748                 next = VART | EXTS | PRIV;
   1749                 continue;
   1750             }
   1751         }
   1752         if (next & VART) {
   1753             if (_isVariantSubtag(pSubtag, subtagLen)) {
   1754                 VariantListEntry *var;
   1755                 UBool isAdded;
   1756 
   1757                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1758                 if (var == NULL) {
   1759                     *status = U_MEMORY_ALLOCATION_ERROR;
   1760                     goto error;
   1761                 }
   1762                 *pSep = 0;
   1763                 var->variant = T_CString_toUpperCase(pSubtag);
   1764                 isAdded = _addVariantToList(&(t->variants), var);
   1765                 if (!isAdded) {
   1766                     /* duplicated variant entry */
   1767                     uprv_free(var);
   1768                     break;
   1769                 }
   1770                 pLastGoodPosition = pSep;
   1771                 next = VART | EXTS | PRIV;
   1772                 continue;
   1773             }
   1774         }
   1775         if (next & EXTS) {
   1776             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   1777                 if (pExtension != NULL) {
   1778                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1779                         /* the previous extension is incomplete */
   1780                         uprv_free(pExtension);
   1781                         pExtension = NULL;
   1782                         break;
   1783                     }
   1784 
   1785                     /* terminate the previous extension value */
   1786                     *pExtValueSubtagEnd = 0;
   1787                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1788 
   1789                     /* insert the extension to the list */
   1790                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1791                         pLastGoodPosition = pExtValueSubtagEnd;
   1792                     } else {
   1793                         /* stop parsing here */
   1794                         uprv_free(pExtension);
   1795                         pExtension = NULL;
   1796                         break;
   1797                     }
   1798 
   1799                     if (isLDMLExtension && reqLDMLType) {
   1800                         /* incomplete LDML extension key and type pair */
   1801                         pExtension = NULL;
   1802                         break;
   1803                     }
   1804                 }
   1805 
   1806                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
   1807 
   1808                 /* create a new extension */
   1809                 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
   1810                 if (pExtension == NULL) {
   1811                     *status = U_MEMORY_ALLOCATION_ERROR;
   1812                     goto error;
   1813                 }
   1814                 *pSep = 0;
   1815                 pExtension->key = T_CString_toLowerCase(pSubtag);
   1816                 pExtension->value = NULL;   /* will be set later */
   1817 
   1818                 /*
   1819                  * reset the start and the end location of extension value
   1820                  * subtags for this extension
   1821                  */
   1822                 pExtValueSubtag = NULL;
   1823                 pExtValueSubtagEnd = NULL;
   1824 
   1825                 next = EXTV;
   1826                 continue;
   1827             }
   1828         }
   1829         if (next & EXTV) {
   1830             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   1831                 if (isLDMLExtension) {
   1832                     if (reqLDMLType) {
   1833                         /* already saw an LDML key */
   1834                         if (!_isLDMLType(pSubtag, subtagLen)) {
   1835                             /* stop parsing here and let the valid LDML extension key/type
   1836                                pairs processed by the code out of this while loop */
   1837                             break;
   1838                         }
   1839                         pExtValueSubtagEnd = pSep;
   1840                         reqLDMLType = FALSE;
   1841                         next = EXTS | EXTV | PRIV;
   1842                     } else {
   1843                         /* LDML key */
   1844                         if (!_isLDMLKey(pSubtag, subtagLen)) {
   1845                             /* stop parsing here and let the valid LDML extension key/type
   1846                                pairs processed by the code out of this while loop */
   1847                             break;
   1848                         }
   1849                         reqLDMLType = TRUE;
   1850                         next = EXTV;
   1851                     }
   1852                 } else {
   1853                     /* Mark the end of this subtag */
   1854                     pExtValueSubtagEnd = pSep;
   1855                     next = EXTS | EXTV | PRIV;
   1856                 }
   1857 
   1858                 if (pExtValueSubtag == NULL) {
   1859                     /* if the start postion of this extension's value is not yet,
   1860                        this one is the first value subtag */
   1861                     pExtValueSubtag = pSubtag;
   1862                 }
   1863                 continue;
   1864             }
   1865         }
   1866         if (next & PRIV) {
   1867             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
   1868                 char *pPrivuseVal;
   1869 
   1870                 if (pExtension != NULL) {
   1871                     /* Process the last extension */
   1872                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1873                         /* the previous extension is incomplete */
   1874                         uprv_free(pExtension);
   1875                         pExtension = NULL;
   1876                         break;
   1877                     } else {
   1878                         /* terminate the previous extension value */
   1879                         *pExtValueSubtagEnd = 0;
   1880                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1881 
   1882                         /* insert the extension to the list */
   1883                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1884                             pLastGoodPosition = pExtValueSubtagEnd;
   1885                             pExtension = NULL;
   1886                         } else {
   1887                         /* stop parsing here */
   1888                             uprv_free(pExtension);
   1889                             pExtension = NULL;
   1890                             break;
   1891                         }
   1892                     }
   1893                 }
   1894 
   1895                 /* The rest of part will be private use value subtags */
   1896                 if (pNext == NULL) {
   1897                     /* empty private use subtag */
   1898                     break;
   1899                 }
   1900                 /* back up the private use value start position */
   1901                 pPrivuseVal = pNext;
   1902 
   1903                 /* validate private use value subtags */
   1904                 while (pNext) {
   1905                     pSubtag = pNext;
   1906                     pSep = pSubtag;
   1907                     while (*pSep) {
   1908                         if (*pSep == SEP) {
   1909                             break;
   1910                         }
   1911                         pSep++;
   1912                     }
   1913                     if (*pSep == 0) {
   1914                         /* last subtag */
   1915                         pNext = NULL;
   1916                     } else {
   1917                         pNext = pSep + 1;
   1918                     }
   1919                     subtagLen = (int32_t)(pSep - pSubtag);
   1920 
   1921                     if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   1922                         pLastGoodPosition = pSep;
   1923                     } else {
   1924                         break;
   1925                     }
   1926                 }
   1927                 if (pLastGoodPosition - pPrivuseVal > 0) {
   1928                     *pLastGoodPosition = 0;
   1929                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   1930                 }
   1931                 /* No more subtags, exiting the parse loop */
   1932                 break;
   1933             }
   1934             break;
   1935         }
   1936         /* If we fell through here, it means this subtag is illegal - quit parsing */
   1937         break;
   1938     }
   1939 
   1940     if (pExtension != NULL) {
   1941         /* Process the last extension */
   1942         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1943             /* the previous extension is incomplete */
   1944             uprv_free(pExtension);
   1945         } else {
   1946             /* terminate the previous extension value */
   1947             *pExtValueSubtagEnd = 0;
   1948             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1949             /* insert the extension to the list */
   1950             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1951                 pLastGoodPosition = pExtValueSubtagEnd;
   1952             } else {
   1953                 uprv_free(pExtension);
   1954             }
   1955         }
   1956     }
   1957 
   1958     if (parsedLen != NULL) {
   1959         *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
   1960     }
   1961 
   1962     return t;
   1963 
   1964 error:
   1965     uprv_free(t);
   1966     return NULL;
   1967 }
   1968 
   1969 static void
   1970 ultag_close(ULanguageTag* langtag) {
   1971 
   1972     if (langtag == NULL) {
   1973         return;
   1974     }
   1975 
   1976     uprv_free(langtag->buf);
   1977 
   1978     if (langtag->variants) {
   1979         VariantListEntry *curVar = langtag->variants;
   1980         while (curVar) {
   1981             VariantListEntry *nextVar = curVar->next;
   1982             uprv_free(curVar);
   1983             curVar = nextVar;
   1984         }
   1985     }
   1986 
   1987     if (langtag->extensions) {
   1988         ExtensionListEntry *curExt = langtag->extensions;
   1989         while (curExt) {
   1990             ExtensionListEntry *nextExt = curExt->next;
   1991             uprv_free(curExt);
   1992             curExt = nextExt;
   1993         }
   1994     }
   1995 
   1996     uprv_free(langtag);
   1997 }
   1998 
   1999 static const char*
   2000 ultag_getLanguage(const ULanguageTag* langtag) {
   2001     return langtag->language;
   2002 }
   2003 
   2004 #if 0
   2005 static const char*
   2006 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2007     int32_t i;
   2008     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2009         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2010             return DEPRECATEDLANGS[i + 1];
   2011         }
   2012     }
   2013     return langtag->language;
   2014 }
   2015 #endif
   2016 
   2017 static const char*
   2018 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2019     if (idx >= 0 && idx < MAXEXTLANG) {
   2020         return langtag->extlang[idx];
   2021     }
   2022     return NULL;
   2023 }
   2024 
   2025 static int32_t
   2026 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2027     int32_t size = 0;
   2028     int32_t i;
   2029     for (i = 0; i < MAXEXTLANG; i++) {
   2030         if (langtag->extlang[i]) {
   2031             size++;
   2032         }
   2033     }
   2034     return size;
   2035 }
   2036 
   2037 static const char*
   2038 ultag_getScript(const ULanguageTag* langtag) {
   2039     return langtag->script;
   2040 }
   2041 
   2042 static const char*
   2043 ultag_getRegion(const ULanguageTag* langtag) {
   2044     return langtag->region;
   2045 }
   2046 
   2047 static const char*
   2048 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2049     const char *var = NULL;
   2050     VariantListEntry *cur = langtag->variants;
   2051     int32_t i = 0;
   2052     while (cur) {
   2053         if (i == idx) {
   2054             var = cur->variant;
   2055             break;
   2056         }
   2057         cur = cur->next;
   2058         i++;
   2059     }
   2060     return var;
   2061 }
   2062 
   2063 static int32_t
   2064 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2065     int32_t size = 0;
   2066     VariantListEntry *cur = langtag->variants;
   2067     while (TRUE) {
   2068         if (cur == NULL) {
   2069             break;
   2070         }
   2071         size++;
   2072         cur = cur->next;
   2073     }
   2074     return size;
   2075 }
   2076 
   2077 static const char*
   2078 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2079     const char *key = NULL;
   2080     ExtensionListEntry *cur = langtag->extensions;
   2081     int32_t i = 0;
   2082     while (cur) {
   2083         if (i == idx) {
   2084             key = cur->key;
   2085             break;
   2086         }
   2087         cur = cur->next;
   2088         i++;
   2089     }
   2090     return key;
   2091 }
   2092 
   2093 static const char*
   2094 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2095     const char *val = NULL;
   2096     ExtensionListEntry *cur = langtag->extensions;
   2097     int32_t i = 0;
   2098     while (cur) {
   2099         if (i == idx) {
   2100             val = cur->value;
   2101             break;
   2102         }
   2103         cur = cur->next;
   2104         i++;
   2105     }
   2106     return val;
   2107 }
   2108 
   2109 static int32_t
   2110 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2111     int32_t size = 0;
   2112     ExtensionListEntry *cur = langtag->extensions;
   2113     while (TRUE) {
   2114         if (cur == NULL) {
   2115             break;
   2116         }
   2117         size++;
   2118         cur = cur->next;
   2119     }
   2120     return size;
   2121 }
   2122 
   2123 static const char*
   2124 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2125     return langtag->privateuse;
   2126 }
   2127 
   2128 #if 0
   2129 static const char*
   2130 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2131     return langtag->grandfathered;
   2132 }
   2133 #endif
   2134 
   2135 
   2136 /*
   2137 * -------------------------------------------------
   2138 *
   2139 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2140 *
   2141 * -------------------------------------------------
   2142 */
   2143 U_DRAFT int32_t U_EXPORT2
   2144 uloc_toLanguageTag(const char* localeID,
   2145                    char* langtag,
   2146                    int32_t langtagCapacity,
   2147                    UBool strict,
   2148                    UErrorCode* status) {
   2149     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2150     char canonical[256];
   2151     int32_t reslen = 0;
   2152     UErrorCode tmpStatus = U_ZERO_ERROR;
   2153 
   2154     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2155     canonical[0] = 0;
   2156     if (uprv_strlen(localeID) > 0) {
   2157         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2158         if (tmpStatus != U_ZERO_ERROR) {
   2159             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2160             return 0;
   2161         }
   2162     }
   2163 
   2164     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2165     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2166     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2167     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2168     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2169 
   2170     return reslen;
   2171 }
   2172 
   2173 
   2174 U_DRAFT int32_t U_EXPORT2
   2175 uloc_forLanguageTag(const char* langtag,
   2176                     char* localeID,
   2177                     int32_t localeIDCapacity,
   2178                     int32_t* parsedLength,
   2179                     UErrorCode* status) {
   2180     ULanguageTag *lt;
   2181     int32_t reslen = 0;
   2182     const char *subtag, *p;
   2183     int32_t len;
   2184     int32_t i, n;
   2185     UBool noRegion = TRUE;
   2186 
   2187     lt = ultag_parse(langtag, -1, parsedLength, status);
   2188     if (U_FAILURE(*status)) {
   2189         return 0;
   2190     }
   2191 
   2192     /* language */
   2193     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2194     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2195         len = (int32_t)uprv_strlen(subtag);
   2196         if (len > 0) {
   2197             if (reslen < localeIDCapacity) {
   2198                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2199             }
   2200             reslen += len;
   2201         }
   2202     }
   2203 
   2204     /* script */
   2205     subtag = ultag_getScript(lt);
   2206     len = (int32_t)uprv_strlen(subtag);
   2207     if (len > 0) {
   2208         if (reslen < localeIDCapacity) {
   2209             *(localeID + reslen) = LOCALE_SEP;
   2210         }
   2211         reslen++;
   2212 
   2213         /* write out the script in title case */
   2214         p = subtag;
   2215         while (*p) {
   2216             if (reslen < localeIDCapacity) {
   2217                 if (p == subtag) {
   2218                     *(localeID + reslen) = uprv_toupper(*p);
   2219                 } else {
   2220                     *(localeID + reslen) = *p;
   2221                 }
   2222             }
   2223             reslen++;
   2224             p++;
   2225         }
   2226     }
   2227 
   2228     /* region */
   2229     subtag = ultag_getRegion(lt);
   2230     len = (int32_t)uprv_strlen(subtag);
   2231     if (len > 0) {
   2232         if (reslen < localeIDCapacity) {
   2233             *(localeID + reslen) = LOCALE_SEP;
   2234         }
   2235         reslen++;
   2236         /* write out the retion in upper case */
   2237         p = subtag;
   2238         while (*p) {
   2239             if (reslen < localeIDCapacity) {
   2240                 *(localeID + reslen) = uprv_toupper(*p);
   2241             }
   2242             reslen++;
   2243             p++;
   2244         }
   2245         noRegion = FALSE;
   2246     }
   2247 
   2248     /* variants */
   2249     n = ultag_getVariantsSize(lt);
   2250     if (n > 0) {
   2251         if (noRegion) {
   2252             if (reslen < localeIDCapacity) {
   2253                 *(localeID + reslen) = LOCALE_SEP;
   2254             }
   2255             reslen++;
   2256         }
   2257 
   2258         for (i = 0; i < n; i++) {
   2259             subtag = ultag_getVariant(lt, i);
   2260             if (reslen < localeIDCapacity) {
   2261                 *(localeID + reslen) = LOCALE_SEP;
   2262             }
   2263             reslen++;
   2264             /* write out the variant in upper case */
   2265             p = subtag;
   2266             while (*p) {
   2267                 if (reslen < localeIDCapacity) {
   2268                     *(localeID + reslen) = uprv_toupper(*p);
   2269                 }
   2270                 reslen++;
   2271                 p++;
   2272             }
   2273         }
   2274     }
   2275 
   2276     /* keywords */
   2277     n = ultag_getExtensionsSize(lt);
   2278     subtag = ultag_getPrivateUse(lt);
   2279     if (n > 0 || uprv_strlen(subtag) > 0) {
   2280         if (reslen == 0) {
   2281             /* need a language */
   2282             if (reslen < localeIDCapacity) {
   2283                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2284             }
   2285             reslen += LANG_UND_LEN;
   2286         }
   2287         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2288         reslen += len;
   2289     }
   2290 
   2291     ultag_close(lt);
   2292     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2293 }
   2294 
   2295 
   2296