Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2009-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ures.h"
     10 #include "unicode/putil.h"
     11 #include "unicode/uloc.h"
     12 #include "ustr_imp.h"
     13 #include "cmemory.h"
     14 #include "cstring.h"
     15 #include "putilimp.h"
     16 #include "uinvchar.h"
     17 
     18 /* struct holding a single variant */
     19 typedef struct VariantListEntry {
     20     const char              *variant;
     21     struct VariantListEntry *next;
     22 } VariantListEntry;
     23 
     24 /* struct holding a single extension */
     25 typedef struct ExtensionListEntry {
     26     const char                  *key;
     27     const char                  *value;
     28     struct ExtensionListEntry   *next;
     29 } ExtensionListEntry;
     30 
     31 #define MAXEXTLANG 3
     32 typedef struct ULanguageTag {
     33     char                *buf;   /* holding parsed subtags */
     34     const char          *language;
     35     const char          *extlang[MAXEXTLANG];
     36     const char          *script;
     37     const char          *region;
     38     VariantListEntry    *variants;
     39     ExtensionListEntry  *extensions;
     40     const char          *privateuse;
     41     const char          *grandfathered;
     42 } ULanguageTag;
     43 
     44 #define MINLEN 2
     45 #define SEP '-'
     46 #define PRIVATEUSE 'x'
     47 #define LDMLEXT 'u'
     48 
     49 #define LOCALE_SEP '_'
     50 #define LOCALE_EXT_SEP '@'
     51 #define LOCALE_KEYWORD_SEP ';'
     52 #define LOCALE_KEY_TYPE_SEP '='
     53 
     54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
     55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
     56 
     57 static const char* EMPTY = "";
     58 static const char* LANG_UND = "und";
     59 static const char* PRIVATEUSE_KEY = "x";
     60 static const char* _POSIX = "_POSIX";
     61 static const char* POSIX_KEY = "va";
     62 static const char* POSIX_VALUE = "posix";
     63 
     64 #define LANG_UND_LEN 3
     65 
     66 static const char* GRANDFATHERED[] = {
     67 /*  grandfathered   preferred */
     68     "art-lojban",   "jbo",
     69     "cel-gaulish",  "",
     70     "en-GB-oed",    "",
     71     "i-ami",        "ami",
     72     "i-bnn",        "bnn",
     73     "i-default",    "",
     74     "i-enochian",   "",
     75     "i-hak",        "hak",
     76     "i-klingon",    "tlh",
     77     "i-lux",        "lb",
     78     "i-mingo",      "",
     79     "i-navajo",     "nv",
     80     "i-pwn",        "pwn",
     81     "i-tao",        "tao",
     82     "i-tay",        "tay",
     83     "i-tsu",        "tsu",
     84     "no-bok",       "nb",
     85     "no-nyn",       "nn",
     86     "sgn-be-fr",    "sfb",
     87     "sgn-be-nl",    "vgt",
     88     "sgn-ch-de",    "sgg",
     89     "zh-guoyu",     "cmn",
     90     "zh-hakka",     "hak",
     91     "zh-min",       "",
     92     "zh-min-nan",   "nan",
     93     "zh-xiang",     "hsn",
     94     NULL,           NULL
     95 };
     96 
     97 static const char* DEPRECATEDLANGS[] = {
     98 /*  deprecated  new */
     99     "iw",       "he",
    100     "ji",       "yi",
    101     "in",       "id",
    102     NULL,       NULL
    103 };
    104 
    105 /*
    106 * -------------------------------------------------
    107 *
    108 * These ultag_ functions may be exposed as APIs later
    109 *
    110 * -------------------------------------------------
    111 */
    112 
    113 static ULanguageTag*
    114 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
    115 
    116 static void
    117 ultag_close(ULanguageTag* langtag);
    118 
    119 static const char*
    120 ultag_getLanguage(const ULanguageTag* langtag);
    121 
    122 #if 0
    123 static const char*
    124 ultag_getJDKLanguage(const ULanguageTag* langtag);
    125 #endif
    126 
    127 static const char*
    128 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
    129 
    130 static int32_t
    131 ultag_getExtlangSize(const ULanguageTag* langtag);
    132 
    133 static const char*
    134 ultag_getScript(const ULanguageTag* langtag);
    135 
    136 static const char*
    137 ultag_getRegion(const ULanguageTag* langtag);
    138 
    139 static const char*
    140 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
    141 
    142 static int32_t
    143 ultag_getVariantsSize(const ULanguageTag* langtag);
    144 
    145 static const char*
    146 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
    147 
    148 static const char*
    149 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
    150 
    151 static int32_t
    152 ultag_getExtensionsSize(const ULanguageTag* langtag);
    153 
    154 static const char*
    155 ultag_getPrivateUse(const ULanguageTag* langtag);
    156 
    157 #if 0
    158 static const char*
    159 ultag_getGrandfathered(const ULanguageTag* langtag);
    160 #endif
    161 
    162 /*
    163 * -------------------------------------------------
    164 *
    165 * Language subtag syntax validation functions
    166 *
    167 * -------------------------------------------------
    168 */
    169 
    170 static UBool
    171 _isAlphaString(const char* s, int32_t len) {
    172     int32_t i;
    173     for (i = 0; i < len; i++) {
    174         if (!ISALPHA(*(s + i))) {
    175             return FALSE;
    176         }
    177     }
    178     return TRUE;
    179 }
    180 
    181 static UBool
    182 _isNumericString(const char* s, int32_t len) {
    183     int32_t i;
    184     for (i = 0; i < len; i++) {
    185         if (!ISNUMERIC(*(s + i))) {
    186             return FALSE;
    187         }
    188     }
    189     return TRUE;
    190 }
    191 
    192 static UBool
    193 _isAlphaNumericString(const char* s, int32_t len) {
    194     int32_t i;
    195     for (i = 0; i < len; i++) {
    196         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
    197             return FALSE;
    198         }
    199     }
    200     return TRUE;
    201 }
    202 
    203 static UBool
    204 _isLanguageSubtag(const char* s, int32_t len) {
    205     /*
    206      * language      = 2*3ALPHA            ; shortest ISO 639 code
    207      *                 ["-" extlang]       ; sometimes followed by
    208      *                                     ;   extended language subtags
    209      *               / 4ALPHA              ; or reserved for future use
    210      *               / 5*8ALPHA            ; or registered language subtag
    211      */
    212     if (len < 0) {
    213         len = (int32_t)uprv_strlen(s);
    214     }
    215     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
    216         return TRUE;
    217     }
    218     return FALSE;
    219 }
    220 
    221 static UBool
    222 _isExtlangSubtag(const char* s, int32_t len) {
    223     /*
    224      * extlang       = 3ALPHA              ; selected ISO 639 codes
    225      *                 *2("-" 3ALPHA)      ; permanently reserved
    226      */
    227     if (len < 0) {
    228         len = (int32_t)uprv_strlen(s);
    229     }
    230     if (len == 3 && _isAlphaString(s, len)) {
    231         return TRUE;
    232     }
    233     return FALSE;
    234 }
    235 
    236 static UBool
    237 _isScriptSubtag(const char* s, int32_t len) {
    238     /*
    239      * script        = 4ALPHA              ; ISO 15924 code
    240      */
    241     if (len < 0) {
    242         len = (int32_t)uprv_strlen(s);
    243     }
    244     if (len == 4 && _isAlphaString(s, len)) {
    245         return TRUE;
    246     }
    247     return FALSE;
    248 }
    249 
    250 static UBool
    251 _isRegionSubtag(const char* s, int32_t len) {
    252     /*
    253      * region        = 2ALPHA              ; ISO 3166-1 code
    254      *               / 3DIGIT              ; UN M.49 code
    255      */
    256     if (len < 0) {
    257         len = (int32_t)uprv_strlen(s);
    258     }
    259     if (len == 2 && _isAlphaString(s, len)) {
    260         return TRUE;
    261     }
    262     if (len == 3 && _isNumericString(s, len)) {
    263         return TRUE;
    264     }
    265     return FALSE;
    266 }
    267 
    268 static UBool
    269 _isVariantSubtag(const char* s, int32_t len) {
    270     /*
    271      * variant       = 5*8alphanum         ; registered variants
    272      *               / (DIGIT 3alphanum)
    273      */
    274     if (len < 0) {
    275         len = (int32_t)uprv_strlen(s);
    276     }
    277     if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
    278         return TRUE;
    279     }
    280     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
    281         return TRUE;
    282     }
    283     return FALSE;
    284 }
    285 
    286 static UBool
    287 _isExtensionSingleton(const char* s, int32_t len) {
    288     /*
    289      * extension     = singleton 1*("-" (2*8alphanum))
    290      */
    291     if (len < 0) {
    292         len = (int32_t)uprv_strlen(s);
    293     }
    294     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
    295         return TRUE;
    296     }
    297     return FALSE;
    298 }
    299 
    300 static UBool
    301 _isExtensionSubtag(const char* s, int32_t len) {
    302     /*
    303      * extension     = singleton 1*("-" (2*8alphanum))
    304      */
    305     if (len < 0) {
    306         len = (int32_t)uprv_strlen(s);
    307     }
    308     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
    309         return TRUE;
    310     }
    311     return FALSE;
    312 }
    313 
    314 static UBool
    315 _isExtensionSubtags(const char* s, int32_t len) {
    316     const char *p = s;
    317     const char *pSubtag = NULL;
    318 
    319     if (len < 0) {
    320         len = (int32_t)uprv_strlen(s);
    321     }
    322 
    323     while ((p - s) < len) {
    324         if (*p == SEP) {
    325             if (pSubtag == NULL) {
    326                 return FALSE;
    327             }
    328             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    329                 return FALSE;
    330             }
    331             pSubtag = NULL;
    332         } else if (pSubtag == NULL) {
    333             pSubtag = p;
    334         }
    335         p++;
    336     }
    337     if (pSubtag == NULL) {
    338         return FALSE;
    339     }
    340     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
    341 }
    342 
    343 static UBool
    344 _isPrivateuseValueSubtag(const char* s, int32_t len) {
    345     /*
    346      * privateuse    = "x" 1*("-" (1*8alphanum))
    347      */
    348     if (len < 0) {
    349         len = (int32_t)uprv_strlen(s);
    350     }
    351     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
    352         return TRUE;
    353     }
    354     return FALSE;
    355 }
    356 
    357 static UBool
    358 _isPrivateuseValueSubtags(const char* s, int32_t len) {
    359     const char *p = s;
    360     const char *pSubtag = NULL;
    361 
    362     if (len < 0) {
    363         len = (int32_t)uprv_strlen(s);
    364     }
    365 
    366     while ((p - s) < len) {
    367         if (*p == SEP) {
    368             if (pSubtag == NULL) {
    369                 return FALSE;
    370             }
    371             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
    372                 return FALSE;
    373             }
    374             pSubtag = NULL;
    375         } else if (pSubtag == NULL) {
    376             pSubtag = p;
    377         }
    378         p++;
    379     }
    380     if (pSubtag == NULL) {
    381         return FALSE;
    382     }
    383     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
    384 }
    385 
    386 static UBool
    387 _isLDMLKey(const char* s, int32_t len) {
    388     if (len < 0) {
    389         len = (int32_t)uprv_strlen(s);
    390     }
    391     if (len == 2 && _isAlphaNumericString(s, len)) {
    392         return TRUE;
    393     }
    394     return FALSE;
    395 }
    396 
    397 static UBool
    398 _isLDMLType(const char* s, int32_t len) {
    399     if (len < 0) {
    400         len = (int32_t)uprv_strlen(s);
    401     }
    402     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
    403         return TRUE;
    404     }
    405     return FALSE;
    406 }
    407 
    408 /*
    409 * -------------------------------------------------
    410 *
    411 * Helper functions
    412 *
    413 * -------------------------------------------------
    414 */
    415 
    416 static UBool
    417 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
    418     UBool bAdded = TRUE;
    419 
    420     if (*first == NULL) {
    421         var->next = NULL;
    422         *first = var;
    423     } else {
    424         VariantListEntry *prev, *cur;
    425         int32_t cmp;
    426 
    427         /* reorder variants in alphabetical order */
    428         prev = NULL;
    429         cur = *first;
    430         while (TRUE) {
    431             if (cur == NULL) {
    432                 prev->next = var;
    433                 var->next = NULL;
    434                 break;
    435             }
    436             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
    437             if (cmp < 0) {
    438                 if (prev == NULL) {
    439                     *first = var;
    440                 } else {
    441                     prev->next = var;
    442                 }
    443                 var->next = cur;
    444                 break;
    445             }
    446             if (cmp == 0) {
    447                 /* duplicated variant */
    448                 bAdded = FALSE;
    449                 break;
    450             }
    451             prev = cur;
    452             cur = cur->next;
    453         }
    454     }
    455 
    456     return bAdded;
    457 }
    458 
    459 
    460 static UBool
    461 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
    462     UBool bAdded = TRUE;
    463 
    464     if (*first == NULL) {
    465         ext->next = NULL;
    466         *first = ext;
    467     } else {
    468         ExtensionListEntry *prev, *cur;
    469         int32_t cmp;
    470 
    471         /* reorder variants in alphabetical order */
    472         prev = NULL;
    473         cur = *first;
    474         while (TRUE) {
    475             if (cur == NULL) {
    476                 prev->next = ext;
    477                 ext->next = NULL;
    478                 break;
    479             }
    480             if (localeToBCP) {
    481                 /* special handling for locale to bcp conversion */
    482                 int32_t len, curlen;
    483 
    484                 len = (int32_t)uprv_strlen(ext->key);
    485                 curlen = (int32_t)uprv_strlen(cur->key);
    486 
    487                 if (len == 1 && curlen == 1) {
    488                     if (*(ext->key) == *(cur->key)) {
    489                         cmp = 0;
    490                     } else if (*(ext->key) == PRIVATEUSE) {
    491                         cmp = 1;
    492                     } else if (*(cur->key) == PRIVATEUSE) {
    493                         cmp = -1;
    494                     } else {
    495                         cmp = *(ext->key) - *(cur->key);
    496                     }
    497                 } else if (len == 1) {
    498                     cmp = *(ext->key) - LDMLEXT;
    499                 } else if (curlen == 1) {
    500                     cmp = LDMLEXT - *(cur->key);
    501                 } else {
    502                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    503                 }
    504             } else {
    505                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
    506             }
    507             if (cmp < 0) {
    508                 if (prev == NULL) {
    509                     *first = ext;
    510                 } else {
    511                     prev->next = ext;
    512                 }
    513                 ext->next = cur;
    514                 break;
    515             }
    516             if (cmp == 0) {
    517                 /* duplicated extension key */
    518                 bAdded = FALSE;
    519                 break;
    520             }
    521             prev = cur;
    522             cur = cur->next;
    523         }
    524     }
    525 
    526     return bAdded;
    527 }
    528 
    529 static void
    530 _initializeULanguageTag(ULanguageTag* langtag) {
    531     int32_t i;
    532 
    533     langtag->buf = NULL;
    534 
    535     langtag->language = EMPTY;
    536     for (i = 0; i < MAXEXTLANG; i++) {
    537         langtag->extlang[i] = NULL;
    538     }
    539 
    540     langtag->script = EMPTY;
    541     langtag->region = EMPTY;
    542 
    543     langtag->variants = NULL;
    544     langtag->extensions = NULL;
    545 
    546     langtag->grandfathered = EMPTY;
    547     langtag->privateuse = EMPTY;
    548 }
    549 
    550 #define KEYTYPEDATA     "keyTypeData"
    551 #define KEYMAP          "keyMap"
    552 #define TYPEMAP         "typeMap"
    553 #define TYPEALIAS       "typeAlias"
    554 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
    555 #define MAX_LDML_KEY_LEN        22
    556 #define MAX_LDML_TYPE_LEN       32
    557 
    558 static int32_t
    559 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
    560                 char* bcpKey, int32_t bcpKeyCapacity,
    561                 UErrorCode *status) {
    562     UResourceBundle *rb;
    563     char keyBuf[MAX_LDML_KEY_LEN];
    564     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    565     int32_t resultLen = 0;
    566     int32_t i;
    567     UErrorCode tmpStatus = U_ZERO_ERROR;
    568     const UChar *uBcpKey;
    569     int32_t bcpKeyLen;
    570 
    571     if (keyLen < 0) {
    572         keyLen = (int32_t)uprv_strlen(key);
    573     }
    574 
    575     if (keyLen >= sizeof(keyBuf)) {
    576         /* no known valid LDML key exceeding 21 */
    577         *status = U_ILLEGAL_ARGUMENT_ERROR;
    578         return 0;
    579     }
    580 
    581     uprv_memcpy(keyBuf, key, keyLen);
    582     keyBuf[keyLen] = 0;
    583 
    584     /* to lower case */
    585     for (i = 0; i < keyLen; i++) {
    586         keyBuf[i] = uprv_tolower(keyBuf[i]);
    587     }
    588 
    589     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    590     ures_getByKey(rb, KEYMAP, rb, status);
    591 
    592     if (U_FAILURE(*status)) {
    593         ures_close(rb);
    594         return 0;
    595     }
    596 
    597     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
    598     if (U_SUCCESS(tmpStatus)) {
    599         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
    600         bcpKeyBuf[bcpKeyLen] = 0;
    601         resultLen = bcpKeyLen;
    602     } else {
    603         if (_isLDMLKey(key, keyLen)) {
    604             uprv_memcpy(bcpKeyBuf, key, keyLen);
    605             bcpKeyBuf[keyLen] = 0;
    606             resultLen = keyLen;
    607         } else {
    608             /* mapping not availabe */
    609             *status = U_ILLEGAL_ARGUMENT_ERROR;
    610         }
    611     }
    612     ures_close(rb);
    613 
    614     if (U_FAILURE(*status)) {
    615         return 0;
    616     }
    617 
    618     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
    619     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
    620 }
    621 
    622 static int32_t
    623 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
    624                 char* key, int32_t keyCapacity,
    625                 UErrorCode *status) {
    626     UResourceBundle *rb;
    627     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    628     int32_t resultLen = 0;
    629     int32_t i;
    630     const char *resKey = NULL;
    631     UResourceBundle *mapData;
    632 
    633     if (bcpKeyLen < 0) {
    634         bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
    635     }
    636 
    637     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
    638         *status = U_ILLEGAL_ARGUMENT_ERROR;
    639         return 0;
    640     }
    641 
    642     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
    643     bcpKeyBuf[bcpKeyLen] = 0;
    644 
    645     /* to lower case */
    646     for (i = 0; i < bcpKeyLen; i++) {
    647         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
    648     }
    649 
    650     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    651     ures_getByKey(rb, KEYMAP, rb, status);
    652     if (U_FAILURE(*status)) {
    653         ures_close(rb);
    654         return 0;
    655     }
    656 
    657     mapData = ures_getNextResource(rb, NULL, status);
    658     while (U_SUCCESS(*status)) {
    659         const UChar *uBcpKey;
    660         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
    661         int32_t tmpBcpKeyLen;
    662 
    663         uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
    664         if (U_FAILURE(*status)) {
    665             break;
    666         }
    667         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
    668         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
    669         if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
    670             /* found a matching BCP47 key */
    671             resKey = ures_getKey(mapData);
    672             resultLen = (int32_t)uprv_strlen(resKey);
    673             break;
    674         }
    675         if (!ures_hasNext(rb)) {
    676             break;
    677         }
    678         ures_getNextResource(rb, mapData, status);
    679     }
    680     ures_close(mapData);
    681     ures_close(rb);
    682 
    683     if (U_FAILURE(*status)) {
    684         return 0;
    685     }
    686 
    687     if (resKey == NULL) {
    688         resKey = bcpKeyBuf;
    689         resultLen = bcpKeyLen;
    690     }
    691 
    692     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
    693     return u_terminateChars(key, keyCapacity, resultLen, status);
    694 }
    695 
    696 static int32_t
    697 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
    698                  const char* type, int32_t typeLen,
    699                  char* bcpType, int32_t bcpTypeCapacity,
    700                  UErrorCode *status) {
    701     UResourceBundle *rb, *keyTypeData, *typeMapForKey;
    702     char keyBuf[MAX_LDML_KEY_LEN];
    703     char typeBuf[MAX_LDML_TYPE_LEN];
    704     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    705     int32_t resultLen = 0;
    706     int32_t i;
    707     UErrorCode tmpStatus = U_ZERO_ERROR;
    708     const UChar *uBcpType, *uCanonicalType;
    709     int32_t bcpTypeLen, canonicalTypeLen;
    710     UBool isTimezone = FALSE;
    711 
    712     if (keyLen < 0) {
    713         keyLen = (int32_t)uprv_strlen(key);
    714     }
    715     if (keyLen >= sizeof(keyBuf)) {
    716         /* no known valid LDML key exceeding 21 */
    717         *status = U_ILLEGAL_ARGUMENT_ERROR;
    718         return 0;
    719     }
    720     uprv_memcpy(keyBuf, key, keyLen);
    721     keyBuf[keyLen] = 0;
    722 
    723     /* to lower case */
    724     for (i = 0; i < keyLen; i++) {
    725         keyBuf[i] = uprv_tolower(keyBuf[i]);
    726     }
    727     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    728         isTimezone = TRUE;
    729     }
    730 
    731     if (typeLen < 0) {
    732         typeLen = (int32_t)uprv_strlen(type);
    733     }
    734     if (typeLen >= sizeof(typeBuf)) {
    735         *status = U_ILLEGAL_ARGUMENT_ERROR;
    736         return 0;
    737     }
    738 
    739     if (isTimezone) {
    740         /* replace '/' with ':' */
    741         for (i = 0; i < typeLen; i++) {
    742             if (*(type + i) == '/') {
    743                 typeBuf[i] = ':';
    744             } else {
    745                 typeBuf[i] = *(type + i);
    746             }
    747         }
    748         typeBuf[typeLen] = 0;
    749         type = &typeBuf[0];
    750     }
    751 
    752     keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
    753     rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
    754     if (U_FAILURE(*status)) {
    755         ures_close(rb);
    756         ures_close(keyTypeData);
    757         return 0;
    758     }
    759 
    760     typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
    761     uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
    762     if (U_SUCCESS(tmpStatus)) {
    763         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    764         resultLen = bcpTypeLen;
    765     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    766         /* is this type alias? */
    767         tmpStatus = U_ZERO_ERROR;
    768         ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
    769         ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    770         uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
    771         if (U_SUCCESS(tmpStatus)) {
    772             u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
    773             if (isTimezone) {
    774                 /* replace '/' with ':' */
    775                 for (i = 0; i < canonicalTypeLen; i++) {
    776                     if (typeBuf[i] == '/') {
    777                         typeBuf[i] = ':';
    778                     }
    779                 }
    780             }
    781             typeBuf[canonicalTypeLen] = 0;
    782 
    783             /* look up the canonical type */
    784             uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
    785             if (U_SUCCESS(tmpStatus)) {
    786                 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
    787                 resultLen = bcpTypeLen;
    788             }
    789         }
    790         if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
    791             if (_isLDMLType(type, typeLen)) {
    792                 uprv_memcpy(bcpTypeBuf, type, typeLen);
    793                 resultLen = typeLen;
    794             } else {
    795                 /* mapping not availabe */
    796                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    797             }
    798         }
    799     } else {
    800         *status = tmpStatus;
    801     }
    802     ures_close(rb);
    803     ures_close(typeMapForKey);
    804     ures_close(keyTypeData);
    805 
    806     if (U_FAILURE(*status)) {
    807         return 0;
    808     }
    809 
    810     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
    811     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
    812 }
    813 
    814 static int32_t
    815 _bcp47ToLDMLType(const char* key, int32_t keyLen,
    816                  const char* bcpType, int32_t bcpTypeLen,
    817                  char* type, int32_t typeCapacity,
    818                  UErrorCode *status) {
    819     UResourceBundle *rb;
    820     char keyBuf[MAX_LDML_KEY_LEN];
    821     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    822     int32_t resultLen = 0;
    823     int32_t i;
    824     const char *resType = NULL;
    825     UResourceBundle *mapData;
    826     UErrorCode tmpStatus = U_ZERO_ERROR;
    827     int32_t copyLen;
    828 
    829     if (keyLen < 0) {
    830         keyLen = (int32_t)uprv_strlen(key);
    831     }
    832 
    833     if (keyLen >= sizeof(keyBuf)) {
    834         /* no known valid LDML key exceeding 21 */
    835         *status = U_ILLEGAL_ARGUMENT_ERROR;
    836         return 0;
    837     }
    838     uprv_memcpy(keyBuf, key, keyLen);
    839     keyBuf[keyLen] = 0;
    840 
    841     /* to lower case */
    842     for (i = 0; i < keyLen; i++) {
    843         keyBuf[i] = uprv_tolower(keyBuf[i]);
    844     }
    845 
    846 
    847     if (bcpTypeLen < 0) {
    848         bcpTypeLen = (int32_t)uprv_strlen(bcpType);
    849     }
    850 
    851     if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
    852         *status = U_ILLEGAL_ARGUMENT_ERROR;
    853         return 0;
    854     }
    855 
    856     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
    857     bcpTypeBuf[bcpTypeLen] = 0;
    858 
    859     /* to lower case */
    860     for (i = 0; i < bcpTypeLen; i++) {
    861         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
    862     }
    863 
    864     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
    865     ures_getByKey(rb, TYPEMAP, rb, status);
    866     if (U_FAILURE(*status)) {
    867         ures_close(rb);
    868         return 0;
    869     }
    870 
    871     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
    872     mapData = ures_getNextResource(rb, NULL, &tmpStatus);
    873     while (U_SUCCESS(tmpStatus)) {
    874         const UChar *uBcpType;
    875         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
    876         int32_t tmpBcpTypeLen;
    877 
    878         uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
    879         if (U_FAILURE(tmpStatus)) {
    880             break;
    881         }
    882         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
    883         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
    884         if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
    885             /* found a matching BCP47 type */
    886             resType = ures_getKey(mapData);
    887             resultLen = (int32_t)uprv_strlen(resType);
    888             break;
    889         }
    890         if (!ures_hasNext(rb)) {
    891             break;
    892         }
    893         ures_getNextResource(rb, mapData, &tmpStatus);
    894     }
    895     ures_close(mapData);
    896     ures_close(rb);
    897 
    898     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
    899         *status = tmpStatus;
    900         return 0;
    901     }
    902 
    903     if (resType == NULL) {
    904         resType = bcpTypeBuf;
    905         resultLen = bcpTypeLen;
    906     }
    907 
    908     copyLen = uprv_min(resultLen, typeCapacity);
    909     uprv_memcpy(type, resType, copyLen);
    910 
    911     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
    912         for (i = 0; i < copyLen; i++) {
    913             if (*(type + i) == ':') {
    914                 *(type + i) = '/';
    915             }
    916         }
    917     }
    918 
    919     return u_terminateChars(type, typeCapacity, resultLen, status);
    920 }
    921 
    922 static int32_t
    923 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    924     char buf[ULOC_LANG_CAPACITY];
    925     UErrorCode tmpStatus = U_ZERO_ERROR;
    926     int32_t len, i;
    927     int32_t reslen = 0;
    928 
    929     if (U_FAILURE(*status)) {
    930         return 0;
    931     }
    932 
    933     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
    934     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    935         if (strict) {
    936             *status = U_ILLEGAL_ARGUMENT_ERROR;
    937             return 0;
    938         }
    939         len = 0;
    940     }
    941 
    942     /* Note: returned language code is in lower case letters */
    943 
    944     if (len == 0) {
    945         if (reslen < capacity) {
    946             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    947         }
    948         reslen += LANG_UND_LEN;
    949     } else if (!_isLanguageSubtag(buf, len)) {
    950             /* invalid language code */
    951         if (strict) {
    952             *status = U_ILLEGAL_ARGUMENT_ERROR;
    953             return 0;
    954         }
    955         if (reslen < capacity) {
    956             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
    957         }
    958         reslen += LANG_UND_LEN;
    959     } else {
    960         /* resolve deprecated */
    961         for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
    962             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
    963                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
    964                 len = (int32_t)uprv_strlen(buf);
    965                 break;
    966             }
    967         }
    968         if (reslen < capacity) {
    969             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
    970         }
    971         reslen += len;
    972     }
    973     u_terminateChars(appendAt, capacity, reslen, status);
    974     return reslen;
    975 }
    976 
    977 static int32_t
    978 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
    979     char buf[ULOC_SCRIPT_CAPACITY];
    980     UErrorCode tmpStatus = U_ZERO_ERROR;
    981     int32_t len;
    982     int32_t reslen = 0;
    983 
    984     if (U_FAILURE(*status)) {
    985         return 0;
    986     }
    987 
    988     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
    989     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
    990         if (strict) {
    991             *status = U_ILLEGAL_ARGUMENT_ERROR;
    992         }
    993         return 0;
    994     }
    995 
    996     if (len > 0) {
    997         if (!_isScriptSubtag(buf, len)) {
    998             /* invalid script code */
    999             if (strict) {
   1000                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1001             }
   1002             return 0;
   1003         } else {
   1004             if (reslen < capacity) {
   1005                 *(appendAt + reslen) = SEP;
   1006             }
   1007             reslen++;
   1008 
   1009             if (reslen < capacity) {
   1010                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1011             }
   1012             reslen += len;
   1013         }
   1014     }
   1015     u_terminateChars(appendAt, capacity, reslen, status);
   1016     return reslen;
   1017 }
   1018 
   1019 static int32_t
   1020 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
   1021     char buf[ULOC_COUNTRY_CAPACITY];
   1022     UErrorCode tmpStatus = U_ZERO_ERROR;
   1023     int32_t len;
   1024     int32_t reslen = 0;
   1025 
   1026     if (U_FAILURE(*status)) {
   1027         return 0;
   1028     }
   1029 
   1030     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
   1031     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1032         if (strict) {
   1033             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1034         }
   1035         return 0;
   1036     }
   1037 
   1038     if (len > 0) {
   1039         if (!_isRegionSubtag(buf, len)) {
   1040             /* invalid region code */
   1041             if (strict) {
   1042                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1043             }
   1044             return 0;
   1045         } else {
   1046             if (reslen < capacity) {
   1047                 *(appendAt + reslen) = SEP;
   1048             }
   1049             reslen++;
   1050 
   1051             if (reslen < capacity) {
   1052                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
   1053             }
   1054             reslen += len;
   1055         }
   1056     }
   1057     u_terminateChars(appendAt, capacity, reslen, status);
   1058     return reslen;
   1059 }
   1060 
   1061 static int32_t
   1062 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
   1063     char buf[ULOC_FULLNAME_CAPACITY];
   1064     UErrorCode tmpStatus = U_ZERO_ERROR;
   1065     int32_t len, i;
   1066     int32_t reslen = 0;
   1067 
   1068     if (U_FAILURE(*status)) {
   1069         return 0;
   1070     }
   1071 
   1072     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
   1073     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1074         if (strict) {
   1075             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1076         }
   1077         return 0;
   1078     }
   1079 
   1080     if (len > 0) {
   1081         char *p, *pVar;
   1082         UBool bNext = TRUE;
   1083         VariantListEntry *var;
   1084         VariantListEntry *varFirst = NULL;
   1085 
   1086         pVar = NULL;
   1087         p = buf;
   1088         while (bNext) {
   1089             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
   1090                 if (*p == 0) {
   1091                     bNext = FALSE;
   1092                 } else {
   1093                     *p = 0; /* terminate */
   1094                 }
   1095                 if (pVar == NULL) {
   1096                     if (strict) {
   1097                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1098                         break;
   1099                     }
   1100                     /* ignore empty variant */
   1101                 } else {
   1102                     /* ICU uses upper case letters for variants, but
   1103                        the canonical format is lowercase in BCP47 */
   1104                     for (i = 0; *(pVar + i) != 0; i++) {
   1105                         *(pVar + i) = uprv_tolower(*(pVar + i));
   1106                     }
   1107 
   1108                     /* validate */
   1109                     if (_isVariantSubtag(pVar, -1)) {
   1110                         if (uprv_strcmp(pVar,POSIX_VALUE)) {
   1111                             /* emit the variant to the list */
   1112                             var = uprv_malloc(sizeof(VariantListEntry));
   1113                             if (var == NULL) {
   1114                                 *status = U_MEMORY_ALLOCATION_ERROR;
   1115                                 break;
   1116                             }
   1117                             var->variant = pVar;
   1118                             if (!_addVariantToList(&varFirst, var)) {
   1119                                 /* duplicated variant */
   1120                                 uprv_free(var);
   1121                                 if (strict) {
   1122                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1123                                     break;
   1124                                 }
   1125                             }
   1126                         } else {
   1127                             /* Special handling for POSIX variant, need to remember that we had it and then */
   1128                             /* treat it like an extension later. */
   1129                             *hadPosix = TRUE;
   1130                         }
   1131                     } else if (strict) {
   1132                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1133                         break;
   1134                     }
   1135                 }
   1136                 /* reset variant starting position */
   1137                 pVar = NULL;
   1138             } else if (pVar == NULL) {
   1139                 pVar = p;
   1140             }
   1141             p++;
   1142         }
   1143 
   1144         if (U_SUCCESS(*status)) {
   1145             if (varFirst != NULL) {
   1146                 int32_t varLen;
   1147 
   1148                 /* write out sorted/validated/normalized variants to the target */
   1149                 var = varFirst;
   1150                 while (var != NULL) {
   1151                     if (reslen < capacity) {
   1152                         *(appendAt + reslen) = SEP;
   1153                     }
   1154                     reslen++;
   1155                     varLen = (int32_t)uprv_strlen(var->variant);
   1156                     if (reslen < capacity) {
   1157                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
   1158                     }
   1159                     reslen += varLen;
   1160                     var = var->next;
   1161                 }
   1162             }
   1163         }
   1164 
   1165         /* clean up */
   1166         var = varFirst;
   1167         while (var != NULL) {
   1168             VariantListEntry *tmpVar = var->next;
   1169             uprv_free(var);
   1170             var = tmpVar;
   1171         }
   1172 
   1173         if (U_FAILURE(*status)) {
   1174             return 0;
   1175         }
   1176     }
   1177 
   1178     u_terminateChars(appendAt, capacity, reslen, status);
   1179     return reslen;
   1180 }
   1181 
   1182 static int32_t
   1183 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
   1184     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1185     UEnumeration *keywordEnum = NULL;
   1186     int32_t reslen = 0;
   1187 
   1188     keywordEnum = uloc_openKeywords(localeID, status);
   1189     if (U_FAILURE(*status) && !hadPosix) {
   1190         uenum_close(keywordEnum);
   1191         return 0;
   1192     }
   1193     if (keywordEnum != NULL || hadPosix) {
   1194         /* reorder extensions */
   1195         int32_t len;
   1196         const char *key;
   1197         ExtensionListEntry *firstExt = NULL;
   1198         ExtensionListEntry *ext;
   1199         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1200         char *pExtBuf = extBuf;
   1201         int32_t extBufCapacity = sizeof(extBuf);
   1202         const char *bcpKey, *bcpValue;
   1203         UErrorCode tmpStatus = U_ZERO_ERROR;
   1204         int32_t keylen;
   1205         UBool isLDMLKeyword;
   1206 
   1207         while (TRUE) {
   1208             key = uenum_next(keywordEnum, NULL, status);
   1209             if (key == NULL) {
   1210                 break;
   1211             }
   1212             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
   1213             if (U_FAILURE(tmpStatus)) {
   1214                 if (strict) {
   1215                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1216                     break;
   1217                 }
   1218                 /* ignore this keyword */
   1219                 tmpStatus = U_ZERO_ERROR;
   1220                 continue;
   1221             }
   1222 
   1223             keylen = (int32_t)uprv_strlen(key);
   1224             isLDMLKeyword = (keylen > 1);
   1225 
   1226             if (isLDMLKeyword) {
   1227                 int32_t modKeyLen;
   1228 
   1229                 /* transform key and value to bcp47 style */
   1230                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
   1231                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1232                     if (strict) {
   1233                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1234                         break;
   1235                     }
   1236                     tmpStatus = U_ZERO_ERROR;
   1237                     continue;
   1238                 }
   1239 
   1240                 bcpKey = pExtBuf;
   1241                 pExtBuf += (modKeyLen + 1);
   1242                 extBufCapacity -= (modKeyLen + 1);
   1243 
   1244                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
   1245                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
   1246                     if (strict) {
   1247                         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1248                         break;
   1249                     }
   1250                     tmpStatus = U_ZERO_ERROR;
   1251                     continue;
   1252                 }
   1253                 bcpValue = pExtBuf;
   1254                 pExtBuf += (len + 1);
   1255                 extBufCapacity -= (len + 1);
   1256             } else {
   1257                 if (*key == PRIVATEUSE) {
   1258                     if (!_isPrivateuseValueSubtags(buf, len)) {
   1259                         if (strict) {
   1260                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1261                             break;
   1262                         }
   1263                         continue;
   1264                     }
   1265                 } else {
   1266                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
   1267                         if (strict) {
   1268                             *status = U_ILLEGAL_ARGUMENT_ERROR;
   1269                             break;
   1270                         }
   1271                         continue;
   1272                     }
   1273                 }
   1274                 bcpKey = key;
   1275                 if ((len + 1) < extBufCapacity) {
   1276                     uprv_memcpy(pExtBuf, buf, len);
   1277                     bcpValue = pExtBuf;
   1278 
   1279                     pExtBuf += len;
   1280 
   1281                     *pExtBuf = 0;
   1282                     pExtBuf++;
   1283 
   1284                     extBufCapacity -= (len + 1);
   1285                 } else {
   1286                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1287                     break;
   1288                 }
   1289             }
   1290 
   1291             /* create ExtensionListEntry */
   1292             ext = uprv_malloc(sizeof(ExtensionListEntry));
   1293             if (ext == NULL) {
   1294                 *status = U_MEMORY_ALLOCATION_ERROR;
   1295                 break;
   1296             }
   1297             ext->key = bcpKey;
   1298             ext->value = bcpValue;
   1299 
   1300             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1301                 uprv_free(ext);
   1302                 if (strict) {
   1303                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1304                     break;
   1305                 }
   1306             }
   1307         }
   1308 
   1309         /* Special handling for POSIX variant - add the keywords for POSIX */
   1310         if (hadPosix) {
   1311             /* create ExtensionListEntry for POSIX */
   1312             ext = uprv_malloc(sizeof(ExtensionListEntry));
   1313             if (ext == NULL) {
   1314                 *status = U_MEMORY_ALLOCATION_ERROR;
   1315             }
   1316             ext->key = POSIX_KEY;
   1317             ext->value = POSIX_VALUE;
   1318 
   1319             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
   1320                 uprv_free(ext);
   1321             }
   1322         }
   1323 
   1324         if (U_SUCCESS(*status) && (firstExt != NULL)) {
   1325             UBool startLDMLExtension = FALSE;
   1326 
   1327             /* write out the sorted BCP47 extensions and private use */
   1328             ext = firstExt;
   1329             while (ext != NULL) {
   1330                 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
   1331                     /* write LDML singleton extension */
   1332                     if (reslen < capacity) {
   1333                         *(appendAt + reslen) = SEP;
   1334                     }
   1335                     reslen++;
   1336                     if (reslen < capacity) {
   1337                         *(appendAt + reslen) = LDMLEXT;
   1338                     }
   1339                     reslen++;
   1340                     startLDMLExtension = TRUE;
   1341                 }
   1342 
   1343                 if (reslen < capacity) {
   1344                     *(appendAt + reslen) = SEP;
   1345                 }
   1346                 reslen++;
   1347                 len = (int32_t)uprv_strlen(ext->key);
   1348                 if (reslen < capacity) {
   1349                     uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
   1350                 }
   1351                 reslen += len;
   1352                 if (reslen < capacity) {
   1353                     *(appendAt + reslen) = SEP;
   1354                 }
   1355                 reslen++;
   1356                 len = (int32_t)uprv_strlen(ext->value);
   1357                 if (reslen < capacity) {
   1358                     uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
   1359                 }
   1360                 reslen += len;
   1361 
   1362                 ext = ext->next;
   1363             }
   1364         }
   1365         /* clean up */
   1366         ext = firstExt;
   1367         while (ext != NULL) {
   1368             ExtensionListEntry *tmpExt = ext->next;
   1369             uprv_free(ext);
   1370             ext = tmpExt;
   1371         }
   1372 
   1373         uenum_close(keywordEnum);
   1374 
   1375         if (U_FAILURE(*status)) {
   1376             return 0;
   1377         }
   1378     }
   1379 
   1380     return u_terminateChars(appendAt, capacity, reslen, status);
   1381 }
   1382 
   1383 /**
   1384  * Append keywords parsed from LDML extension value
   1385  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
   1386  * Note: char* buf is used for storing keywords
   1387  */
   1388 static void
   1389 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
   1390     const char *p, *pNext, *pSep;
   1391     const char *pBcpKey, *pBcpType;
   1392     const char *pKey, *pType;
   1393     int32_t bcpKeyLen = 0, bcpTypeLen;
   1394     ExtensionListEntry *kwd, *nextKwd;
   1395     ExtensionListEntry *kwdFirst = NULL;
   1396     int32_t bufIdx = 0;
   1397     int32_t  len;
   1398 
   1399     pNext = ldmlext;
   1400     pBcpKey = pBcpType = NULL;
   1401     while (pNext) {
   1402         p = pSep = pNext;
   1403 
   1404         /* locate next separator char */
   1405         while (*pSep) {
   1406             if (*pSep == SEP) {
   1407                 break;
   1408             }
   1409             pSep++;
   1410         }
   1411         if (*pSep == 0) {
   1412             /* last subtag */
   1413             pNext = NULL;
   1414         } else {
   1415             pNext = pSep + 1;
   1416         }
   1417 
   1418         if (pBcpKey == NULL) {
   1419             pBcpKey = p;
   1420             bcpKeyLen = (int32_t)(pSep - p);
   1421         } else {
   1422             pBcpType = p;
   1423             bcpTypeLen = (int32_t)(pSep - p);
   1424 
   1425             /* BCP key to locale key */
   1426             len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1427             if (U_FAILURE(*status)) {
   1428                 goto cleanup;
   1429             }
   1430             pKey = buf + bufIdx;
   1431             bufIdx += len;
   1432             *(buf + bufIdx) = 0;
   1433             bufIdx++;
   1434 
   1435             /* BCP type to locale type */
   1436             len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
   1437             if (U_FAILURE(*status)) {
   1438                 goto cleanup;
   1439             }
   1440             pType = buf + bufIdx;
   1441             bufIdx += len;
   1442             *(buf + bufIdx) = 0;
   1443             bufIdx++;
   1444 
   1445             /* Special handling for u-va-posix, since we want to treat this as a variant, not */
   1446             /* as a keyword.                                                                  */
   1447 
   1448             if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
   1449                 *posixVariant = TRUE;
   1450             } else {
   1451                 /* create an ExtensionListEntry for this keyword */
   1452                 kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1453                 if (kwd == NULL) {
   1454                     *status = U_MEMORY_ALLOCATION_ERROR;
   1455                     goto cleanup;
   1456                 }
   1457 
   1458                 kwd->key = pKey;
   1459                 kwd->value = pType;
   1460 
   1461                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1462                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1463                     uprv_free(kwd);
   1464                     goto cleanup;
   1465                 }
   1466             }
   1467 
   1468             /* for next pair */
   1469             pBcpKey = NULL;
   1470             pBcpType = NULL;
   1471         }
   1472     }
   1473 
   1474     if (pBcpKey != NULL) {
   1475         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1476         goto cleanup;
   1477     }
   1478 
   1479     kwd = kwdFirst;
   1480     while (kwd != NULL) {
   1481         nextKwd = kwd->next;
   1482         _addExtensionToList(appendTo, kwd, FALSE);
   1483         kwd = nextKwd;
   1484     }
   1485 
   1486     return;
   1487 
   1488 cleanup:
   1489     kwd = kwdFirst;
   1490     while (kwd != NULL) {
   1491         nextKwd = kwd->next;
   1492         uprv_free(kwd);
   1493         kwd = nextKwd;
   1494     }
   1495 }
   1496 
   1497 
   1498 static int32_t
   1499 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
   1500     int32_t reslen = 0;
   1501     int32_t i, n;
   1502     int32_t len;
   1503     ExtensionListEntry *kwdFirst = NULL;
   1504     ExtensionListEntry *kwd;
   1505     const char *key, *type;
   1506     char kwdBuf[ULOC_KEYWORDS_CAPACITY];
   1507     UBool posixVariant = FALSE;
   1508 
   1509     if (U_FAILURE(*status)) {
   1510         return 0;
   1511     }
   1512 
   1513     n = ultag_getExtensionsSize(langtag);
   1514 
   1515     /* resolve locale keywords and reordering keys */
   1516     for (i = 0; i < n; i++) {
   1517         key = ultag_getExtensionKey(langtag, i);
   1518         type = ultag_getExtensionValue(langtag, i);
   1519         if (*key == LDMLEXT) {
   1520             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), &posixVariant, status);
   1521             if (U_FAILURE(*status)) {
   1522                 break;
   1523             }
   1524         } else {
   1525             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1526             if (kwd == NULL) {
   1527                 *status = U_MEMORY_ALLOCATION_ERROR;
   1528                 break;
   1529             }
   1530             kwd->key = key;
   1531             kwd->value = type;
   1532             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1533                 uprv_free(kwd);
   1534                 *status = U_ILLEGAL_ARGUMENT_ERROR;
   1535                 break;
   1536             }
   1537         }
   1538     }
   1539 
   1540     if (U_SUCCESS(*status)) {
   1541         type = ultag_getPrivateUse(langtag);
   1542         if ((int32_t)uprv_strlen(type) > 0) {
   1543             /* add private use as a keyword */
   1544             kwd = uprv_malloc(sizeof(ExtensionListEntry));
   1545             if (kwd == NULL) {
   1546                 *status = U_MEMORY_ALLOCATION_ERROR;
   1547             } else {
   1548                 kwd->key = PRIVATEUSE_KEY;
   1549                 kwd->value = type;
   1550                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
   1551                     uprv_free(kwd);
   1552                     *status = U_ILLEGAL_ARGUMENT_ERROR;
   1553                 }
   1554             }
   1555         }
   1556     }
   1557 
   1558     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
   1559 
   1560     if (U_SUCCESS(*status) && posixVariant) {
   1561         len = (int32_t) uprv_strlen(_POSIX);
   1562         if (reslen < capacity) {
   1563             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
   1564         }
   1565         reslen += len;
   1566     }
   1567 
   1568     if (U_SUCCESS(*status) && kwdFirst != NULL) {
   1569         /* write out the sorted keywords */
   1570         kwd = kwdFirst;
   1571         while (kwd != NULL) {
   1572             if (reslen < capacity) {
   1573                 if (kwd == kwdFirst) {
   1574                     /* '@' */
   1575                     *(appendAt + reslen) = LOCALE_EXT_SEP;
   1576                 } else {
   1577                     /* ';' */
   1578                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
   1579                 }
   1580             }
   1581             reslen++;
   1582 
   1583             /* key */
   1584             len = (int32_t)uprv_strlen(kwd->key);
   1585             if (reslen < capacity) {
   1586                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
   1587             }
   1588             reslen += len;
   1589 
   1590             /* '=' */
   1591             if (reslen < capacity) {
   1592                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
   1593             }
   1594             reslen++;
   1595 
   1596             /* type */
   1597             len = (int32_t)uprv_strlen(kwd->value);
   1598             if (reslen < capacity) {
   1599                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
   1600             }
   1601             reslen += len;
   1602 
   1603             kwd = kwd->next;
   1604         }
   1605     }
   1606 
   1607     /* clean up */
   1608     kwd = kwdFirst;
   1609     while (kwd != NULL) {
   1610         ExtensionListEntry *tmpKwd = kwd->next;
   1611         uprv_free(kwd);
   1612         kwd = tmpKwd;
   1613     }
   1614 
   1615     if (U_FAILURE(*status)) {
   1616         return 0;
   1617     }
   1618 
   1619     return u_terminateChars(appendAt, capacity, reslen, status);
   1620 }
   1621 
   1622 /*
   1623 * -------------------------------------------------
   1624 *
   1625 * ultag_ functions
   1626 *
   1627 * -------------------------------------------------
   1628 */
   1629 
   1630 /* Bit flags used by the parser */
   1631 #define LANG 0x0001
   1632 #define EXTL 0x0002
   1633 #define SCRT 0x0004
   1634 #define REGN 0x0008
   1635 #define VART 0x0010
   1636 #define EXTS 0x0020
   1637 #define EXTV 0x0040
   1638 #define PRIV 0x0080
   1639 
   1640 static ULanguageTag*
   1641 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
   1642     ULanguageTag *t;
   1643     char *tagBuf;
   1644     int16_t next;
   1645     char *pSubtag, *pNext, *pLastGoodPosition;
   1646     int32_t subtagLen;
   1647     int32_t extlangIdx;
   1648     ExtensionListEntry *pExtension;
   1649     char *pExtValueSubtag, *pExtValueSubtagEnd;
   1650     int32_t i;
   1651     UBool isLDMLExtension, reqLDMLType;
   1652 
   1653     if (parsedLen != NULL) {
   1654         *parsedLen = 0;
   1655     }
   1656 
   1657     if (U_FAILURE(*status)) {
   1658         return NULL;
   1659     }
   1660 
   1661     if (tagLen < 0) {
   1662         tagLen = (int32_t)uprv_strlen(tag);
   1663     }
   1664 
   1665     /* copy the entire string */
   1666     tagBuf = (char*)uprv_malloc(tagLen + 1);
   1667     if (tagBuf == NULL) {
   1668         *status = U_MEMORY_ALLOCATION_ERROR;
   1669         return NULL;
   1670     }
   1671     uprv_memcpy(tagBuf, tag, tagLen);
   1672     *(tagBuf + tagLen) = 0;
   1673 
   1674     /* create a ULanguageTag */
   1675     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
   1676     _initializeULanguageTag(t);
   1677     t->buf = tagBuf;
   1678     if (t == NULL) {
   1679         uprv_free(tagBuf);
   1680         *status = U_MEMORY_ALLOCATION_ERROR;
   1681         return NULL;
   1682     }
   1683 
   1684     if (tagLen < MINLEN) {
   1685         /* the input tag is too short - return empty ULanguageTag */
   1686         return t;
   1687     }
   1688 
   1689     /* check if the tag is grandfathered */
   1690     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
   1691         if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
   1692             /* a grandfathered tag is always longer than its preferred mapping */
   1693             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
   1694             t->language = t->buf;
   1695             if (parsedLen != NULL) {
   1696                 *parsedLen = tagLen;
   1697             }
   1698             return t;
   1699         }
   1700     }
   1701 
   1702     /*
   1703      * langtag      =   language
   1704      *                  ["-" script]
   1705      *                  ["-" region]
   1706      *                  *("-" variant)
   1707      *                  *("-" extension)
   1708      *                  ["-" privateuse]
   1709      */
   1710 
   1711     next = LANG | PRIV;
   1712     pNext = pLastGoodPosition = tagBuf;
   1713     extlangIdx = 0;
   1714     pExtension = NULL;
   1715     pExtValueSubtag = NULL;
   1716     pExtValueSubtagEnd = NULL;
   1717     isLDMLExtension = FALSE;
   1718     reqLDMLType = FALSE;
   1719 
   1720     while (pNext) {
   1721         char *pSep;
   1722 
   1723         pSubtag = pNext;
   1724 
   1725         /* locate next separator char */
   1726         pSep = pSubtag;
   1727         while (*pSep) {
   1728             if (*pSep == SEP) {
   1729                 break;
   1730             }
   1731             pSep++;
   1732         }
   1733         if (*pSep == 0) {
   1734             /* last subtag */
   1735             pNext = NULL;
   1736         } else {
   1737             pNext = pSep + 1;
   1738         }
   1739         subtagLen = (int32_t)(pSep - pSubtag);
   1740 
   1741         if (next & LANG) {
   1742             if (_isLanguageSubtag(pSubtag, subtagLen)) {
   1743                 *pSep = 0;  /* terminate */
   1744                 t->language = T_CString_toLowerCase(pSubtag);
   1745 
   1746                 pLastGoodPosition = pSep;
   1747                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1748                 continue;
   1749             }
   1750         }
   1751         if (next & EXTL) {
   1752             if (_isExtlangSubtag(pSubtag, subtagLen)) {
   1753                 *pSep = 0;
   1754                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
   1755 
   1756                 pLastGoodPosition = pSep;
   1757                 if (extlangIdx < 3) {
   1758                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
   1759                 } else {
   1760                     next = SCRT | REGN | VART | EXTS | PRIV;
   1761                 }
   1762                 continue;
   1763             }
   1764         }
   1765         if (next & SCRT) {
   1766             if (_isScriptSubtag(pSubtag, subtagLen)) {
   1767                 char *p = pSubtag;
   1768 
   1769                 *pSep = 0;
   1770 
   1771                 /* to title case */
   1772                 *p = uprv_toupper(*p);
   1773                 p++;
   1774                 for (; *p; p++) {
   1775                     *p = uprv_tolower(*p);
   1776                 }
   1777 
   1778                 t->script = pSubtag;
   1779 
   1780                 pLastGoodPosition = pSep;
   1781                 next = REGN | VART | EXTS | PRIV;
   1782                 continue;
   1783             }
   1784         }
   1785         if (next & REGN) {
   1786             if (_isRegionSubtag(pSubtag, subtagLen)) {
   1787                 *pSep = 0;
   1788                 t->region = T_CString_toUpperCase(pSubtag);
   1789 
   1790                 pLastGoodPosition = pSep;
   1791                 next = VART | EXTS | PRIV;
   1792                 continue;
   1793             }
   1794         }
   1795         if (next & VART) {
   1796             if (_isVariantSubtag(pSubtag, subtagLen)) {
   1797                 VariantListEntry *var;
   1798                 UBool isAdded;
   1799 
   1800                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
   1801                 if (var == NULL) {
   1802                     *status = U_MEMORY_ALLOCATION_ERROR;
   1803                     goto error;
   1804                 }
   1805                 *pSep = 0;
   1806                 var->variant = T_CString_toUpperCase(pSubtag);
   1807                 isAdded = _addVariantToList(&(t->variants), var);
   1808                 if (!isAdded) {
   1809                     /* duplicated variant entry */
   1810                     uprv_free(var);
   1811                     break;
   1812                 }
   1813                 pLastGoodPosition = pSep;
   1814                 next = VART | EXTS | PRIV;
   1815                 continue;
   1816             }
   1817         }
   1818         if (next & EXTS) {
   1819             if (_isExtensionSingleton(pSubtag, subtagLen)) {
   1820                 if (pExtension != NULL) {
   1821                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1822                         /* the previous extension is incomplete */
   1823                         uprv_free(pExtension);
   1824                         pExtension = NULL;
   1825                         break;
   1826                     }
   1827 
   1828                     /* terminate the previous extension value */
   1829                     *pExtValueSubtagEnd = 0;
   1830                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1831 
   1832                     /* insert the extension to the list */
   1833                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1834                         pLastGoodPosition = pExtValueSubtagEnd;
   1835                     } else {
   1836                         /* stop parsing here */
   1837                         uprv_free(pExtension);
   1838                         pExtension = NULL;
   1839                         break;
   1840                     }
   1841 
   1842                     if (isLDMLExtension && reqLDMLType) {
   1843                         /* incomplete LDML extension key and type pair */
   1844                         pExtension = NULL;
   1845                         break;
   1846                     }
   1847                 }
   1848 
   1849                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
   1850 
   1851                 /* create a new extension */
   1852                 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
   1853                 if (pExtension == NULL) {
   1854                     *status = U_MEMORY_ALLOCATION_ERROR;
   1855                     goto error;
   1856                 }
   1857                 *pSep = 0;
   1858                 pExtension->key = T_CString_toLowerCase(pSubtag);
   1859                 pExtension->value = NULL;   /* will be set later */
   1860 
   1861                 /*
   1862                  * reset the start and the end location of extension value
   1863                  * subtags for this extension
   1864                  */
   1865                 pExtValueSubtag = NULL;
   1866                 pExtValueSubtagEnd = NULL;
   1867 
   1868                 next = EXTV;
   1869                 continue;
   1870             }
   1871         }
   1872         if (next & EXTV) {
   1873             if (_isExtensionSubtag(pSubtag, subtagLen)) {
   1874                 if (isLDMLExtension) {
   1875                     if (reqLDMLType) {
   1876                         /* already saw an LDML key */
   1877                         if (!_isLDMLType(pSubtag, subtagLen)) {
   1878                             /* stop parsing here and let the valid LDML extension key/type
   1879                                pairs processed by the code out of this while loop */
   1880                             break;
   1881                         }
   1882                         pExtValueSubtagEnd = pSep;
   1883                         reqLDMLType = FALSE;
   1884                         next = EXTS | EXTV | PRIV;
   1885                     } else {
   1886                         /* LDML key */
   1887                         if (!_isLDMLKey(pSubtag, subtagLen)) {
   1888                             /* stop parsing here and let the valid LDML extension key/type
   1889                                pairs processed by the code out of this while loop */
   1890                             break;
   1891                         }
   1892                         reqLDMLType = TRUE;
   1893                         next = EXTV;
   1894                     }
   1895                 } else {
   1896                     /* Mark the end of this subtag */
   1897                     pExtValueSubtagEnd = pSep;
   1898                     next = EXTS | EXTV | PRIV;
   1899                 }
   1900 
   1901                 if (pExtValueSubtag == NULL) {
   1902                     /* if the start postion of this extension's value is not yet,
   1903                        this one is the first value subtag */
   1904                     pExtValueSubtag = pSubtag;
   1905                 }
   1906                 continue;
   1907             }
   1908         }
   1909         if (next & PRIV) {
   1910             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
   1911                 char *pPrivuseVal;
   1912 
   1913                 if (pExtension != NULL) {
   1914                     /* Process the last extension */
   1915                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1916                         /* the previous extension is incomplete */
   1917                         uprv_free(pExtension);
   1918                         pExtension = NULL;
   1919                         break;
   1920                     } else {
   1921                         /* terminate the previous extension value */
   1922                         *pExtValueSubtagEnd = 0;
   1923                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1924 
   1925                         /* insert the extension to the list */
   1926                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1927                             pLastGoodPosition = pExtValueSubtagEnd;
   1928                             pExtension = NULL;
   1929                         } else {
   1930                         /* stop parsing here */
   1931                             uprv_free(pExtension);
   1932                             pExtension = NULL;
   1933                             break;
   1934                         }
   1935                     }
   1936                 }
   1937 
   1938                 /* The rest of part will be private use value subtags */
   1939                 if (pNext == NULL) {
   1940                     /* empty private use subtag */
   1941                     break;
   1942                 }
   1943                 /* back up the private use value start position */
   1944                 pPrivuseVal = pNext;
   1945 
   1946                 /* validate private use value subtags */
   1947                 while (pNext) {
   1948                     pSubtag = pNext;
   1949                     pSep = pSubtag;
   1950                     while (*pSep) {
   1951                         if (*pSep == SEP) {
   1952                             break;
   1953                         }
   1954                         pSep++;
   1955                     }
   1956                     if (*pSep == 0) {
   1957                         /* last subtag */
   1958                         pNext = NULL;
   1959                     } else {
   1960                         pNext = pSep + 1;
   1961                     }
   1962                     subtagLen = (int32_t)(pSep - pSubtag);
   1963 
   1964                     if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
   1965                         pLastGoodPosition = pSep;
   1966                     } else {
   1967                         break;
   1968                     }
   1969                 }
   1970                 if (pLastGoodPosition - pPrivuseVal > 0) {
   1971                     *pLastGoodPosition = 0;
   1972                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
   1973                 }
   1974                 /* No more subtags, exiting the parse loop */
   1975                 break;
   1976             }
   1977             break;
   1978         }
   1979         /* If we fell through here, it means this subtag is illegal - quit parsing */
   1980         break;
   1981     }
   1982 
   1983     if (pExtension != NULL) {
   1984         /* Process the last extension */
   1985         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
   1986             /* the previous extension is incomplete */
   1987             uprv_free(pExtension);
   1988         } else {
   1989             /* terminate the previous extension value */
   1990             *pExtValueSubtagEnd = 0;
   1991             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
   1992             /* insert the extension to the list */
   1993             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
   1994                 pLastGoodPosition = pExtValueSubtagEnd;
   1995             } else {
   1996                 uprv_free(pExtension);
   1997             }
   1998         }
   1999     }
   2000 
   2001     if (parsedLen != NULL) {
   2002         *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
   2003     }
   2004 
   2005     return t;
   2006 
   2007 error:
   2008     uprv_free(t);
   2009     return NULL;
   2010 }
   2011 
   2012 static void
   2013 ultag_close(ULanguageTag* langtag) {
   2014 
   2015     if (langtag == NULL) {
   2016         return;
   2017     }
   2018 
   2019     uprv_free(langtag->buf);
   2020 
   2021     if (langtag->variants) {
   2022         VariantListEntry *curVar = langtag->variants;
   2023         while (curVar) {
   2024             VariantListEntry *nextVar = curVar->next;
   2025             uprv_free(curVar);
   2026             curVar = nextVar;
   2027         }
   2028     }
   2029 
   2030     if (langtag->extensions) {
   2031         ExtensionListEntry *curExt = langtag->extensions;
   2032         while (curExt) {
   2033             ExtensionListEntry *nextExt = curExt->next;
   2034             uprv_free(curExt);
   2035             curExt = nextExt;
   2036         }
   2037     }
   2038 
   2039     uprv_free(langtag);
   2040 }
   2041 
   2042 static const char*
   2043 ultag_getLanguage(const ULanguageTag* langtag) {
   2044     return langtag->language;
   2045 }
   2046 
   2047 #if 0
   2048 static const char*
   2049 ultag_getJDKLanguage(const ULanguageTag* langtag) {
   2050     int32_t i;
   2051     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
   2052         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
   2053             return DEPRECATEDLANGS[i + 1];
   2054         }
   2055     }
   2056     return langtag->language;
   2057 }
   2058 #endif
   2059 
   2060 static const char*
   2061 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
   2062     if (idx >= 0 && idx < MAXEXTLANG) {
   2063         return langtag->extlang[idx];
   2064     }
   2065     return NULL;
   2066 }
   2067 
   2068 static int32_t
   2069 ultag_getExtlangSize(const ULanguageTag* langtag) {
   2070     int32_t size = 0;
   2071     int32_t i;
   2072     for (i = 0; i < MAXEXTLANG; i++) {
   2073         if (langtag->extlang[i]) {
   2074             size++;
   2075         }
   2076     }
   2077     return size;
   2078 }
   2079 
   2080 static const char*
   2081 ultag_getScript(const ULanguageTag* langtag) {
   2082     return langtag->script;
   2083 }
   2084 
   2085 static const char*
   2086 ultag_getRegion(const ULanguageTag* langtag) {
   2087     return langtag->region;
   2088 }
   2089 
   2090 static const char*
   2091 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
   2092     const char *var = NULL;
   2093     VariantListEntry *cur = langtag->variants;
   2094     int32_t i = 0;
   2095     while (cur) {
   2096         if (i == idx) {
   2097             var = cur->variant;
   2098             break;
   2099         }
   2100         cur = cur->next;
   2101         i++;
   2102     }
   2103     return var;
   2104 }
   2105 
   2106 static int32_t
   2107 ultag_getVariantsSize(const ULanguageTag* langtag) {
   2108     int32_t size = 0;
   2109     VariantListEntry *cur = langtag->variants;
   2110     while (TRUE) {
   2111         if (cur == NULL) {
   2112             break;
   2113         }
   2114         size++;
   2115         cur = cur->next;
   2116     }
   2117     return size;
   2118 }
   2119 
   2120 static const char*
   2121 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
   2122     const char *key = NULL;
   2123     ExtensionListEntry *cur = langtag->extensions;
   2124     int32_t i = 0;
   2125     while (cur) {
   2126         if (i == idx) {
   2127             key = cur->key;
   2128             break;
   2129         }
   2130         cur = cur->next;
   2131         i++;
   2132     }
   2133     return key;
   2134 }
   2135 
   2136 static const char*
   2137 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
   2138     const char *val = NULL;
   2139     ExtensionListEntry *cur = langtag->extensions;
   2140     int32_t i = 0;
   2141     while (cur) {
   2142         if (i == idx) {
   2143             val = cur->value;
   2144             break;
   2145         }
   2146         cur = cur->next;
   2147         i++;
   2148     }
   2149     return val;
   2150 }
   2151 
   2152 static int32_t
   2153 ultag_getExtensionsSize(const ULanguageTag* langtag) {
   2154     int32_t size = 0;
   2155     ExtensionListEntry *cur = langtag->extensions;
   2156     while (TRUE) {
   2157         if (cur == NULL) {
   2158             break;
   2159         }
   2160         size++;
   2161         cur = cur->next;
   2162     }
   2163     return size;
   2164 }
   2165 
   2166 static const char*
   2167 ultag_getPrivateUse(const ULanguageTag* langtag) {
   2168     return langtag->privateuse;
   2169 }
   2170 
   2171 #if 0
   2172 static const char*
   2173 ultag_getGrandfathered(const ULanguageTag* langtag) {
   2174     return langtag->grandfathered;
   2175 }
   2176 #endif
   2177 
   2178 
   2179 /*
   2180 * -------------------------------------------------
   2181 *
   2182 * Locale/BCP47 conversion APIs, exposed as uloc_*
   2183 *
   2184 * -------------------------------------------------
   2185 */
   2186 U_DRAFT int32_t U_EXPORT2
   2187 uloc_toLanguageTag(const char* localeID,
   2188                    char* langtag,
   2189                    int32_t langtagCapacity,
   2190                    UBool strict,
   2191                    UErrorCode* status) {
   2192     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
   2193     char canonical[256];
   2194     int32_t reslen = 0;
   2195     UErrorCode tmpStatus = U_ZERO_ERROR;
   2196     UBool hadPosix = FALSE;
   2197 
   2198     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
   2199     canonical[0] = 0;
   2200     if (uprv_strlen(localeID) > 0) {
   2201         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
   2202         if (tmpStatus != U_ZERO_ERROR) {
   2203             *status = U_ILLEGAL_ARGUMENT_ERROR;
   2204             return 0;
   2205         }
   2206     }
   2207 
   2208     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
   2209     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2210     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
   2211     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
   2212     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
   2213 
   2214     return reslen;
   2215 }
   2216 
   2217 
   2218 U_DRAFT int32_t U_EXPORT2
   2219 uloc_forLanguageTag(const char* langtag,
   2220                     char* localeID,
   2221                     int32_t localeIDCapacity,
   2222                     int32_t* parsedLength,
   2223                     UErrorCode* status) {
   2224     ULanguageTag *lt;
   2225     int32_t reslen = 0;
   2226     const char *subtag, *p;
   2227     int32_t len;
   2228     int32_t i, n;
   2229     UBool noRegion = TRUE;
   2230 
   2231     lt = ultag_parse(langtag, -1, parsedLength, status);
   2232     if (U_FAILURE(*status)) {
   2233         return 0;
   2234     }
   2235 
   2236     /* language */
   2237     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
   2238     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
   2239         len = (int32_t)uprv_strlen(subtag);
   2240         if (len > 0) {
   2241             if (reslen < localeIDCapacity) {
   2242                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
   2243             }
   2244             reslen += len;
   2245         }
   2246     }
   2247 
   2248     /* script */
   2249     subtag = ultag_getScript(lt);
   2250     len = (int32_t)uprv_strlen(subtag);
   2251     if (len > 0) {
   2252         if (reslen < localeIDCapacity) {
   2253             *(localeID + reslen) = LOCALE_SEP;
   2254         }
   2255         reslen++;
   2256 
   2257         /* write out the script in title case */
   2258         p = subtag;
   2259         while (*p) {
   2260             if (reslen < localeIDCapacity) {
   2261                 if (p == subtag) {
   2262                     *(localeID + reslen) = uprv_toupper(*p);
   2263                 } else {
   2264                     *(localeID + reslen) = *p;
   2265                 }
   2266             }
   2267             reslen++;
   2268             p++;
   2269         }
   2270     }
   2271 
   2272     /* region */
   2273     subtag = ultag_getRegion(lt);
   2274     len = (int32_t)uprv_strlen(subtag);
   2275     if (len > 0) {
   2276         if (reslen < localeIDCapacity) {
   2277             *(localeID + reslen) = LOCALE_SEP;
   2278         }
   2279         reslen++;
   2280         /* write out the retion in upper case */
   2281         p = subtag;
   2282         while (*p) {
   2283             if (reslen < localeIDCapacity) {
   2284                 *(localeID + reslen) = uprv_toupper(*p);
   2285             }
   2286             reslen++;
   2287             p++;
   2288         }
   2289         noRegion = FALSE;
   2290     }
   2291 
   2292     /* variants */
   2293     n = ultag_getVariantsSize(lt);
   2294     if (n > 0) {
   2295         if (noRegion) {
   2296             if (reslen < localeIDCapacity) {
   2297                 *(localeID + reslen) = LOCALE_SEP;
   2298             }
   2299             reslen++;
   2300         }
   2301 
   2302         for (i = 0; i < n; i++) {
   2303             subtag = ultag_getVariant(lt, i);
   2304             if (reslen < localeIDCapacity) {
   2305                 *(localeID + reslen) = LOCALE_SEP;
   2306             }
   2307             reslen++;
   2308             /* write out the variant in upper case */
   2309             p = subtag;
   2310             while (*p) {
   2311                 if (reslen < localeIDCapacity) {
   2312                     *(localeID + reslen) = uprv_toupper(*p);
   2313                 }
   2314                 reslen++;
   2315                 p++;
   2316             }
   2317         }
   2318     }
   2319 
   2320     /* keywords */
   2321     n = ultag_getExtensionsSize(lt);
   2322     subtag = ultag_getPrivateUse(lt);
   2323     if (n > 0 || uprv_strlen(subtag) > 0) {
   2324         if (reslen == 0) {
   2325             /* need a language */
   2326             if (reslen < localeIDCapacity) {
   2327                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
   2328             }
   2329             reslen += LANG_UND_LEN;
   2330         }
   2331         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
   2332         reslen += len;
   2333     }
   2334 
   2335     ultag_close(lt);
   2336     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
   2337 }
   2338 
   2339 
   2340