Home | History | Annotate | Download | only in cintltst
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * Copyright (c) 1997-2016, International Business Machines
      5  * Corporation and others. All Rights Reserved.
      6  ********************************************************************/
      7 
      8 #include <string.h>
      9 #include "unicode/utypes.h"
     10 #include "unicode/uscript.h"
     11 #include "unicode/uchar.h"
     12 #include "cintltst.h"
     13 #include "cucdapi.h"
     14 #include "cmemory.h"
     15 
     16 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
     17     int32_t i;
     18     if(length == 0) {
     19         strcpy(s, "(no scripts)");
     20         return;
     21     }
     22     s[0] = 0;
     23     for(i = 0; i < length; ++i) {
     24         if(i > 0) {
     25             strcat(s, " ");
     26         }
     27         strcat(s, uscript_getShortName(scripts[i]));
     28     }
     29 }
     30 
     31 static void assertEqualScripts(const char *msg,
     32                                const UScriptCode scripts1[], int32_t length1,
     33                                const UScriptCode scripts2[], int32_t length2,
     34                                UErrorCode errorCode) {
     35     char s1[80];
     36     char s2[80];
     37     if(U_FAILURE(errorCode)) {
     38         log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
     39         return;
     40     }
     41     scriptsToString(scripts1, length1, s1);
     42     scriptsToString(scripts2, length2, s2);
     43     if(0!=strcmp(s1, s2)) {
     44         log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
     45     }
     46 }
     47 
     48 void TestUScriptCodeAPI(){
     49     int i =0;
     50     int numErrors =0;
     51     {
     52         const char* testNames[]={
     53         /* test locale */
     54         "en", "en_US", "sr", "ta" , "te_IN",
     55         "hi", "he", "ar",
     56         /* test abbr */
     57         "Hani", "Hang","Hebr","Hira",
     58         "Knda","Kana","Khmr","Lao",
     59         "Latn",/*"Latf","Latg",*/
     60         "Mlym", "Mong",
     61 
     62         /* test names */
     63         "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
     64         "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
     65         /* test lower case names */
     66         "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
     67         "oriya",     "runic",     "sinhala", "syriac","tamil",
     68         "telugu",    "thaana",    "thai",    "tibetan",
     69         /* test the bounds*/
     70         "tagb", "arabic",
     71         /* test bogus */
     72         "asfdasd", "5464", "12235",
     73         /* test the last index */
     74         "zyyy", "YI",
     75         NULL
     76         };
     77         UScriptCode expected[] ={
     78             /* locales should return */
     79             USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
     80             USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
     81             /* abbr should return */
     82             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
     83             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
     84             USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
     85             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
     86             /* names should return */
     87             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
     88             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
     89             /* lower case names should return */
     90             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
     91             USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
     92             USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
     93             /* bounds */
     94             USCRIPT_TAGBANWA, USCRIPT_ARABIC,
     95             /* bogus names should return invalid code */
     96             USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
     97             USCRIPT_COMMON, USCRIPT_YI,
     98         };
     99 
    100         UErrorCode err = U_ZERO_ERROR;
    101 
    102         const int32_t capacity = 10;
    103 
    104         for( ; testNames[i]!=NULL; i++){
    105             UScriptCode script[10]={USCRIPT_INVALID_CODE};
    106             uscript_getCode(testNames[i],script,capacity, &err);
    107             if( script[0] != expected[i]){
    108                    log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
    109                        script[0],expected[i],testNames[i]);
    110                    numErrors++;
    111             }
    112         }
    113         if(numErrors >0 ){
    114             log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
    115         }
    116     }
    117 
    118     {
    119         UErrorCode err = U_ZERO_ERROR;
    120         int32_t capacity=0;
    121         int32_t j;
    122         UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
    123         UScriptCode script[10]={USCRIPT_INVALID_CODE};
    124         int32_t num = uscript_getCode("ja",script,capacity, &err);
    125         /* preflight */
    126         if(err==U_BUFFER_OVERFLOW_ERROR){
    127             err = U_ZERO_ERROR;
    128             capacity = 10;
    129             num = uscript_getCode("ja",script,capacity, &err);
    130             if(num!=UPRV_LENGTHOF(jaCode)){
    131                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
    132                         num, UPRV_LENGTHOF(jaCode));
    133             }
    134             for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
    135                 if(script[j]!=jaCode[j]) {
    136                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
    137                             script[j], uscript_getName(script[j]),
    138                             jaCode[j], uscript_getName(jaCode[j]));
    139 
    140                 }
    141             }
    142         }else{
    143             log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
    144                 "U_BUFFER_OVERFLOW_ERROR",
    145                  u_errorName(err));
    146         }
    147 
    148     }
    149     {
    150         static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
    151         static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
    152         static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
    153         static const UScriptCode HAN[1] = { USCRIPT_HAN };
    154         static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
    155         static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
    156         static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
    157         UScriptCode scripts[5];
    158         UErrorCode err;
    159         int32_t num;
    160 
    161         // Should work regardless of whether we have locale data for the language.
    162         err = U_ZERO_ERROR;
    163         num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
    164         assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
    165         err = U_ZERO_ERROR;
    166         num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
    167         assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
    168 
    169         // Multi-script languages.
    170         err = U_ZERO_ERROR;
    171         num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
    172         assertEqualScripts("ja scripts: Kana Hira Hani",
    173                            JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
    174         err = U_ZERO_ERROR;
    175         num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
    176         assertEqualScripts("ko scripts: Hang Hani",
    177                            KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
    178         err = U_ZERO_ERROR;
    179         num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
    180         assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
    181         err = U_ZERO_ERROR;
    182         num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
    183         assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
    184         err = U_ZERO_ERROR;
    185         num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
    186         assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
    187 
    188         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
    189         err = U_ZERO_ERROR;
    190         num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
    191         assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
    192     }
    193 
    194     {
    195         UScriptCode testAbbr[]={
    196             /* names should return */
    197             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
    198             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
    199         };
    200 
    201         const char* expectedNames[]={
    202 
    203             /* test names */
    204             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
    205             "Gothic",  "Greek",  "Gujarati",
    206              NULL
    207         };
    208         i=0;
    209         while(i<UPRV_LENGTHOF(testAbbr)){
    210             const char* name = uscript_getName(testAbbr[i]);
    211              if(name == NULL) {
    212                log_data_err("Couldn't get script name\n");
    213                return;
    214              }
    215             numErrors=0;
    216             if(strcmp(expectedNames[i],name)!=0){
    217                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
    218                 numErrors++;
    219             }
    220             if(numErrors > 0){
    221                 if(numErrors >0 ){
    222                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
    223                 }
    224             }
    225             i++;
    226         }
    227 
    228     }
    229 
    230     {
    231         UScriptCode testAbbr[]={
    232             /* abbr should return */
    233             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
    234             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
    235             USCRIPT_LATIN,
    236             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
    237         };
    238 
    239         const char* expectedAbbr[]={
    240               /* test abbr */
    241             "Hani", "Hang","Hebr","Hira",
    242             "Knda","Kana","Khmr","Laoo",
    243             "Latn",
    244             "Mlym", "Mong",
    245              NULL
    246         };
    247         i=0;
    248         while(i<UPRV_LENGTHOF(testAbbr)){
    249             const char* name = uscript_getShortName(testAbbr[i]);
    250             numErrors=0;
    251             if(strcmp(expectedAbbr[i],name)!=0){
    252                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
    253                 numErrors++;
    254             }
    255             if(numErrors > 0){
    256                 if(numErrors >0 ){
    257                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
    258                 }
    259             }
    260             i++;
    261         }
    262 
    263     }
    264     /* now test uscript_getScript() API */
    265     {
    266         uint32_t codepoints[] = {
    267                 0x0000FF9D, /* USCRIPT_KATAKANA*/
    268                 0x0000FFBE, /* USCRIPT_HANGUL*/
    269                 0x0000FFC7, /* USCRIPT_HANGUL*/
    270                 0x0000FFCF, /* USCRIPT_HANGUL*/
    271                 0x0000FFD7, /* USCRIPT_HANGUL*/
    272                 0x0000FFDC, /* USCRIPT_HANGUL*/
    273                 0x00010300, /* USCRIPT_OLD_ITALIC*/
    274                 0x00010330, /* USCRIPT_GOTHIC*/
    275                 0x0001034A, /* USCRIPT_GOTHIC*/
    276                 0x00010400, /* USCRIPT_DESERET*/
    277                 0x00010428, /* USCRIPT_DESERET*/
    278                 0x0001D167, /* USCRIPT_INHERITED*/
    279                 0x0001D17B, /* USCRIPT_INHERITED*/
    280                 0x0001D185, /* USCRIPT_INHERITED*/
    281                 0x0001D1AA, /* USCRIPT_INHERITED*/
    282                 0x00020000, /* USCRIPT_HAN*/
    283                 0x00000D02, /* USCRIPT_MALAYALAM*/
    284                 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
    285                 0x00000000, /* USCRIPT_COMMON*/
    286                 0x0001D169, /* USCRIPT_INHERITED*/
    287                 0x0001D182, /* USCRIPT_INHERITED*/
    288                 0x0001D18B, /* USCRIPT_INHERITED*/
    289                 0x0001D1AD, /* USCRIPT_INHERITED*/
    290         };
    291 
    292         UScriptCode expected[] = {
    293                 USCRIPT_KATAKANA ,
    294                 USCRIPT_HANGUL ,
    295                 USCRIPT_HANGUL ,
    296                 USCRIPT_HANGUL ,
    297                 USCRIPT_HANGUL ,
    298                 USCRIPT_HANGUL ,
    299                 USCRIPT_OLD_ITALIC,
    300                 USCRIPT_GOTHIC ,
    301                 USCRIPT_GOTHIC ,
    302                 USCRIPT_DESERET ,
    303                 USCRIPT_DESERET ,
    304                 USCRIPT_INHERITED,
    305                 USCRIPT_INHERITED,
    306                 USCRIPT_INHERITED,
    307                 USCRIPT_INHERITED,
    308                 USCRIPT_HAN ,
    309                 USCRIPT_MALAYALAM,
    310                 USCRIPT_UNKNOWN,
    311                 USCRIPT_COMMON,
    312                 USCRIPT_INHERITED ,
    313                 USCRIPT_INHERITED ,
    314                 USCRIPT_INHERITED ,
    315                 USCRIPT_INHERITED ,
    316         };
    317         UScriptCode code = USCRIPT_INVALID_CODE;
    318         UErrorCode status = U_ZERO_ERROR;
    319         UBool passed = TRUE;
    320 
    321         for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
    322             code = uscript_getScript(codepoints[i],&status);
    323             if(U_SUCCESS(status)){
    324                 if( code != expected[i] ||
    325                     code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
    326                 ) {
    327                     log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
    328                     passed = FALSE;
    329                 }
    330             }else{
    331                 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
    332                          codepoints[i],u_errorName(status));
    333                 break;
    334             }
    335         }
    336 
    337         if(passed==FALSE){
    338            log_err("uscript_getScript failed.\n");
    339         }
    340     }
    341     {
    342         UScriptCode code= USCRIPT_INVALID_CODE;
    343         UErrorCode  status = U_ZERO_ERROR;
    344         code = uscript_getScript(0x001D169,&status);
    345         if(code != USCRIPT_INHERITED){
    346             log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
    347         }
    348     }
    349     {
    350         UScriptCode code= USCRIPT_INVALID_CODE;
    351         UErrorCode  status = U_ZERO_ERROR;
    352         int32_t err = 0;
    353 
    354         for(i = 0; i<=0x10ffff; i++){
    355             code =  uscript_getScript(i,&status);
    356             if(code == USCRIPT_INVALID_CODE){
    357                 err++;
    358                 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
    359             }
    360         }
    361         if(err>0){
    362             log_err("uscript_getScript failed for %d codepoints\n", err);
    363         }
    364     }
    365     {
    366         for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
    367             const char* name = uscript_getName((UScriptCode)i);
    368             if(name==NULL || strcmp(name,"")==0){
    369                 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
    370             }
    371         }
    372     }
    373 
    374     {
    375         /*
    376          * These script codes were originally added to ICU pre-3.6, so that ICU would
    377          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
    378          * These script codes were added with only short names because we don't
    379          * want to invent long names ourselves.
    380          * Unicode 5 and later encode some of these scripts and give them long names.
    381          * Whenever this happens, the long script names here need to be updated.
    382          */
    383         static const char* expectedLong[] = {
    384             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
    385             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
    386             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
    387             "Javanese", "Kayah_Li", "Latf", "Latg",
    388             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
    389             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
    390             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
    391             "Zxxx", "Unknown",
    392             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
    393             "Moon", "Meetei_Mayek",
    394             /* new in ICU 4.0 */
    395             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
    396             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
    397             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
    398             "Zmth", "Zsym",
    399             /* new in ICU 4.4 */
    400             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
    401             /* new in ICU 4.6 */
    402             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
    403             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
    404             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
    405             /* new in ICU 4.8 */
    406             "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
    407             /* new in ICU 49 */
    408             "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
    409             /* new in ICU 52 */
    410             "Caucasian_Albanian", "Mahajani",
    411             /* new in ICU 54 */
    412             "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
    413             // new in ICU 58
    414             "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
    415             // new in ICU 60
    416             "Masaram_Gondi", "Soyombo", "Zanabazar_Square"
    417         };
    418         static const char* expectedShort[] = {
    419             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
    420             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
    421             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
    422             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
    423             "Zxxx", "Zzzz",
    424             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
    425             "Moon", "Mtei",
    426             /* new in ICU 4.0 */
    427             "Armi", "Avst", "Cakm", "Kore",
    428             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
    429             "Zmth", "Zsym",
    430             /* new in ICU 4.4 */
    431             "Bamu", "Lisu", "Nkgb", "Sarb",
    432             /* new in ICU 4.6 */
    433             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
    434             "Narb", "Nbat", "Palm", "Sind", "Wara",
    435             /* new in ICU 4.8 */
    436             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
    437             /* new in ICU 49 */
    438             "Hluw", "Khoj", "Tirh",
    439             /* new in ICU 52 */
    440             "Aghb", "Mahj",
    441             /* new in ICU 54 */
    442             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
    443             // new in ICU 58
    444             "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
    445             // new in ICU 60
    446             "Gonm", "Soyo", "Zanb"
    447         };
    448         int32_t j = 0;
    449         if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
    450             log_err("need to add new script codes in cucdapi.c!\n");
    451             return;
    452         }
    453         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
    454             const char* name = uscript_getName((UScriptCode)i);
    455             if(name==NULL || strcmp(name,expectedLong[j])!=0){
    456                 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
    457             }
    458             name = uscript_getShortName((UScriptCode)i);
    459             if(name==NULL || strcmp(name,expectedShort[j])!=0){
    460                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
    461             }
    462         }
    463         for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
    464             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
    465             UErrorCode status = U_ZERO_ERROR;
    466             int32_t len = 0;
    467             len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
    468             if(U_FAILURE(status)){
    469                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
    470             }
    471             if(len>1){
    472                 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
    473             }
    474             if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
    475                 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
    476             }
    477         }
    478     }
    479 
    480     {
    481         /* test characters which have Script_Extensions */
    482         UErrorCode errorCode=U_ZERO_ERROR;
    483         if(!(
    484                 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
    485                 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
    486                 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
    487             U_FAILURE(errorCode)
    488         ) {
    489             log_err("uscript_getScript(character with Script_Extensions) failed\n");
    490         }
    491     }
    492 }
    493 
    494 void TestHasScript() {
    495     if(!(
    496         !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
    497         uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
    498         !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
    499         !uscript_hasScript(0x063f, USCRIPT_THAANA))
    500     ) {
    501         log_err("uscript_hasScript(U+063F, ...) is wrong\n");
    502     }
    503     if(!(
    504         !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
    505         uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
    506         uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
    507         !uscript_hasScript(0x0640, USCRIPT_THAANA))
    508     ) {
    509         log_err("uscript_hasScript(U+0640, ...) is wrong\n");
    510     }
    511     if(!(
    512         !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
    513         uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
    514         uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
    515         !uscript_hasScript(0x0650, USCRIPT_THAANA))
    516     ) {
    517         log_err("uscript_hasScript(U+0650, ...) is wrong\n");
    518     }
    519     if(!(
    520         !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
    521         uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
    522         !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
    523         uscript_hasScript(0x0660, USCRIPT_THAANA))
    524     ) {
    525         log_err("uscript_hasScript(U+0660, ...) is wrong\n");
    526     }
    527     if(!(
    528         !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
    529         uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
    530         !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
    531         uscript_hasScript(0xfdf2, USCRIPT_THAANA))
    532     ) {
    533         log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
    534     }
    535     if(uscript_hasScript(0x0640, 0xaffe)) {
    536         /* An unguarded implementation might go into an infinite loop. */
    537         log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
    538     }
    539 }
    540 
    541 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
    542     UBool contain=FALSE;
    543     int32_t prev=-1, i;
    544     for(i=0; i<length; ++i) {
    545         int32_t s=scripts[i];
    546         if(s<=prev) {
    547             log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
    548         }
    549         if(s==script) { contain=TRUE; }
    550     }
    551     return contain;
    552 }
    553 
    554 void TestGetScriptExtensions() {
    555     UScriptCode scripts[20];
    556     int32_t length;
    557     UErrorCode errorCode;
    558 
    559     /* errors and overflows */
    560     errorCode=U_PARSE_ERROR;
    561     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    562     if(errorCode!=U_PARSE_ERROR) {
    563         log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
    564               u_errorName(errorCode));
    565     }
    566     errorCode=U_ZERO_ERROR;
    567     length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
    568     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
    569         log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
    570               u_errorName(errorCode));
    571     }
    572     errorCode=U_ZERO_ERROR;
    573     length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
    574     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
    575         log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
    576               u_errorName(errorCode));
    577     }
    578     errorCode=U_ZERO_ERROR;
    579     length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
    580     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
    581         log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
    582               (int)length, u_errorName(errorCode));
    583     }
    584     errorCode=U_ZERO_ERROR;
    585     length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
    586     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
    587         log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
    588               (int)length, u_errorName(errorCode));
    589     }
    590     /* U+063F has only a Script code, no Script_Extensions. */
    591     errorCode=U_ZERO_ERROR;
    592     length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
    593     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
    594         log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
    595               (int)length, u_errorName(errorCode));
    596     }
    597 
    598     /* invalid code points */
    599     errorCode=U_ZERO_ERROR;
    600     length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    601     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
    602         log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
    603               (int)length, u_errorName(errorCode));
    604     }
    605     errorCode=U_ZERO_ERROR;
    606     length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    607     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
    608         log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
    609               (int)length, u_errorName(errorCode));
    610     }
    611 
    612     /* normal usage */
    613     errorCode=U_ZERO_ERROR;
    614     length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
    615     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
    616         log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
    617               (int)length, u_errorName(errorCode));
    618     }
    619     errorCode=U_ZERO_ERROR;
    620     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    621     if(U_FAILURE(errorCode) || length<3 ||
    622             !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
    623             !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
    624             !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
    625         log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
    626               (int)length, u_errorName(errorCode));
    627     }
    628     errorCode=U_ZERO_ERROR;
    629     length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    630     if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
    631         log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
    632               (int)length, u_errorName(errorCode));
    633     }
    634     errorCode=U_ZERO_ERROR;
    635     length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    636     if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
    637         log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
    638               (int)length, u_errorName(errorCode));
    639     }
    640 }
    641 
    642 void TestScriptMetadataAPI() {
    643     /* API & code coverage. More testing in intltest/ucdtest.cpp. */
    644     UErrorCode errorCode=U_ZERO_ERROR;
    645     UChar sample[8];
    646 
    647     if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
    648             U_FAILURE(errorCode) ||
    649             uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
    650             sample[1]!=0) {
    651         log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
    652     }
    653     sample[0]=0xfffe;
    654     if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
    655             errorCode!=U_BUFFER_OVERFLOW_ERROR ||
    656             sample[0]!=0xfffe) {
    657         log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
    658     }
    659     errorCode=U_ZERO_ERROR;
    660     if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
    661             U_FAILURE(errorCode) ||
    662             sample[0]!=0) {
    663         log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
    664     }
    665     sample[0]=0xfffe;
    666     if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
    667             errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
    668             sample[0]!=0xfffe) {
    669         log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
    670     }
    671 
    672     if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
    673             // Unicode 10 gives up on "aspirational".
    674             uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
    675             uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
    676             uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
    677             uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
    678             uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
    679             uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
    680         log_err("uscript_getUsage() failed\n");
    681     }
    682 
    683     if(uscript_isRightToLeft(USCRIPT_LATIN) ||
    684             uscript_isRightToLeft(USCRIPT_CIRTH) ||
    685             !uscript_isRightToLeft(USCRIPT_ARABIC) ||
    686             !uscript_isRightToLeft(USCRIPT_HEBREW)) {
    687         log_err("uscript_isRightToLeft() failed\n");
    688     }
    689 
    690     if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
    691             uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
    692             !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
    693             !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
    694         log_err("uscript_breaksBetweenLetters() failed\n");
    695     }
    696 
    697     if(uscript_isCased(USCRIPT_CIRTH) ||
    698             uscript_isCased(USCRIPT_HAN) ||
    699             !uscript_isCased(USCRIPT_LATIN) ||
    700             !uscript_isCased(USCRIPT_GREEK)) {
    701         log_err("uscript_isCased() failed\n");
    702     }
    703 }
    704 
    705 void TestBinaryValues() {
    706     /*
    707      * Unicode 5.1 explicitly defines binary property value aliases.
    708      * Verify that they are all recognized.
    709      */
    710     static const char *const falseValues[]={ "N", "No", "F", "False" };
    711     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
    712     int32_t i;
    713     for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
    714         if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
    715             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
    716         }
    717     }
    718     for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
    719         if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
    720             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
    721         }
    722     }
    723 }
    724