Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 
     26 #include "cintltst.h"
     27 #include "putilimp.h"
     28 #include "uparse.h"
     29 #include "ucase.h"
     30 #include "ubidi_props.h"
     31 #include "uprops.h"
     32 #include "uset_imp.h"
     33 #include "usc_impl.h"
     34 #include "unormimp.h"
     35 #include "udatamem.h" /* for testing ucase_openBinary() */
     36 #include "cucdapi.h"
     37 
     38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     39 
     40 /* prototypes --------------------------------------------------------------- */
     41 
     42 static void TestUpperLower(void);
     43 static void TestLetterNumber(void);
     44 static void TestMisc(void);
     45 static void TestPOSIX(void);
     46 static void TestControlPrint(void);
     47 static void TestIdentifier(void);
     48 static void TestUnicodeData(void);
     49 static void TestCodeUnit(void);
     50 static void TestCodePoint(void);
     51 static void TestCharLength(void);
     52 static void TestCharNames(void);
     53 static void TestMirroring(void);
     54 static void TestUScriptRunAPI(void);
     55 static void TestAdditionalProperties(void);
     56 static void TestNumericProperties(void);
     57 static void TestPropertyNames(void);
     58 static void TestPropertyValues(void);
     59 static void TestConsistency(void);
     60 static void TestUCase(void);
     61 static void TestUBiDiProps(void);
     62 static void TestCaseFolding(void);
     63 
     64 /* internal methods used */
     65 static int32_t MakeProp(char* str);
     66 static int32_t MakeDir(char* str);
     67 
     68 /* helpers ------------------------------------------------------------------ */
     69 
     70 static void
     71 parseUCDFile(const char *filename,
     72              char *fields[][2], int32_t fieldCount,
     73              UParseLineFn *lineFn, void *context,
     74              UErrorCode *pErrorCode) {
     75     char path[256];
     76     char backupPath[256];
     77 
     78     if(U_FAILURE(*pErrorCode)) {
     79         return;
     80     }
     81 
     82     /* Look inside ICU_DATA first */
     83     strcpy(path, u_getDataDirectory());
     84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     85     strcat(path, filename);
     86 
     87     /* As a fallback, try to guess where the source data was located
     88      *    at the time ICU was built, and look there.
     89      */
     90     strcpy(backupPath, ctest_dataSrcDir());
     91     strcat(backupPath, U_FILE_SEP_STRING);
     92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     93     strcat(backupPath, filename);
     94 
     95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     97         *pErrorCode=U_ZERO_ERROR;
     98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
     99     }
    100     if(U_FAILURE(*pErrorCode)) {
    101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    102     }
    103 }
    104 
    105 /* test data ---------------------------------------------------------------- */
    106 
    107 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
    108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    109 static const int32_t tagValues[] =
    110     {
    111     /* Mn */ U_NON_SPACING_MARK,
    112     /* Mc */ U_COMBINING_SPACING_MARK,
    113     /* Me */ U_ENCLOSING_MARK,
    114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    115     /* Nl */ U_LETTER_NUMBER,
    116     /* No */ U_OTHER_NUMBER,
    117     /* Zs */ U_SPACE_SEPARATOR,
    118     /* Zl */ U_LINE_SEPARATOR,
    119     /* Zp */ U_PARAGRAPH_SEPARATOR,
    120     /* Cc */ U_CONTROL_CHAR,
    121     /* Cf */ U_FORMAT_CHAR,
    122     /* Cs */ U_SURROGATE,
    123     /* Co */ U_PRIVATE_USE_CHAR,
    124     /* Cn */ U_UNASSIGNED,
    125     /* Lu */ U_UPPERCASE_LETTER,
    126     /* Ll */ U_LOWERCASE_LETTER,
    127     /* Lt */ U_TITLECASE_LETTER,
    128     /* Lm */ U_MODIFIER_LETTER,
    129     /* Lo */ U_OTHER_LETTER,
    130     /* Pc */ U_CONNECTOR_PUNCTUATION,
    131     /* Pd */ U_DASH_PUNCTUATION,
    132     /* Ps */ U_START_PUNCTUATION,
    133     /* Pe */ U_END_PUNCTUATION,
    134     /* Po */ U_OTHER_PUNCTUATION,
    135     /* Sm */ U_MATH_SYMBOL,
    136     /* Sc */ U_CURRENCY_SYMBOL,
    137     /* Sk */ U_MODIFIER_SYMBOL,
    138     /* So */ U_OTHER_SYMBOL,
    139     /* Pi */ U_INITIAL_PUNCTUATION,
    140     /* Pf */ U_FINAL_PUNCTUATION
    141     };
    142 
    143 static const char dirStrings[][5] = {
    144     "L",
    145     "R",
    146     "EN",
    147     "ES",
    148     "ET",
    149     "AN",
    150     "CS",
    151     "B",
    152     "S",
    153     "WS",
    154     "ON",
    155     "LRE",
    156     "LRO",
    157     "AL",
    158     "RLE",
    159     "RLO",
    160     "PDF",
    161     "NSM",
    162     "BN"
    163 };
    164 
    165 void addUnicodeTest(TestNode** root);
    166 
    167 void addUnicodeTest(TestNode** root)
    168 {
    169     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    170     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    171     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    172     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    173     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    174     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    175     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    176     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    177     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    178     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    179     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    180     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    181     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    182     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    183     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    184     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    185     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    186     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    187     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    188     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    189     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    190     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    191     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    192 }
    193 
    194 /*==================================================== */
    195 /* test u_toupper() and u_tolower()                    */
    196 /*==================================================== */
    197 static void TestUpperLower()
    198 {
    199     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    200     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    201     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    202     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    203     int32_t i;
    204 
    205     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    206     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    207 
    208 /*
    209 Checks LetterLike Symbols which were previously a source of confusion
    210 [Bertrand A. D. 02/04/98]
    211 */
    212     for (i=0x2100;i<0x2138;i++)
    213     {
    214         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    215         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    216         {
    217             if (i != (int)u_tolower(i)) /* itself */
    218                 log_err("Failed case conversion with itself: U+%04x\n", i);
    219             if (i != (int)u_toupper(i))
    220                 log_err("Failed case conversion with itself: U+%04x\n", i);
    221         }
    222     }
    223 
    224     for(i=0; i < u_strlen(upper); i++){
    225         if(u_tolower(upper[i]) != lower[i]){
    226             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    227         }
    228     }
    229 
    230     log_verbose("testing upper lower\n");
    231     for (i = 0; i < 21; i++) {
    232 
    233         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    234         {
    235             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    236         }
    237         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    238          {
    239             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    240         }
    241         else if (upperTest[i] != u_tolower(lowerTest[i]))
    242         {
    243             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    244         }
    245         else if (lowerTest[i] != u_toupper(upperTest[i]))
    246          {
    247             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    248         }
    249         else if (upperTest[i] != u_tolower(upperTest[i]))
    250         {
    251             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    252         }
    253         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    254         {
    255             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    256         }
    257     }
    258     log_verbose("done testing upper lower\n");
    259 
    260     log_verbose("testing u_istitle\n");
    261     {
    262         static const UChar expected[] = {
    263             0x1F88,
    264             0x1F89,
    265             0x1F8A,
    266             0x1F8B,
    267             0x1F8C,
    268             0x1F8D,
    269             0x1F8E,
    270             0x1F8F,
    271             0x1F88,
    272             0x1F89,
    273             0x1F8A,
    274             0x1F8B,
    275             0x1F8C,
    276             0x1F8D,
    277             0x1F8E,
    278             0x1F8F,
    279             0x1F98,
    280             0x1F99,
    281             0x1F9A,
    282             0x1F9B,
    283             0x1F9C,
    284             0x1F9D,
    285             0x1F9E,
    286             0x1F9F,
    287             0x1F98,
    288             0x1F99,
    289             0x1F9A,
    290             0x1F9B,
    291             0x1F9C,
    292             0x1F9D,
    293             0x1F9E,
    294             0x1F9F,
    295             0x1FA8,
    296             0x1FA9,
    297             0x1FAA,
    298             0x1FAB,
    299             0x1FAC,
    300             0x1FAD,
    301             0x1FAE,
    302             0x1FAF,
    303             0x1FA8,
    304             0x1FA9,
    305             0x1FAA,
    306             0x1FAB,
    307             0x1FAC,
    308             0x1FAD,
    309             0x1FAE,
    310             0x1FAF,
    311             0x1FBC,
    312             0x1FBC,
    313             0x1FCC,
    314             0x1FCC,
    315             0x1FFC,
    316             0x1FFC,
    317         };
    318         int32_t num = sizeof(expected)/sizeof(expected[0]);
    319         for(i=0; i<num; i++){
    320             if(!u_istitle(expected[i])){
    321                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    322             }
    323         }
    324 
    325     }
    326 }
    327 
    328 /* compare two sets and verify that their difference or intersection is empty */
    329 static UBool
    330 showADiffB(const USet *a, const USet *b,
    331            const char *a_name, const char *b_name,
    332            UBool expect, UBool diffIsError) {
    333     USet *aa;
    334     int32_t i, start, end, length;
    335     UErrorCode errorCode;
    336 
    337     /*
    338      * expect:
    339      * TRUE  -> a-b should be empty, that is, b should contain all of a
    340      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    341      */
    342     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    343         return TRUE;
    344     }
    345 
    346     /* clone a to aa because a is const */
    347     aa=uset_open(1, 0);
    348     if(aa==NULL) {
    349         /* unusual problem - out of memory? */
    350         return FALSE;
    351     }
    352     uset_addAll(aa, a);
    353 
    354     /* compute the set in question */
    355     if(expect) {
    356         /* a-b */
    357         uset_removeAll(aa, b);
    358     } else {
    359         /* a&b */
    360         uset_retainAll(aa, b);
    361     }
    362 
    363     /* aa is not empty because of the initial tests above; show its contents */
    364     errorCode=U_ZERO_ERROR;
    365     i=0;
    366     for(;;) {
    367         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    368         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    369             break; /* done */
    370         }
    371         if(U_FAILURE(errorCode)) {
    372             log_err("error comparing %s with %s at difference item %d: %s\n",
    373                 a_name, b_name, i, u_errorName(errorCode));
    374             break;
    375         }
    376         if(length!=0) {
    377             break; /* done with code points, got a string or -1 */
    378         }
    379 
    380         if(diffIsError) {
    381             if(expect) {
    382                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    383             } else {
    384                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    385             }
    386         } else {
    387             if(expect) {
    388                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    389             } else {
    390                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    391             }
    392         }
    393 
    394         ++i;
    395     }
    396 
    397     uset_close(aa);
    398     return FALSE;
    399 }
    400 
    401 static UBool
    402 showAMinusB(const USet *a, const USet *b,
    403             const char *a_name, const char *b_name,
    404             UBool diffIsError) {
    405     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    406 }
    407 
    408 static UBool
    409 showAIntersectB(const USet *a, const USet *b,
    410                 const char *a_name, const char *b_name,
    411                 UBool diffIsError) {
    412     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    413 }
    414 
    415 static UBool
    416 compareUSets(const USet *a, const USet *b,
    417              const char *a_name, const char *b_name,
    418              UBool diffIsError) {
    419     /*
    420      * Use an arithmetic & not a logical && so that both branches
    421      * are always taken and all differences are shown.
    422      */
    423     return
    424         showAMinusB(a, b, a_name, b_name, diffIsError) &
    425         showAMinusB(b, a, b_name, a_name, diffIsError);
    426 }
    427 
    428 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    429 static void TestLetterNumber()
    430 {
    431     UChar i = 0x0000;
    432 
    433     log_verbose("Testing for isalpha\n");
    434     for (i = 0x0041; i < 0x005B; i++) {
    435         if (!u_isalpha(i))
    436         {
    437             log_err("Failed isLetter test at  %.4X\n", i);
    438         }
    439     }
    440     for (i = 0x0660; i < 0x066A; i++) {
    441         if (u_isalpha(i))
    442         {
    443             log_err("Failed isLetter test with numbers at %.4X\n", i);
    444         }
    445     }
    446 
    447     log_verbose("Testing for isdigit\n");
    448     for (i = 0x0660; i < 0x066A; i++) {
    449         if (!u_isdigit(i))
    450         {
    451             log_verbose("Failed isNumber test at %.4X\n", i);
    452         }
    453     }
    454 
    455     log_verbose("Testing for isalnum\n");
    456     for (i = 0x0041; i < 0x005B; i++) {
    457         if (!u_isalnum(i))
    458         {
    459             log_err("Failed isAlNum test at  %.4X\n", i);
    460         }
    461     }
    462     for (i = 0x0660; i < 0x066A; i++) {
    463         if (!u_isalnum(i))
    464         {
    465             log_err("Failed isAlNum test at  %.4X\n", i);
    466         }
    467     }
    468 
    469     {
    470         /*
    471          * The following checks work only starting from Unicode 4.0.
    472          * Check the version number here.
    473          */
    474         static UVersionInfo u401={ 4, 0, 1, 0 };
    475         UVersionInfo version;
    476         u_getUnicodeVersion(version);
    477         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    478             return;
    479         }
    480     }
    481 
    482     {
    483         /*
    484          * Sanity check:
    485          * Verify that exactly the digit characters have decimal digit values.
    486          * This assumption is used in the implementation of u_digit()
    487          * (which checks nt=de)
    488          * compared with the parallel java.lang.Character.digit()
    489          * (which checks Nd).
    490          *
    491          * This was not true in Unicode 3.2 and earlier.
    492          * Unicode 4.0 fixed discrepancies.
    493          * Unicode 4.0.1 re-introduced problems in this area due to an
    494          * unintentionally incomplete last-minute change.
    495          */
    496         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    497         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    498 
    499         USet *digits, *decimalValues;
    500         UErrorCode errorCode;
    501 
    502         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    503         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    504         errorCode=U_ZERO_ERROR;
    505         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    506         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    507 
    508         if(U_SUCCESS(errorCode)) {
    509             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    510         }
    511 
    512         uset_close(digits);
    513         uset_close(decimalValues);
    514     }
    515 }
    516 
    517 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    518 static void TestMisc()
    519 {
    520     static const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    521     static const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    522     static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6b };
    523     static const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    524     static const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
    525     static const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    526 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    527     static const UChar sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    528     static const UChar sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    529     static const UChar sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    530     static const UChar sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
    531 
    532 
    533     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    534 
    535     uint32_t mask;
    536 
    537     int32_t i;
    538     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    539     UVersionInfo realVersion;
    540 
    541     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    542 
    543     log_verbose("Testing for isspace and nonspaces\n");
    544     for (i = 0; i < 5; i++) {
    545         if (!(u_isspace(sampleSpaces[i])) ||
    546                 (u_isspace(sampleNonSpaces[i])))
    547         {
    548             log_err("Space char test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
    549         }
    550         if (!(u_isJavaSpaceChar(sampleSpaces[i])) ||
    551                 (u_isJavaSpaceChar(sampleNonSpaces[i])))
    552         {
    553             log_err("u_isJavaSpaceChar() test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
    554         }
    555     }
    556 
    557     log_verbose("Testing for isspace and nonspaces\n");
    558     for (i = 0; i < 5; i++) {
    559         if (!(u_isWhitespace(sampleWhiteSpaces[i])) ||
    560                 (u_isWhitespace(sampleNonWhiteSpaces[i])))
    561         {
    562             log_err("White Space char test error : %lx or %lx \n", sampleWhiteSpaces[i], sampleNonWhiteSpaces[i]);
    563         }
    564     }
    565 
    566     log_verbose("Testing for isdefined\n");
    567     for (i = 0; i < 3; i++) {
    568         if ((u_isdefined(sampleUndefined[i])) ||
    569                 !(u_isdefined(sampleDefined[i])))
    570         {
    571             log_err("Undefined char test error : U+%04x or U+%04x\n", (int32_t)sampleUndefined[i], (int32_t)sampleDefined[i]);
    572         }
    573     }
    574 
    575     log_verbose("Testing for isbase\n");
    576     for (i = 0; i < 3; i++) {
    577         if ((u_isbase(sampleNonBase[i])) ||
    578                 !(u_isbase(sampleBase[i])))
    579         {
    580             log_err("Non-baseform char test error : U+%04x or U+%04x",(int32_t)sampleNonBase[i], (int32_t)sampleBase[i]);
    581         }
    582     }
    583 
    584     log_verbose("Testing for isdigit \n");
    585     for (i = 0; i < 4; i++) {
    586         if ((u_isdigit(sampleDigits[i]) &&
    587             (u_charDigitValue(sampleDigits[i])!= sampleDigitValues[i])) ||
    588             (u_isdigit(sampleNonDigits[i]))) {
    589             log_err("Digit char test error : %lx   or   %lx\n", sampleDigits[i], sampleNonDigits[i]);
    590         }
    591     }
    592 
    593     /* Tests the ICU version #*/
    594     u_getVersion(realVersion);
    595     u_versionToString(realVersion, icuVersion);
    596     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    597     {
    598         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    599     }
    600 #if defined(ICU_VERSION)
    601     /* test only happens where we have configure.in with VERSION - sanity check. */
    602     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    603     {
    604         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    605     }
    606 #endif
    607 
    608     /* test U_GC_... */
    609     if(
    610         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    611         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    612         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    613         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    614         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    615         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    616     ) {
    617         log_err("error: U_GET_GC_MASK does not work properly\n");
    618     }
    619 
    620     mask=0;
    621     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    622 
    623     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    624     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    625     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    626     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    627     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    628 
    629     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    630     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    631     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    632 
    633     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    634     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    635     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    636 
    637     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    638     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    639     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    640 
    641     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    642     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    643     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    644     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    645 
    646     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    647     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    648     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    649     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    650     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    651 
    652     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    653     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    654     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    655     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    656 
    657     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    658     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    659 
    660     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    661         log_err("error: problems with U_GC_XX_MASK constants\n");
    662     }
    663 
    664     mask=0;
    665     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    666     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    667     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    668     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    669     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    670     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    671     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    672 
    673     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    674         log_err("error: problems with U_GC_Y_MASK constants\n");
    675     }
    676     {
    677         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    678         for(i=0; i<10; i++){
    679             if(digit[i]!=u_forDigit(i,10)){
    680                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    681             }
    682         }
    683     }
    684 
    685     /* test u_digit() */
    686     {
    687         static const struct {
    688             UChar32 c;
    689             int8_t radix, value;
    690         } data[]={
    691             /* base 16 */
    692             { 0x0031, 16, 1 },
    693             { 0x0038, 16, 8 },
    694             { 0x0043, 16, 12 },
    695             { 0x0066, 16, 15 },
    696             { 0x00e4, 16, -1 },
    697             { 0x0662, 16, 2 },
    698             { 0x06f5, 16, 5 },
    699             { 0xff13, 16, 3 },
    700             { 0xff41, 16, 10 },
    701 
    702             /* base 8 */
    703             { 0x0031, 8, 1 },
    704             { 0x0038, 8, -1 },
    705             { 0x0043, 8, -1 },
    706             { 0x0066, 8, -1 },
    707             { 0x00e4, 8, -1 },
    708             { 0x0662, 8, 2 },
    709             { 0x06f5, 8, 5 },
    710             { 0xff13, 8, 3 },
    711             { 0xff41, 8, -1 },
    712 
    713             /* base 36 */
    714             { 0x5a, 36, 35 },
    715             { 0x7a, 36, 35 },
    716             { 0xff3a, 36, 35 },
    717             { 0xff5a, 36, 35 },
    718 
    719             /* wrong radix values */
    720             { 0x0031, 1, -1 },
    721             { 0xff3a, 37, -1 }
    722         };
    723 
    724         for(i=0; i<LENGTHOF(data); ++i) {
    725             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    726                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    727                         data[i].c,
    728                         data[i].radix,
    729                         u_digit(data[i].c, data[i].radix),
    730                         data[i].value);
    731             }
    732         }
    733     }
    734 }
    735 
    736 /* test C/POSIX-style functions --------------------------------------------- */
    737 
    738 /* bit flags */
    739 #define ISAL     1
    740 #define ISLO     2
    741 #define ISUP     4
    742 
    743 #define ISDI     8
    744 #define ISXD  0x10
    745 
    746 #define ISAN  0x20
    747 
    748 #define ISPU  0x40
    749 #define ISGR  0x80
    750 #define ISPR 0x100
    751 
    752 #define ISSP 0x200
    753 #define ISBL 0x400
    754 #define ISCN 0x800
    755 
    756 /* C/POSIX-style functions, in the same order as the bit flags */
    757 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    758 
    759 static const struct {
    760     IsPOSIXClass *fn;
    761     const char *name;
    762 } posixClasses[]={
    763     { u_isalpha, "isalpha" },
    764     { u_islower, "islower" },
    765     { u_isupper, "isupper" },
    766     { u_isdigit, "isdigit" },
    767     { u_isxdigit, "isxdigit" },
    768     { u_isalnum, "isalnum" },
    769     { u_ispunct, "ispunct" },
    770     { u_isgraph, "isgraph" },
    771     { u_isprint, "isprint" },
    772     { u_isspace, "isspace" },
    773     { u_isblank, "isblank" },
    774     { u_iscntrl, "iscntrl" }
    775 };
    776 
    777 static const struct {
    778     UChar32 c;
    779     uint32_t posixResults;
    780 } posixData[]={
    781     { 0x0008,                                                        ISCN },    /* backspace */
    782     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    783     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    784     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    785     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    786     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    787     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    788     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    789     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    790     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    791     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    792     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    793     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    794     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    795     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    796     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    797     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    798     { 0x0600,                                                        ISCN },    /* arabic number sign */
    799     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    800     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    801     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    802     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    803     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    804     { 0x200b,                                                        ISCN },    /* ZWSP */
    805   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    806     { 0x200e,                                                        ISCN },    /* LRM */
    807     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    808     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    809     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    810     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    811     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    812     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    813     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    814     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    815 };
    816 
    817 static void
    818 TestPOSIX() {
    819     uint32_t mask;
    820     int32_t cl, i;
    821     UBool expect;
    822 
    823     mask=1;
    824     for(cl=0; cl<12; ++cl) {
    825         for(i=0; i<LENGTHOF(posixData); ++i) {
    826             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    827             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    828                 log_err("u_%s(U+%04x)=%s is wrong\n",
    829                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    830             }
    831         }
    832         mask<<=1;
    833     }
    834 }
    835 
    836 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    837 static void TestControlPrint()
    838 {
    839     const UChar sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    840     const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    841     const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
    842     const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    843     UChar32 c;
    844     int i;
    845 
    846     log_verbose("Testing for iscontrol\n");
    847     for (i = 0; i < LENGTHOF(sampleControl); i++) {
    848         if (!u_iscntrl(sampleControl[i]))
    849         {
    850             log_err("Control char test error : U+%04x should be control but is not\n", (int32_t)sampleControl[i]);
    851         }
    852     }
    853 
    854     log_verbose("Testing for !iscontrol\n");
    855     for (i = 0; i < LENGTHOF(sampleNonControl); i++) {
    856         if (u_iscntrl(sampleNonControl[i]))
    857         {
    858             log_err("Control char test error : U+%04x should not be control but is\n", (int32_t)sampleNonControl[i]);
    859         }
    860     }
    861 
    862     log_verbose("testing for isprintable\n");
    863     for (i = 0; i < 3; i++) {
    864         if (!u_isprint(samplePrintable[i]))
    865         {
    866             log_err("Printable char test error : U+%04x should be printable but is not\n", (int32_t)samplePrintable[i]);
    867         }
    868         if (u_isprint(sampleNonPrintable[i]))
    869         {
    870             log_err("Printable char test error : U+%04x should not be printable but is\n", (int32_t)sampleNonPrintable[i]);
    871         }
    872     }
    873 
    874     /* test all ISO 8 controls */
    875     for(c=0; c<=0x9f; ++c) {
    876         if(c==0x20) {
    877             /* skip ASCII graphic characters and continue with DEL */
    878             c=0x7f;
    879         }
    880         if(!u_iscntrl(c)) {
    881             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    882         }
    883         if(!u_isISOControl(c)) {
    884             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    885         }
    886         if(u_isprint(c)) {
    887             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    888         }
    889     }
    890 
    891     /* test all Latin-1 graphic characters */
    892     for(c=0x20; c<=0xff; ++c) {
    893         if(c==0x7f) {
    894             c=0xa0;
    895         } else if(c==0xad) {
    896             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    897             ++c;
    898         }
    899         if(!u_isprint(c)) {
    900             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    901         }
    902     }
    903 }
    904 
    905 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    906 static void TestIdentifier()
    907 {
    908     const UChar sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    909     const UChar sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    910     const UChar sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    911     const UChar sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    912     const UChar sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    913     const UChar sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    914     const UChar sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    915     const UChar sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    916     const UChar sampleIDIgnore[] = {0x0006, 0x0010, 0x206b};
    917     const UChar sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    918 
    919     int i;
    920 
    921     log_verbose("Testing sampleJavaID start \n");
    922     for (i = 0; i < 3; i++) {
    923         if (!(u_isJavaIDStart(sampleJavaIDStart[i])) ||
    924                 (u_isJavaIDStart(sampleNonJavaIDStart[i])))
    925             log_err("Java ID Start char test error : %lx or %lx\n",
    926             sampleJavaIDStart[i], sampleNonJavaIDStart[i]);
    927     }
    928 
    929     log_verbose("Testing sampleJavaID part \n");
    930     for (i = 0; i < 3; i++) {
    931         if (!(u_isJavaIDPart(sampleJavaIDPart[i])) ||
    932                 (u_isJavaIDPart(sampleNonJavaIDPart[i])))
    933             log_err("Java ID Part char test error : %lx or %lx\n",
    934              sampleJavaIDPart[i], sampleNonJavaIDPart[i]);
    935     }
    936 
    937     log_verbose("Testing sampleUnicodeID start \n");
    938     for (i = 0; i < 3; i++) {
    939         /* T_test_logln_ustr((int32_t)i); */
    940         if (!(u_isIDStart(sampleUnicodeIDStart[i])) ||
    941                 (u_isIDStart(sampleNonUnicodeIDStart[i])))
    942         {
    943             log_err("Unicode ID Start char test error : %lx  or  %lx\n", sampleUnicodeIDStart[i],
    944                                     sampleNonUnicodeIDStart[i]);
    945         }
    946     }
    947 
    948     log_verbose("Testing sample unicode ID part \n");
    949     for (i = 2; i < 3; i++) {   /* nos *** starts with 2 instead of 0, until clarified */
    950         /* T_test_logln_ustr((int32_t)i); */
    951         if (!(u_isIDPart(sampleUnicodeIDPart[i])) ||
    952                 (u_isIDPart(sampleNonUnicodeIDPart[i])))
    953            {
    954             log_err("Unicode ID Part char test error : %lx  or  %lx", sampleUnicodeIDPart[i], sampleNonUnicodeIDPart[i]);
    955             }
    956     }
    957 
    958     log_verbose("Testing  sampleId ignore\n");
    959     for (i = 0; i < 3; i++) {
    960         /*T_test_logln_ustr((int32_t)i); */
    961         if (!(u_isIDIgnorable(sampleIDIgnore[i])) ||
    962                 (u_isIDIgnorable(sampleNonIDIgnore[i])))
    963         {
    964             log_err("ID ignorable char test error : U+%04x  or  U+%04x\n", sampleIDIgnore[i], sampleNonIDIgnore[i]);
    965         }
    966     }
    967 }
    968 
    969 /* for each line of UnicodeData.txt, check some of the properties */
    970 /*
    971  * ### TODO
    972  * This test fails incorrectly if the First or Last code point of a repetitive area
    973  * is overridden, which is allowed and is encouraged for the PUAs.
    974  * Currently, this means that both area First/Last and override lines are
    975  * tested against the properties from the API,
    976  * and the area boundary will not match and cause an error.
    977  *
    978  * This function should detect area boundaries and skip them for the test of individual
    979  * code points' properties.
    980  * Then it should check that the areas contain all the same properties except where overridden.
    981  * For this, it would have had to set a flag for which code points were listed explicitly.
    982  */
    983 static void U_CALLCONV
    984 unicodeDataLineFn(void *context,
    985                   char *fields[][2], int32_t fieldCount,
    986                   UErrorCode *pErrorCode)
    987 {
    988     char buffer[100];
    989     char *end;
    990     uint32_t value;
    991     UChar32 c;
    992     int32_t i;
    993     int8_t type;
    994 
    995     /* get the character code, field 0 */
    996     c=strtoul(fields[0][0], &end, 16);
    997     if(end<=fields[0][0] || end!=fields[0][1]) {
    998         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    999         return;
   1000     }
   1001     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
   1002         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
   1003         return;
   1004     }
   1005 
   1006     /* get general category, field 2 */
   1007     *fields[2][1]=0;
   1008     type = (int8_t)tagValues[MakeProp(fields[2][0])];
   1009     if(u_charType(c)!=type) {
   1010         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
   1011     }
   1012     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1013         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1014     }
   1015 
   1016     /* get canonical combining class, field 3 */
   1017     value=strtoul(fields[3][0], &end, 10);
   1018     if(end<=fields[3][0] || end!=fields[3][1]) {
   1019         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
   1020         return;
   1021     }
   1022     if(value>255) {
   1023         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
   1024         return;
   1025     }
   1026 #if !UCONFIG_NO_NORMALIZATION
   1027     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
   1028         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
   1029     }
   1030 #endif
   1031 
   1032     /* get BiDi category, field 4 */
   1033     *fields[4][1]=0;
   1034     i=MakeDir(fields[4][0]);
   1035     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
   1036         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
   1037     }
   1038 
   1039     /* get ISO Comment, field 11 */
   1040     *fields[11][1]=0;
   1041     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1042     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1043         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1044             c, u_errorName(*pErrorCode),
   1045             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1046             fields[11][0]);
   1047     }
   1048 
   1049     /* get uppercase mapping, field 12 */
   1050     if(fields[12][0]!=fields[12][1]) {
   1051         value=strtoul(fields[12][0], &end, 16);
   1052         if(end!=fields[12][1]) {
   1053             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1054             return;
   1055         }
   1056         if((UChar32)value!=u_toupper(c)) {
   1057             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1058         }
   1059     } else {
   1060         /* no case mapping: the API must map the code point to itself */
   1061         if(c!=u_toupper(c)) {
   1062             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1063         }
   1064     }
   1065 
   1066     /* get lowercase mapping, field 13 */
   1067     if(fields[13][0]!=fields[13][1]) {
   1068         value=strtoul(fields[13][0], &end, 16);
   1069         if(end!=fields[13][1]) {
   1070             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1071             return;
   1072         }
   1073         if((UChar32)value!=u_tolower(c)) {
   1074             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1075         }
   1076     } else {
   1077         /* no case mapping: the API must map the code point to itself */
   1078         if(c!=u_tolower(c)) {
   1079             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1080         }
   1081     }
   1082 
   1083     /* get titlecase mapping, field 14 */
   1084     if(fields[14][0]!=fields[14][1]) {
   1085         value=strtoul(fields[14][0], &end, 16);
   1086         if(end!=fields[14][1]) {
   1087             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1088             return;
   1089         }
   1090         if((UChar32)value!=u_totitle(c)) {
   1091             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1092         }
   1093     } else {
   1094         /* no case mapping: the API must map the code point to itself */
   1095         if(c!=u_totitle(c)) {
   1096             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1097         }
   1098     }
   1099 }
   1100 
   1101 static UBool U_CALLCONV
   1102 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1103     static const UChar32 test[][2]={
   1104         {0x41, U_UPPERCASE_LETTER},
   1105         {0x308, U_NON_SPACING_MARK},
   1106         {0xfffe, U_GENERAL_OTHER_TYPES},
   1107         {0xe0041, U_FORMAT_CHAR},
   1108         {0xeffff, U_UNASSIGNED}
   1109     };
   1110 
   1111     int32_t i, count;
   1112 
   1113     if(0!=strcmp((const char *)context, "a1")) {
   1114         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1115         return FALSE;
   1116     }
   1117 
   1118     count=LENGTHOF(test);
   1119     for(i=0; i<count; ++i) {
   1120         if(start<=test[i][0] && test[i][0]<limit) {
   1121             if(type!=(UCharCategory)test[i][1]) {
   1122                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1123                         start, limit, (long)type, test[i][0], test[i][1]);
   1124             }
   1125             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1126             return i==(count-1) ? FALSE : TRUE;
   1127         }
   1128     }
   1129 
   1130     if(start>test[count-1][0]) {
   1131         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1132                 start, limit, (long)type);
   1133         return FALSE;
   1134     }
   1135 
   1136     return TRUE;
   1137 }
   1138 
   1139 static UBool U_CALLCONV
   1140 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1141     /* default Bidi classes for unassigned code points */
   1142     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1143         { 0x0590, U_LEFT_TO_RIGHT },
   1144         { 0x0600, U_RIGHT_TO_LEFT },
   1145         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1146         { 0x0900, U_RIGHT_TO_LEFT },
   1147         { 0xFB1D, U_LEFT_TO_RIGHT },
   1148         { 0xFB50, U_RIGHT_TO_LEFT },
   1149         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1150         { 0xFE70, U_LEFT_TO_RIGHT },
   1151         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1152         { 0x10800, U_LEFT_TO_RIGHT },
   1153         { 0x11000, U_RIGHT_TO_LEFT },
   1154         { 0x110000, U_LEFT_TO_RIGHT }
   1155     };
   1156 
   1157     UChar32 c;
   1158     int32_t i;
   1159     UCharDirection shouldBeDir;
   1160 
   1161     /*
   1162      * LineBreak.txt specifies:
   1163      *   #  - Assigned characters that are not listed explicitly are given the value
   1164      *   #    "AL".
   1165      *   #  - Unassigned characters are given the value "XX".
   1166      *
   1167      * PUA characters are listed explicitly with "XX".
   1168      * Verify that no assigned character has "XX".
   1169      */
   1170     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1171         c=start;
   1172         while(c<limit) {
   1173             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1174                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1175             }
   1176             ++c;
   1177         }
   1178     }
   1179 
   1180     /*
   1181      * Verify default Bidi classes.
   1182      * For recent Unicode versions, see UCD.html.
   1183      *
   1184      * For older Unicode versions:
   1185      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1186      * http://www.unicode.org/reports/tr9/
   1187      *
   1188      * See also DerivedBidiClass.txt for Cn code points!
   1189      *
   1190      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1191      * changed some default values.
   1192      * In particular, non-characters and unassigned Default Ignorable Code Points
   1193      * change from L to BN.
   1194      *
   1195      * UCD.html version 4.0.1 does not yet reflect these changes.
   1196      */
   1197     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1198         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1199         c=start;
   1200         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
   1201             if((int32_t)c<defaultBidi[i][0]) {
   1202                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1203                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1204                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1205                     } else {
   1206                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1207                     }
   1208 
   1209                     if( u_charDirection(c)!=shouldBeDir ||
   1210                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1211                     ) {
   1212                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1213                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1214                     }
   1215                     ++c;
   1216                 }
   1217             }
   1218         }
   1219     }
   1220 
   1221     return TRUE;
   1222 }
   1223 
   1224 /* tests for several properties */
   1225 static void TestUnicodeData()
   1226 {
   1227     UVersionInfo expectVersionArray;
   1228     UVersionInfo versionArray;
   1229     char *fields[15][2];
   1230     UErrorCode errorCode;
   1231     UChar32 c;
   1232     int8_t type;
   1233 
   1234     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1235     u_getUnicodeVersion(versionArray);
   1236     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1237     {
   1238         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1239         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1240     }
   1241 
   1242 #if defined(ICU_UNICODE_VERSION)
   1243     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1244     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1245     {
   1246          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1247     }
   1248 #endif
   1249 
   1250     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1251         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1252     }
   1253 
   1254     errorCode=U_ZERO_ERROR;
   1255     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
   1256     if(U_FAILURE(errorCode)) {
   1257         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1258     }
   1259 
   1260     /* sanity check on repeated properties */
   1261     for(c=0xfffe; c<=0x10ffff;) {
   1262         type=u_charType(c);
   1263         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1264             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1265         }
   1266         if(type!=U_UNASSIGNED) {
   1267             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1268         }
   1269         if((c&0xffff)==0xfffe) {
   1270             ++c;
   1271         } else {
   1272             c+=0xffff;
   1273         }
   1274     }
   1275 
   1276     /* test that PUA is not "unassigned" */
   1277     for(c=0xe000; c<=0x10fffd;) {
   1278         type=u_charType(c);
   1279         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1280             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1281         }
   1282         if(type==U_UNASSIGNED) {
   1283             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1284         } else if(type!=U_PRIVATE_USE_CHAR) {
   1285             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1286         }
   1287         if(c==0xf8ff) {
   1288             c=0xf0000;
   1289         } else if(c==0xffffd) {
   1290             c=0x100000;
   1291         } else {
   1292             ++c;
   1293         }
   1294     }
   1295 
   1296     /* test u_enumCharTypes() */
   1297     u_enumCharTypes(enumTypeRange, "a1");
   1298 
   1299     /* check default properties */
   1300     u_enumCharTypes(enumDefaultsRange, NULL);
   1301 }
   1302 
   1303 static void TestCodeUnit(){
   1304     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1305 
   1306     int32_t i;
   1307 
   1308     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1309         UChar c=codeunit[i];
   1310         if(i<4){
   1311             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1312                 log_err("ERROR: U+%04x is a single", c);
   1313             }
   1314 
   1315         }
   1316         if(i >= 4 && i< 8){
   1317             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1318                 log_err("ERROR: U+%04x is a first surrogate", c);
   1319             }
   1320         }
   1321         if(i >= 8 && i< 12){
   1322             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1323                 log_err("ERROR: U+%04x is a second surrogate", c);
   1324             }
   1325         }
   1326     }
   1327 
   1328 }
   1329 
   1330 static void TestCodePoint(){
   1331     const UChar32 codePoint[]={
   1332         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1333         0xd800,
   1334         0xdbff,
   1335         0xdc00,
   1336         0xdfff,
   1337         0xdc04,
   1338         0xd821,
   1339         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1340         0x20ac,
   1341         0xd7ff,
   1342         0xe000,
   1343         0xe123,
   1344         0x0061,
   1345         0xe065,
   1346         0x20402,
   1347         0x24506,
   1348         0x23456,
   1349         0x20402,
   1350         0x10402,
   1351         0x23456,
   1352         /*not a surrogate, not valid, isUnicodeChar, isError */
   1353         0x0015,
   1354         0x009f,
   1355         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1356         0xffff,
   1357         0xfffe,
   1358     };
   1359     int32_t i;
   1360     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1361         UChar32 c=codePoint[i];
   1362         if(i<6){
   1363             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1364                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1365             }
   1366             if(UTF_IS_VALID(c)){
   1367                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1368             }
   1369             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1370                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1371             }
   1372             if(UTF_IS_ERROR(c)){
   1373                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1374             }
   1375         }else if(i >=6 && i<18){
   1376             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1377                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1378             }
   1379             if(!UTF_IS_VALID(c)){
   1380                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1381             }
   1382             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1383                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1384             }
   1385             if(UTF_IS_ERROR(c)){
   1386                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1387             }
   1388         }else if(i >=18 && i<20){
   1389             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1390                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1391             }
   1392             if(UTF_IS_VALID(c)){
   1393                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1394             }
   1395             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1396                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1397             }
   1398             if(!UTF_IS_ERROR(c)){
   1399                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1400             }
   1401         }
   1402         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1403             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1404                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1405             }
   1406             if(UTF_IS_VALID(c)){
   1407                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1408             }
   1409             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1410                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1411             }
   1412             if(!UTF_IS_ERROR(c)){
   1413                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1414             }
   1415         }
   1416     }
   1417 
   1418     if(
   1419         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1420         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1421         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1422         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1423     ) {
   1424         log_err("error with U_IS_BMP()\n");
   1425     }
   1426 
   1427     if(
   1428         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1429         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1430         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1431         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1432     ) {
   1433         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1434     }
   1435 }
   1436 
   1437 static void TestCharLength()
   1438 {
   1439     const int32_t codepoint[]={
   1440         1, 0x0061,
   1441         1, 0xe065,
   1442         1, 0x20ac,
   1443         2, 0x20402,
   1444         2, 0x23456,
   1445         2, 0x24506,
   1446         2, 0x20402,
   1447         2, 0x10402,
   1448         1, 0xd7ff,
   1449         1, 0xe000
   1450     };
   1451 
   1452     int32_t i;
   1453     UBool multiple;
   1454     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1455         UChar32 c=codepoint[i+1];
   1456         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1457             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
   1458         }
   1459         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1460         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1461             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1462         }
   1463     }
   1464 }
   1465 
   1466 /*internal functions ----*/
   1467 static int32_t MakeProp(char* str)
   1468 {
   1469     int32_t result = 0;
   1470     char* matchPosition =0;
   1471 
   1472     matchPosition = strstr(tagStrings, str);
   1473     if (matchPosition == 0)
   1474     {
   1475         log_err("unrecognized type letter ");
   1476         log_err(str);
   1477     }
   1478     else
   1479         result = (int32_t)((matchPosition - tagStrings) / 2);
   1480     return result;
   1481 }
   1482 
   1483 static int32_t MakeDir(char* str)
   1484 {
   1485     int32_t pos = 0;
   1486     for (pos = 0; pos < 19; pos++) {
   1487         if (strcmp(str, dirStrings[pos]) == 0) {
   1488             return pos;
   1489         }
   1490     }
   1491     return -1;
   1492 }
   1493 
   1494 /* test u_charName() -------------------------------------------------------- */
   1495 
   1496 static const struct {
   1497     uint32_t code;
   1498     const char *name, *oldName, *extName;
   1499 } names[]={
   1500     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1501     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1502     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1503     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1504     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1505     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1506     {0xd800, "", "", "<lead surrogate-D800>" },
   1507     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1508     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
   1509     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1510     {0xffff, "", "", "<noncharacter-FFFF>" },
   1511     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1512 };
   1513 
   1514 static UBool
   1515 enumCharNamesFn(void *context,
   1516                 UChar32 code, UCharNameChoice nameChoice,
   1517                 const char *name, int32_t length) {
   1518     int32_t *pCount=(int32_t *)context;
   1519     int i;
   1520 
   1521     if(length<=0 || length!=(int32_t)strlen(name)) {
   1522         /* should not be called with an empty string or invalid length */
   1523         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1524         return TRUE;
   1525     }
   1526 
   1527     ++*pCount;
   1528     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1529         if(code==(UChar32)names[i].code) {
   1530             switch (nameChoice) {
   1531                 case U_EXTENDED_CHAR_NAME:
   1532                     if(0!=strcmp(name, names[i].extName)) {
   1533                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1534                     }
   1535                     break;
   1536                 case U_UNICODE_CHAR_NAME:
   1537                     if(0!=strcmp(name, names[i].name)) {
   1538                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1539                     }
   1540                     break;
   1541                 case U_UNICODE_10_CHAR_NAME:
   1542                     if(names[i].oldName[0]==0 || 0!=strcmp(name, names[i].oldName)) {
   1543                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, names[i].oldName);
   1544                     }
   1545                     break;
   1546                 case U_CHAR_NAME_CHOICE_COUNT:
   1547                     break;
   1548             }
   1549             break;
   1550         }
   1551     }
   1552     return TRUE;
   1553 }
   1554 
   1555 struct enumExtCharNamesContext {
   1556     uint32_t length;
   1557     int32_t last;
   1558 };
   1559 
   1560 static UBool
   1561 enumExtCharNamesFn(void *context,
   1562                 UChar32 code, UCharNameChoice nameChoice,
   1563                 const char *name, int32_t length) {
   1564     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1565 
   1566     if (ecncp->last != (int32_t) code - 1) {
   1567         if (ecncp->last < 0) {
   1568             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1569         } else {
   1570             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1571         }
   1572     }
   1573     ecncp->last = (int32_t) code;
   1574 
   1575     if (!*name) {
   1576         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1577     }
   1578 
   1579     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1580 }
   1581 
   1582 /**
   1583  * This can be made more efficient by moving it into putil.c and having
   1584  * it directly access the ebcdic translation tables.
   1585  * TODO: If we get this method in putil.c, then delete it from here.
   1586  */
   1587 static UChar
   1588 u_charToUChar(char c) {
   1589     UChar uc;
   1590     u_charsToUChars(&c, &uc, 1);
   1591     return uc;
   1592 }
   1593 
   1594 static void
   1595 TestCharNames() {
   1596     static char name[80];
   1597     UErrorCode errorCode=U_ZERO_ERROR;
   1598     struct enumExtCharNamesContext extContext;
   1599     int32_t length;
   1600     UChar32 c;
   1601     int32_t i;
   1602 
   1603     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1604     length=uprv_getMaxCharNameLength();
   1605     if(length==0) {
   1606         /* no names data available */
   1607         return;
   1608     }
   1609     if(length<83) { /* Unicode 3.2 max char name length */
   1610         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1611     }
   1612     /* ### TODO same tests for max ISO comment length as for max name length */
   1613 
   1614     log_verbose("Testing u_charName()\n");
   1615     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1616         /* modern Unicode character name */
   1617         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1618         if(U_FAILURE(errorCode)) {
   1619             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1620             return;
   1621         }
   1622         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1623             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1624         }
   1625 
   1626         /* find the modern name */
   1627         if (*names[i].name) {
   1628             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1629             if(U_FAILURE(errorCode)) {
   1630                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1631                 return;
   1632             }
   1633             if(c!=(UChar32)names[i].code) {
   1634                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1635             }
   1636         }
   1637 
   1638         /* Unicode 1.0 character name */
   1639         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1640         if(U_FAILURE(errorCode)) {
   1641             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1642             return;
   1643         }
   1644         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1645             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1646         }
   1647 
   1648         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1649         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1650             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1651             if(U_FAILURE(errorCode)) {
   1652                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1653                 return;
   1654             }
   1655             if(c!=(UChar32)names[i].code) {
   1656                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1657             }
   1658         }
   1659     }
   1660 
   1661     /* test u_enumCharNames() */
   1662     length=0;
   1663     errorCode=U_ZERO_ERROR;
   1664     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1665     if(U_FAILURE(errorCode) || length<94140) {
   1666         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1667     }
   1668 
   1669     extContext.length = 0;
   1670     extContext.last = -1;
   1671     errorCode=U_ZERO_ERROR;
   1672     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1673     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1674         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1675     }
   1676 
   1677     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1678     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1679         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1680     }
   1681 
   1682     /* Test getCharNameCharacters */
   1683     if(!QUICK) {
   1684         enum { BUFSIZE = 256 };
   1685         UErrorCode ec = U_ZERO_ERROR;
   1686         char buf[BUFSIZE];
   1687         int32_t maxLength;
   1688         UChar32 cp;
   1689         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1690         int32_t l1, l2;
   1691         UBool map[256];
   1692         UBool ok;
   1693 
   1694         USet* set = uset_open(1, 0); /* empty set */
   1695         USet* dumb = uset_open(1, 0); /* empty set */
   1696 
   1697         /*
   1698          * uprv_getCharNameCharacters() will likely return more lowercase
   1699          * letters than actual character names contain because
   1700          * it includes all the characters in lowercased names of
   1701          * general categories, for the full possible set of extended names.
   1702          */
   1703         {
   1704             USetAdder sa={
   1705                 NULL,
   1706                 uset_add,
   1707                 uset_addRange,
   1708                 uset_addString,
   1709                 NULL /* don't need remove() */
   1710             };
   1711             sa.set=set;
   1712             uprv_getCharNameCharacters(&sa);
   1713         }
   1714 
   1715         /* build set the dumb (but sure-fire) way */
   1716         for (i=0; i<256; ++i) {
   1717             map[i] = FALSE;
   1718         }
   1719 
   1720         maxLength=0;
   1721         for (cp=0; cp<0x110000; ++cp) {
   1722             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1723                                      buf, BUFSIZE, &ec);
   1724             if (U_FAILURE(ec)) {
   1725                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1726                 uset_close(set);
   1727                 uset_close(dumb);
   1728                 return;
   1729             }
   1730             if(len>maxLength) {
   1731                 maxLength=len;
   1732             }
   1733 
   1734             for (i=0; i<len; ++i) {
   1735                 if (!map[(uint8_t) buf[i]]) {
   1736                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1737                     map[(uint8_t) buf[i]] = TRUE;
   1738                 }
   1739             }
   1740 
   1741             /* test for leading/trailing whitespace */
   1742             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1743                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1744             }
   1745         }
   1746 
   1747         if(map[(uint8_t)'\t']) {
   1748             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1749         }
   1750 
   1751         length=uprv_getMaxCharNameLength();
   1752         if(length!=maxLength) {
   1753             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1754                     length, maxLength);
   1755         }
   1756 
   1757         /* compare the sets.  Where is my uset_equals?!! */
   1758         ok=TRUE;
   1759         for(i=0; i<256; ++i) {
   1760             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1761                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1762                     /* ignore lowercase a-z that are in set but not in dumb */
   1763                     ok=TRUE;
   1764                 } else {
   1765                     ok=FALSE;
   1766                     break;
   1767                 }
   1768             }
   1769         }
   1770 
   1771         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1772         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1773         if (U_FAILURE(ec)) {
   1774             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1775             uset_close(set);
   1776             uset_close(dumb);
   1777             return;
   1778         }
   1779 
   1780         if (l1 >= BUFSIZE) {
   1781             l1 = BUFSIZE-1;
   1782             pat[l1] = 0;
   1783         }
   1784         if (l2 >= BUFSIZE) {
   1785             l2 = BUFSIZE-1;
   1786             dumbPat[l2] = 0;
   1787         }
   1788 
   1789         if (!ok) {
   1790             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1791                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1792         } else if(VERBOSITY) {
   1793             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1794         }
   1795 
   1796         uset_close(set);
   1797         uset_close(dumb);
   1798     }
   1799 
   1800     /* ### TODO: test error cases and other interesting things */
   1801 }
   1802 
   1803 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1804 
   1805 static void
   1806 TestMirroring() {
   1807     USet *set;
   1808     UErrorCode errorCode;
   1809 
   1810     UChar32 start, end, c2, c3;
   1811     int32_t i;
   1812 
   1813     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1814 
   1815     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1816 
   1817     log_verbose("Testing u_isMirrored()\n");
   1818     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1819          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1820         )
   1821     ) {
   1822         log_err("u_isMirrored() does not work correctly\n");
   1823     }
   1824 
   1825     log_verbose("Testing u_charMirror()\n");
   1826     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1827          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1828          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1829          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1830          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1831          )
   1832     ) {
   1833         log_err("u_charMirror() does not work correctly\n");
   1834     }
   1835 
   1836     /* verify that Bidi_Mirroring_Glyph roundtrips */
   1837     errorCode=U_ZERO_ERROR;
   1838     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   1839 
   1840     if (U_FAILURE(errorCode)) {
   1841         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   1842     } else {
   1843         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   1844             do {
   1845                 c2=u_charMirror(start);
   1846                 c3=u_charMirror(c2);
   1847                 if(c3!=start) {
   1848                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   1849                 }
   1850             } while(++start<=end);
   1851         }
   1852     }
   1853 
   1854     uset_close(set);
   1855 }
   1856 
   1857 
   1858 struct RunTestData
   1859 {
   1860     const char *runText;
   1861     UScriptCode runCode;
   1862 };
   1863 
   1864 typedef struct RunTestData RunTestData;
   1865 
   1866 static void
   1867 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   1868                 const char *prefix)
   1869 {
   1870     int32_t run, runStart, runLimit;
   1871     UScriptCode runCode;
   1872 
   1873     /* iterate over all the runs */
   1874     run = 0;
   1875     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   1876         if (runStart != runStarts[run]) {
   1877             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   1878                 prefix, run, runStarts[run], runStart);
   1879         }
   1880 
   1881         if (runLimit != runStarts[run + 1]) {
   1882             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   1883                 prefix, run, runStarts[run + 1], runLimit);
   1884         }
   1885 
   1886         if (runCode != testData[run].runCode) {
   1887             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   1888                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   1889         }
   1890 
   1891         run += 1;
   1892 
   1893         /* stop when we've seen all the runs we expect to see */
   1894         if (run >= nRuns) {
   1895             break;
   1896         }
   1897     }
   1898 
   1899     /* Complain if we didn't see then number of runs we expected */
   1900     if (run != nRuns) {
   1901         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   1902     }
   1903 }
   1904 
   1905 static void
   1906 TestUScriptRunAPI()
   1907 {
   1908     static const RunTestData testData1[] = {
   1909         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   1910         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   1911         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   1912         {"English (", USCRIPT_LATIN},
   1913         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   1914         {") ", USCRIPT_LATIN},
   1915         {"\\u6F22\\u5B75", USCRIPT_HAN},
   1916         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   1917         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   1918         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   1919     };
   1920 
   1921     static const RunTestData testData2[] = {
   1922        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   1923     };
   1924 
   1925     static const struct {
   1926       const RunTestData *testData;
   1927       int32_t nRuns;
   1928     } testDataEntries[] = {
   1929         {testData1, LENGTHOF(testData1)},
   1930         {testData2, LENGTHOF(testData2)}
   1931     };
   1932 
   1933     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
   1934     int32_t testEntry;
   1935 
   1936     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   1937         UChar testString[1024];
   1938         int32_t runStarts[256];
   1939         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   1940         const RunTestData *testData = testDataEntries[testEntry].testData;
   1941 
   1942         int32_t run, stringLimit;
   1943         UScriptRun *scriptRun = NULL;
   1944         UErrorCode err;
   1945 
   1946         /*
   1947          * Fill in the test string and the runStarts array.
   1948          */
   1949         stringLimit = 0;
   1950         for (run = 0; run < nTestRuns; run += 1) {
   1951             runStarts[run] = stringLimit;
   1952             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   1953             /*stringLimit -= 1;*/
   1954         }
   1955 
   1956         /* The limit of the last run */
   1957         runStarts[nTestRuns] = stringLimit;
   1958 
   1959         /*
   1960          * Make sure that calling uscript_OpenRun with a NULL text pointer
   1961          * and a non-zero text length returns the correct error.
   1962          */
   1963         err = U_ZERO_ERROR;
   1964         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   1965 
   1966         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1967             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1968         }
   1969 
   1970         if (scriptRun != NULL) {
   1971             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   1972             uscript_closeRun(scriptRun);
   1973         }
   1974 
   1975         /*
   1976          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   1977          * and a zero text length returns the correct error.
   1978          */
   1979         err = U_ZERO_ERROR;
   1980         scriptRun = uscript_openRun(testString, 0, &err);
   1981 
   1982         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1983             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1984         }
   1985 
   1986         if (scriptRun != NULL) {
   1987             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   1988             uscript_closeRun(scriptRun);
   1989         }
   1990 
   1991         /*
   1992          * Make sure that calling uscript_openRun with a NULL text pointer
   1993          * and a zero text length doesn't return an error.
   1994          */
   1995         err = U_ZERO_ERROR;
   1996         scriptRun = uscript_openRun(NULL, 0, &err);
   1997 
   1998         if (U_FAILURE(err)) {
   1999             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2000         }
   2001 
   2002         /* Make sure that the empty iterator doesn't find any runs */
   2003         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2004             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2005         }
   2006 
   2007         /*
   2008          * Make sure that calling uscript_setRunText with a NULL text pointer
   2009          * and a non-zero text length returns the correct error.
   2010          */
   2011         err = U_ZERO_ERROR;
   2012         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2013 
   2014         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2015             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2016         }
   2017 
   2018         /*
   2019          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2020          * and a zero text length returns the correct error.
   2021          */
   2022         err = U_ZERO_ERROR;
   2023         uscript_setRunText(scriptRun, testString, 0, &err);
   2024 
   2025         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2026             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2027         }
   2028 
   2029         /*
   2030          * Now call uscript_setRunText on the empty iterator
   2031          * and make sure that it works.
   2032          */
   2033         err = U_ZERO_ERROR;
   2034         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2035 
   2036         if (U_FAILURE(err)) {
   2037             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2038         } else {
   2039             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2040         }
   2041 
   2042         uscript_closeRun(scriptRun);
   2043 
   2044         /*
   2045          * Now open an interator over the testString
   2046          * using uscript_openRun and make sure that it works
   2047          */
   2048         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2049 
   2050         if (U_FAILURE(err)) {
   2051             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2052         } else {
   2053             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2054         }
   2055 
   2056         /* Now reset the iterator, and make sure
   2057          * that it still works.
   2058          */
   2059         uscript_resetRun(scriptRun);
   2060 
   2061         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2062 
   2063         /* Close the iterator */
   2064         uscript_closeRun(scriptRun);
   2065     }
   2066 }
   2067 
   2068 /* test additional, non-core properties */
   2069 static void
   2070 TestAdditionalProperties() {
   2071     /* test data for u_charAge() */
   2072     static const struct {
   2073         UChar32 c;
   2074         UVersionInfo version;
   2075     } charAges[]={
   2076         {0x41,    { 1, 1, 0, 0 }},
   2077         {0xffff,  { 1, 1, 0, 0 }},
   2078         {0x20ab,  { 2, 0, 0, 0 }},
   2079         {0x2fffe, { 2, 0, 0, 0 }},
   2080         {0x20ac,  { 2, 1, 0, 0 }},
   2081         {0xfb1d,  { 3, 0, 0, 0 }},
   2082         {0x3f4,   { 3, 1, 0, 0 }},
   2083         {0x10300, { 3, 1, 0, 0 }},
   2084         {0x220,   { 3, 2, 0, 0 }},
   2085         {0xff60,  { 3, 2, 0, 0 }}
   2086     };
   2087 
   2088     /* test data for u_hasBinaryProperty() */
   2089     static const int32_t
   2090     props[][3]={ /* code point, property, value */
   2091         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2092         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2093         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2094 
   2095         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2096         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2097 
   2098         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2099         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2100 
   2101         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2102         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2103 
   2104         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2105         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2106         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2107         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2108         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2109 
   2110         { 0x058a, UCHAR_DASH, TRUE },
   2111         { 0x007e, UCHAR_DASH, FALSE },
   2112 
   2113         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2114         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2115 
   2116         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2117         { 0x0020, UCHAR_EXTENDER, FALSE },
   2118 
   2119 #if !UCONFIG_NO_NORMALIZATION
   2120         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2121         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2122         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2123 
   2124         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2125         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2126 
   2127         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2128         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2129 
   2130         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2131         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2132         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2133         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2134         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2135         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2136 
   2137         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2138         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2139 
   2140         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2141         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2142         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2143         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2144         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2145         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2146 #endif
   2147 
   2148         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2149         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2150         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2151 
   2152         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2153         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2154 
   2155         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2156         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2157         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2158 
   2159         { 0x2172, UCHAR_ID_START, TRUE },
   2160         { 0x007a, UCHAR_ID_START, TRUE },
   2161         { 0x0039, UCHAR_ID_START, FALSE },
   2162 
   2163         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2164         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2165         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2166 
   2167         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2168         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2169 
   2170         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2171         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2172         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2173 
   2174         { 0x1d7a9, UCHAR_MATH, TRUE },
   2175         { 0x2135, UCHAR_MATH, TRUE },
   2176         { 0x0062, UCHAR_MATH, FALSE },
   2177 
   2178         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2179         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2180         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2181 
   2182         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2183         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2184         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2185 
   2186         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2187         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2188 
   2189         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2190         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2191         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2192 
   2193         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2194         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2195         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2196 
   2197         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2198         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2199         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2200 
   2201         { 0x16ee, UCHAR_XID_START, TRUE },
   2202         { 0x23456, UCHAR_XID_START, TRUE },
   2203         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2204 
   2205         /*
   2206          * Version break:
   2207          * The following properties are only supported starting with the
   2208          * Unicode version indicated in the second field.
   2209          */
   2210         { -1, 0x320, 0 },
   2211 
   2212         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2213         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2214         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2215 
   2216         { 0x0341, UCHAR_DEPRECATED, TRUE },
   2217         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2218         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2219 
   2220         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2221         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2222         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2223         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2224 
   2225         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2226         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2227         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2228         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2229 
   2230         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2231         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2232 
   2233         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2234         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2235 
   2236         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2237         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2238 
   2239         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2240         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2241 
   2242         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2243         { 0x4e00, UCHAR_RADICAL, FALSE },
   2244 
   2245         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2246         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2247 
   2248         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2249         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2250 
   2251         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2252 
   2253         { 0x002e, UCHAR_S_TERM, TRUE },
   2254         { 0x0061, UCHAR_S_TERM, FALSE },
   2255 
   2256         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2257         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2258         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2259         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2260 
   2261         /* enum/integer type properties */
   2262 
   2263         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2264         /* test default Bidi classes for unassigned code points */
   2265         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2266         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2267         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2268         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2269         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2270         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2271         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2272         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2273         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2274         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2275         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2276 
   2277         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2278         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2279         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2280         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2281         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2282         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2283         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2284         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2285 
   2286         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2287         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2288         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2289         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2290         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2291         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2292         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2293         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2294         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2295         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2296         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2297 
   2298         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2299         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2300 
   2301         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2302         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2303         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2304         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2305         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2306         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2307         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2308         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2309         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2310 
   2311         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2312         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2313         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2314         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2315         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2316         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2317         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2318         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2319         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2320         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2321         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2322         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2323         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2324         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2325         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2326         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2327         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2328 
   2329         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2330         { 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 },
   2331 
   2332         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2333         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2334         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2335         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2336         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2337         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
   2338 
   2339         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2340         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2341         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2342         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2343         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2344         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2345         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2346         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2347 
   2348         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2349         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2350         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2351         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2352         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2353         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2354         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2355         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2356         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2357         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2358         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2359         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2360         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2361         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2362         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2363         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2364 
   2365         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2366 
   2367         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2368 
   2369         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2370         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2371         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2372         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2373 
   2374         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2375         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2376         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2377         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2378 
   2379         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2380         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2381         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2382         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2383 
   2384         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2385         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2386         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2387         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2388         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2389         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2390 
   2391         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2392         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2393         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2394         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2395 
   2396         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2397         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2398         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2399         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2400         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2401 
   2402         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2403 
   2404         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2405 
   2406         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2407         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2408         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2409 
   2410         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2411         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2412         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2413         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2414         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2415 
   2416         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2417         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2418         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2419 
   2420         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2421         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2422         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2423         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2424 
   2425         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2426         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2427         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2428         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2429         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2430         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2431 
   2432         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2433         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2434         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2435         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2436 
   2437         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2438         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2439         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2440         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2441 
   2442         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2443         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2444         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2445         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2446 
   2447         /* undefined UProperty values */
   2448         { 0x61, 0x4a7, 0 },
   2449         { 0x234bc, 0x15ed, 0 }
   2450     };
   2451 
   2452     UVersionInfo version;
   2453     UChar32 c;
   2454     int32_t i, result, uVersion;
   2455     UProperty which;
   2456 
   2457     /* what is our Unicode version? */
   2458     u_getUnicodeVersion(version);
   2459     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2460 
   2461     u_charAge(0x20, version);
   2462     if(version[0]==0) {
   2463         /* no additional properties available */
   2464         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2465         return;
   2466     }
   2467 
   2468     /* test u_charAge() */
   2469     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2470         u_charAge(charAges[i].c, version);
   2471         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2472             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2473                 charAges[i].c,
   2474                 version[0], version[1], version[2], version[3],
   2475                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2476         }
   2477     }
   2478 
   2479     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2480         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2481         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2482         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2483         u_getIntPropertyMinValue(0x2345)!=0
   2484     ) {
   2485         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2486     }
   2487     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2488         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2489     }
   2490     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2491         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2492     }
   2493     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2494         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2495     }
   2496     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2497         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2498     }
   2499     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2500         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2501     }
   2502     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2503         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2504     }
   2505     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2506         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2507     }
   2508     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2509         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2510     }
   2511     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2512         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2513     }
   2514     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2515         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2516     }
   2517     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2518         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2519     }
   2520     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2521         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2522     }
   2523     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2524         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2525     }
   2526     /*JB#2410*/
   2527     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2528         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2529     }
   2530     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2531         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2532     }
   2533     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2534         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2535     }
   2536     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2537         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2538     }
   2539     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2540         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2541     }
   2542 
   2543     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2544     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2545         if(props[i][0]<0) {
   2546             /* Unicode version break */
   2547             if(uVersion<props[i][1]) {
   2548                 break; /* do not test properties that are not yet supported */
   2549             } else {
   2550                 continue; /* skip this row */
   2551             }
   2552         }
   2553 
   2554         c=(UChar32)props[i][0];
   2555         which=(UProperty)props[i][1];
   2556 
   2557         if(which<UCHAR_INT_START) {
   2558             result=u_hasBinaryProperty(c, which);
   2559             if(result!=props[i][2]) {
   2560                 log_err("error: u_hasBinaryProperty(U+%04lx, %d)=%d is wrong (props[%d])\n",
   2561                         c, which, result, i);
   2562             }
   2563         }
   2564 
   2565         result=u_getIntPropertyValue(c, which);
   2566         if(result!=props[i][2]) {
   2567             log_err("error: u_getIntPropertyValue(U+%04lx, 0x1000+%d)=%d is wrong, should be %d (props[%d])\n",
   2568                     c, (int32_t)which-0x1000, result, props[i][2], i);
   2569         }
   2570 
   2571         /* test separate functions, too */
   2572         switch((UProperty)props[i][1]) {
   2573         case UCHAR_ALPHABETIC:
   2574             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2575                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2576                         props[i][0], result, i);
   2577             }
   2578             break;
   2579         case UCHAR_LOWERCASE:
   2580             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2581                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2582                         props[i][0], result, i);
   2583             }
   2584             break;
   2585         case UCHAR_UPPERCASE:
   2586             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2587                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2588                         props[i][0], result, i);
   2589             }
   2590             break;
   2591         case UCHAR_WHITE_SPACE:
   2592             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2593                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2594                         props[i][0], result, i);
   2595             }
   2596             break;
   2597         default:
   2598             break;
   2599         }
   2600     }
   2601 }
   2602 
   2603 static void
   2604 TestNumericProperties(void) {
   2605     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2606     static const struct {
   2607         UChar32 c;
   2608         int32_t type;
   2609         double numValue;
   2610     } values[]={
   2611         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2612         { 0x0C66, U_NT_DECIMAL, 0 },
   2613         { 0x96f6, U_NT_NUMERIC, 0 },
   2614         { 0x2159, U_NT_NUMERIC, 1./6. },
   2615         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2616         { 0x0031, U_NT_DECIMAL, 1. },
   2617         { 0x4e00, U_NT_NUMERIC, 1. },
   2618         { 0x58f1, U_NT_NUMERIC, 1. },
   2619         { 0x10320, U_NT_NUMERIC, 1. },
   2620         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2621         { 0x00B2, U_NT_DIGIT, 2. },
   2622         { 0x5f10, U_NT_NUMERIC, 2. },
   2623         { 0x1813, U_NT_DECIMAL, 3. },
   2624         { 0x5f0e, U_NT_NUMERIC, 3. },
   2625         { 0x2173, U_NT_NUMERIC, 4. },
   2626         { 0x8086, U_NT_NUMERIC, 4. },
   2627         { 0x278E, U_NT_DIGIT, 5. },
   2628         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2629         { 0x247A, U_NT_DIGIT, 7. },
   2630         { 0x7396, U_NT_NUMERIC, 9. },
   2631         { 0x1372, U_NT_NUMERIC, 10. },
   2632         { 0x216B, U_NT_NUMERIC, 12. },
   2633         { 0x16EE, U_NT_NUMERIC, 17. },
   2634         { 0x249A, U_NT_NUMERIC, 19. },
   2635         { 0x303A, U_NT_NUMERIC, 30. },
   2636         { 0x5345, U_NT_NUMERIC, 30. },
   2637         { 0x32B2, U_NT_NUMERIC, 37. },
   2638         { 0x1375, U_NT_NUMERIC, 40. },
   2639         { 0x10323, U_NT_NUMERIC, 50. },
   2640         { 0x0BF1, U_NT_NUMERIC, 100. },
   2641         { 0x964c, U_NT_NUMERIC, 100. },
   2642         { 0x217E, U_NT_NUMERIC, 500. },
   2643         { 0x2180, U_NT_NUMERIC, 1000. },
   2644         { 0x4edf, U_NT_NUMERIC, 1000. },
   2645         { 0x2181, U_NT_NUMERIC, 5000. },
   2646         { 0x137C, U_NT_NUMERIC, 10000. },
   2647         { 0x4e07, U_NT_NUMERIC, 10000. },
   2648         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2649         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2650         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2651         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2652         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2653         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2654         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2655         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2656     };
   2657 
   2658     double nv;
   2659     UChar32 c;
   2660     int32_t i, type;
   2661 
   2662     for(i=0; i<LENGTHOF(values); ++i) {
   2663         c=values[i].c;
   2664         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2665         nv=u_getNumericValue(c);
   2666 
   2667         if(type!=values[i].type) {
   2668             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2669         }
   2670         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2671             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2672         }
   2673     }
   2674 }
   2675 
   2676 /**
   2677  * Test the property names and property value names API.
   2678  */
   2679 static void
   2680 TestPropertyNames(void) {
   2681     int32_t p, v, choice=0, rev;
   2682     UBool atLeastSomething = FALSE;
   2683 
   2684     for (p=0; ; ++p) {
   2685         UProperty propEnum = (UProperty)p;
   2686         UBool sawProp = FALSE;
   2687         if(p > 10 && !atLeastSomething) {
   2688           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2689           return;
   2690         }
   2691 
   2692         for (choice=0; ; ++choice) {
   2693             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2694             if (name) {
   2695                 if (!sawProp)
   2696                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2697                 log_verbose("%d=\"%s\"", choice, name);
   2698                 sawProp = TRUE;
   2699                 atLeastSomething = TRUE;
   2700 
   2701                 /* test reverse mapping */
   2702                 rev = u_getPropertyEnum(name);
   2703                 if (rev != p) {
   2704                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2705                             p, name, rev);
   2706                 }
   2707             }
   2708             if (!name && choice>0) break;
   2709         }
   2710         if (sawProp) {
   2711             /* looks like a valid property; check the values */
   2712             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2713             int32_t max = 0;
   2714             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2715                 max = 255;
   2716             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2717                 /* it's far too slow to iterate all the way up to
   2718                    the real max, U_GC_P_MASK */
   2719                 max = U_GC_NL_MASK;
   2720             } else if (p == UCHAR_BLOCK) {
   2721                 /* UBlockCodes, unlike other values, start at 1 */
   2722                 max = 1;
   2723             }
   2724             log_verbose("\n");
   2725             for (v=-1; ; ++v) {
   2726                 UBool sawValue = FALSE;
   2727                 for (choice=0; ; ++choice) {
   2728                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2729                     if (vname) {
   2730                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2731                         log_verbose("%d=\"%s\"", choice, vname);
   2732                         sawValue = TRUE;
   2733 
   2734                         /* test reverse mapping */
   2735                         rev = u_getPropertyValueEnum(propEnum, vname);
   2736                         if (rev != v) {
   2737                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2738                                     pname, v, vname, rev);
   2739                         }
   2740                     }
   2741                     if (!vname && choice>0) break;
   2742                 }
   2743                 if (sawValue) {
   2744                     log_verbose("\n");
   2745                 }
   2746                 if (!sawValue && v>=max) break;
   2747             }
   2748         }
   2749         if (!sawProp) {
   2750             if (p>=UCHAR_STRING_LIMIT) {
   2751                 break;
   2752             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   2753                 p = UCHAR_STRING_START - 1;
   2754             } else if (p>=UCHAR_MASK_LIMIT) {
   2755                 p = UCHAR_DOUBLE_START - 1;
   2756             } else if (p>=UCHAR_INT_LIMIT) {
   2757                 p = UCHAR_MASK_START - 1;
   2758             } else if (p>=UCHAR_BINARY_LIMIT) {
   2759                 p = UCHAR_INT_START - 1;
   2760             }
   2761         }
   2762     }
   2763 }
   2764 
   2765 /**
   2766  * Test the property values API.  See JB#2410.
   2767  */
   2768 static void
   2769 TestPropertyValues(void) {
   2770     int32_t i, p, min, max;
   2771     UErrorCode ec;
   2772 
   2773     /* Min should be 0 for everything. */
   2774     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   2775     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   2776         UProperty propEnum = (UProperty)p;
   2777         min = u_getIntPropertyMinValue(propEnum);
   2778         if (min != 0) {
   2779             if (p == UCHAR_BLOCK) {
   2780                 /* This is okay...for now.  See JB#2487.
   2781                    TODO Update this for JB#2487. */
   2782             } else {
   2783                 const char* name;
   2784                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2785                 if (name == NULL)
   2786                     name = "<ERROR>";
   2787                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   2788                         name, min);
   2789             }
   2790         }
   2791     }
   2792 
   2793     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   2794         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   2795         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   2796     }
   2797 
   2798     /* Max should be -1 for invalid properties. */
   2799     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   2800     if (max != -1) {
   2801         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   2802                 max);
   2803     }
   2804 
   2805     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   2806     for (i=0; i<2; ++i) {
   2807         int32_t script;
   2808         const char* desc;
   2809         ec = U_ZERO_ERROR;
   2810         switch (i) {
   2811         case 0:
   2812             script = uscript_getScript(-1, &ec);
   2813             desc = "uscript_getScript(-1)";
   2814             break;
   2815         case 1:
   2816             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   2817             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   2818             break;
   2819         default:
   2820             log_err("Internal test error. Too many scripts\n");
   2821             return;
   2822         }
   2823         /* We don't explicitly test ec.  It should be U_FAILURE but it
   2824            isn't documented as such. */
   2825         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   2826             log_err("FAIL: %s = %d, exp. 0\n",
   2827                     desc, script);
   2828         }
   2829     }
   2830 }
   2831 
   2832 /* add characters from a serialized set to a normal one */
   2833 static void
   2834 _setAddSerialized(USet *set, const USerializedSet *sset) {
   2835     UChar32 start, end;
   2836     int32_t i, count;
   2837 
   2838     count=uset_getSerializedRangeCount(sset);
   2839     for(i=0; i<count; ++i) {
   2840         uset_getSerializedRange(sset, i, &start, &end);
   2841         uset_addRange(set, start, end);
   2842     }
   2843 }
   2844 
   2845 /* various tests for consistency of UCD data and API behavior */
   2846 static void
   2847 TestConsistency() {
   2848 #if !UCONFIG_NO_NORMALIZATION
   2849     UChar buffer16[300];
   2850 #endif
   2851     char buffer[300];
   2852     USet *set1, *set2, *set3, *set4;
   2853     UErrorCode errorCode;
   2854 
   2855 #if !UCONFIG_NO_NORMALIZATION
   2856     USerializedSet sset;
   2857 #endif
   2858     UChar32 start, end;
   2859     int32_t i, length;
   2860 
   2861     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   2862     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   2863     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   2864     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   2865     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   2866 
   2867     U_STRING_DECL(mathBlocksPattern,
   2868         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2869         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2870     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   2871     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   2872     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   2873     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2874 
   2875     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   2876     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   2877     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   2878     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   2879     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   2880 
   2881     U_STRING_INIT(mathBlocksPattern,
   2882         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2883         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2884     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   2885     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   2886     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   2887     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2888 
   2889     /*
   2890      * It used to be that UCD.html and its precursors said
   2891      * "Those dashes used to mark connections between pieces of words,
   2892      *  plus the Katakana middle dot."
   2893      *
   2894      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   2895      * but not from Hyphen.
   2896      * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
   2897      * Therefore, do not show errors when testing the Hyphen property.
   2898      */
   2899     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   2900                 "known to the UTC and not considered errors.\n");
   2901 
   2902     errorCode=U_ZERO_ERROR;
   2903     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   2904     set2=uset_openPattern(dashPattern, 8, &errorCode);
   2905     if(U_SUCCESS(errorCode)) {
   2906         /* remove the Katakana middle dot(s) from set1 */
   2907         uset_remove(set1, 0x30fb);
   2908         uset_remove(set1, 0xff65); /* halfwidth variant */
   2909         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   2910     } else {
   2911         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2912     }
   2913 
   2914     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   2915     set3=uset_openPattern(formatPattern, 6, &errorCode);
   2916     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   2917     if(U_SUCCESS(errorCode)) {
   2918         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   2919         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   2920         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   2921     } else {
   2922         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2923     }
   2924 
   2925     uset_close(set1);
   2926     uset_close(set2);
   2927     uset_close(set3);
   2928     uset_close(set4);
   2929 
   2930     /*
   2931      * Check that each lowercase character has "small" in its name
   2932      * and not "capital".
   2933      * There are some such characters, some of which seem odd.
   2934      * Use the verbose flag to see these notices.
   2935      */
   2936     errorCode=U_ZERO_ERROR;
   2937     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   2938     if(U_SUCCESS(errorCode)) {
   2939         for(i=0;; ++i) {
   2940             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   2941             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   2942                 break; /* done */
   2943             }
   2944             if(U_FAILURE(errorCode)) {
   2945                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   2946                         i, u_errorName(errorCode));
   2947                 break;
   2948             }
   2949             if(length!=0) {
   2950                 break; /* done with code points, got a string or -1 */
   2951             }
   2952 
   2953             while(start<=end) {
   2954                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   2955                 if(U_FAILURE(errorCode)) {
   2956                     log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   2957                     errorCode=U_ZERO_ERROR;
   2958                     continue;
   2959                 }
   2960                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   2961                     strstr(buffer, "SMALL CAPITAL")==NULL
   2962                 ) {
   2963                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   2964                 }
   2965                 ++start;
   2966             }
   2967         }
   2968     } else {
   2969         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2970     }
   2971     uset_close(set1);
   2972 
   2973 #if !UCONFIG_NO_NORMALIZATION
   2974 
   2975     /*
   2976      * Test for an example that unorm_getCanonStartSet() delivers
   2977      * all characters that compose from the input one,
   2978      * even in multiple steps.
   2979      * For example, the set for "I" (0049) should contain both
   2980      * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
   2981      * In general, the set for the middle such character should be a subset
   2982      * of the set for the first.
   2983      */
   2984     set1=uset_open(1, 0);
   2985     set2=uset_open(1, 0);
   2986 
   2987     if (unorm_getCanonStartSet(0x49, &sset)) {
   2988         _setAddSerialized(set1, &sset);
   2989 
   2990         /* enumerate all characters that are plausible to be latin letters */
   2991         for(start=0xa0; start<0x2000; ++start) {
   2992             if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) {
   2993                 uset_add(set2, start);
   2994             }
   2995         }
   2996 
   2997         compareUSets(set1, set2,
   2998                      "[canon start set of 0049]", "[all c with canon decomp with 0049]",
   2999                      TRUE);
   3000     } else {
   3001       log_err("error calling unorm_getCanonStartSet()\n");
   3002     }
   3003 
   3004     uset_close(set1);
   3005     uset_close(set2);
   3006 
   3007 #endif
   3008 
   3009     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3010     errorCode=U_ZERO_ERROR;
   3011     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3012     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3013     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3014     if(U_SUCCESS(errorCode)) {
   3015         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3016         uset_complement(set3);      /* assigned characters */
   3017         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3018         compareUSets(set1, set2,
   3019                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3020                      TRUE);
   3021     } else {
   3022         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3023     }
   3024     uset_close(set1);
   3025     uset_close(set2);
   3026     uset_close(set3);
   3027 
   3028     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3029     errorCode=U_ZERO_ERROR;
   3030     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3031     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3032     if(U_SUCCESS(errorCode)) {
   3033         compareUSets(set1, set2,
   3034                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3035                      TRUE);
   3036     } else {
   3037         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3038     }
   3039     uset_close(set1);
   3040     uset_close(set2);
   3041 }
   3042 
   3043 /*
   3044  * Starting with ICU4C 3.4, the core Unicode properties files
   3045  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3046  * are hardcoded in the common DLL and therefore not included
   3047  * in the data package any more.
   3048  * Test requiring these files are disabled so that
   3049  * we need not jump through hoops (like adding snapshots of these files
   3050  * to testdata).
   3051  * See Jitterbug 4497.
   3052  */
   3053 #define HARDCODED_DATA_4497 1
   3054 
   3055 /* API coverage for ucase.c */
   3056 static void TestUCase() {
   3057 #if !HARDCODED_DATA_4497
   3058     UDataMemory *pData;
   3059     UCaseProps *csp;
   3060     const UCaseProps *ccsp;
   3061     UErrorCode errorCode;
   3062 
   3063     /* coverage for ucase_openBinary() */
   3064     errorCode=U_ZERO_ERROR;
   3065     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3066     if(U_FAILURE(errorCode)) {
   3067         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3068                     u_errorName(errorCode));
   3069         return;
   3070     }
   3071 
   3072     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3073     if(U_FAILURE(errorCode)) {
   3074         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3075                 u_errorName(errorCode));
   3076         udata_close(pData);
   3077         return;
   3078     }
   3079 
   3080     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3081         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3082     }
   3083 
   3084     ucase_close(csp);
   3085     udata_close(pData);
   3086 
   3087     /* coverage for ucase_getDummy() */
   3088     errorCode=U_ZERO_ERROR;
   3089     ccsp=ucase_getDummy(&errorCode);
   3090     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3091         log_err("ucase_tolower(dummy, A)!=A\n");
   3092     }
   3093 #endif
   3094 }
   3095 
   3096 /* API coverage for ubidi_props.c */
   3097 static void TestUBiDiProps() {
   3098 #if !HARDCODED_DATA_4497
   3099     UDataMemory *pData;
   3100     UBiDiProps *bdp;
   3101     const UBiDiProps *cbdp;
   3102     UErrorCode errorCode;
   3103 
   3104     /* coverage for ubidi_openBinary() */
   3105     errorCode=U_ZERO_ERROR;
   3106     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3107     if(U_FAILURE(errorCode)) {
   3108         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3109                     u_errorName(errorCode));
   3110         return;
   3111     }
   3112 
   3113     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3114     if(U_FAILURE(errorCode)) {
   3115         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3116                 u_errorName(errorCode));
   3117         udata_close(pData);
   3118         return;
   3119     }
   3120 
   3121     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3122         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3123     }
   3124 
   3125     ubidi_closeProps(bdp);
   3126     udata_close(pData);
   3127 
   3128     /* coverage for ubidi_getDummy() */
   3129     errorCode=U_ZERO_ERROR;
   3130     cbdp=ubidi_getDummy(&errorCode);
   3131     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3132         log_err("ubidi_getClass(dummy, space)!=0\n");
   3133     }
   3134 #endif
   3135 }
   3136 
   3137 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3138 
   3139 /* bit set for which case foldings for a character have been tested already */
   3140 enum {
   3141     CF_SIMPLE=1,
   3142     CF_FULL=2,
   3143     CF_TURKIC=4,
   3144     CF_ALL=7
   3145 };
   3146 
   3147 static void
   3148 testFold(UChar32 c, int which,
   3149          UChar32 simple, UChar32 turkic,
   3150          const UChar *full, int32_t fullLength,
   3151          const UChar *turkicFull, int32_t turkicFullLength) {
   3152     UChar s[2], t[32];
   3153     UChar32 c2;
   3154     int32_t length, length2;
   3155 
   3156     UErrorCode errorCode=U_ZERO_ERROR;
   3157 
   3158     length=0;
   3159     U16_APPEND_UNSAFE(s, length, c);
   3160 
   3161     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3162         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3163     }
   3164     if((which&CF_FULL)!=0) {
   3165         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
   3166         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3167             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3168         }
   3169     }
   3170     if((which&CF_TURKIC)!=0) {
   3171         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3172             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3173         }
   3174 
   3175         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3176         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3177             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3178         }
   3179     }
   3180 }
   3181 
   3182 /* test that c case-folds to itself */
   3183 static void
   3184 testFoldToSelf(UChar32 c, int which) {
   3185     UChar s[2];
   3186     int32_t length;
   3187 
   3188     length=0;
   3189     U16_APPEND_UNSAFE(s, length, c);
   3190     testFold(c, which, c, c, s, length, s, length);
   3191 }
   3192 
   3193 struct CaseFoldingData {
   3194     USet *notSeen;
   3195     UChar32 prev, prevSimple;
   3196     UChar prevFull[32];
   3197     int32_t prevFullLength;
   3198     int which;
   3199 };
   3200 typedef struct CaseFoldingData CaseFoldingData;
   3201 
   3202 static void U_CALLCONV
   3203 caseFoldingLineFn(void *context,
   3204                   char *fields[][2], int32_t fieldCount,
   3205                   UErrorCode *pErrorCode) {
   3206     CaseFoldingData *pData=(CaseFoldingData *)context;
   3207     char *end;
   3208     UChar full[32];
   3209     UChar32 c, prev, simple;
   3210     int32_t count;
   3211     int which;
   3212     char status;
   3213 
   3214     /* get code point */
   3215     c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
   3216     end=(char *)u_skipWhitespace(end);
   3217     if(end<=fields[0][0] || end!=fields[0][1]) {
   3218         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3219         *pErrorCode=U_PARSE_ERROR;
   3220         return;
   3221     }
   3222 
   3223     /* get the status of this mapping */
   3224     status=*u_skipWhitespace(fields[1][0]);
   3225     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3226         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3227         *pErrorCode=U_PARSE_ERROR;
   3228         return;
   3229     }
   3230 
   3231     /* get the mapping */
   3232     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3233     if(U_FAILURE(*pErrorCode)) {
   3234         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3235         return;
   3236     }
   3237 
   3238     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3239     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3240         simple=c;
   3241     }
   3242 
   3243     if(c!=(prev=pData->prev)) {
   3244         /*
   3245          * Test remaining mappings for the previous code point.
   3246          * If a turkic folding was not mentioned, then it should fold the same
   3247          * as the regular simple case folding.
   3248          */
   3249         UChar s[2];
   3250         int32_t length;
   3251 
   3252         length=0;
   3253         U16_APPEND_UNSAFE(s, length, prev);
   3254         testFold(prev, (~pData->which)&CF_ALL,
   3255                  prev, pData->prevSimple,
   3256                  s, length,
   3257                  pData->prevFull, pData->prevFullLength);
   3258         pData->prev=pData->prevSimple=c;
   3259         length=0;
   3260         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3261         pData->prevFullLength=length;
   3262         pData->which=0;
   3263     }
   3264 
   3265     /*
   3266      * Turn the status into a bit set of case foldings to test.
   3267      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3268      */
   3269     switch(status) {
   3270     case 'C':
   3271         which=CF_SIMPLE|CF_FULL;
   3272         pData->prevSimple=simple;
   3273         u_memcpy(pData->prevFull, full, count);
   3274         pData->prevFullLength=count;
   3275         break;
   3276     case 'S':
   3277         which=CF_SIMPLE;
   3278         pData->prevSimple=simple;
   3279         break;
   3280     case 'F':
   3281         which=CF_FULL;
   3282         u_memcpy(pData->prevFull, full, count);
   3283         pData->prevFullLength=count;
   3284         break;
   3285     case 'T':
   3286         which=CF_TURKIC;
   3287         break;
   3288     default:
   3289         which=0;
   3290         break; /* won't happen because of test above */
   3291     }
   3292 
   3293     testFold(c, which, simple, simple, full, count, full, count);
   3294 
   3295     /* remember which case foldings of c have been tested */
   3296     pData->which|=which;
   3297 
   3298     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3299     uset_remove(pData->notSeen, c);
   3300 }
   3301 
   3302 static void
   3303 TestCaseFolding() {
   3304     CaseFoldingData data={ NULL };
   3305     char *fields[3][2];
   3306     UErrorCode errorCode;
   3307 
   3308     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3309 
   3310     errorCode=U_ZERO_ERROR;
   3311     /* test BMP & plane 1 - nothing interesting above */
   3312     data.notSeen=uset_open(0, 0x1ffff);
   3313     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3314 
   3315     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3316     if(U_SUCCESS(errorCode)) {
   3317         int32_t i, start, end;
   3318 
   3319         /* add a pseudo-last line to finish testing of the actual last one */
   3320         fields[0][0]=lastLine;
   3321         fields[0][1]=lastLine+6;
   3322         fields[1][0]=lastLine+7;
   3323         fields[1][1]=lastLine+9;
   3324         fields[2][0]=lastLine+10;
   3325         fields[2][1]=lastLine+17;
   3326         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3327 
   3328         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3329         for(i=0;
   3330             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3331                 U_SUCCESS(errorCode);
   3332             ++i
   3333         ) {
   3334             do {
   3335                 testFoldToSelf(start, CF_ALL);
   3336             } while(++start<=end);
   3337         }
   3338     }
   3339 
   3340     uset_close(data.notSeen);
   3341 }
   3342