Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "cintltst.h"
     28 #include "putilimp.h"
     29 #include "uparse.h"
     30 #include "ucase.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "uset_imp.h"
     34 #include "usc_impl.h"
     35 #include "udatamem.h" /* for testing ucase_openBinary() */
     36 #include "cucdapi.h"
     37 
     38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     39 
     40 /* prototypes --------------------------------------------------------------- */
     41 
     42 static void TestUpperLower(void);
     43 static void TestLetterNumber(void);
     44 static void TestMisc(void);
     45 static void TestPOSIX(void);
     46 static void TestControlPrint(void);
     47 static void TestIdentifier(void);
     48 static void TestUnicodeData(void);
     49 static void TestCodeUnit(void);
     50 static void TestCodePoint(void);
     51 static void TestCharLength(void);
     52 static void TestCharNames(void);
     53 static void TestMirroring(void);
     54 static void TestUScriptRunAPI(void);
     55 static void TestAdditionalProperties(void);
     56 static void TestNumericProperties(void);
     57 static void TestPropertyNames(void);
     58 static void TestPropertyValues(void);
     59 static void TestConsistency(void);
     60 static void TestUCase(void);
     61 static void TestUBiDiProps(void);
     62 static void TestCaseFolding(void);
     63 
     64 /* internal methods used */
     65 static int32_t MakeProp(char* str);
     66 static int32_t MakeDir(char* str);
     67 
     68 /* helpers ------------------------------------------------------------------ */
     69 
     70 static void
     71 parseUCDFile(const char *filename,
     72              char *fields[][2], int32_t fieldCount,
     73              UParseLineFn *lineFn, void *context,
     74              UErrorCode *pErrorCode) {
     75     char path[256];
     76     char backupPath[256];
     77 
     78     if(U_FAILURE(*pErrorCode)) {
     79         return;
     80     }
     81 
     82     /* Look inside ICU_DATA first */
     83     strcpy(path, u_getDataDirectory());
     84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     85     strcat(path, filename);
     86 
     87     /* As a fallback, try to guess where the source data was located
     88      *    at the time ICU was built, and look there.
     89      */
     90     strcpy(backupPath, ctest_dataSrcDir());
     91     strcat(backupPath, U_FILE_SEP_STRING);
     92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     93     strcat(backupPath, filename);
     94 
     95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     97         *pErrorCode=U_ZERO_ERROR;
     98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
     99     }
    100     if(U_FAILURE(*pErrorCode)) {
    101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    102     }
    103 }
    104 
    105 /* test data ---------------------------------------------------------------- */
    106 
    107 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
    108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    109 static const int32_t tagValues[] =
    110     {
    111     /* Mn */ U_NON_SPACING_MARK,
    112     /* Mc */ U_COMBINING_SPACING_MARK,
    113     /* Me */ U_ENCLOSING_MARK,
    114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    115     /* Nl */ U_LETTER_NUMBER,
    116     /* No */ U_OTHER_NUMBER,
    117     /* Zs */ U_SPACE_SEPARATOR,
    118     /* Zl */ U_LINE_SEPARATOR,
    119     /* Zp */ U_PARAGRAPH_SEPARATOR,
    120     /* Cc */ U_CONTROL_CHAR,
    121     /* Cf */ U_FORMAT_CHAR,
    122     /* Cs */ U_SURROGATE,
    123     /* Co */ U_PRIVATE_USE_CHAR,
    124     /* Cn */ U_UNASSIGNED,
    125     /* Lu */ U_UPPERCASE_LETTER,
    126     /* Ll */ U_LOWERCASE_LETTER,
    127     /* Lt */ U_TITLECASE_LETTER,
    128     /* Lm */ U_MODIFIER_LETTER,
    129     /* Lo */ U_OTHER_LETTER,
    130     /* Pc */ U_CONNECTOR_PUNCTUATION,
    131     /* Pd */ U_DASH_PUNCTUATION,
    132     /* Ps */ U_START_PUNCTUATION,
    133     /* Pe */ U_END_PUNCTUATION,
    134     /* Po */ U_OTHER_PUNCTUATION,
    135     /* Sm */ U_MATH_SYMBOL,
    136     /* Sc */ U_CURRENCY_SYMBOL,
    137     /* Sk */ U_MODIFIER_SYMBOL,
    138     /* So */ U_OTHER_SYMBOL,
    139     /* Pi */ U_INITIAL_PUNCTUATION,
    140     /* Pf */ U_FINAL_PUNCTUATION
    141     };
    142 
    143 static const char dirStrings[][5] = {
    144     "L",
    145     "R",
    146     "EN",
    147     "ES",
    148     "ET",
    149     "AN",
    150     "CS",
    151     "B",
    152     "S",
    153     "WS",
    154     "ON",
    155     "LRE",
    156     "LRO",
    157     "AL",
    158     "RLE",
    159     "RLO",
    160     "PDF",
    161     "NSM",
    162     "BN"
    163 };
    164 
    165 void addUnicodeTest(TestNode** root);
    166 
    167 void addUnicodeTest(TestNode** root)
    168 {
    169     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    170     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    171     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    172     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    173     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    174     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    175     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    176     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    177     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    178     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    179     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    180     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    181     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    182     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    183     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    184     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    185     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
    186     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
    187     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    188     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    189     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    190     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    191     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    192     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    193     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    194 }
    195 
    196 /*==================================================== */
    197 /* test u_toupper() and u_tolower()                    */
    198 /*==================================================== */
    199 static void TestUpperLower()
    200 {
    201     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    202     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    203     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    204     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    205     int32_t i;
    206 
    207     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    208     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    209 
    210 /*
    211 Checks LetterLike Symbols which were previously a source of confusion
    212 [Bertrand A. D. 02/04/98]
    213 */
    214     for (i=0x2100;i<0x2138;i++)
    215     {
    216         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    217         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    218         {
    219             if (i != (int)u_tolower(i)) /* itself */
    220                 log_err("Failed case conversion with itself: U+%04x\n", i);
    221             if (i != (int)u_toupper(i))
    222                 log_err("Failed case conversion with itself: U+%04x\n", i);
    223         }
    224     }
    225 
    226     for(i=0; i < u_strlen(upper); i++){
    227         if(u_tolower(upper[i]) != lower[i]){
    228             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    229         }
    230     }
    231 
    232     log_verbose("testing upper lower\n");
    233     for (i = 0; i < 21; i++) {
    234 
    235         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    236         {
    237             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    238         }
    239         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    240          {
    241             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    242         }
    243         else if (upperTest[i] != u_tolower(lowerTest[i]))
    244         {
    245             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    246         }
    247         else if (lowerTest[i] != u_toupper(upperTest[i]))
    248          {
    249             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    250         }
    251         else if (upperTest[i] != u_tolower(upperTest[i]))
    252         {
    253             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    254         }
    255         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    256         {
    257             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    258         }
    259     }
    260     log_verbose("done testing upper lower\n");
    261 
    262     log_verbose("testing u_istitle\n");
    263     {
    264         static const UChar expected[] = {
    265             0x1F88,
    266             0x1F89,
    267             0x1F8A,
    268             0x1F8B,
    269             0x1F8C,
    270             0x1F8D,
    271             0x1F8E,
    272             0x1F8F,
    273             0x1F88,
    274             0x1F89,
    275             0x1F8A,
    276             0x1F8B,
    277             0x1F8C,
    278             0x1F8D,
    279             0x1F8E,
    280             0x1F8F,
    281             0x1F98,
    282             0x1F99,
    283             0x1F9A,
    284             0x1F9B,
    285             0x1F9C,
    286             0x1F9D,
    287             0x1F9E,
    288             0x1F9F,
    289             0x1F98,
    290             0x1F99,
    291             0x1F9A,
    292             0x1F9B,
    293             0x1F9C,
    294             0x1F9D,
    295             0x1F9E,
    296             0x1F9F,
    297             0x1FA8,
    298             0x1FA9,
    299             0x1FAA,
    300             0x1FAB,
    301             0x1FAC,
    302             0x1FAD,
    303             0x1FAE,
    304             0x1FAF,
    305             0x1FA8,
    306             0x1FA9,
    307             0x1FAA,
    308             0x1FAB,
    309             0x1FAC,
    310             0x1FAD,
    311             0x1FAE,
    312             0x1FAF,
    313             0x1FBC,
    314             0x1FBC,
    315             0x1FCC,
    316             0x1FCC,
    317             0x1FFC,
    318             0x1FFC,
    319         };
    320         int32_t num = sizeof(expected)/sizeof(expected[0]);
    321         for(i=0; i<num; i++){
    322             if(!u_istitle(expected[i])){
    323                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    324             }
    325         }
    326 
    327     }
    328 }
    329 
    330 /* compare two sets and verify that their difference or intersection is empty */
    331 static UBool
    332 showADiffB(const USet *a, const USet *b,
    333            const char *a_name, const char *b_name,
    334            UBool expect, UBool diffIsError) {
    335     USet *aa;
    336     int32_t i, start, end, length;
    337     UErrorCode errorCode;
    338 
    339     /*
    340      * expect:
    341      * TRUE  -> a-b should be empty, that is, b should contain all of a
    342      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    343      */
    344     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    345         return TRUE;
    346     }
    347 
    348     /* clone a to aa because a is const */
    349     aa=uset_open(1, 0);
    350     if(aa==NULL) {
    351         /* unusual problem - out of memory? */
    352         return FALSE;
    353     }
    354     uset_addAll(aa, a);
    355 
    356     /* compute the set in question */
    357     if(expect) {
    358         /* a-b */
    359         uset_removeAll(aa, b);
    360     } else {
    361         /* a&b */
    362         uset_retainAll(aa, b);
    363     }
    364 
    365     /* aa is not empty because of the initial tests above; show its contents */
    366     errorCode=U_ZERO_ERROR;
    367     i=0;
    368     for(;;) {
    369         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    370         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    371             break; /* done */
    372         }
    373         if(U_FAILURE(errorCode)) {
    374             log_err("error comparing %s with %s at difference item %d: %s\n",
    375                 a_name, b_name, i, u_errorName(errorCode));
    376             break;
    377         }
    378         if(length!=0) {
    379             break; /* done with code points, got a string or -1 */
    380         }
    381 
    382         if(diffIsError) {
    383             if(expect) {
    384                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    385             } else {
    386                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    387             }
    388         } else {
    389             if(expect) {
    390                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    391             } else {
    392                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    393             }
    394         }
    395 
    396         ++i;
    397     }
    398 
    399     uset_close(aa);
    400     return FALSE;
    401 }
    402 
    403 static UBool
    404 showAMinusB(const USet *a, const USet *b,
    405             const char *a_name, const char *b_name,
    406             UBool diffIsError) {
    407     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    408 }
    409 
    410 static UBool
    411 showAIntersectB(const USet *a, const USet *b,
    412                 const char *a_name, const char *b_name,
    413                 UBool diffIsError) {
    414     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    415 }
    416 
    417 static UBool
    418 compareUSets(const USet *a, const USet *b,
    419              const char *a_name, const char *b_name,
    420              UBool diffIsError) {
    421     /*
    422      * Use an arithmetic & not a logical && so that both branches
    423      * are always taken and all differences are shown.
    424      */
    425     return
    426         showAMinusB(a, b, a_name, b_name, diffIsError) &
    427         showAMinusB(b, a, b_name, a_name, diffIsError);
    428 }
    429 
    430 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    431 static void TestLetterNumber()
    432 {
    433     UChar i = 0x0000;
    434 
    435     log_verbose("Testing for isalpha\n");
    436     for (i = 0x0041; i < 0x005B; i++) {
    437         if (!u_isalpha(i))
    438         {
    439             log_err("Failed isLetter test at  %.4X\n", i);
    440         }
    441     }
    442     for (i = 0x0660; i < 0x066A; i++) {
    443         if (u_isalpha(i))
    444         {
    445             log_err("Failed isLetter test with numbers at %.4X\n", i);
    446         }
    447     }
    448 
    449     log_verbose("Testing for isdigit\n");
    450     for (i = 0x0660; i < 0x066A; i++) {
    451         if (!u_isdigit(i))
    452         {
    453             log_verbose("Failed isNumber test at %.4X\n", i);
    454         }
    455     }
    456 
    457     log_verbose("Testing for isalnum\n");
    458     for (i = 0x0041; i < 0x005B; i++) {
    459         if (!u_isalnum(i))
    460         {
    461             log_err("Failed isAlNum test at  %.4X\n", i);
    462         }
    463     }
    464     for (i = 0x0660; i < 0x066A; i++) {
    465         if (!u_isalnum(i))
    466         {
    467             log_err("Failed isAlNum test at  %.4X\n", i);
    468         }
    469     }
    470 
    471     {
    472         /*
    473          * The following checks work only starting from Unicode 4.0.
    474          * Check the version number here.
    475          */
    476         static UVersionInfo u401={ 4, 0, 1, 0 };
    477         UVersionInfo version;
    478         u_getUnicodeVersion(version);
    479         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    480             return;
    481         }
    482     }
    483 
    484     {
    485         /*
    486          * Sanity check:
    487          * Verify that exactly the digit characters have decimal digit values.
    488          * This assumption is used in the implementation of u_digit()
    489          * (which checks nt=de)
    490          * compared with the parallel java.lang.Character.digit()
    491          * (which checks Nd).
    492          *
    493          * This was not true in Unicode 3.2 and earlier.
    494          * Unicode 4.0 fixed discrepancies.
    495          * Unicode 4.0.1 re-introduced problems in this area due to an
    496          * unintentionally incomplete last-minute change.
    497          */
    498         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    499         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    500 
    501         USet *digits, *decimalValues;
    502         UErrorCode errorCode;
    503 
    504         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    505         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    506         errorCode=U_ZERO_ERROR;
    507         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    508         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    509 
    510         if(U_SUCCESS(errorCode)) {
    511             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    512         }
    513 
    514         uset_close(digits);
    515         uset_close(decimalValues);
    516     }
    517 }
    518 
    519 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    520                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    521                                 UBool expected) {
    522     int32_t i;
    523     for (i = 0; i < sampleCharsLength; ++i) {
    524         UBool result = propFn(sampleChars[i]);
    525         if (result != expected) {
    526             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    527                     propName, sampleChars[i], result);
    528         }
    529     }
    530 }
    531 
    532 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    533 static void TestMisc()
    534 {
    535     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    536     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    537     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    538     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    539     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    540     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    541 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    542     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    543     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    544     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    545     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    546 
    547     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    548 
    549     uint32_t mask;
    550 
    551     int32_t i;
    552     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    553     UVersionInfo realVersion;
    554 
    555     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    556 
    557     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    558     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    559 
    560     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    561                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    562     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    563                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    564 
    565     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    566                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
    567     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    568                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
    569 
    570     testSampleCharProps(u_isdefined, "u_isdefined",
    571                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
    572     testSampleCharProps(u_isdefined, "u_isdefined",
    573                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
    574 
    575     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
    576     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
    577 
    578     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
    579     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
    580 
    581     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
    582         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    583             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    584                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    585         }
    586     }
    587 
    588     /* Tests the ICU version #*/
    589     u_getVersion(realVersion);
    590     u_versionToString(realVersion, icuVersion);
    591     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    592     {
    593         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    594     }
    595 #if defined(ICU_VERSION)
    596     /* test only happens where we have configure.in with VERSION - sanity check. */
    597     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    598     {
    599         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    600     }
    601 #endif
    602 
    603     /* test U_GC_... */
    604     if(
    605         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    606         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    607         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    608         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    609         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    610         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    611     ) {
    612         log_err("error: U_GET_GC_MASK does not work properly\n");
    613     }
    614 
    615     mask=0;
    616     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    617 
    618     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    619     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    620     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    621     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    622     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    623 
    624     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    625     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    626     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    627 
    628     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    629     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    630     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    631 
    632     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    633     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    634     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    635 
    636     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    637     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    638     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    639     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    640 
    641     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    642     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    643     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    644     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    645     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    646 
    647     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    648     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    649     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    650     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    651 
    652     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    653     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    654 
    655     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    656         log_err("error: problems with U_GC_XX_MASK constants\n");
    657     }
    658 
    659     mask=0;
    660     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    661     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    662     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    663     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    664     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    665     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    666     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    667 
    668     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    669         log_err("error: problems with U_GC_Y_MASK constants\n");
    670     }
    671     {
    672         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    673         for(i=0; i<10; i++){
    674             if(digit[i]!=u_forDigit(i,10)){
    675                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    676             }
    677         }
    678     }
    679 
    680     /* test u_digit() */
    681     {
    682         static const struct {
    683             UChar32 c;
    684             int8_t radix, value;
    685         } data[]={
    686             /* base 16 */
    687             { 0x0031, 16, 1 },
    688             { 0x0038, 16, 8 },
    689             { 0x0043, 16, 12 },
    690             { 0x0066, 16, 15 },
    691             { 0x00e4, 16, -1 },
    692             { 0x0662, 16, 2 },
    693             { 0x06f5, 16, 5 },
    694             { 0xff13, 16, 3 },
    695             { 0xff41, 16, 10 },
    696 
    697             /* base 8 */
    698             { 0x0031, 8, 1 },
    699             { 0x0038, 8, -1 },
    700             { 0x0043, 8, -1 },
    701             { 0x0066, 8, -1 },
    702             { 0x00e4, 8, -1 },
    703             { 0x0662, 8, 2 },
    704             { 0x06f5, 8, 5 },
    705             { 0xff13, 8, 3 },
    706             { 0xff41, 8, -1 },
    707 
    708             /* base 36 */
    709             { 0x5a, 36, 35 },
    710             { 0x7a, 36, 35 },
    711             { 0xff3a, 36, 35 },
    712             { 0xff5a, 36, 35 },
    713 
    714             /* wrong radix values */
    715             { 0x0031, 1, -1 },
    716             { 0xff3a, 37, -1 }
    717         };
    718 
    719         for(i=0; i<LENGTHOF(data); ++i) {
    720             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    721                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    722                         data[i].c,
    723                         data[i].radix,
    724                         u_digit(data[i].c, data[i].radix),
    725                         data[i].value);
    726             }
    727         }
    728     }
    729 }
    730 
    731 /* test C/POSIX-style functions --------------------------------------------- */
    732 
    733 /* bit flags */
    734 #define ISAL     1
    735 #define ISLO     2
    736 #define ISUP     4
    737 
    738 #define ISDI     8
    739 #define ISXD  0x10
    740 
    741 #define ISAN  0x20
    742 
    743 #define ISPU  0x40
    744 #define ISGR  0x80
    745 #define ISPR 0x100
    746 
    747 #define ISSP 0x200
    748 #define ISBL 0x400
    749 #define ISCN 0x800
    750 
    751 /* C/POSIX-style functions, in the same order as the bit flags */
    752 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    753 
    754 static const struct {
    755     IsPOSIXClass *fn;
    756     const char *name;
    757 } posixClasses[]={
    758     { u_isalpha, "isalpha" },
    759     { u_islower, "islower" },
    760     { u_isupper, "isupper" },
    761     { u_isdigit, "isdigit" },
    762     { u_isxdigit, "isxdigit" },
    763     { u_isalnum, "isalnum" },
    764     { u_ispunct, "ispunct" },
    765     { u_isgraph, "isgraph" },
    766     { u_isprint, "isprint" },
    767     { u_isspace, "isspace" },
    768     { u_isblank, "isblank" },
    769     { u_iscntrl, "iscntrl" }
    770 };
    771 
    772 static const struct {
    773     UChar32 c;
    774     uint32_t posixResults;
    775 } posixData[]={
    776     { 0x0008,                                                        ISCN },    /* backspace */
    777     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    778     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    779     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    780     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    781     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    782     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    783     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    784     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    785     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    786     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    787     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    788     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    789     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    790     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    791     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    792     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    793     { 0x0600,                                                        ISCN },    /* arabic number sign */
    794     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    795     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    796     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    797     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    798     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    799     { 0x200b,                                                        ISCN },    /* ZWSP */
    800   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    801     { 0x200e,                                                        ISCN },    /* LRM */
    802     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    803     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    804     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    805     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    806     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    807     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    808     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    809     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    810 };
    811 
    812 static void
    813 TestPOSIX() {
    814     uint32_t mask;
    815     int32_t cl, i;
    816     UBool expect;
    817 
    818     mask=1;
    819     for(cl=0; cl<12; ++cl) {
    820         for(i=0; i<LENGTHOF(posixData); ++i) {
    821             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    822             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    823                 log_err("u_%s(U+%04x)=%s is wrong\n",
    824                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    825             }
    826         }
    827         mask<<=1;
    828     }
    829 }
    830 
    831 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    832 static void TestControlPrint()
    833 {
    834     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    835     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    836     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    837     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    838     UChar32 c;
    839 
    840     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
    841     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
    842 
    843     testSampleCharProps(u_isprint, "u_isprint",
    844                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
    845     testSampleCharProps(u_isprint, "u_isprint",
    846                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
    847 
    848     /* test all ISO 8 controls */
    849     for(c=0; c<=0x9f; ++c) {
    850         if(c==0x20) {
    851             /* skip ASCII graphic characters and continue with DEL */
    852             c=0x7f;
    853         }
    854         if(!u_iscntrl(c)) {
    855             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    856         }
    857         if(!u_isISOControl(c)) {
    858             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    859         }
    860         if(u_isprint(c)) {
    861             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    862         }
    863     }
    864 
    865     /* test all Latin-1 graphic characters */
    866     for(c=0x20; c<=0xff; ++c) {
    867         if(c==0x7f) {
    868             c=0xa0;
    869         } else if(c==0xad) {
    870             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    871             ++c;
    872         }
    873         if(!u_isprint(c)) {
    874             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    875         }
    876     }
    877 }
    878 
    879 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    880 static void TestIdentifier()
    881 {
    882     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    883     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    884     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    885     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    886     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    887     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    888     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    889     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    890     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    891     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    892 
    893     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    894                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    895     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    896                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
    897 
    898     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    899                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
    900     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    901                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
    902 
    903     /* IDPart should imply IDStart */
    904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    905                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    906 
    907     testSampleCharProps(u_isIDStart, "u_isIDStart",
    908                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    909     testSampleCharProps(u_isIDStart, "u_isIDStart",
    910                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    911 
    912     testSampleCharProps(u_isIDPart, "u_isIDPart",
    913                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
    914     testSampleCharProps(u_isIDPart, "u_isIDPart",
    915                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    916 
    917     /* IDPart should imply IDStart */
    918     testSampleCharProps(u_isIDPart, "u_isIDPart",
    919                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    920 
    921     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    922                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
    923     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    924                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
    925 }
    926 
    927 /* for each line of UnicodeData.txt, check some of the properties */
    928 /*
    929  * ### TODO
    930  * This test fails incorrectly if the First or Last code point of a repetitive area
    931  * is overridden, which is allowed and is encouraged for the PUAs.
    932  * Currently, this means that both area First/Last and override lines are
    933  * tested against the properties from the API,
    934  * and the area boundary will not match and cause an error.
    935  *
    936  * This function should detect area boundaries and skip them for the test of individual
    937  * code points' properties.
    938  * Then it should check that the areas contain all the same properties except where overridden.
    939  * For this, it would have had to set a flag for which code points were listed explicitly.
    940  */
    941 static void U_CALLCONV
    942 unicodeDataLineFn(void *context,
    943                   char *fields[][2], int32_t fieldCount,
    944                   UErrorCode *pErrorCode)
    945 {
    946     char buffer[100];
    947     char *end;
    948     uint32_t value;
    949     UChar32 c;
    950     int32_t i;
    951     int8_t type;
    952 
    953     /* get the character code, field 0 */
    954     c=strtoul(fields[0][0], &end, 16);
    955     if(end<=fields[0][0] || end!=fields[0][1]) {
    956         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    957         return;
    958     }
    959     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    960         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    961         return;
    962     }
    963 
    964     /* get general category, field 2 */
    965     *fields[2][1]=0;
    966     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    967     if(u_charType(c)!=type) {
    968         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    969     }
    970     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    971         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    972     }
    973 
    974     /* get canonical combining class, field 3 */
    975     value=strtoul(fields[3][0], &end, 10);
    976     if(end<=fields[3][0] || end!=fields[3][1]) {
    977         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
    978         return;
    979     }
    980     if(value>255) {
    981         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
    982         return;
    983     }
    984 #if !UCONFIG_NO_NORMALIZATION
    985     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
    986         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
    987     }
    988 #endif
    989 
    990     /* get BiDi category, field 4 */
    991     *fields[4][1]=0;
    992     i=MakeDir(fields[4][0]);
    993     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
    994         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
    995     }
    996 
    997     /* get ISO Comment, field 11 */
    998     *fields[11][1]=0;
    999     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1000     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1001         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1002             c, u_errorName(*pErrorCode),
   1003             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1004             fields[11][0]);
   1005     }
   1006 
   1007     /* get uppercase mapping, field 12 */
   1008     if(fields[12][0]!=fields[12][1]) {
   1009         value=strtoul(fields[12][0], &end, 16);
   1010         if(end!=fields[12][1]) {
   1011             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1012             return;
   1013         }
   1014         if((UChar32)value!=u_toupper(c)) {
   1015             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1016         }
   1017     } else {
   1018         /* no case mapping: the API must map the code point to itself */
   1019         if(c!=u_toupper(c)) {
   1020             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1021         }
   1022     }
   1023 
   1024     /* get lowercase mapping, field 13 */
   1025     if(fields[13][0]!=fields[13][1]) {
   1026         value=strtoul(fields[13][0], &end, 16);
   1027         if(end!=fields[13][1]) {
   1028             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1029             return;
   1030         }
   1031         if((UChar32)value!=u_tolower(c)) {
   1032             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1033         }
   1034     } else {
   1035         /* no case mapping: the API must map the code point to itself */
   1036         if(c!=u_tolower(c)) {
   1037             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1038         }
   1039     }
   1040 
   1041     /* get titlecase mapping, field 14 */
   1042     if(fields[14][0]!=fields[14][1]) {
   1043         value=strtoul(fields[14][0], &end, 16);
   1044         if(end!=fields[14][1]) {
   1045             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1046             return;
   1047         }
   1048         if((UChar32)value!=u_totitle(c)) {
   1049             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1050         }
   1051     } else {
   1052         /* no case mapping: the API must map the code point to itself */
   1053         if(c!=u_totitle(c)) {
   1054             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1055         }
   1056     }
   1057 }
   1058 
   1059 static UBool U_CALLCONV
   1060 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1061     static const UChar32 test[][2]={
   1062         {0x41, U_UPPERCASE_LETTER},
   1063         {0x308, U_NON_SPACING_MARK},
   1064         {0xfffe, U_GENERAL_OTHER_TYPES},
   1065         {0xe0041, U_FORMAT_CHAR},
   1066         {0xeffff, U_UNASSIGNED}
   1067     };
   1068 
   1069     int32_t i, count;
   1070 
   1071     if(0!=strcmp((const char *)context, "a1")) {
   1072         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1073         return FALSE;
   1074     }
   1075 
   1076     count=LENGTHOF(test);
   1077     for(i=0; i<count; ++i) {
   1078         if(start<=test[i][0] && test[i][0]<limit) {
   1079             if(type!=(UCharCategory)test[i][1]) {
   1080                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1081                         start, limit, (long)type, test[i][0], test[i][1]);
   1082             }
   1083             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1084             return i==(count-1) ? FALSE : TRUE;
   1085         }
   1086     }
   1087 
   1088     if(start>test[count-1][0]) {
   1089         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1090                 start, limit, (long)type);
   1091         return FALSE;
   1092     }
   1093 
   1094     return TRUE;
   1095 }
   1096 
   1097 static UBool U_CALLCONV
   1098 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1099     /* default Bidi classes for unassigned code points */
   1100     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1101         { 0x0590, U_LEFT_TO_RIGHT },
   1102         { 0x0600, U_RIGHT_TO_LEFT },
   1103         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1104         { 0x0900, U_RIGHT_TO_LEFT },
   1105         { 0xFB1D, U_LEFT_TO_RIGHT },
   1106         { 0xFB50, U_RIGHT_TO_LEFT },
   1107         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1108         { 0xFE70, U_LEFT_TO_RIGHT },
   1109         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1110         { 0x10800, U_LEFT_TO_RIGHT },
   1111         { 0x11000, U_RIGHT_TO_LEFT },
   1112         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1113         { 0x1F000, U_RIGHT_TO_LEFT },
   1114         { 0x110000, U_LEFT_TO_RIGHT }
   1115     };
   1116 
   1117     UChar32 c;
   1118     int32_t i;
   1119     UCharDirection shouldBeDir;
   1120 
   1121     /*
   1122      * LineBreak.txt specifies:
   1123      *   #  - Assigned characters that are not listed explicitly are given the value
   1124      *   #    "AL".
   1125      *   #  - Unassigned characters are given the value "XX".
   1126      *
   1127      * PUA characters are listed explicitly with "XX".
   1128      * Verify that no assigned character has "XX".
   1129      */
   1130     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1131         c=start;
   1132         while(c<limit) {
   1133             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1134                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1135             }
   1136             ++c;
   1137         }
   1138     }
   1139 
   1140     /*
   1141      * Verify default Bidi classes.
   1142      * For recent Unicode versions, see UCD.html.
   1143      *
   1144      * For older Unicode versions:
   1145      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1146      * http://www.unicode.org/reports/tr9/
   1147      *
   1148      * See also DerivedBidiClass.txt for Cn code points!
   1149      *
   1150      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1151      * changed some default values.
   1152      * In particular, non-characters and unassigned Default Ignorable Code Points
   1153      * change from L to BN.
   1154      *
   1155      * UCD.html version 4.0.1 does not yet reflect these changes.
   1156      */
   1157     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1158         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1159         c=start;
   1160         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
   1161             if((int32_t)c<defaultBidi[i][0]) {
   1162                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1163                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1164                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1165                     } else {
   1166                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1167                     }
   1168 
   1169                     if( u_charDirection(c)!=shouldBeDir ||
   1170                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1171                     ) {
   1172                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1173                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1174                     }
   1175                     ++c;
   1176                 }
   1177             }
   1178         }
   1179     }
   1180 
   1181     return TRUE;
   1182 }
   1183 
   1184 /* tests for several properties */
   1185 static void TestUnicodeData()
   1186 {
   1187     UVersionInfo expectVersionArray;
   1188     UVersionInfo versionArray;
   1189     char *fields[15][2];
   1190     UErrorCode errorCode;
   1191     UChar32 c;
   1192     int8_t type;
   1193 
   1194     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1195     u_getUnicodeVersion(versionArray);
   1196     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1197     {
   1198         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1199         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1200     }
   1201 
   1202 #if defined(ICU_UNICODE_VERSION)
   1203     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1204     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1205     {
   1206          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1207     }
   1208 #endif
   1209 
   1210     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1211         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1212     }
   1213 
   1214     errorCode=U_ZERO_ERROR;
   1215     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
   1216     if(U_FAILURE(errorCode)) {
   1217         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1218     }
   1219 
   1220     /* sanity check on repeated properties */
   1221     for(c=0xfffe; c<=0x10ffff;) {
   1222         type=u_charType(c);
   1223         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1224             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1225         }
   1226         if(type!=U_UNASSIGNED) {
   1227             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1228         }
   1229         if((c&0xffff)==0xfffe) {
   1230             ++c;
   1231         } else {
   1232             c+=0xffff;
   1233         }
   1234     }
   1235 
   1236     /* test that PUA is not "unassigned" */
   1237     for(c=0xe000; c<=0x10fffd;) {
   1238         type=u_charType(c);
   1239         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1240             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1241         }
   1242         if(type==U_UNASSIGNED) {
   1243             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1244         } else if(type!=U_PRIVATE_USE_CHAR) {
   1245             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1246         }
   1247         if(c==0xf8ff) {
   1248             c=0xf0000;
   1249         } else if(c==0xffffd) {
   1250             c=0x100000;
   1251         } else {
   1252             ++c;
   1253         }
   1254     }
   1255 
   1256     /* test u_enumCharTypes() */
   1257     u_enumCharTypes(enumTypeRange, "a1");
   1258 
   1259     /* check default properties */
   1260     u_enumCharTypes(enumDefaultsRange, NULL);
   1261 }
   1262 
   1263 static void TestCodeUnit(){
   1264     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1265 
   1266     int32_t i;
   1267 
   1268     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1269         UChar c=codeunit[i];
   1270         if(i<4){
   1271             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1272                 log_err("ERROR: U+%04x is a single", c);
   1273             }
   1274 
   1275         }
   1276         if(i >= 4 && i< 8){
   1277             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1278                 log_err("ERROR: U+%04x is a first surrogate", c);
   1279             }
   1280         }
   1281         if(i >= 8 && i< 12){
   1282             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1283                 log_err("ERROR: U+%04x is a second surrogate", c);
   1284             }
   1285         }
   1286     }
   1287 
   1288 }
   1289 
   1290 static void TestCodePoint(){
   1291     const UChar32 codePoint[]={
   1292         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1293         0xd800,
   1294         0xdbff,
   1295         0xdc00,
   1296         0xdfff,
   1297         0xdc04,
   1298         0xd821,
   1299         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1300         0x20ac,
   1301         0xd7ff,
   1302         0xe000,
   1303         0xe123,
   1304         0x0061,
   1305         0xe065,
   1306         0x20402,
   1307         0x24506,
   1308         0x23456,
   1309         0x20402,
   1310         0x10402,
   1311         0x23456,
   1312         /*not a surrogate, not valid, isUnicodeChar, isError */
   1313         0x0015,
   1314         0x009f,
   1315         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1316         0xffff,
   1317         0xfffe,
   1318     };
   1319     int32_t i;
   1320     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1321         UChar32 c=codePoint[i];
   1322         if(i<6){
   1323             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1324                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1325             }
   1326             if(UTF_IS_VALID(c)){
   1327                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1328             }
   1329             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1330                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1331             }
   1332             if(UTF_IS_ERROR(c)){
   1333                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1334             }
   1335         }else if(i >=6 && i<18){
   1336             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1337                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1338             }
   1339             if(!UTF_IS_VALID(c)){
   1340                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1341             }
   1342             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1343                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1344             }
   1345             if(UTF_IS_ERROR(c)){
   1346                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1347             }
   1348         }else if(i >=18 && i<20){
   1349             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1350                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1351             }
   1352             if(UTF_IS_VALID(c)){
   1353                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1354             }
   1355             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1356                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1357             }
   1358             if(!UTF_IS_ERROR(c)){
   1359                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1360             }
   1361         }
   1362         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1363             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1364                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1365             }
   1366             if(UTF_IS_VALID(c)){
   1367                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1368             }
   1369             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1370                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1371             }
   1372             if(!UTF_IS_ERROR(c)){
   1373                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1374             }
   1375         }
   1376     }
   1377 
   1378     if(
   1379         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1380         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1381         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1382         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1383     ) {
   1384         log_err("error with U_IS_BMP()\n");
   1385     }
   1386 
   1387     if(
   1388         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1389         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1390         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1391         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1392     ) {
   1393         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1394     }
   1395 }
   1396 
   1397 static void TestCharLength()
   1398 {
   1399     const int32_t codepoint[]={
   1400         1, 0x0061,
   1401         1, 0xe065,
   1402         1, 0x20ac,
   1403         2, 0x20402,
   1404         2, 0x23456,
   1405         2, 0x24506,
   1406         2, 0x20402,
   1407         2, 0x10402,
   1408         1, 0xd7ff,
   1409         1, 0xe000
   1410     };
   1411 
   1412     int32_t i;
   1413     UBool multiple;
   1414     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1415         UChar32 c=codepoint[i+1];
   1416         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1417             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
   1418         }
   1419         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1420         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1421             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1422         }
   1423     }
   1424 }
   1425 
   1426 /*internal functions ----*/
   1427 static int32_t MakeProp(char* str)
   1428 {
   1429     int32_t result = 0;
   1430     char* matchPosition =0;
   1431 
   1432     matchPosition = strstr(tagStrings, str);
   1433     if (matchPosition == 0)
   1434     {
   1435         log_err("unrecognized type letter ");
   1436         log_err(str);
   1437     }
   1438     else
   1439         result = (int32_t)((matchPosition - tagStrings) / 2);
   1440     return result;
   1441 }
   1442 
   1443 static int32_t MakeDir(char* str)
   1444 {
   1445     int32_t pos = 0;
   1446     for (pos = 0; pos < 19; pos++) {
   1447         if (strcmp(str, dirStrings[pos]) == 0) {
   1448             return pos;
   1449         }
   1450     }
   1451     return -1;
   1452 }
   1453 
   1454 /* test u_charName() -------------------------------------------------------- */
   1455 
   1456 static const struct {
   1457     uint32_t code;
   1458     const char *name, *oldName, *extName, *alias;
   1459 } names[]={
   1460     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1461     {0x01a2, "LATIN CAPITAL LETTER OI",
   1462              "LATIN CAPITAL LETTER O I",
   1463              "LATIN CAPITAL LETTER OI",
   1464              "LATIN CAPITAL LETTER GHA"},
   1465     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
   1466              "LATIN SMALL LETTER DOTLESS J BAR HOOK",
   1467              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1468     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1469              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1470              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1471     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1472     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1473     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1474     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1475     {0xd800, "", "", "<lead surrogate-D800>" },
   1476     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1477     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
   1478     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1479     {0xffff, "", "", "<noncharacter-FFFF>" },
   1480     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1481               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1482               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1483     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1484 };
   1485 
   1486 static UBool
   1487 enumCharNamesFn(void *context,
   1488                 UChar32 code, UCharNameChoice nameChoice,
   1489                 const char *name, int32_t length) {
   1490     int32_t *pCount=(int32_t *)context;
   1491     const char *expected;
   1492     int i;
   1493 
   1494     if(length<=0 || length!=(int32_t)strlen(name)) {
   1495         /* should not be called with an empty string or invalid length */
   1496         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1497         return TRUE;
   1498     }
   1499 
   1500     ++*pCount;
   1501     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1502         if(code==(UChar32)names[i].code) {
   1503             switch (nameChoice) {
   1504                 case U_EXTENDED_CHAR_NAME:
   1505                     if(0!=strcmp(name, names[i].extName)) {
   1506                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1507                     }
   1508                     break;
   1509                 case U_UNICODE_CHAR_NAME:
   1510                     if(0!=strcmp(name, names[i].name)) {
   1511                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1512                     }
   1513                     break;
   1514                 case U_UNICODE_10_CHAR_NAME:
   1515                     expected=names[i].oldName;
   1516                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1517                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1518                     }
   1519                     break;
   1520                 case U_CHAR_NAME_ALIAS:
   1521                     expected=names[i].alias;
   1522                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1523                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1524                     }
   1525                     break;
   1526                 case U_CHAR_NAME_CHOICE_COUNT:
   1527                     break;
   1528             }
   1529             break;
   1530         }
   1531     }
   1532     return TRUE;
   1533 }
   1534 
   1535 struct enumExtCharNamesContext {
   1536     uint32_t length;
   1537     int32_t last;
   1538 };
   1539 
   1540 static UBool
   1541 enumExtCharNamesFn(void *context,
   1542                 UChar32 code, UCharNameChoice nameChoice,
   1543                 const char *name, int32_t length) {
   1544     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1545 
   1546     if (ecncp->last != (int32_t) code - 1) {
   1547         if (ecncp->last < 0) {
   1548             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1549         } else {
   1550             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1551         }
   1552     }
   1553     ecncp->last = (int32_t) code;
   1554 
   1555     if (!*name) {
   1556         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1557     }
   1558 
   1559     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1560 }
   1561 
   1562 /**
   1563  * This can be made more efficient by moving it into putil.c and having
   1564  * it directly access the ebcdic translation tables.
   1565  * TODO: If we get this method in putil.c, then delete it from here.
   1566  */
   1567 static UChar
   1568 u_charToUChar(char c) {
   1569     UChar uc;
   1570     u_charsToUChars(&c, &uc, 1);
   1571     return uc;
   1572 }
   1573 
   1574 static void
   1575 TestCharNames() {
   1576     static char name[80];
   1577     UErrorCode errorCode=U_ZERO_ERROR;
   1578     struct enumExtCharNamesContext extContext;
   1579     const char *expected;
   1580     int32_t length;
   1581     UChar32 c;
   1582     int32_t i;
   1583 
   1584     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1585     length=uprv_getMaxCharNameLength();
   1586     if(length==0) {
   1587         /* no names data available */
   1588         return;
   1589     }
   1590     if(length<83) { /* Unicode 3.2 max char name length */
   1591         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1592     }
   1593     /* ### TODO same tests for max ISO comment length as for max name length */
   1594 
   1595     log_verbose("Testing u_charName()\n");
   1596     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1597         /* modern Unicode character name */
   1598         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1599         if(U_FAILURE(errorCode)) {
   1600             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1601             return;
   1602         }
   1603         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1604             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1605         }
   1606 
   1607         /* find the modern name */
   1608         if (*names[i].name) {
   1609             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1610             if(U_FAILURE(errorCode)) {
   1611                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1612                 return;
   1613             }
   1614             if(c!=(UChar32)names[i].code) {
   1615                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1616             }
   1617         }
   1618 
   1619         /* Unicode 1.0 character name */
   1620         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1621         if(U_FAILURE(errorCode)) {
   1622             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1623             return;
   1624         }
   1625         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1626             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1627         }
   1628 
   1629         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1630         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1631             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1632             if(U_FAILURE(errorCode)) {
   1633                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1634                 return;
   1635             }
   1636             if(c!=(UChar32)names[i].code) {
   1637                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1638             }
   1639         }
   1640 
   1641         /* Unicode character name alias */
   1642         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1643         if(U_FAILURE(errorCode)) {
   1644             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1645             return;
   1646         }
   1647         expected=names[i].alias;
   1648         if(expected==NULL) {
   1649             expected="";
   1650         }
   1651         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1652             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1653                     names[i].code, name, length, expected);
   1654         }
   1655 
   1656         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1657         if(expected[0]!=0 /* && length>0 */) {
   1658             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1659             if(U_FAILURE(errorCode)) {
   1660                 log_err("u_charFromName(%s - alias) error %s\n",
   1661                         expected, u_errorName(errorCode));
   1662                 return;
   1663             }
   1664             if(c!=(UChar32)names[i].code) {
   1665                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1666                         expected, c, names[i].code);
   1667             }
   1668         }
   1669     }
   1670 
   1671     /* test u_enumCharNames() */
   1672     length=0;
   1673     errorCode=U_ZERO_ERROR;
   1674     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1675     if(U_FAILURE(errorCode) || length<94140) {
   1676         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1677     }
   1678 
   1679     extContext.length = 0;
   1680     extContext.last = -1;
   1681     errorCode=U_ZERO_ERROR;
   1682     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1683     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1684         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1685     }
   1686 
   1687     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1688     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1689         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1690     }
   1691 
   1692     /* Test getCharNameCharacters */
   1693     if(!getTestOption(QUICK_OPTION)) {
   1694         enum { BUFSIZE = 256 };
   1695         UErrorCode ec = U_ZERO_ERROR;
   1696         char buf[BUFSIZE];
   1697         int32_t maxLength;
   1698         UChar32 cp;
   1699         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1700         int32_t l1, l2;
   1701         UBool map[256];
   1702         UBool ok;
   1703 
   1704         USet* set = uset_open(1, 0); /* empty set */
   1705         USet* dumb = uset_open(1, 0); /* empty set */
   1706 
   1707         /*
   1708          * uprv_getCharNameCharacters() will likely return more lowercase
   1709          * letters than actual character names contain because
   1710          * it includes all the characters in lowercased names of
   1711          * general categories, for the full possible set of extended names.
   1712          */
   1713         {
   1714             USetAdder sa={
   1715                 NULL,
   1716                 uset_add,
   1717                 uset_addRange,
   1718                 uset_addString,
   1719                 NULL /* don't need remove() */
   1720             };
   1721             sa.set=set;
   1722             uprv_getCharNameCharacters(&sa);
   1723         }
   1724 
   1725         /* build set the dumb (but sure-fire) way */
   1726         for (i=0; i<256; ++i) {
   1727             map[i] = FALSE;
   1728         }
   1729 
   1730         maxLength=0;
   1731         for (cp=0; cp<0x110000; ++cp) {
   1732             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1733                                      buf, BUFSIZE, &ec);
   1734             if (U_FAILURE(ec)) {
   1735                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1736                 uset_close(set);
   1737                 uset_close(dumb);
   1738                 return;
   1739             }
   1740             if(len>maxLength) {
   1741                 maxLength=len;
   1742             }
   1743 
   1744             for (i=0; i<len; ++i) {
   1745                 if (!map[(uint8_t) buf[i]]) {
   1746                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1747                     map[(uint8_t) buf[i]] = TRUE;
   1748                 }
   1749             }
   1750 
   1751             /* test for leading/trailing whitespace */
   1752             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1753                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1754             }
   1755         }
   1756 
   1757         if(map[(uint8_t)'\t']) {
   1758             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1759         }
   1760 
   1761         length=uprv_getMaxCharNameLength();
   1762         if(length!=maxLength) {
   1763             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1764                     length, maxLength);
   1765         }
   1766 
   1767         /* compare the sets.  Where is my uset_equals?!! */
   1768         ok=TRUE;
   1769         for(i=0; i<256; ++i) {
   1770             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1771                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1772                     /* ignore lowercase a-z that are in set but not in dumb */
   1773                     ok=TRUE;
   1774                 } else {
   1775                     ok=FALSE;
   1776                     break;
   1777                 }
   1778             }
   1779         }
   1780 
   1781         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1782         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1783         if (U_FAILURE(ec)) {
   1784             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1785             uset_close(set);
   1786             uset_close(dumb);
   1787             return;
   1788         }
   1789 
   1790         if (l1 >= BUFSIZE) {
   1791             l1 = BUFSIZE-1;
   1792             pat[l1] = 0;
   1793         }
   1794         if (l2 >= BUFSIZE) {
   1795             l2 = BUFSIZE-1;
   1796             dumbPat[l2] = 0;
   1797         }
   1798 
   1799         if (!ok) {
   1800             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1801                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1802         } else if(getTestOption(VERBOSITY_OPTION)) {
   1803             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1804         }
   1805 
   1806         uset_close(set);
   1807         uset_close(dumb);
   1808     }
   1809 
   1810     /* ### TODO: test error cases and other interesting things */
   1811 }
   1812 
   1813 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1814 
   1815 static void
   1816 TestMirroring() {
   1817     USet *set;
   1818     UErrorCode errorCode;
   1819 
   1820     UChar32 start, end, c2, c3;
   1821     int32_t i;
   1822 
   1823     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1824 
   1825     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1826 
   1827     log_verbose("Testing u_isMirrored()\n");
   1828     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1829          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1830         )
   1831     ) {
   1832         log_err("u_isMirrored() does not work correctly\n");
   1833     }
   1834 
   1835     log_verbose("Testing u_charMirror()\n");
   1836     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1837          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1838          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1839          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1840          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1841          )
   1842     ) {
   1843         log_err("u_charMirror() does not work correctly\n");
   1844     }
   1845 
   1846     /* verify that Bidi_Mirroring_Glyph roundtrips */
   1847     errorCode=U_ZERO_ERROR;
   1848     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   1849 
   1850     if (U_FAILURE(errorCode)) {
   1851         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   1852     } else {
   1853         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   1854             do {
   1855                 c2=u_charMirror(start);
   1856                 c3=u_charMirror(c2);
   1857                 if(c3!=start) {
   1858                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   1859                 }
   1860             } while(++start<=end);
   1861         }
   1862     }
   1863 
   1864     uset_close(set);
   1865 }
   1866 
   1867 
   1868 struct RunTestData
   1869 {
   1870     const char *runText;
   1871     UScriptCode runCode;
   1872 };
   1873 
   1874 typedef struct RunTestData RunTestData;
   1875 
   1876 static void
   1877 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   1878                 const char *prefix)
   1879 {
   1880     int32_t run, runStart, runLimit;
   1881     UScriptCode runCode;
   1882 
   1883     /* iterate over all the runs */
   1884     run = 0;
   1885     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   1886         if (runStart != runStarts[run]) {
   1887             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   1888                 prefix, run, runStarts[run], runStart);
   1889         }
   1890 
   1891         if (runLimit != runStarts[run + 1]) {
   1892             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   1893                 prefix, run, runStarts[run + 1], runLimit);
   1894         }
   1895 
   1896         if (runCode != testData[run].runCode) {
   1897             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   1898                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   1899         }
   1900 
   1901         run += 1;
   1902 
   1903         /* stop when we've seen all the runs we expect to see */
   1904         if (run >= nRuns) {
   1905             break;
   1906         }
   1907     }
   1908 
   1909     /* Complain if we didn't see then number of runs we expected */
   1910     if (run != nRuns) {
   1911         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   1912     }
   1913 }
   1914 
   1915 static void
   1916 TestUScriptRunAPI()
   1917 {
   1918     static const RunTestData testData1[] = {
   1919         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   1920         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   1921         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   1922         {"English (", USCRIPT_LATIN},
   1923         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   1924         {") ", USCRIPT_LATIN},
   1925         {"\\u6F22\\u5B75", USCRIPT_HAN},
   1926         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   1927         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   1928         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   1929     };
   1930 
   1931     static const RunTestData testData2[] = {
   1932        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   1933     };
   1934 
   1935     static const struct {
   1936       const RunTestData *testData;
   1937       int32_t nRuns;
   1938     } testDataEntries[] = {
   1939         {testData1, LENGTHOF(testData1)},
   1940         {testData2, LENGTHOF(testData2)}
   1941     };
   1942 
   1943     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
   1944     int32_t testEntry;
   1945 
   1946     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   1947         UChar testString[1024];
   1948         int32_t runStarts[256];
   1949         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   1950         const RunTestData *testData = testDataEntries[testEntry].testData;
   1951 
   1952         int32_t run, stringLimit;
   1953         UScriptRun *scriptRun = NULL;
   1954         UErrorCode err;
   1955 
   1956         /*
   1957          * Fill in the test string and the runStarts array.
   1958          */
   1959         stringLimit = 0;
   1960         for (run = 0; run < nTestRuns; run += 1) {
   1961             runStarts[run] = stringLimit;
   1962             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   1963             /*stringLimit -= 1;*/
   1964         }
   1965 
   1966         /* The limit of the last run */
   1967         runStarts[nTestRuns] = stringLimit;
   1968 
   1969         /*
   1970          * Make sure that calling uscript_OpenRun with a NULL text pointer
   1971          * and a non-zero text length returns the correct error.
   1972          */
   1973         err = U_ZERO_ERROR;
   1974         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   1975 
   1976         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1977             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1978         }
   1979 
   1980         if (scriptRun != NULL) {
   1981             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   1982             uscript_closeRun(scriptRun);
   1983         }
   1984 
   1985         /*
   1986          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   1987          * and a zero text length returns the correct error.
   1988          */
   1989         err = U_ZERO_ERROR;
   1990         scriptRun = uscript_openRun(testString, 0, &err);
   1991 
   1992         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1993             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1994         }
   1995 
   1996         if (scriptRun != NULL) {
   1997             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   1998             uscript_closeRun(scriptRun);
   1999         }
   2000 
   2001         /*
   2002          * Make sure that calling uscript_openRun with a NULL text pointer
   2003          * and a zero text length doesn't return an error.
   2004          */
   2005         err = U_ZERO_ERROR;
   2006         scriptRun = uscript_openRun(NULL, 0, &err);
   2007 
   2008         if (U_FAILURE(err)) {
   2009             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2010         }
   2011 
   2012         /* Make sure that the empty iterator doesn't find any runs */
   2013         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2014             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2015         }
   2016 
   2017         /*
   2018          * Make sure that calling uscript_setRunText with a NULL text pointer
   2019          * and a non-zero text length returns the correct error.
   2020          */
   2021         err = U_ZERO_ERROR;
   2022         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2023 
   2024         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2025             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2026         }
   2027 
   2028         /*
   2029          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2030          * and a zero text length returns the correct error.
   2031          */
   2032         err = U_ZERO_ERROR;
   2033         uscript_setRunText(scriptRun, testString, 0, &err);
   2034 
   2035         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2036             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2037         }
   2038 
   2039         /*
   2040          * Now call uscript_setRunText on the empty iterator
   2041          * and make sure that it works.
   2042          */
   2043         err = U_ZERO_ERROR;
   2044         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2045 
   2046         if (U_FAILURE(err)) {
   2047             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2048         } else {
   2049             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2050         }
   2051 
   2052         uscript_closeRun(scriptRun);
   2053 
   2054         /*
   2055          * Now open an interator over the testString
   2056          * using uscript_openRun and make sure that it works
   2057          */
   2058         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2059 
   2060         if (U_FAILURE(err)) {
   2061             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2062         } else {
   2063             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2064         }
   2065 
   2066         /* Now reset the iterator, and make sure
   2067          * that it still works.
   2068          */
   2069         uscript_resetRun(scriptRun);
   2070 
   2071         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2072 
   2073         /* Close the iterator */
   2074         uscript_closeRun(scriptRun);
   2075     }
   2076 }
   2077 
   2078 /* test additional, non-core properties */
   2079 static void
   2080 TestAdditionalProperties() {
   2081     /* test data for u_charAge() */
   2082     static const struct {
   2083         UChar32 c;
   2084         UVersionInfo version;
   2085     } charAges[]={
   2086         {0x41,    { 1, 1, 0, 0 }},
   2087         {0xffff,  { 1, 1, 0, 0 }},
   2088         {0x20ab,  { 2, 0, 0, 0 }},
   2089         {0x2fffe, { 2, 0, 0, 0 }},
   2090         {0x20ac,  { 2, 1, 0, 0 }},
   2091         {0xfb1d,  { 3, 0, 0, 0 }},
   2092         {0x3f4,   { 3, 1, 0, 0 }},
   2093         {0x10300, { 3, 1, 0, 0 }},
   2094         {0x220,   { 3, 2, 0, 0 }},
   2095         {0xff60,  { 3, 2, 0, 0 }}
   2096     };
   2097 
   2098     /* test data for u_hasBinaryProperty() */
   2099     static const int32_t
   2100     props[][3]={ /* code point, property, value */
   2101         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2102         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2103         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2104 
   2105         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2106         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2107 
   2108         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2109         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2110 
   2111         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2112         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2113 
   2114         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2115         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2116         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2117         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2118         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2119 
   2120         { 0x058a, UCHAR_DASH, TRUE },
   2121         { 0x007e, UCHAR_DASH, FALSE },
   2122 
   2123         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2124         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2125 
   2126         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2127         { 0x0020, UCHAR_EXTENDER, FALSE },
   2128 
   2129 #if !UCONFIG_NO_NORMALIZATION
   2130         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2131         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2132         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2133 
   2134         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2135         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2136 
   2137         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2138         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2139 
   2140         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2141         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2142         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2143         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2144         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2145         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2146 
   2147         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2148         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2149 
   2150         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2151         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2152         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2153         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2154         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2155         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2156 #endif
   2157 
   2158         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2159         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2160         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2161 
   2162         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2163         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2164 
   2165         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2166         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2167         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2168 
   2169         { 0x2172, UCHAR_ID_START, TRUE },
   2170         { 0x007a, UCHAR_ID_START, TRUE },
   2171         { 0x0039, UCHAR_ID_START, FALSE },
   2172 
   2173         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2174         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2175         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2176 
   2177         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2178         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2179 
   2180         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2181         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2182         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2183 
   2184         { 0x1d7a9, UCHAR_MATH, TRUE },
   2185         { 0x2135, UCHAR_MATH, TRUE },
   2186         { 0x0062, UCHAR_MATH, FALSE },
   2187 
   2188         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2189         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2190         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2191 
   2192         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2193         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2194         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2195 
   2196         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2197         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2198 
   2199         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2200         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2201         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2202 
   2203         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2204         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2205         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2206 
   2207         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2208         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2209         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2210 
   2211         { 0x16ee, UCHAR_XID_START, TRUE },
   2212         { 0x23456, UCHAR_XID_START, TRUE },
   2213         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2214 
   2215         /*
   2216          * Version break:
   2217          * The following properties are only supported starting with the
   2218          * Unicode version indicated in the second field.
   2219          */
   2220         { -1, 0x320, 0 },
   2221 
   2222         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2223         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2224         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2225 
   2226         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2227         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2228         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2229         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2230 
   2231         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2232         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2233         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2234         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2235 
   2236         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2237         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2238         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2239         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2240 
   2241         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2242         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2243 
   2244         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2245         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2246 
   2247         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2248         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2249 
   2250         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2251         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2252 
   2253         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2254         { 0x4e00, UCHAR_RADICAL, FALSE },
   2255 
   2256         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2257         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2258 
   2259         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2260         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2261 
   2262         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2263 
   2264         { 0x002e, UCHAR_S_TERM, TRUE },
   2265         { 0x0061, UCHAR_S_TERM, FALSE },
   2266 
   2267         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2268         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2269         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2270         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2271 
   2272         /* enum/integer type properties */
   2273 
   2274         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2275         /* test default Bidi classes for unassigned code points */
   2276         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2277         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2278         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2279         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2280         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2281         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2282         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2283         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2284         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2285         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2286         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2287 
   2288         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2289         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2290         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2291         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2292         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2293         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2294         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2295         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2296 
   2297         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2298         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2299         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2300         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2301         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2302         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2303         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2304         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2305         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2306         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2307         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2308 
   2309         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2310         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2311 
   2312         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2313         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2314         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2315         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2316         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2317         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2318         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2319         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2320         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2321 
   2322         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2323         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2324         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2325         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2326         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2327         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2328         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2329         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2330         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2331         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2332         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2333         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2334         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2335         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2336         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2337         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2338         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2339 
   2340         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2341         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2342         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2343 
   2344         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2345         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2346         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2347         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2348         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2349 
   2350         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2351         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2352         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2353         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2354         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2355         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2356         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2357         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2358 
   2359         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2360         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2361         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2362         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2363         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2364         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2365         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2366         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2367         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2368         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2369         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2370         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2371         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2372         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2373         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2374         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2375 
   2376         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2377 
   2378         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2379 
   2380         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2381         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2382         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2383         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2384         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2385         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2386         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2387 
   2388         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2389         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2390         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2391         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2392 
   2393         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2394         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2395         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2396         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2397         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2398         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2399 
   2400         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2401         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2402         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2403         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2404 
   2405         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2406         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2407         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2408         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2409         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2410         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2411         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2412 
   2413         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2414         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2415         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2416         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2417 
   2418         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2419         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2420         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2421         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2422 
   2423         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2424         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2425         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2426         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2427         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2428 
   2429         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2430 
   2431         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2432 
   2433         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2434         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2435         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2436 
   2437         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2438         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2439         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2440         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2441         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2442 
   2443         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2444         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2445         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2446 
   2447         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2448         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2449         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2450         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2451 
   2452         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2453         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2454         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2455         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2456         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2457         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2458 
   2459         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2460         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2461         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2462         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2463 
   2464         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2465         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2466         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2467         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2468 
   2469         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2470         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2471         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2472         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2473 
   2474         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2475 
   2476         /* test some script codes >127 */
   2477         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2478         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2479         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2480 
   2481         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
   2482 
   2483         /* value changed in Unicode 6.0 */
   2484         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
   2485 
   2486         /* undefined UProperty values */
   2487         { 0x61, 0x4a7, 0 },
   2488         { 0x234bc, 0x15ed, 0 }
   2489     };
   2490 
   2491     UVersionInfo version;
   2492     UChar32 c;
   2493     int32_t i, result, uVersion;
   2494     UProperty which;
   2495 
   2496     /* what is our Unicode version? */
   2497     u_getUnicodeVersion(version);
   2498     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2499 
   2500     u_charAge(0x20, version);
   2501     if(version[0]==0) {
   2502         /* no additional properties available */
   2503         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2504         return;
   2505     }
   2506 
   2507     /* test u_charAge() */
   2508     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2509         u_charAge(charAges[i].c, version);
   2510         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2511             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2512                 charAges[i].c,
   2513                 version[0], version[1], version[2], version[3],
   2514                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2515         }
   2516     }
   2517 
   2518     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2519         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2520         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2521         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2522         u_getIntPropertyMinValue(0x2345)!=0
   2523     ) {
   2524         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2525     }
   2526     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2527         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2528     }
   2529     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2530         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2531     }
   2532     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2533         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2534     }
   2535     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2536         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2537     }
   2538     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2539         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2540     }
   2541     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2542         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2543     }
   2544     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2545         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2546     }
   2547     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2548         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2549     }
   2550     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2551         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2552     }
   2553     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2554         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2555     }
   2556     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2557         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2558     }
   2559     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2560         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2561     }
   2562     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2563         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2564     }
   2565     /*JB#2410*/
   2566     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2567         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2568     }
   2569     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2570         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2571     }
   2572     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2573         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2574     }
   2575     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2576         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2577     }
   2578     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2579         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2580     }
   2581 
   2582     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2583     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2584         const char *whichName;
   2585 
   2586         if(props[i][0]<0) {
   2587             /* Unicode version break */
   2588             if(uVersion<props[i][1]) {
   2589                 break; /* do not test properties that are not yet supported */
   2590             } else {
   2591                 continue; /* skip this row */
   2592             }
   2593         }
   2594 
   2595         c=(UChar32)props[i][0];
   2596         which=(UProperty)props[i][1];
   2597         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2598 
   2599         if(which<UCHAR_INT_START) {
   2600             result=u_hasBinaryProperty(c, which);
   2601             if(result!=props[i][2]) {
   2602                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2603                         c, whichName, result, i);
   2604             }
   2605         }
   2606 
   2607         result=u_getIntPropertyValue(c, which);
   2608         if(result!=props[i][2]) {
   2609             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2610                     c, whichName, result, props[i][2], i);
   2611         }
   2612 
   2613         /* test separate functions, too */
   2614         switch((UProperty)props[i][1]) {
   2615         case UCHAR_ALPHABETIC:
   2616             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2617                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2618                         props[i][0], result, i);
   2619             }
   2620             break;
   2621         case UCHAR_LOWERCASE:
   2622             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2623                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2624                         props[i][0], result, i);
   2625             }
   2626             break;
   2627         case UCHAR_UPPERCASE:
   2628             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2629                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2630                         props[i][0], result, i);
   2631             }
   2632             break;
   2633         case UCHAR_WHITE_SPACE:
   2634             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2635                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2636                         props[i][0], result, i);
   2637             }
   2638             break;
   2639         default:
   2640             break;
   2641         }
   2642     }
   2643 }
   2644 
   2645 static void
   2646 TestNumericProperties(void) {
   2647     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2648     static const struct {
   2649         UChar32 c;
   2650         int32_t type;
   2651         double numValue;
   2652     } values[]={
   2653         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2654         { 0x0C66, U_NT_DECIMAL, 0 },
   2655         { 0x96f6, U_NT_NUMERIC, 0 },
   2656         { 0xa833, U_NT_NUMERIC, 1./16. },
   2657         { 0x2152, U_NT_NUMERIC, 1./10. },
   2658         { 0x2151, U_NT_NUMERIC, 1./9. },
   2659         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2660         { 0x2150, U_NT_NUMERIC, 1./7. },
   2661         { 0x2159, U_NT_NUMERIC, 1./6. },
   2662         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2663         { 0x2155, U_NT_NUMERIC, 1./5. },
   2664         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2665         { 0x0031, U_NT_DECIMAL, 1. },
   2666         { 0x4e00, U_NT_NUMERIC, 1. },
   2667         { 0x58f1, U_NT_NUMERIC, 1. },
   2668         { 0x10320, U_NT_NUMERIC, 1. },
   2669         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2670         { 0x00B2, U_NT_DIGIT, 2. },
   2671         { 0x5f10, U_NT_NUMERIC, 2. },
   2672         { 0x1813, U_NT_DECIMAL, 3. },
   2673         { 0x5f0e, U_NT_NUMERIC, 3. },
   2674         { 0x2173, U_NT_NUMERIC, 4. },
   2675         { 0x8086, U_NT_NUMERIC, 4. },
   2676         { 0x278E, U_NT_DIGIT, 5. },
   2677         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2678         { 0x247A, U_NT_DIGIT, 7. },
   2679         { 0x7396, U_NT_NUMERIC, 9. },
   2680         { 0x1372, U_NT_NUMERIC, 10. },
   2681         { 0x216B, U_NT_NUMERIC, 12. },
   2682         { 0x16EE, U_NT_NUMERIC, 17. },
   2683         { 0x249A, U_NT_NUMERIC, 19. },
   2684         { 0x303A, U_NT_NUMERIC, 30. },
   2685         { 0x5345, U_NT_NUMERIC, 30. },
   2686         { 0x32B2, U_NT_NUMERIC, 37. },
   2687         { 0x1375, U_NT_NUMERIC, 40. },
   2688         { 0x10323, U_NT_NUMERIC, 50. },
   2689         { 0x0BF1, U_NT_NUMERIC, 100. },
   2690         { 0x964c, U_NT_NUMERIC, 100. },
   2691         { 0x217E, U_NT_NUMERIC, 500. },
   2692         { 0x2180, U_NT_NUMERIC, 1000. },
   2693         { 0x4edf, U_NT_NUMERIC, 1000. },
   2694         { 0x2181, U_NT_NUMERIC, 5000. },
   2695         { 0x137C, U_NT_NUMERIC, 10000. },
   2696         { 0x4e07, U_NT_NUMERIC, 10000. },
   2697         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2698         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2699         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2700         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2701         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2702         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2703         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2704         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2705         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2706         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2707     };
   2708 
   2709     double nv;
   2710     UChar32 c;
   2711     int32_t i, type;
   2712 
   2713     for(i=0; i<LENGTHOF(values); ++i) {
   2714         c=values[i].c;
   2715         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2716         nv=u_getNumericValue(c);
   2717 
   2718         if(type!=values[i].type) {
   2719             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2720         }
   2721         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2722             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2723         }
   2724     }
   2725 }
   2726 
   2727 /**
   2728  * Test the property names and property value names API.
   2729  */
   2730 static void
   2731 TestPropertyNames(void) {
   2732     int32_t p, v, choice=0, rev;
   2733     UBool atLeastSomething = FALSE;
   2734 
   2735     for (p=0; ; ++p) {
   2736         UProperty propEnum = (UProperty)p;
   2737         UBool sawProp = FALSE;
   2738         if(p > 10 && !atLeastSomething) {
   2739           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2740           return;
   2741         }
   2742 
   2743         for (choice=0; ; ++choice) {
   2744             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2745             if (name) {
   2746                 if (!sawProp)
   2747                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2748                 log_verbose("%d=\"%s\"", choice, name);
   2749                 sawProp = TRUE;
   2750                 atLeastSomething = TRUE;
   2751 
   2752                 /* test reverse mapping */
   2753                 rev = u_getPropertyEnum(name);
   2754                 if (rev != p) {
   2755                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2756                             p, name, rev);
   2757                 }
   2758             }
   2759             if (!name && choice>0) break;
   2760         }
   2761         if (sawProp) {
   2762             /* looks like a valid property; check the values */
   2763             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2764             int32_t max = 0;
   2765             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2766                 max = 255;
   2767             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2768                 /* it's far too slow to iterate all the way up to
   2769                    the real max, U_GC_P_MASK */
   2770                 max = U_GC_NL_MASK;
   2771             } else if (p == UCHAR_BLOCK) {
   2772                 /* UBlockCodes, unlike other values, start at 1 */
   2773                 max = 1;
   2774             }
   2775             log_verbose("\n");
   2776             for (v=-1; ; ++v) {
   2777                 UBool sawValue = FALSE;
   2778                 for (choice=0; ; ++choice) {
   2779                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2780                     if (vname) {
   2781                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2782                         log_verbose("%d=\"%s\"", choice, vname);
   2783                         sawValue = TRUE;
   2784 
   2785                         /* test reverse mapping */
   2786                         rev = u_getPropertyValueEnum(propEnum, vname);
   2787                         if (rev != v) {
   2788                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2789                                     pname, v, vname, rev);
   2790                         }
   2791                     }
   2792                     if (!vname && choice>0) break;
   2793                 }
   2794                 if (sawValue) {
   2795                     log_verbose("\n");
   2796                 }
   2797                 if (!sawValue && v>=max) break;
   2798             }
   2799         }
   2800         if (!sawProp) {
   2801             if (p>=UCHAR_STRING_LIMIT) {
   2802                 break;
   2803             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   2804                 p = UCHAR_STRING_START - 1;
   2805             } else if (p>=UCHAR_MASK_LIMIT) {
   2806                 p = UCHAR_DOUBLE_START - 1;
   2807             } else if (p>=UCHAR_INT_LIMIT) {
   2808                 p = UCHAR_MASK_START - 1;
   2809             } else if (p>=UCHAR_BINARY_LIMIT) {
   2810                 p = UCHAR_INT_START - 1;
   2811             }
   2812         }
   2813     }
   2814 }
   2815 
   2816 /**
   2817  * Test the property values API.  See JB#2410.
   2818  */
   2819 static void
   2820 TestPropertyValues(void) {
   2821     int32_t i, p, min, max;
   2822     UErrorCode ec;
   2823 
   2824     /* Min should be 0 for everything. */
   2825     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   2826     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   2827         UProperty propEnum = (UProperty)p;
   2828         min = u_getIntPropertyMinValue(propEnum);
   2829         if (min != 0) {
   2830             if (p == UCHAR_BLOCK) {
   2831                 /* This is okay...for now.  See JB#2487.
   2832                    TODO Update this for JB#2487. */
   2833             } else {
   2834                 const char* name;
   2835                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2836                 if (name == NULL)
   2837                     name = "<ERROR>";
   2838                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   2839                         name, min);
   2840             }
   2841         }
   2842     }
   2843 
   2844     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   2845         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   2846         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   2847     }
   2848 
   2849     /* Max should be -1 for invalid properties. */
   2850     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   2851     if (max != -1) {
   2852         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   2853                 max);
   2854     }
   2855 
   2856     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   2857     for (i=0; i<2; ++i) {
   2858         int32_t script;
   2859         const char* desc;
   2860         ec = U_ZERO_ERROR;
   2861         switch (i) {
   2862         case 0:
   2863             script = uscript_getScript(-1, &ec);
   2864             desc = "uscript_getScript(-1)";
   2865             break;
   2866         case 1:
   2867             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   2868             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   2869             break;
   2870         default:
   2871             log_err("Internal test error. Too many scripts\n");
   2872             return;
   2873         }
   2874         /* We don't explicitly test ec.  It should be U_FAILURE but it
   2875            isn't documented as such. */
   2876         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   2877             log_err("FAIL: %s = %d, exp. 0\n",
   2878                     desc, script);
   2879         }
   2880     }
   2881 }
   2882 
   2883 /* various tests for consistency of UCD data and API behavior */
   2884 static void
   2885 TestConsistency() {
   2886     char buffer[300];
   2887     USet *set1, *set2, *set3, *set4;
   2888     UErrorCode errorCode;
   2889 
   2890     UChar32 start, end;
   2891     int32_t i, length;
   2892 
   2893     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   2894     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   2895     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   2896     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   2897     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   2898 
   2899     U_STRING_DECL(mathBlocksPattern,
   2900         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2901         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2902     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   2903     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   2904     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   2905     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2906 
   2907     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   2908     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   2909     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   2910     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   2911     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   2912 
   2913     U_STRING_INIT(mathBlocksPattern,
   2914         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2915         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2916     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   2917     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   2918     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   2919     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2920 
   2921     /*
   2922      * It used to be that UCD.html and its precursors said
   2923      * "Those dashes used to mark connections between pieces of words,
   2924      *  plus the Katakana middle dot."
   2925      *
   2926      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   2927      * but not from Hyphen.
   2928      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
   2929      * Therefore, do not show errors when testing the Hyphen property.
   2930      */
   2931     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   2932                 "known to the UTC and not considered errors.\n");
   2933 
   2934     errorCode=U_ZERO_ERROR;
   2935     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   2936     set2=uset_openPattern(dashPattern, 8, &errorCode);
   2937     if(U_SUCCESS(errorCode)) {
   2938         /* remove the Katakana middle dot(s) from set1 */
   2939         uset_remove(set1, 0x30fb);
   2940         uset_remove(set1, 0xff65); /* halfwidth variant */
   2941         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   2942     } else {
   2943         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2944     }
   2945 
   2946     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   2947     set3=uset_openPattern(formatPattern, 6, &errorCode);
   2948     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   2949     if(U_SUCCESS(errorCode)) {
   2950         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   2951         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   2952         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   2953     } else {
   2954         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2955     }
   2956 
   2957     uset_close(set1);
   2958     uset_close(set2);
   2959     uset_close(set3);
   2960     uset_close(set4);
   2961 
   2962     /*
   2963      * Check that each lowercase character has "small" in its name
   2964      * and not "capital".
   2965      * There are some such characters, some of which seem odd.
   2966      * Use the verbose flag to see these notices.
   2967      */
   2968     errorCode=U_ZERO_ERROR;
   2969     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   2970     if(U_SUCCESS(errorCode)) {
   2971         for(i=0;; ++i) {
   2972             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   2973             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   2974                 break; /* done */
   2975             }
   2976             if(U_FAILURE(errorCode)) {
   2977                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   2978                         i, u_errorName(errorCode));
   2979                 break;
   2980             }
   2981             if(length!=0) {
   2982                 break; /* done with code points, got a string or -1 */
   2983             }
   2984 
   2985             while(start<=end) {
   2986                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   2987                 if(U_FAILURE(errorCode)) {
   2988                     log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   2989                     errorCode=U_ZERO_ERROR;
   2990                     continue;
   2991                 }
   2992                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   2993                     strstr(buffer, "SMALL CAPITAL")==NULL
   2994                 ) {
   2995                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   2996                 }
   2997                 ++start;
   2998             }
   2999         }
   3000     } else {
   3001         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3002     }
   3003     uset_close(set1);
   3004 
   3005     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3006     errorCode=U_ZERO_ERROR;
   3007     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3008     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3009     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3010     if(U_SUCCESS(errorCode)) {
   3011         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3012         uset_complement(set3);      /* assigned characters */
   3013         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3014         compareUSets(set1, set2,
   3015                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3016                      TRUE);
   3017     } else {
   3018         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3019     }
   3020     uset_close(set1);
   3021     uset_close(set2);
   3022     uset_close(set3);
   3023 
   3024     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3025     errorCode=U_ZERO_ERROR;
   3026     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3027     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3028     if(U_SUCCESS(errorCode)) {
   3029         compareUSets(set1, set2,
   3030                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3031                      TRUE);
   3032     } else {
   3033         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3034     }
   3035     uset_close(set1);
   3036     uset_close(set2);
   3037 }
   3038 
   3039 /*
   3040  * Starting with ICU4C 3.4, the core Unicode properties files
   3041  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3042  * are hardcoded in the common DLL and therefore not included
   3043  * in the data package any more.
   3044  * Test requiring these files are disabled so that
   3045  * we need not jump through hoops (like adding snapshots of these files
   3046  * to testdata).
   3047  * See Jitterbug 4497.
   3048  */
   3049 #define HARDCODED_DATA_4497 1
   3050 
   3051 /* API coverage for ucase.c */
   3052 static void TestUCase() {
   3053 #if !HARDCODED_DATA_4497
   3054     UDataMemory *pData;
   3055     UCaseProps *csp;
   3056     const UCaseProps *ccsp;
   3057     UErrorCode errorCode;
   3058 
   3059     /* coverage for ucase_openBinary() */
   3060     errorCode=U_ZERO_ERROR;
   3061     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3062     if(U_FAILURE(errorCode)) {
   3063         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3064                     u_errorName(errorCode));
   3065         return;
   3066     }
   3067 
   3068     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3069     if(U_FAILURE(errorCode)) {
   3070         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3071                 u_errorName(errorCode));
   3072         udata_close(pData);
   3073         return;
   3074     }
   3075 
   3076     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3077         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3078     }
   3079 
   3080     ucase_close(csp);
   3081     udata_close(pData);
   3082 
   3083     /* coverage for ucase_getDummy() */
   3084     errorCode=U_ZERO_ERROR;
   3085     ccsp=ucase_getDummy(&errorCode);
   3086     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3087         log_err("ucase_tolower(dummy, A)!=A\n");
   3088     }
   3089 #endif
   3090 }
   3091 
   3092 /* API coverage for ubidi_props.c */
   3093 static void TestUBiDiProps() {
   3094 #if !HARDCODED_DATA_4497
   3095     UDataMemory *pData;
   3096     UBiDiProps *bdp;
   3097     const UBiDiProps *cbdp;
   3098     UErrorCode errorCode;
   3099 
   3100     /* coverage for ubidi_openBinary() */
   3101     errorCode=U_ZERO_ERROR;
   3102     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3103     if(U_FAILURE(errorCode)) {
   3104         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3105                     u_errorName(errorCode));
   3106         return;
   3107     }
   3108 
   3109     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3110     if(U_FAILURE(errorCode)) {
   3111         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3112                 u_errorName(errorCode));
   3113         udata_close(pData);
   3114         return;
   3115     }
   3116 
   3117     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3118         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3119     }
   3120 
   3121     ubidi_closeProps(bdp);
   3122     udata_close(pData);
   3123 
   3124     /* coverage for ubidi_getDummy() */
   3125     errorCode=U_ZERO_ERROR;
   3126     cbdp=ubidi_getDummy(&errorCode);
   3127     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3128         log_err("ubidi_getClass(dummy, space)!=0\n");
   3129     }
   3130 #endif
   3131 }
   3132 
   3133 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3134 
   3135 /* bit set for which case foldings for a character have been tested already */
   3136 enum {
   3137     CF_SIMPLE=1,
   3138     CF_FULL=2,
   3139     CF_TURKIC=4,
   3140     CF_ALL=7
   3141 };
   3142 
   3143 static void
   3144 testFold(UChar32 c, int which,
   3145          UChar32 simple, UChar32 turkic,
   3146          const UChar *full, int32_t fullLength,
   3147          const UChar *turkicFull, int32_t turkicFullLength) {
   3148     UChar s[2], t[32];
   3149     UChar32 c2;
   3150     int32_t length, length2;
   3151 
   3152     UErrorCode errorCode=U_ZERO_ERROR;
   3153 
   3154     length=0;
   3155     U16_APPEND_UNSAFE(s, length, c);
   3156 
   3157     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3158         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3159     }
   3160     if((which&CF_FULL)!=0) {
   3161         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
   3162         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3163             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3164         }
   3165     }
   3166     if((which&CF_TURKIC)!=0) {
   3167         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3168             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3169         }
   3170 
   3171         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3172         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3173             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3174         }
   3175     }
   3176 }
   3177 
   3178 /* test that c case-folds to itself */
   3179 static void
   3180 testFoldToSelf(UChar32 c, int which) {
   3181     UChar s[2];
   3182     int32_t length;
   3183 
   3184     length=0;
   3185     U16_APPEND_UNSAFE(s, length, c);
   3186     testFold(c, which, c, c, s, length, s, length);
   3187 }
   3188 
   3189 struct CaseFoldingData {
   3190     USet *notSeen;
   3191     UChar32 prev, prevSimple;
   3192     UChar prevFull[32];
   3193     int32_t prevFullLength;
   3194     int which;
   3195 };
   3196 typedef struct CaseFoldingData CaseFoldingData;
   3197 
   3198 static void U_CALLCONV
   3199 caseFoldingLineFn(void *context,
   3200                   char *fields[][2], int32_t fieldCount,
   3201                   UErrorCode *pErrorCode) {
   3202     CaseFoldingData *pData=(CaseFoldingData *)context;
   3203     char *end;
   3204     UChar full[32];
   3205     UChar32 c, prev, simple;
   3206     int32_t count;
   3207     int which;
   3208     char status;
   3209 
   3210     /* get code point */
   3211     c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
   3212     end=(char *)u_skipWhitespace(end);
   3213     if(end<=fields[0][0] || end!=fields[0][1]) {
   3214         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3215         *pErrorCode=U_PARSE_ERROR;
   3216         return;
   3217     }
   3218 
   3219     /* get the status of this mapping */
   3220     status=*u_skipWhitespace(fields[1][0]);
   3221     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3222         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3223         *pErrorCode=U_PARSE_ERROR;
   3224         return;
   3225     }
   3226 
   3227     /* get the mapping */
   3228     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3229     if(U_FAILURE(*pErrorCode)) {
   3230         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3231         return;
   3232     }
   3233 
   3234     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3235     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3236         simple=c;
   3237     }
   3238 
   3239     if(c!=(prev=pData->prev)) {
   3240         /*
   3241          * Test remaining mappings for the previous code point.
   3242          * If a turkic folding was not mentioned, then it should fold the same
   3243          * as the regular simple case folding.
   3244          */
   3245         UChar s[2];
   3246         int32_t length;
   3247 
   3248         length=0;
   3249         U16_APPEND_UNSAFE(s, length, prev);
   3250         testFold(prev, (~pData->which)&CF_ALL,
   3251                  prev, pData->prevSimple,
   3252                  s, length,
   3253                  pData->prevFull, pData->prevFullLength);
   3254         pData->prev=pData->prevSimple=c;
   3255         length=0;
   3256         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3257         pData->prevFullLength=length;
   3258         pData->which=0;
   3259     }
   3260 
   3261     /*
   3262      * Turn the status into a bit set of case foldings to test.
   3263      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3264      */
   3265     switch(status) {
   3266     case 'C':
   3267         which=CF_SIMPLE|CF_FULL;
   3268         pData->prevSimple=simple;
   3269         u_memcpy(pData->prevFull, full, count);
   3270         pData->prevFullLength=count;
   3271         break;
   3272     case 'S':
   3273         which=CF_SIMPLE;
   3274         pData->prevSimple=simple;
   3275         break;
   3276     case 'F':
   3277         which=CF_FULL;
   3278         u_memcpy(pData->prevFull, full, count);
   3279         pData->prevFullLength=count;
   3280         break;
   3281     case 'T':
   3282         which=CF_TURKIC;
   3283         break;
   3284     default:
   3285         which=0;
   3286         break; /* won't happen because of test above */
   3287     }
   3288 
   3289     testFold(c, which, simple, simple, full, count, full, count);
   3290 
   3291     /* remember which case foldings of c have been tested */
   3292     pData->which|=which;
   3293 
   3294     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3295     uset_remove(pData->notSeen, c);
   3296 }
   3297 
   3298 static void
   3299 TestCaseFolding() {
   3300     CaseFoldingData data={ NULL };
   3301     char *fields[3][2];
   3302     UErrorCode errorCode;
   3303 
   3304     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3305 
   3306     errorCode=U_ZERO_ERROR;
   3307     /* test BMP & plane 1 - nothing interesting above */
   3308     data.notSeen=uset_open(0, 0x1ffff);
   3309     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3310 
   3311     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3312     if(U_SUCCESS(errorCode)) {
   3313         int32_t i, start, end;
   3314 
   3315         /* add a pseudo-last line to finish testing of the actual last one */
   3316         fields[0][0]=lastLine;
   3317         fields[0][1]=lastLine+6;
   3318         fields[1][0]=lastLine+7;
   3319         fields[1][1]=lastLine+9;
   3320         fields[2][0]=lastLine+10;
   3321         fields[2][1]=lastLine+17;
   3322         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3323 
   3324         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3325         for(i=0;
   3326             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3327                 U_SUCCESS(errorCode);
   3328             ++i
   3329         ) {
   3330             do {
   3331                 testFoldToSelf(start, CF_ALL);
   3332             } while(++start<=end);
   3333         }
   3334     }
   3335 
   3336     uset_close(data.notSeen);
   3337 }
   3338