Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2012, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "cintltst.h"
     28 #include "putilimp.h"
     29 #include "uparse.h"
     30 #include "ucase.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "uset_imp.h"
     34 #include "usc_impl.h"
     35 #include "udatamem.h" /* for testing ucase_openBinary() */
     36 #include "cucdapi.h"
     37 
     38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     39 
     40 /* prototypes --------------------------------------------------------------- */
     41 
     42 static void TestUpperLower(void);
     43 static void TestLetterNumber(void);
     44 static void TestMisc(void);
     45 static void TestPOSIX(void);
     46 static void TestControlPrint(void);
     47 static void TestIdentifier(void);
     48 static void TestUnicodeData(void);
     49 static void TestCodeUnit(void);
     50 static void TestCodePoint(void);
     51 static void TestCharLength(void);
     52 static void TestCharNames(void);
     53 static void TestMirroring(void);
     54 static void TestUScriptRunAPI(void);
     55 static void TestAdditionalProperties(void);
     56 static void TestNumericProperties(void);
     57 static void TestPropertyNames(void);
     58 static void TestPropertyValues(void);
     59 static void TestConsistency(void);
     60 static void TestUCase(void);
     61 static void TestUBiDiProps(void);
     62 static void TestCaseFolding(void);
     63 
     64 /* internal methods used */
     65 static int32_t MakeProp(char* str);
     66 static int32_t MakeDir(char* str);
     67 
     68 /* helpers ------------------------------------------------------------------ */
     69 
     70 static void
     71 parseUCDFile(const char *filename,
     72              char *fields[][2], int32_t fieldCount,
     73              UParseLineFn *lineFn, void *context,
     74              UErrorCode *pErrorCode) {
     75     char path[256];
     76     char backupPath[256];
     77 
     78     if(U_FAILURE(*pErrorCode)) {
     79         return;
     80     }
     81 
     82     /* Look inside ICU_DATA first */
     83     strcpy(path, u_getDataDirectory());
     84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     85     strcat(path, filename);
     86 
     87     /* As a fallback, try to guess where the source data was located
     88      *    at the time ICU was built, and look there.
     89      */
     90     strcpy(backupPath, ctest_dataSrcDir());
     91     strcat(backupPath, U_FILE_SEP_STRING);
     92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     93     strcat(backupPath, filename);
     94 
     95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     97         *pErrorCode=U_ZERO_ERROR;
     98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
     99     }
    100     if(U_FAILURE(*pErrorCode)) {
    101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    102     }
    103 }
    104 
    105 /* test data ---------------------------------------------------------------- */
    106 
    107 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
    108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    109 static const int32_t tagValues[] =
    110     {
    111     /* Mn */ U_NON_SPACING_MARK,
    112     /* Mc */ U_COMBINING_SPACING_MARK,
    113     /* Me */ U_ENCLOSING_MARK,
    114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    115     /* Nl */ U_LETTER_NUMBER,
    116     /* No */ U_OTHER_NUMBER,
    117     /* Zs */ U_SPACE_SEPARATOR,
    118     /* Zl */ U_LINE_SEPARATOR,
    119     /* Zp */ U_PARAGRAPH_SEPARATOR,
    120     /* Cc */ U_CONTROL_CHAR,
    121     /* Cf */ U_FORMAT_CHAR,
    122     /* Cs */ U_SURROGATE,
    123     /* Co */ U_PRIVATE_USE_CHAR,
    124     /* Cn */ U_UNASSIGNED,
    125     /* Lu */ U_UPPERCASE_LETTER,
    126     /* Ll */ U_LOWERCASE_LETTER,
    127     /* Lt */ U_TITLECASE_LETTER,
    128     /* Lm */ U_MODIFIER_LETTER,
    129     /* Lo */ U_OTHER_LETTER,
    130     /* Pc */ U_CONNECTOR_PUNCTUATION,
    131     /* Pd */ U_DASH_PUNCTUATION,
    132     /* Ps */ U_START_PUNCTUATION,
    133     /* Pe */ U_END_PUNCTUATION,
    134     /* Po */ U_OTHER_PUNCTUATION,
    135     /* Sm */ U_MATH_SYMBOL,
    136     /* Sc */ U_CURRENCY_SYMBOL,
    137     /* Sk */ U_MODIFIER_SYMBOL,
    138     /* So */ U_OTHER_SYMBOL,
    139     /* Pi */ U_INITIAL_PUNCTUATION,
    140     /* Pf */ U_FINAL_PUNCTUATION
    141     };
    142 
    143 static const char dirStrings[][5] = {
    144     "L",
    145     "R",
    146     "EN",
    147     "ES",
    148     "ET",
    149     "AN",
    150     "CS",
    151     "B",
    152     "S",
    153     "WS",
    154     "ON",
    155     "LRE",
    156     "LRO",
    157     "AL",
    158     "RLE",
    159     "RLO",
    160     "PDF",
    161     "NSM",
    162     "BN"
    163 };
    164 
    165 void addUnicodeTest(TestNode** root);
    166 
    167 void addUnicodeTest(TestNode** root)
    168 {
    169     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    170     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    171     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    172     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    173     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    174     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    175     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    176     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    177     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    178     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    179     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    180     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    181     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    182     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    183     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    184     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    185     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
    186     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
    187     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    188     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    189     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    190     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    191     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    192     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    193     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    194 }
    195 
    196 /*==================================================== */
    197 /* test u_toupper() and u_tolower()                    */
    198 /*==================================================== */
    199 static void TestUpperLower()
    200 {
    201     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    202     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    203     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    204     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    205     int32_t i;
    206 
    207     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    208     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    209 
    210 /*
    211 Checks LetterLike Symbols which were previously a source of confusion
    212 [Bertrand A. D. 02/04/98]
    213 */
    214     for (i=0x2100;i<0x2138;i++)
    215     {
    216         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    217         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    218         {
    219             if (i != (int)u_tolower(i)) /* itself */
    220                 log_err("Failed case conversion with itself: U+%04x\n", i);
    221             if (i != (int)u_toupper(i))
    222                 log_err("Failed case conversion with itself: U+%04x\n", i);
    223         }
    224     }
    225 
    226     for(i=0; i < u_strlen(upper); i++){
    227         if(u_tolower(upper[i]) != lower[i]){
    228             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    229         }
    230     }
    231 
    232     log_verbose("testing upper lower\n");
    233     for (i = 0; i < 21; i++) {
    234 
    235         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    236         {
    237             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    238         }
    239         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    240          {
    241             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    242         }
    243         else if (upperTest[i] != u_tolower(lowerTest[i]))
    244         {
    245             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    246         }
    247         else if (lowerTest[i] != u_toupper(upperTest[i]))
    248          {
    249             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    250         }
    251         else if (upperTest[i] != u_tolower(upperTest[i]))
    252         {
    253             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    254         }
    255         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    256         {
    257             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    258         }
    259     }
    260     log_verbose("done testing upper lower\n");
    261 
    262     log_verbose("testing u_istitle\n");
    263     {
    264         static const UChar expected[] = {
    265             0x1F88,
    266             0x1F89,
    267             0x1F8A,
    268             0x1F8B,
    269             0x1F8C,
    270             0x1F8D,
    271             0x1F8E,
    272             0x1F8F,
    273             0x1F88,
    274             0x1F89,
    275             0x1F8A,
    276             0x1F8B,
    277             0x1F8C,
    278             0x1F8D,
    279             0x1F8E,
    280             0x1F8F,
    281             0x1F98,
    282             0x1F99,
    283             0x1F9A,
    284             0x1F9B,
    285             0x1F9C,
    286             0x1F9D,
    287             0x1F9E,
    288             0x1F9F,
    289             0x1F98,
    290             0x1F99,
    291             0x1F9A,
    292             0x1F9B,
    293             0x1F9C,
    294             0x1F9D,
    295             0x1F9E,
    296             0x1F9F,
    297             0x1FA8,
    298             0x1FA9,
    299             0x1FAA,
    300             0x1FAB,
    301             0x1FAC,
    302             0x1FAD,
    303             0x1FAE,
    304             0x1FAF,
    305             0x1FA8,
    306             0x1FA9,
    307             0x1FAA,
    308             0x1FAB,
    309             0x1FAC,
    310             0x1FAD,
    311             0x1FAE,
    312             0x1FAF,
    313             0x1FBC,
    314             0x1FBC,
    315             0x1FCC,
    316             0x1FCC,
    317             0x1FFC,
    318             0x1FFC,
    319         };
    320         int32_t num = sizeof(expected)/sizeof(expected[0]);
    321         for(i=0; i<num; i++){
    322             if(!u_istitle(expected[i])){
    323                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    324             }
    325         }
    326 
    327     }
    328 }
    329 
    330 /* compare two sets and verify that their difference or intersection is empty */
    331 static UBool
    332 showADiffB(const USet *a, const USet *b,
    333            const char *a_name, const char *b_name,
    334            UBool expect, UBool diffIsError) {
    335     USet *aa;
    336     int32_t i, start, end, length;
    337     UErrorCode errorCode;
    338 
    339     /*
    340      * expect:
    341      * TRUE  -> a-b should be empty, that is, b should contain all of a
    342      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    343      */
    344     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    345         return TRUE;
    346     }
    347 
    348     /* clone a to aa because a is const */
    349     aa=uset_open(1, 0);
    350     if(aa==NULL) {
    351         /* unusual problem - out of memory? */
    352         return FALSE;
    353     }
    354     uset_addAll(aa, a);
    355 
    356     /* compute the set in question */
    357     if(expect) {
    358         /* a-b */
    359         uset_removeAll(aa, b);
    360     } else {
    361         /* a&b */
    362         uset_retainAll(aa, b);
    363     }
    364 
    365     /* aa is not empty because of the initial tests above; show its contents */
    366     errorCode=U_ZERO_ERROR;
    367     i=0;
    368     for(;;) {
    369         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    370         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    371             break; /* done */
    372         }
    373         if(U_FAILURE(errorCode)) {
    374             log_err("error comparing %s with %s at difference item %d: %s\n",
    375                 a_name, b_name, i, u_errorName(errorCode));
    376             break;
    377         }
    378         if(length!=0) {
    379             break; /* done with code points, got a string or -1 */
    380         }
    381 
    382         if(diffIsError) {
    383             if(expect) {
    384                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    385             } else {
    386                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    387             }
    388         } else {
    389             if(expect) {
    390                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    391             } else {
    392                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    393             }
    394         }
    395 
    396         ++i;
    397     }
    398 
    399     uset_close(aa);
    400     return FALSE;
    401 }
    402 
    403 static UBool
    404 showAMinusB(const USet *a, const USet *b,
    405             const char *a_name, const char *b_name,
    406             UBool diffIsError) {
    407     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    408 }
    409 
    410 static UBool
    411 showAIntersectB(const USet *a, const USet *b,
    412                 const char *a_name, const char *b_name,
    413                 UBool diffIsError) {
    414     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    415 }
    416 
    417 static UBool
    418 compareUSets(const USet *a, const USet *b,
    419              const char *a_name, const char *b_name,
    420              UBool diffIsError) {
    421     /*
    422      * Use an arithmetic & not a logical && so that both branches
    423      * are always taken and all differences are shown.
    424      */
    425     return
    426         showAMinusB(a, b, a_name, b_name, diffIsError) &
    427         showAMinusB(b, a, b_name, a_name, diffIsError);
    428 }
    429 
    430 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    431 static void TestLetterNumber()
    432 {
    433     UChar i = 0x0000;
    434 
    435     log_verbose("Testing for isalpha\n");
    436     for (i = 0x0041; i < 0x005B; i++) {
    437         if (!u_isalpha(i))
    438         {
    439             log_err("Failed isLetter test at  %.4X\n", i);
    440         }
    441     }
    442     for (i = 0x0660; i < 0x066A; i++) {
    443         if (u_isalpha(i))
    444         {
    445             log_err("Failed isLetter test with numbers at %.4X\n", i);
    446         }
    447     }
    448 
    449     log_verbose("Testing for isdigit\n");
    450     for (i = 0x0660; i < 0x066A; i++) {
    451         if (!u_isdigit(i))
    452         {
    453             log_verbose("Failed isNumber test at %.4X\n", i);
    454         }
    455     }
    456 
    457     log_verbose("Testing for isalnum\n");
    458     for (i = 0x0041; i < 0x005B; i++) {
    459         if (!u_isalnum(i))
    460         {
    461             log_err("Failed isAlNum test at  %.4X\n", i);
    462         }
    463     }
    464     for (i = 0x0660; i < 0x066A; i++) {
    465         if (!u_isalnum(i))
    466         {
    467             log_err("Failed isAlNum test at  %.4X\n", i);
    468         }
    469     }
    470 
    471     {
    472         /*
    473          * The following checks work only starting from Unicode 4.0.
    474          * Check the version number here.
    475          */
    476         static UVersionInfo u401={ 4, 0, 1, 0 };
    477         UVersionInfo version;
    478         u_getUnicodeVersion(version);
    479         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    480             return;
    481         }
    482     }
    483 
    484     {
    485         /*
    486          * Sanity check:
    487          * Verify that exactly the digit characters have decimal digit values.
    488          * This assumption is used in the implementation of u_digit()
    489          * (which checks nt=de)
    490          * compared with the parallel java.lang.Character.digit()
    491          * (which checks Nd).
    492          *
    493          * This was not true in Unicode 3.2 and earlier.
    494          * Unicode 4.0 fixed discrepancies.
    495          * Unicode 4.0.1 re-introduced problems in this area due to an
    496          * unintentionally incomplete last-minute change.
    497          */
    498         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    499         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    500 
    501         USet *digits, *decimalValues;
    502         UErrorCode errorCode;
    503 
    504         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    505         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    506         errorCode=U_ZERO_ERROR;
    507         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    508         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    509 
    510         if(U_SUCCESS(errorCode)) {
    511             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    512         }
    513 
    514         uset_close(digits);
    515         uset_close(decimalValues);
    516     }
    517 }
    518 
    519 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    520                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    521                                 UBool expected) {
    522     int32_t i;
    523     for (i = 0; i < sampleCharsLength; ++i) {
    524         UBool result = propFn(sampleChars[i]);
    525         if (result != expected) {
    526             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    527                     propName, sampleChars[i], result);
    528         }
    529     }
    530 }
    531 
    532 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    533 static void TestMisc()
    534 {
    535     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    536     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    537     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    538     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    539     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    540     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    541 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    542     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    543     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    544     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    545     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    546 
    547     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    548 
    549     uint32_t mask;
    550 
    551     int32_t i;
    552     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    553     UVersionInfo realVersion;
    554 
    555     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    556 
    557     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    558     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    559 
    560     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    561                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    562     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    563                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    564 
    565     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    566                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
    567     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    568                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
    569 
    570     testSampleCharProps(u_isdefined, "u_isdefined",
    571                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
    572     testSampleCharProps(u_isdefined, "u_isdefined",
    573                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
    574 
    575     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
    576     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
    577 
    578     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
    579     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
    580 
    581     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
    582         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    583             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    584                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    585         }
    586     }
    587 
    588     /* Tests the ICU version #*/
    589     u_getVersion(realVersion);
    590     u_versionToString(realVersion, icuVersion);
    591     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    592     {
    593         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    594     }
    595 #if defined(ICU_VERSION)
    596     /* test only happens where we have configure.in with VERSION - sanity check. */
    597     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    598     {
    599         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    600     }
    601 #endif
    602 
    603     /* test U_GC_... */
    604     if(
    605         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    606         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    607         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    608         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    609         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    610         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    611     ) {
    612         log_err("error: U_GET_GC_MASK does not work properly\n");
    613     }
    614 
    615     mask=0;
    616     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    617 
    618     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    619     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    620     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    621     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    622     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    623 
    624     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    625     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    626     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    627 
    628     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    629     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    630     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    631 
    632     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    633     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    634     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    635 
    636     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    637     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    638     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    639     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    640 
    641     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    642     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    643     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    644     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    645     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    646 
    647     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    648     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    649     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    650     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    651 
    652     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    653     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    654 
    655     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    656         log_err("error: problems with U_GC_XX_MASK constants\n");
    657     }
    658 
    659     mask=0;
    660     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    661     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    662     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    663     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    664     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    665     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    666     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    667 
    668     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    669         log_err("error: problems with U_GC_Y_MASK constants\n");
    670     }
    671     {
    672         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    673         for(i=0; i<10; i++){
    674             if(digit[i]!=u_forDigit(i,10)){
    675                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    676             }
    677         }
    678     }
    679 
    680     /* test u_digit() */
    681     {
    682         static const struct {
    683             UChar32 c;
    684             int8_t radix, value;
    685         } data[]={
    686             /* base 16 */
    687             { 0x0031, 16, 1 },
    688             { 0x0038, 16, 8 },
    689             { 0x0043, 16, 12 },
    690             { 0x0066, 16, 15 },
    691             { 0x00e4, 16, -1 },
    692             { 0x0662, 16, 2 },
    693             { 0x06f5, 16, 5 },
    694             { 0xff13, 16, 3 },
    695             { 0xff41, 16, 10 },
    696 
    697             /* base 8 */
    698             { 0x0031, 8, 1 },
    699             { 0x0038, 8, -1 },
    700             { 0x0043, 8, -1 },
    701             { 0x0066, 8, -1 },
    702             { 0x00e4, 8, -1 },
    703             { 0x0662, 8, 2 },
    704             { 0x06f5, 8, 5 },
    705             { 0xff13, 8, 3 },
    706             { 0xff41, 8, -1 },
    707 
    708             /* base 36 */
    709             { 0x5a, 36, 35 },
    710             { 0x7a, 36, 35 },
    711             { 0xff3a, 36, 35 },
    712             { 0xff5a, 36, 35 },
    713 
    714             /* wrong radix values */
    715             { 0x0031, 1, -1 },
    716             { 0xff3a, 37, -1 }
    717         };
    718 
    719         for(i=0; i<LENGTHOF(data); ++i) {
    720             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    721                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    722                         data[i].c,
    723                         data[i].radix,
    724                         u_digit(data[i].c, data[i].radix),
    725                         data[i].value);
    726             }
    727         }
    728     }
    729 }
    730 
    731 /* test C/POSIX-style functions --------------------------------------------- */
    732 
    733 /* bit flags */
    734 #define ISAL     1
    735 #define ISLO     2
    736 #define ISUP     4
    737 
    738 #define ISDI     8
    739 #define ISXD  0x10
    740 
    741 #define ISAN  0x20
    742 
    743 #define ISPU  0x40
    744 #define ISGR  0x80
    745 #define ISPR 0x100
    746 
    747 #define ISSP 0x200
    748 #define ISBL 0x400
    749 #define ISCN 0x800
    750 
    751 /* C/POSIX-style functions, in the same order as the bit flags */
    752 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    753 
    754 static const struct {
    755     IsPOSIXClass *fn;
    756     const char *name;
    757 } posixClasses[]={
    758     { u_isalpha, "isalpha" },
    759     { u_islower, "islower" },
    760     { u_isupper, "isupper" },
    761     { u_isdigit, "isdigit" },
    762     { u_isxdigit, "isxdigit" },
    763     { u_isalnum, "isalnum" },
    764     { u_ispunct, "ispunct" },
    765     { u_isgraph, "isgraph" },
    766     { u_isprint, "isprint" },
    767     { u_isspace, "isspace" },
    768     { u_isblank, "isblank" },
    769     { u_iscntrl, "iscntrl" }
    770 };
    771 
    772 static const struct {
    773     UChar32 c;
    774     uint32_t posixResults;
    775 } posixData[]={
    776     { 0x0008,                                                        ISCN },    /* backspace */
    777     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    778     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    779     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    780     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    781     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    782     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    783     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    784     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    785     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    786     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    787     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    788     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    789     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    790     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    791     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    792     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    793     { 0x0600,                                                        ISCN },    /* arabic number sign */
    794     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    795     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    796     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    797     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    798     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    799     { 0x200b,                                                        ISCN },    /* ZWSP */
    800   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    801     { 0x200e,                                                        ISCN },    /* LRM */
    802     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    803     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    804     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    805     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    806     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    807     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    808     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    809     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    810 };
    811 
    812 static void
    813 TestPOSIX() {
    814     uint32_t mask;
    815     int32_t cl, i;
    816     UBool expect;
    817 
    818     mask=1;
    819     for(cl=0; cl<12; ++cl) {
    820         for(i=0; i<LENGTHOF(posixData); ++i) {
    821             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    822             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    823                 log_err("u_%s(U+%04x)=%s is wrong\n",
    824                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    825             }
    826         }
    827         mask<<=1;
    828     }
    829 }
    830 
    831 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    832 static void TestControlPrint()
    833 {
    834     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    835     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    836     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    837     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    838     UChar32 c;
    839 
    840     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
    841     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
    842 
    843     testSampleCharProps(u_isprint, "u_isprint",
    844                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
    845     testSampleCharProps(u_isprint, "u_isprint",
    846                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
    847 
    848     /* test all ISO 8 controls */
    849     for(c=0; c<=0x9f; ++c) {
    850         if(c==0x20) {
    851             /* skip ASCII graphic characters and continue with DEL */
    852             c=0x7f;
    853         }
    854         if(!u_iscntrl(c)) {
    855             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    856         }
    857         if(!u_isISOControl(c)) {
    858             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    859         }
    860         if(u_isprint(c)) {
    861             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    862         }
    863     }
    864 
    865     /* test all Latin-1 graphic characters */
    866     for(c=0x20; c<=0xff; ++c) {
    867         if(c==0x7f) {
    868             c=0xa0;
    869         } else if(c==0xad) {
    870             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    871             ++c;
    872         }
    873         if(!u_isprint(c)) {
    874             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    875         }
    876     }
    877 }
    878 
    879 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    880 static void TestIdentifier()
    881 {
    882     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    883     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    884     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    885     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    886     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    887     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    888     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    889     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    890     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    891     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    892 
    893     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    894                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    895     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    896                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
    897 
    898     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    899                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
    900     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    901                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
    902 
    903     /* IDPart should imply IDStart */
    904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    905                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    906 
    907     testSampleCharProps(u_isIDStart, "u_isIDStart",
    908                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    909     testSampleCharProps(u_isIDStart, "u_isIDStart",
    910                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    911 
    912     testSampleCharProps(u_isIDPart, "u_isIDPart",
    913                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
    914     testSampleCharProps(u_isIDPart, "u_isIDPart",
    915                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    916 
    917     /* IDPart should imply IDStart */
    918     testSampleCharProps(u_isIDPart, "u_isIDPart",
    919                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    920 
    921     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    922                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
    923     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    924                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
    925 }
    926 
    927 /* for each line of UnicodeData.txt, check some of the properties */
    928 typedef struct UnicodeDataContext {
    929 #if UCONFIG_NO_NORMALIZATION
    930     const void *dummy;
    931 #else
    932     const UNormalizer2 *nfc;
    933     const UNormalizer2 *nfkc;
    934 #endif
    935 } UnicodeDataContext;
    936 
    937 /*
    938  * ### TODO
    939  * This test fails incorrectly if the First or Last code point of a repetitive area
    940  * is overridden, which is allowed and is encouraged for the PUAs.
    941  * Currently, this means that both area First/Last and override lines are
    942  * tested against the properties from the API,
    943  * and the area boundary will not match and cause an error.
    944  *
    945  * This function should detect area boundaries and skip them for the test of individual
    946  * code points' properties.
    947  * Then it should check that the areas contain all the same properties except where overridden.
    948  * For this, it would have had to set a flag for which code points were listed explicitly.
    949  */
    950 static void U_CALLCONV
    951 unicodeDataLineFn(void *context,
    952                   char *fields[][2], int32_t fieldCount,
    953                   UErrorCode *pErrorCode)
    954 {
    955     char buffer[100];
    956     const char *d;
    957     char *end;
    958     uint32_t value;
    959     UChar32 c;
    960     int32_t i;
    961     int8_t type;
    962     int32_t dt;
    963     UChar dm[32], s[32];
    964     int32_t dmLength, length;
    965 
    966 #if !UCONFIG_NO_NORMALIZATION
    967     const UNormalizer2 *nfc, *nfkc;
    968 #endif
    969 
    970     /* get the character code, field 0 */
    971     c=strtoul(fields[0][0], &end, 16);
    972     if(end<=fields[0][0] || end!=fields[0][1]) {
    973         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    974         return;
    975     }
    976     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    977         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    978         return;
    979     }
    980 
    981     /* get general category, field 2 */
    982     *fields[2][1]=0;
    983     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    984     if(u_charType(c)!=type) {
    985         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    986     }
    987     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    988         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    989     }
    990 
    991     /* get canonical combining class, field 3 */
    992     value=strtoul(fields[3][0], &end, 10);
    993     if(end<=fields[3][0] || end!=fields[3][1]) {
    994         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
    995         return;
    996     }
    997     if(value>255) {
    998         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
    999         return;
   1000     }
   1001 #if !UCONFIG_NO_NORMALIZATION
   1002     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
   1003         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
   1004     }
   1005     nfkc=((UnicodeDataContext *)context)->nfkc;
   1006     if(value!=unorm2_getCombiningClass(nfkc, c)) {
   1007         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
   1008     }
   1009 #endif
   1010 
   1011     /* get BiDi category, field 4 */
   1012     *fields[4][1]=0;
   1013     i=MakeDir(fields[4][0]);
   1014     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
   1015         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
   1016     }
   1017 
   1018     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
   1019     d=NULL;
   1020     if(fields[5][0]==fields[5][1]) {
   1021         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
   1022         if(c==0xac00 || c==0xd7a3) {
   1023             dt=U_DT_CANONICAL;
   1024         } else {
   1025             dt=U_DT_NONE;
   1026         }
   1027     } else {
   1028         d=fields[5][0];
   1029         *fields[5][1]=0;
   1030         dt=UCHAR_INVALID_CODE;
   1031         if(*d=='<') {
   1032             end=strchr(++d, '>');
   1033             if(end!=NULL) {
   1034                 *end=0;
   1035                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
   1036                 d=u_skipWhitespace(end+1);
   1037             }
   1038         } else {
   1039             dt=U_DT_CANONICAL;
   1040         }
   1041     }
   1042     if(dt>U_DT_NONE) {
   1043         if(c==0xac00) {
   1044             dm[0]=0x1100;
   1045             dm[1]=0x1161;
   1046             dm[2]=0;
   1047             dmLength=2;
   1048         } else if(c==0xd7a3) {
   1049             dm[0]=0xd788;
   1050             dm[1]=0x11c2;
   1051             dm[2]=0;
   1052             dmLength=2;
   1053         } else {
   1054             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
   1055         }
   1056     } else {
   1057         dmLength=-1;
   1058     }
   1059     if(dt<0 || U_FAILURE(*pErrorCode)) {
   1060         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
   1061         return;
   1062     }
   1063 #if !UCONFIG_NO_NORMALIZATION
   1064     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
   1065     if(i!=dt) {
   1066         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
   1067     }
   1068     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
   1069     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
   1070     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1071         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
   1072                 "or the Decomposition_Mapping is different (%s)\n",
   1073                 c, length, dmLength, u_errorName(*pErrorCode));
   1074         return;
   1075     }
   1076     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
   1077     if(dt!=U_DT_CANONICAL) {
   1078         dmLength=-1;
   1079     }
   1080     nfc=((UnicodeDataContext *)context)->nfc;
   1081     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
   1082     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1083         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
   1084                 "or the Decomposition_Mapping is different (%s)\n",
   1085                 c, length, dmLength, u_errorName(*pErrorCode));
   1086         return;
   1087     }
   1088     /* recompose */
   1089     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
   1090         UChar32 a, b, composite;
   1091         i=0;
   1092         U16_NEXT(dm, i, dmLength, a);
   1093         U16_NEXT(dm, i, dmLength, b);
   1094         /* i==dmLength */
   1095         composite=unorm2_composePair(nfc, a, b);
   1096         if(composite!=c) {
   1097             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
   1098                     (long)c, (long)a, (long)b, (long)composite);
   1099         }
   1100         /*
   1101          * Note: NFKC has fewer round-trip mappings than NFC,
   1102          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
   1103          */
   1104     }
   1105 #endif
   1106 
   1107     /* get ISO Comment, field 11 */
   1108     *fields[11][1]=0;
   1109     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1110     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1111         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1112             c, u_errorName(*pErrorCode),
   1113             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1114             fields[11][0]);
   1115     }
   1116 
   1117     /* get uppercase mapping, field 12 */
   1118     if(fields[12][0]!=fields[12][1]) {
   1119         value=strtoul(fields[12][0], &end, 16);
   1120         if(end!=fields[12][1]) {
   1121             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1122             return;
   1123         }
   1124         if((UChar32)value!=u_toupper(c)) {
   1125             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1126         }
   1127     } else {
   1128         /* no case mapping: the API must map the code point to itself */
   1129         if(c!=u_toupper(c)) {
   1130             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1131         }
   1132     }
   1133 
   1134     /* get lowercase mapping, field 13 */
   1135     if(fields[13][0]!=fields[13][1]) {
   1136         value=strtoul(fields[13][0], &end, 16);
   1137         if(end!=fields[13][1]) {
   1138             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1139             return;
   1140         }
   1141         if((UChar32)value!=u_tolower(c)) {
   1142             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1143         }
   1144     } else {
   1145         /* no case mapping: the API must map the code point to itself */
   1146         if(c!=u_tolower(c)) {
   1147             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1148         }
   1149     }
   1150 
   1151     /* get titlecase mapping, field 14 */
   1152     if(fields[14][0]!=fields[14][1]) {
   1153         value=strtoul(fields[14][0], &end, 16);
   1154         if(end!=fields[14][1]) {
   1155             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1156             return;
   1157         }
   1158         if((UChar32)value!=u_totitle(c)) {
   1159             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1160         }
   1161     } else {
   1162         /* no case mapping: the API must map the code point to itself */
   1163         if(c!=u_totitle(c)) {
   1164             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1165         }
   1166     }
   1167 }
   1168 
   1169 static UBool U_CALLCONV
   1170 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1171     static const UChar32 test[][2]={
   1172         {0x41, U_UPPERCASE_LETTER},
   1173         {0x308, U_NON_SPACING_MARK},
   1174         {0xfffe, U_GENERAL_OTHER_TYPES},
   1175         {0xe0041, U_FORMAT_CHAR},
   1176         {0xeffff, U_UNASSIGNED}
   1177     };
   1178 
   1179     int32_t i, count;
   1180 
   1181     if(0!=strcmp((const char *)context, "a1")) {
   1182         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1183         return FALSE;
   1184     }
   1185 
   1186     count=LENGTHOF(test);
   1187     for(i=0; i<count; ++i) {
   1188         if(start<=test[i][0] && test[i][0]<limit) {
   1189             if(type!=(UCharCategory)test[i][1]) {
   1190                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1191                         start, limit, (long)type, test[i][0], test[i][1]);
   1192             }
   1193             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1194             return i==(count-1) ? FALSE : TRUE;
   1195         }
   1196     }
   1197 
   1198     if(start>test[count-1][0]) {
   1199         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1200                 start, limit, (long)type);
   1201         return FALSE;
   1202     }
   1203 
   1204     return TRUE;
   1205 }
   1206 
   1207 static UBool U_CALLCONV
   1208 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1209     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
   1210     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1211         { 0x0590, U_LEFT_TO_RIGHT },
   1212         { 0x0600, U_RIGHT_TO_LEFT },
   1213         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1214         { 0x08A0, U_RIGHT_TO_LEFT },
   1215         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
   1216         { 0xFB1D, U_LEFT_TO_RIGHT },
   1217         { 0xFB50, U_RIGHT_TO_LEFT },
   1218         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1219         { 0xFE70, U_LEFT_TO_RIGHT },
   1220         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1221         { 0x10800, U_LEFT_TO_RIGHT },
   1222         { 0x11000, U_RIGHT_TO_LEFT },
   1223         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1224         { 0x1EE00, U_RIGHT_TO_LEFT },
   1225         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
   1226         { 0x1F000, U_RIGHT_TO_LEFT },
   1227         { 0x110000, U_LEFT_TO_RIGHT }
   1228     };
   1229 
   1230     UChar32 c;
   1231     int32_t i;
   1232     UCharDirection shouldBeDir;
   1233 
   1234     /*
   1235      * LineBreak.txt specifies:
   1236      *   #  - Assigned characters that are not listed explicitly are given the value
   1237      *   #    "AL".
   1238      *   #  - Unassigned characters are given the value "XX".
   1239      *
   1240      * PUA characters are listed explicitly with "XX".
   1241      * Verify that no assigned character has "XX".
   1242      */
   1243     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1244         c=start;
   1245         while(c<limit) {
   1246             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1247                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1248             }
   1249             ++c;
   1250         }
   1251     }
   1252 
   1253     /*
   1254      * Verify default Bidi classes.
   1255      * For recent Unicode versions, see UCD.html.
   1256      *
   1257      * For older Unicode versions:
   1258      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1259      * http://www.unicode.org/reports/tr9/
   1260      *
   1261      * See also DerivedBidiClass.txt for Cn code points!
   1262      *
   1263      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1264      * changed some default values.
   1265      * In particular, non-characters and unassigned Default Ignorable Code Points
   1266      * change from L to BN.
   1267      *
   1268      * UCD.html version 4.0.1 does not yet reflect these changes.
   1269      */
   1270     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1271         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1272         c=start;
   1273         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
   1274             if((int32_t)c<defaultBidi[i][0]) {
   1275                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1276                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1277                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1278                     } else {
   1279                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1280                     }
   1281 
   1282                     if( u_charDirection(c)!=shouldBeDir ||
   1283                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1284                     ) {
   1285                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1286                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1287                     }
   1288                     ++c;
   1289                 }
   1290             }
   1291         }
   1292     }
   1293 
   1294     return TRUE;
   1295 }
   1296 
   1297 /* tests for several properties */
   1298 static void TestUnicodeData()
   1299 {
   1300     UVersionInfo expectVersionArray;
   1301     UVersionInfo versionArray;
   1302     char *fields[15][2];
   1303     UErrorCode errorCode;
   1304     UChar32 c;
   1305     int8_t type;
   1306 
   1307     UnicodeDataContext context;
   1308 
   1309     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1310     u_getUnicodeVersion(versionArray);
   1311     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1312     {
   1313         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1314         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1315     }
   1316 
   1317 #if defined(ICU_UNICODE_VERSION)
   1318     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1319     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1320     {
   1321          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1322     }
   1323 #endif
   1324 
   1325     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1326         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1327     }
   1328 
   1329     errorCode=U_ZERO_ERROR;
   1330 #if !UCONFIG_NO_NORMALIZATION
   1331     context.nfc=unorm2_getNFCInstance(&errorCode);
   1332     context.nfkc=unorm2_getNFKCInstance(&errorCode);
   1333     if(U_FAILURE(errorCode)) {
   1334         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
   1335         return;
   1336     }
   1337 #endif
   1338     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
   1339     if(U_FAILURE(errorCode)) {
   1340         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1341     }
   1342 
   1343     /* sanity check on repeated properties */
   1344     for(c=0xfffe; c<=0x10ffff;) {
   1345         type=u_charType(c);
   1346         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1347             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1348         }
   1349         if(type!=U_UNASSIGNED) {
   1350             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1351         }
   1352         if((c&0xffff)==0xfffe) {
   1353             ++c;
   1354         } else {
   1355             c+=0xffff;
   1356         }
   1357     }
   1358 
   1359     /* test that PUA is not "unassigned" */
   1360     for(c=0xe000; c<=0x10fffd;) {
   1361         type=u_charType(c);
   1362         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1363             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1364         }
   1365         if(type==U_UNASSIGNED) {
   1366             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1367         } else if(type!=U_PRIVATE_USE_CHAR) {
   1368             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1369         }
   1370         if(c==0xf8ff) {
   1371             c=0xf0000;
   1372         } else if(c==0xffffd) {
   1373             c=0x100000;
   1374         } else {
   1375             ++c;
   1376         }
   1377     }
   1378 
   1379     /* test u_enumCharTypes() */
   1380     u_enumCharTypes(enumTypeRange, "a1");
   1381 
   1382     /* check default properties */
   1383     u_enumCharTypes(enumDefaultsRange, NULL);
   1384 }
   1385 
   1386 static void TestCodeUnit(){
   1387     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1388 
   1389     int32_t i;
   1390 
   1391     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1392         UChar c=codeunit[i];
   1393         if(i<4){
   1394             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1395                 log_err("ERROR: U+%04x is a single", c);
   1396             }
   1397 
   1398         }
   1399         if(i >= 4 && i< 8){
   1400             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1401                 log_err("ERROR: U+%04x is a first surrogate", c);
   1402             }
   1403         }
   1404         if(i >= 8 && i< 12){
   1405             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1406                 log_err("ERROR: U+%04x is a second surrogate", c);
   1407             }
   1408         }
   1409     }
   1410 
   1411 }
   1412 
   1413 static void TestCodePoint(){
   1414     const UChar32 codePoint[]={
   1415         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1416         0xd800,
   1417         0xdbff,
   1418         0xdc00,
   1419         0xdfff,
   1420         0xdc04,
   1421         0xd821,
   1422         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1423         0x20ac,
   1424         0xd7ff,
   1425         0xe000,
   1426         0xe123,
   1427         0x0061,
   1428         0xe065,
   1429         0x20402,
   1430         0x24506,
   1431         0x23456,
   1432         0x20402,
   1433         0x10402,
   1434         0x23456,
   1435         /*not a surrogate, not valid, isUnicodeChar, isError */
   1436         0x0015,
   1437         0x009f,
   1438         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1439         0xffff,
   1440         0xfffe,
   1441     };
   1442     int32_t i;
   1443     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1444         UChar32 c=codePoint[i];
   1445         if(i<6){
   1446             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1447                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1448             }
   1449             if(UTF_IS_VALID(c)){
   1450                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1451             }
   1452             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1453                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1454             }
   1455             if(UTF_IS_ERROR(c)){
   1456                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1457             }
   1458         }else if(i >=6 && i<18){
   1459             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1460                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1461             }
   1462             if(!UTF_IS_VALID(c)){
   1463                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1464             }
   1465             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1466                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1467             }
   1468             if(UTF_IS_ERROR(c)){
   1469                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1470             }
   1471         }else if(i >=18 && i<20){
   1472             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1473                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1474             }
   1475             if(UTF_IS_VALID(c)){
   1476                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1477             }
   1478             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1479                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1480             }
   1481             if(!UTF_IS_ERROR(c)){
   1482                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1483             }
   1484         }
   1485         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1486             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1487                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1488             }
   1489             if(UTF_IS_VALID(c)){
   1490                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1491             }
   1492             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1493                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1494             }
   1495             if(!UTF_IS_ERROR(c)){
   1496                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1497             }
   1498         }
   1499     }
   1500 
   1501     if(
   1502         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1503         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1504         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1505         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1506     ) {
   1507         log_err("error with U_IS_BMP()\n");
   1508     }
   1509 
   1510     if(
   1511         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1512         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1513         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1514         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1515     ) {
   1516         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1517     }
   1518 }
   1519 
   1520 static void TestCharLength()
   1521 {
   1522     const int32_t codepoint[]={
   1523         1, 0x0061,
   1524         1, 0xe065,
   1525         1, 0x20ac,
   1526         2, 0x20402,
   1527         2, 0x23456,
   1528         2, 0x24506,
   1529         2, 0x20402,
   1530         2, 0x10402,
   1531         1, 0xd7ff,
   1532         1, 0xe000
   1533     };
   1534 
   1535     int32_t i;
   1536     UBool multiple;
   1537     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1538         UChar32 c=codepoint[i+1];
   1539         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1540             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
   1541         }
   1542         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1543         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1544             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1545         }
   1546     }
   1547 }
   1548 
   1549 /*internal functions ----*/
   1550 static int32_t MakeProp(char* str)
   1551 {
   1552     int32_t result = 0;
   1553     char* matchPosition =0;
   1554 
   1555     matchPosition = strstr(tagStrings, str);
   1556     if (matchPosition == 0)
   1557     {
   1558         log_err("unrecognized type letter ");
   1559         log_err(str);
   1560     }
   1561     else
   1562         result = (int32_t)((matchPosition - tagStrings) / 2);
   1563     return result;
   1564 }
   1565 
   1566 static int32_t MakeDir(char* str)
   1567 {
   1568     int32_t pos = 0;
   1569     for (pos = 0; pos < 19; pos++) {
   1570         if (strcmp(str, dirStrings[pos]) == 0) {
   1571             return pos;
   1572         }
   1573     }
   1574     return -1;
   1575 }
   1576 
   1577 /* test u_charName() -------------------------------------------------------- */
   1578 
   1579 static const struct {
   1580     uint32_t code;
   1581     const char *name, *oldName, *extName, *alias;
   1582 } names[]={
   1583     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1584     {0x01a2, "LATIN CAPITAL LETTER OI", "",
   1585              "LATIN CAPITAL LETTER OI",
   1586              "LATIN CAPITAL LETTER GHA"},
   1587     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
   1588              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1589     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1590              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1591              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1592     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1593     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1594     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1595     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1596     {0xd800, "", "", "<lead surrogate-D800>" },
   1597     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1598     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
   1599     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1600     {0xffff, "", "", "<noncharacter-FFFF>" },
   1601     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1602               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1603               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1604     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1605 };
   1606 
   1607 static UBool
   1608 enumCharNamesFn(void *context,
   1609                 UChar32 code, UCharNameChoice nameChoice,
   1610                 const char *name, int32_t length) {
   1611     int32_t *pCount=(int32_t *)context;
   1612     const char *expected;
   1613     int i;
   1614 
   1615     if(length<=0 || length!=(int32_t)strlen(name)) {
   1616         /* should not be called with an empty string or invalid length */
   1617         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1618         return TRUE;
   1619     }
   1620 
   1621     ++*pCount;
   1622     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1623         if(code==(UChar32)names[i].code) {
   1624             switch (nameChoice) {
   1625                 case U_EXTENDED_CHAR_NAME:
   1626                     if(0!=strcmp(name, names[i].extName)) {
   1627                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1628                     }
   1629                     break;
   1630                 case U_UNICODE_CHAR_NAME:
   1631                     if(0!=strcmp(name, names[i].name)) {
   1632                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1633                     }
   1634                     break;
   1635                 case U_UNICODE_10_CHAR_NAME:
   1636                     expected=names[i].oldName;
   1637                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1638                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1639                     }
   1640                     break;
   1641                 case U_CHAR_NAME_ALIAS:
   1642                     expected=names[i].alias;
   1643                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1644                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1645                     }
   1646                     break;
   1647                 case U_CHAR_NAME_CHOICE_COUNT:
   1648                     break;
   1649             }
   1650             break;
   1651         }
   1652     }
   1653     return TRUE;
   1654 }
   1655 
   1656 struct enumExtCharNamesContext {
   1657     uint32_t length;
   1658     int32_t last;
   1659 };
   1660 
   1661 static UBool
   1662 enumExtCharNamesFn(void *context,
   1663                 UChar32 code, UCharNameChoice nameChoice,
   1664                 const char *name, int32_t length) {
   1665     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1666 
   1667     if (ecncp->last != (int32_t) code - 1) {
   1668         if (ecncp->last < 0) {
   1669             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1670         } else {
   1671             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1672         }
   1673     }
   1674     ecncp->last = (int32_t) code;
   1675 
   1676     if (!*name) {
   1677         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1678     }
   1679 
   1680     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1681 }
   1682 
   1683 /**
   1684  * This can be made more efficient by moving it into putil.c and having
   1685  * it directly access the ebcdic translation tables.
   1686  * TODO: If we get this method in putil.c, then delete it from here.
   1687  */
   1688 static UChar
   1689 u_charToUChar(char c) {
   1690     UChar uc;
   1691     u_charsToUChars(&c, &uc, 1);
   1692     return uc;
   1693 }
   1694 
   1695 static void
   1696 TestCharNames() {
   1697     static char name[80];
   1698     UErrorCode errorCode=U_ZERO_ERROR;
   1699     struct enumExtCharNamesContext extContext;
   1700     const char *expected;
   1701     int32_t length;
   1702     UChar32 c;
   1703     int32_t i;
   1704 
   1705     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1706     length=uprv_getMaxCharNameLength();
   1707     if(length==0) {
   1708         /* no names data available */
   1709         return;
   1710     }
   1711     if(length<83) { /* Unicode 3.2 max char name length */
   1712         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1713     }
   1714     /* ### TODO same tests for max ISO comment length as for max name length */
   1715 
   1716     log_verbose("Testing u_charName()\n");
   1717     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1718         /* modern Unicode character name */
   1719         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1720         if(U_FAILURE(errorCode)) {
   1721             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1722             return;
   1723         }
   1724         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1725             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1726         }
   1727 
   1728         /* find the modern name */
   1729         if (*names[i].name) {
   1730             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1731             if(U_FAILURE(errorCode)) {
   1732                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1733                 return;
   1734             }
   1735             if(c!=(UChar32)names[i].code) {
   1736                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1737             }
   1738         }
   1739 
   1740         /* Unicode 1.0 character name */
   1741         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1742         if(U_FAILURE(errorCode)) {
   1743             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1744             return;
   1745         }
   1746         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1747             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1748         }
   1749 
   1750         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1751         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1752             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1753             if(U_FAILURE(errorCode)) {
   1754                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1755                 return;
   1756             }
   1757             if(c!=(UChar32)names[i].code) {
   1758                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1759             }
   1760         }
   1761 
   1762         /* Unicode character name alias */
   1763         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1764         if(U_FAILURE(errorCode)) {
   1765             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1766             return;
   1767         }
   1768         expected=names[i].alias;
   1769         if(expected==NULL) {
   1770             expected="";
   1771         }
   1772         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1773             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1774                     names[i].code, name, length, expected);
   1775         }
   1776 
   1777         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1778         if(expected[0]!=0 /* && length>0 */) {
   1779             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1780             if(U_FAILURE(errorCode)) {
   1781                 log_err("u_charFromName(%s - alias) error %s\n",
   1782                         expected, u_errorName(errorCode));
   1783                 return;
   1784             }
   1785             if(c!=(UChar32)names[i].code) {
   1786                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1787                         expected, c, names[i].code);
   1788             }
   1789         }
   1790     }
   1791 
   1792     /* test u_enumCharNames() */
   1793     length=0;
   1794     errorCode=U_ZERO_ERROR;
   1795     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1796     if(U_FAILURE(errorCode) || length<94140) {
   1797         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1798     }
   1799 
   1800     extContext.length = 0;
   1801     extContext.last = -1;
   1802     errorCode=U_ZERO_ERROR;
   1803     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1804     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1805         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1806     }
   1807 
   1808     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1809     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1810         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1811     }
   1812 
   1813     /* Test getCharNameCharacters */
   1814     if(!getTestOption(QUICK_OPTION)) {
   1815         enum { BUFSIZE = 256 };
   1816         UErrorCode ec = U_ZERO_ERROR;
   1817         char buf[BUFSIZE];
   1818         int32_t maxLength;
   1819         UChar32 cp;
   1820         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1821         int32_t l1, l2;
   1822         UBool map[256];
   1823         UBool ok;
   1824 
   1825         USet* set = uset_open(1, 0); /* empty set */
   1826         USet* dumb = uset_open(1, 0); /* empty set */
   1827 
   1828         /*
   1829          * uprv_getCharNameCharacters() will likely return more lowercase
   1830          * letters than actual character names contain because
   1831          * it includes all the characters in lowercased names of
   1832          * general categories, for the full possible set of extended names.
   1833          */
   1834         {
   1835             USetAdder sa={
   1836                 NULL,
   1837                 uset_add,
   1838                 uset_addRange,
   1839                 uset_addString,
   1840                 NULL /* don't need remove() */
   1841             };
   1842             sa.set=set;
   1843             uprv_getCharNameCharacters(&sa);
   1844         }
   1845 
   1846         /* build set the dumb (but sure-fire) way */
   1847         for (i=0; i<256; ++i) {
   1848             map[i] = FALSE;
   1849         }
   1850 
   1851         maxLength=0;
   1852         for (cp=0; cp<0x110000; ++cp) {
   1853             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1854                                      buf, BUFSIZE, &ec);
   1855             if (U_FAILURE(ec)) {
   1856                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1857                 uset_close(set);
   1858                 uset_close(dumb);
   1859                 return;
   1860             }
   1861             if(len>maxLength) {
   1862                 maxLength=len;
   1863             }
   1864 
   1865             for (i=0; i<len; ++i) {
   1866                 if (!map[(uint8_t) buf[i]]) {
   1867                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1868                     map[(uint8_t) buf[i]] = TRUE;
   1869                 }
   1870             }
   1871 
   1872             /* test for leading/trailing whitespace */
   1873             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1874                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1875             }
   1876         }
   1877 
   1878         if(map[(uint8_t)'\t']) {
   1879             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1880         }
   1881 
   1882         length=uprv_getMaxCharNameLength();
   1883         if(length!=maxLength) {
   1884             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1885                     length, maxLength);
   1886         }
   1887 
   1888         /* compare the sets.  Where is my uset_equals?!! */
   1889         ok=TRUE;
   1890         for(i=0; i<256; ++i) {
   1891             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1892                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1893                     /* ignore lowercase a-z that are in set but not in dumb */
   1894                     ok=TRUE;
   1895                 } else {
   1896                     ok=FALSE;
   1897                     break;
   1898                 }
   1899             }
   1900         }
   1901 
   1902         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1903         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1904         if (U_FAILURE(ec)) {
   1905             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1906             uset_close(set);
   1907             uset_close(dumb);
   1908             return;
   1909         }
   1910 
   1911         if (l1 >= BUFSIZE) {
   1912             l1 = BUFSIZE-1;
   1913             pat[l1] = 0;
   1914         }
   1915         if (l2 >= BUFSIZE) {
   1916             l2 = BUFSIZE-1;
   1917             dumbPat[l2] = 0;
   1918         }
   1919 
   1920         if (!ok) {
   1921             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1922                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1923         } else if(getTestOption(VERBOSITY_OPTION)) {
   1924             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1925         }
   1926 
   1927         uset_close(set);
   1928         uset_close(dumb);
   1929     }
   1930 
   1931     /* ### TODO: test error cases and other interesting things */
   1932 }
   1933 
   1934 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1935 
   1936 static void
   1937 TestMirroring() {
   1938     USet *set;
   1939     UErrorCode errorCode;
   1940 
   1941     UChar32 start, end, c2, c3;
   1942     int32_t i;
   1943 
   1944     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1945 
   1946     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1947 
   1948     log_verbose("Testing u_isMirrored()\n");
   1949     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1950          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1951         )
   1952     ) {
   1953         log_err("u_isMirrored() does not work correctly\n");
   1954     }
   1955 
   1956     log_verbose("Testing u_charMirror()\n");
   1957     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1958          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1959          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1960          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1961          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1962          )
   1963     ) {
   1964         log_err("u_charMirror() does not work correctly\n");
   1965     }
   1966 
   1967     /* verify that Bidi_Mirroring_Glyph roundtrips */
   1968     errorCode=U_ZERO_ERROR;
   1969     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   1970 
   1971     if (U_FAILURE(errorCode)) {
   1972         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   1973     } else {
   1974         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   1975             do {
   1976                 c2=u_charMirror(start);
   1977                 c3=u_charMirror(c2);
   1978                 if(c3!=start) {
   1979                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   1980                 }
   1981             } while(++start<=end);
   1982         }
   1983     }
   1984 
   1985     uset_close(set);
   1986 }
   1987 
   1988 
   1989 struct RunTestData
   1990 {
   1991     const char *runText;
   1992     UScriptCode runCode;
   1993 };
   1994 
   1995 typedef struct RunTestData RunTestData;
   1996 
   1997 static void
   1998 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   1999                 const char *prefix)
   2000 {
   2001     int32_t run, runStart, runLimit;
   2002     UScriptCode runCode;
   2003 
   2004     /* iterate over all the runs */
   2005     run = 0;
   2006     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   2007         if (runStart != runStarts[run]) {
   2008             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   2009                 prefix, run, runStarts[run], runStart);
   2010         }
   2011 
   2012         if (runLimit != runStarts[run + 1]) {
   2013             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   2014                 prefix, run, runStarts[run + 1], runLimit);
   2015         }
   2016 
   2017         if (runCode != testData[run].runCode) {
   2018             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   2019                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   2020         }
   2021 
   2022         run += 1;
   2023 
   2024         /* stop when we've seen all the runs we expect to see */
   2025         if (run >= nRuns) {
   2026             break;
   2027         }
   2028     }
   2029 
   2030     /* Complain if we didn't see then number of runs we expected */
   2031     if (run != nRuns) {
   2032         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   2033     }
   2034 }
   2035 
   2036 static void
   2037 TestUScriptRunAPI()
   2038 {
   2039     static const RunTestData testData1[] = {
   2040         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   2041         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   2042         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   2043         {"English (", USCRIPT_LATIN},
   2044         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   2045         {") ", USCRIPT_LATIN},
   2046         {"\\u6F22\\u5B75", USCRIPT_HAN},
   2047         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   2048         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   2049         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   2050     };
   2051 
   2052     static const RunTestData testData2[] = {
   2053        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   2054     };
   2055 
   2056     static const struct {
   2057       const RunTestData *testData;
   2058       int32_t nRuns;
   2059     } testDataEntries[] = {
   2060         {testData1, LENGTHOF(testData1)},
   2061         {testData2, LENGTHOF(testData2)}
   2062     };
   2063 
   2064     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
   2065     int32_t testEntry;
   2066 
   2067     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   2068         UChar testString[1024];
   2069         int32_t runStarts[256];
   2070         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   2071         const RunTestData *testData = testDataEntries[testEntry].testData;
   2072 
   2073         int32_t run, stringLimit;
   2074         UScriptRun *scriptRun = NULL;
   2075         UErrorCode err;
   2076 
   2077         /*
   2078          * Fill in the test string and the runStarts array.
   2079          */
   2080         stringLimit = 0;
   2081         for (run = 0; run < nTestRuns; run += 1) {
   2082             runStarts[run] = stringLimit;
   2083             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   2084             /*stringLimit -= 1;*/
   2085         }
   2086 
   2087         /* The limit of the last run */
   2088         runStarts[nTestRuns] = stringLimit;
   2089 
   2090         /*
   2091          * Make sure that calling uscript_OpenRun with a NULL text pointer
   2092          * and a non-zero text length returns the correct error.
   2093          */
   2094         err = U_ZERO_ERROR;
   2095         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   2096 
   2097         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2098             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2099         }
   2100 
   2101         if (scriptRun != NULL) {
   2102             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   2103             uscript_closeRun(scriptRun);
   2104         }
   2105 
   2106         /*
   2107          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2108          * and a zero text length returns the correct error.
   2109          */
   2110         err = U_ZERO_ERROR;
   2111         scriptRun = uscript_openRun(testString, 0, &err);
   2112 
   2113         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2114             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2115         }
   2116 
   2117         if (scriptRun != NULL) {
   2118             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   2119             uscript_closeRun(scriptRun);
   2120         }
   2121 
   2122         /*
   2123          * Make sure that calling uscript_openRun with a NULL text pointer
   2124          * and a zero text length doesn't return an error.
   2125          */
   2126         err = U_ZERO_ERROR;
   2127         scriptRun = uscript_openRun(NULL, 0, &err);
   2128 
   2129         if (U_FAILURE(err)) {
   2130             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2131         }
   2132 
   2133         /* Make sure that the empty iterator doesn't find any runs */
   2134         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2135             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2136         }
   2137 
   2138         /*
   2139          * Make sure that calling uscript_setRunText with a NULL text pointer
   2140          * and a non-zero text length returns the correct error.
   2141          */
   2142         err = U_ZERO_ERROR;
   2143         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2144 
   2145         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2146             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2147         }
   2148 
   2149         /*
   2150          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2151          * and a zero text length returns the correct error.
   2152          */
   2153         err = U_ZERO_ERROR;
   2154         uscript_setRunText(scriptRun, testString, 0, &err);
   2155 
   2156         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2157             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2158         }
   2159 
   2160         /*
   2161          * Now call uscript_setRunText on the empty iterator
   2162          * and make sure that it works.
   2163          */
   2164         err = U_ZERO_ERROR;
   2165         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2166 
   2167         if (U_FAILURE(err)) {
   2168             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2169         } else {
   2170             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2171         }
   2172 
   2173         uscript_closeRun(scriptRun);
   2174 
   2175         /*
   2176          * Now open an interator over the testString
   2177          * using uscript_openRun and make sure that it works
   2178          */
   2179         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2180 
   2181         if (U_FAILURE(err)) {
   2182             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2183         } else {
   2184             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2185         }
   2186 
   2187         /* Now reset the iterator, and make sure
   2188          * that it still works.
   2189          */
   2190         uscript_resetRun(scriptRun);
   2191 
   2192         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2193 
   2194         /* Close the iterator */
   2195         uscript_closeRun(scriptRun);
   2196     }
   2197 }
   2198 
   2199 /* test additional, non-core properties */
   2200 static void
   2201 TestAdditionalProperties() {
   2202     /* test data for u_charAge() */
   2203     static const struct {
   2204         UChar32 c;
   2205         UVersionInfo version;
   2206     } charAges[]={
   2207         {0x41,    { 1, 1, 0, 0 }},
   2208         {0xffff,  { 1, 1, 0, 0 }},
   2209         {0x20ab,  { 2, 0, 0, 0 }},
   2210         {0x2fffe, { 2, 0, 0, 0 }},
   2211         {0x20ac,  { 2, 1, 0, 0 }},
   2212         {0xfb1d,  { 3, 0, 0, 0 }},
   2213         {0x3f4,   { 3, 1, 0, 0 }},
   2214         {0x10300, { 3, 1, 0, 0 }},
   2215         {0x220,   { 3, 2, 0, 0 }},
   2216         {0xff60,  { 3, 2, 0, 0 }}
   2217     };
   2218 
   2219     /* test data for u_hasBinaryProperty() */
   2220     static const int32_t
   2221     props[][3]={ /* code point, property, value */
   2222         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2223         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2224         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2225 
   2226         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2227         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2228 
   2229         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2230         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2231 
   2232         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2233         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2234 
   2235         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2236         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2237         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2238         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2239         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2240 
   2241         { 0x058a, UCHAR_DASH, TRUE },
   2242         { 0x007e, UCHAR_DASH, FALSE },
   2243 
   2244         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2245         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2246 
   2247         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2248         { 0x0020, UCHAR_EXTENDER, FALSE },
   2249 
   2250 #if !UCONFIG_NO_NORMALIZATION
   2251         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2252         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2253         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2254 
   2255         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2256         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2257 
   2258         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2259         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2260 
   2261         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2262         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2263         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2264         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2265         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2266         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2267 
   2268         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2269         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2270 
   2271         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2272         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2273         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2274         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2275         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2276         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2277 #endif
   2278 
   2279         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2280         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2281         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2282 
   2283         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2284         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2285 
   2286         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2287         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2288         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2289 
   2290         { 0x2172, UCHAR_ID_START, TRUE },
   2291         { 0x007a, UCHAR_ID_START, TRUE },
   2292         { 0x0039, UCHAR_ID_START, FALSE },
   2293 
   2294         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2295         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2296         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2297 
   2298         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2299         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2300 
   2301         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2302         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2303         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2304 
   2305         { 0x1d7a9, UCHAR_MATH, TRUE },
   2306         { 0x2135, UCHAR_MATH, TRUE },
   2307         { 0x0062, UCHAR_MATH, FALSE },
   2308 
   2309         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2310         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2311         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2312 
   2313         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2314         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2315         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2316 
   2317         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2318         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2319 
   2320         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2321         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2322         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2323 
   2324         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2325         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2326         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2327 
   2328         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2329         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2330         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2331 
   2332         { 0x16ee, UCHAR_XID_START, TRUE },
   2333         { 0x23456, UCHAR_XID_START, TRUE },
   2334         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2335 
   2336         /*
   2337          * Version break:
   2338          * The following properties are only supported starting with the
   2339          * Unicode version indicated in the second field.
   2340          */
   2341         { -1, 0x320, 0 },
   2342 
   2343         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2344         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2345         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2346 
   2347         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2348         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2349         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2350         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2351 
   2352         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2353         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2354         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2355         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2356 
   2357         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2358         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2359         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2360         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2361 
   2362         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2363         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2364 
   2365         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2366         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2367 
   2368         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2369         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2370 
   2371         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2372         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2373 
   2374         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2375         { 0x4e00, UCHAR_RADICAL, FALSE },
   2376 
   2377         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2378         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2379 
   2380         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2381         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2382 
   2383         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2384 
   2385         { 0x002e, UCHAR_S_TERM, TRUE },
   2386         { 0x0061, UCHAR_S_TERM, FALSE },
   2387 
   2388         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2389         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2390         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2391         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2392 
   2393         /* enum/integer type properties */
   2394 
   2395         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2396         /* test default Bidi classes for unassigned code points */
   2397         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2398         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2399         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2400         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2401         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2402         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2403         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2404         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2405         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2406         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2407         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2408 
   2409         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2410         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2411         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2412         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2413         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2414         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2415         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2416         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2417 
   2418         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2419         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2420         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2421         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2422         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2423         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2424         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2425         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2426         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2427         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2428         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2429 
   2430         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2431         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2432 
   2433         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2434         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2435         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2436         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2437         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2438         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2439         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2440         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2441         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2442 
   2443         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2444         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2445         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2446         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2447         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2448         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2449         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2450         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2451         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2452         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2453         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2454         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2455         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2456         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2457         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2458         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2459         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2460 
   2461         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2462         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2463         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2464 
   2465         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2466         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2467         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2468         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2469         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2470 
   2471         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2472         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2473         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2474         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2475         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2476         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2477         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2478         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2479 
   2480         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2481         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2482         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2483         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2484         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2485         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2486         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2487         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2488         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2489         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2490         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2491         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2492         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2493         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2494         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2495         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2496 
   2497         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2498 
   2499         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2500 
   2501         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2502         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2503         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2504         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2505         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2506         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2507         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2508 
   2509         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2510         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2511         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2512         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2513 
   2514         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2515         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2516         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2517         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2518         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2519         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2520 
   2521         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2522         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2523         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2524         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2525 
   2526         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2527         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2528         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2529         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2530         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2531         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2532         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2533 
   2534         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2535         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2536         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2537         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2538 
   2539         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2540         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2541         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2542         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2543 
   2544         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2545         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2546         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2547         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2548         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2549 
   2550         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2551 
   2552         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2553 
   2554         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2555         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2556         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2557 
   2558         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2559         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2560         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2561         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2562         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2563 
   2564         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2565         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2566         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2567 
   2568         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2569         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2570         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2571         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2572 
   2573         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2574         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2575         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2576         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2577         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2578         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2579 
   2580         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2581         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2582         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2583         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2584 
   2585         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2586         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2587         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2588         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2589 
   2590         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2591         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2592         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2593         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2594 
   2595         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2596 
   2597         /* unassigned code points in new default Bidi R blocks */
   2598         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2599         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2600 
   2601         /* test some script codes >127 */
   2602         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2603         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2604         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2605 
   2606         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
   2607 
   2608         /* value changed in Unicode 6.0 */
   2609         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
   2610 
   2611         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
   2612 
   2613         /* unassigned code points in new/changed default Bidi AL blocks */
   2614         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2615         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2616 
   2617         /* undefined UProperty values */
   2618         { 0x61, 0x4a7, 0 },
   2619         { 0x234bc, 0x15ed, 0 }
   2620     };
   2621 
   2622     UVersionInfo version;
   2623     UChar32 c;
   2624     int32_t i, result, uVersion;
   2625     UProperty which;
   2626 
   2627     /* what is our Unicode version? */
   2628     u_getUnicodeVersion(version);
   2629     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2630 
   2631     u_charAge(0x20, version);
   2632     if(version[0]==0) {
   2633         /* no additional properties available */
   2634         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2635         return;
   2636     }
   2637 
   2638     /* test u_charAge() */
   2639     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2640         u_charAge(charAges[i].c, version);
   2641         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2642             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2643                 charAges[i].c,
   2644                 version[0], version[1], version[2], version[3],
   2645                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2646         }
   2647     }
   2648 
   2649     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2650         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2651         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2652         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2653         u_getIntPropertyMinValue(0x2345)!=0
   2654     ) {
   2655         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2656     }
   2657     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2658         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2659     }
   2660     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2661         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2662     }
   2663     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2664         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2665     }
   2666     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2667         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2668     }
   2669     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2670         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2671     }
   2672     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2673         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2674     }
   2675     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2676         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2677     }
   2678     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2679         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2680     }
   2681     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2682         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2683     }
   2684     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2685         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2686     }
   2687     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2688         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2689     }
   2690     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2691         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2692     }
   2693     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2694         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2695     }
   2696     /*JB#2410*/
   2697     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2698         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2699     }
   2700     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2701         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2702     }
   2703     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2704         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2705     }
   2706     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2707         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2708     }
   2709     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2710         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2711     }
   2712 
   2713     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2714     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2715         const char *whichName;
   2716 
   2717         if(props[i][0]<0) {
   2718             /* Unicode version break */
   2719             if(uVersion<props[i][1]) {
   2720                 break; /* do not test properties that are not yet supported */
   2721             } else {
   2722                 continue; /* skip this row */
   2723             }
   2724         }
   2725 
   2726         c=(UChar32)props[i][0];
   2727         which=(UProperty)props[i][1];
   2728         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2729 
   2730         if(which<UCHAR_INT_START) {
   2731             result=u_hasBinaryProperty(c, which);
   2732             if(result!=props[i][2]) {
   2733                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2734                         c, whichName, result, i);
   2735             }
   2736         }
   2737 
   2738         result=u_getIntPropertyValue(c, which);
   2739         if(result!=props[i][2]) {
   2740             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2741                     c, whichName, result, props[i][2], i);
   2742         }
   2743 
   2744         /* test separate functions, too */
   2745         switch((UProperty)props[i][1]) {
   2746         case UCHAR_ALPHABETIC:
   2747             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2748                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2749                         props[i][0], result, i);
   2750             }
   2751             break;
   2752         case UCHAR_LOWERCASE:
   2753             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2754                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2755                         props[i][0], result, i);
   2756             }
   2757             break;
   2758         case UCHAR_UPPERCASE:
   2759             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2760                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2761                         props[i][0], result, i);
   2762             }
   2763             break;
   2764         case UCHAR_WHITE_SPACE:
   2765             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2766                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2767                         props[i][0], result, i);
   2768             }
   2769             break;
   2770         default:
   2771             break;
   2772         }
   2773     }
   2774 }
   2775 
   2776 static void
   2777 TestNumericProperties(void) {
   2778     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2779     static const struct {
   2780         UChar32 c;
   2781         int32_t type;
   2782         double numValue;
   2783     } values[]={
   2784         { 0x12456, U_NT_NUMERIC, -1. },
   2785         { 0x12457, U_NT_NUMERIC, -1. },
   2786         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2787         { 0x0C66, U_NT_DECIMAL, 0 },
   2788         { 0x96f6, U_NT_NUMERIC, 0 },
   2789         { 0xa833, U_NT_NUMERIC, 1./16. },
   2790         { 0x2152, U_NT_NUMERIC, 1./10. },
   2791         { 0x2151, U_NT_NUMERIC, 1./9. },
   2792         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2793         { 0x2150, U_NT_NUMERIC, 1./7. },
   2794         { 0x2159, U_NT_NUMERIC, 1./6. },
   2795         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2796         { 0x2155, U_NT_NUMERIC, 1./5. },
   2797         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2798         { 0x0031, U_NT_DECIMAL, 1. },
   2799         { 0x4e00, U_NT_NUMERIC, 1. },
   2800         { 0x58f1, U_NT_NUMERIC, 1. },
   2801         { 0x10320, U_NT_NUMERIC, 1. },
   2802         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2803         { 0x00B2, U_NT_DIGIT, 2. },
   2804         { 0x5f10, U_NT_NUMERIC, 2. },
   2805         { 0x1813, U_NT_DECIMAL, 3. },
   2806         { 0x5f0e, U_NT_NUMERIC, 3. },
   2807         { 0x2173, U_NT_NUMERIC, 4. },
   2808         { 0x8086, U_NT_NUMERIC, 4. },
   2809         { 0x278E, U_NT_DIGIT, 5. },
   2810         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2811         { 0x247A, U_NT_DIGIT, 7. },
   2812         { 0x7396, U_NT_NUMERIC, 9. },
   2813         { 0x1372, U_NT_NUMERIC, 10. },
   2814         { 0x216B, U_NT_NUMERIC, 12. },
   2815         { 0x16EE, U_NT_NUMERIC, 17. },
   2816         { 0x249A, U_NT_NUMERIC, 19. },
   2817         { 0x303A, U_NT_NUMERIC, 30. },
   2818         { 0x5345, U_NT_NUMERIC, 30. },
   2819         { 0x32B2, U_NT_NUMERIC, 37. },
   2820         { 0x1375, U_NT_NUMERIC, 40. },
   2821         { 0x10323, U_NT_NUMERIC, 50. },
   2822         { 0x0BF1, U_NT_NUMERIC, 100. },
   2823         { 0x964c, U_NT_NUMERIC, 100. },
   2824         { 0x217E, U_NT_NUMERIC, 500. },
   2825         { 0x2180, U_NT_NUMERIC, 1000. },
   2826         { 0x4edf, U_NT_NUMERIC, 1000. },
   2827         { 0x2181, U_NT_NUMERIC, 5000. },
   2828         { 0x137C, U_NT_NUMERIC, 10000. },
   2829         { 0x4e07, U_NT_NUMERIC, 10000. },
   2830         { 0x12432, U_NT_NUMERIC, 216000. },
   2831         { 0x12433, U_NT_NUMERIC, 432000. },
   2832         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2833         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2834         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2835         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2836         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2837         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2838         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2839         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2840         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2841         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2842     };
   2843 
   2844     double nv;
   2845     UChar32 c;
   2846     int32_t i, type;
   2847 
   2848     for(i=0; i<LENGTHOF(values); ++i) {
   2849         c=values[i].c;
   2850         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2851         nv=u_getNumericValue(c);
   2852 
   2853         if(type!=values[i].type) {
   2854             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2855         }
   2856         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2857             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2858         }
   2859     }
   2860 }
   2861 
   2862 /**
   2863  * Test the property names and property value names API.
   2864  */
   2865 static void
   2866 TestPropertyNames(void) {
   2867     int32_t p, v, choice=0, rev;
   2868     UBool atLeastSomething = FALSE;
   2869 
   2870     for (p=0; ; ++p) {
   2871         UProperty propEnum = (UProperty)p;
   2872         UBool sawProp = FALSE;
   2873         if(p > 10 && !atLeastSomething) {
   2874           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2875           return;
   2876         }
   2877 
   2878         for (choice=0; ; ++choice) {
   2879             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2880             if (name) {
   2881                 if (!sawProp)
   2882                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2883                 log_verbose("%d=\"%s\"", choice, name);
   2884                 sawProp = TRUE;
   2885                 atLeastSomething = TRUE;
   2886 
   2887                 /* test reverse mapping */
   2888                 rev = u_getPropertyEnum(name);
   2889                 if (rev != p) {
   2890                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2891                             p, name, rev);
   2892                 }
   2893             }
   2894             if (!name && choice>0) break;
   2895         }
   2896         if (sawProp) {
   2897             /* looks like a valid property; check the values */
   2898             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2899             int32_t max = 0;
   2900             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2901                 max = 255;
   2902             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2903                 /* it's far too slow to iterate all the way up to
   2904                    the real max, U_GC_P_MASK */
   2905                 max = U_GC_NL_MASK;
   2906             } else if (p == UCHAR_BLOCK) {
   2907                 /* UBlockCodes, unlike other values, start at 1 */
   2908                 max = 1;
   2909             }
   2910             log_verbose("\n");
   2911             for (v=-1; ; ++v) {
   2912                 UBool sawValue = FALSE;
   2913                 for (choice=0; ; ++choice) {
   2914                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2915                     if (vname) {
   2916                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2917                         log_verbose("%d=\"%s\"", choice, vname);
   2918                         sawValue = TRUE;
   2919 
   2920                         /* test reverse mapping */
   2921                         rev = u_getPropertyValueEnum(propEnum, vname);
   2922                         if (rev != v) {
   2923                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2924                                     pname, v, vname, rev);
   2925                         }
   2926                     }
   2927                     if (!vname && choice>0) break;
   2928                 }
   2929                 if (sawValue) {
   2930                     log_verbose("\n");
   2931                 }
   2932                 if (!sawValue && v>=max) break;
   2933             }
   2934         }
   2935         if (!sawProp) {
   2936             if (p>=UCHAR_STRING_LIMIT) {
   2937                 break;
   2938             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   2939                 p = UCHAR_STRING_START - 1;
   2940             } else if (p>=UCHAR_MASK_LIMIT) {
   2941                 p = UCHAR_DOUBLE_START - 1;
   2942             } else if (p>=UCHAR_INT_LIMIT) {
   2943                 p = UCHAR_MASK_START - 1;
   2944             } else if (p>=UCHAR_BINARY_LIMIT) {
   2945                 p = UCHAR_INT_START - 1;
   2946             }
   2947         }
   2948     }
   2949 }
   2950 
   2951 /**
   2952  * Test the property values API.  See JB#2410.
   2953  */
   2954 static void
   2955 TestPropertyValues(void) {
   2956     int32_t i, p, min, max;
   2957     UErrorCode ec;
   2958 
   2959     /* Min should be 0 for everything. */
   2960     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   2961     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   2962         UProperty propEnum = (UProperty)p;
   2963         min = u_getIntPropertyMinValue(propEnum);
   2964         if (min != 0) {
   2965             if (p == UCHAR_BLOCK) {
   2966                 /* This is okay...for now.  See JB#2487.
   2967                    TODO Update this for JB#2487. */
   2968             } else {
   2969                 const char* name;
   2970                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2971                 if (name == NULL)
   2972                     name = "<ERROR>";
   2973                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   2974                         name, min);
   2975             }
   2976         }
   2977     }
   2978 
   2979     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   2980         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   2981         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   2982     }
   2983 
   2984     /* Max should be -1 for invalid properties. */
   2985     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   2986     if (max != -1) {
   2987         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   2988                 max);
   2989     }
   2990 
   2991     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   2992     for (i=0; i<2; ++i) {
   2993         int32_t script;
   2994         const char* desc;
   2995         ec = U_ZERO_ERROR;
   2996         switch (i) {
   2997         case 0:
   2998             script = uscript_getScript(-1, &ec);
   2999             desc = "uscript_getScript(-1)";
   3000             break;
   3001         case 1:
   3002             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   3003             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   3004             break;
   3005         default:
   3006             log_err("Internal test error. Too many scripts\n");
   3007             return;
   3008         }
   3009         /* We don't explicitly test ec.  It should be U_FAILURE but it
   3010            isn't documented as such. */
   3011         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   3012             log_err("FAIL: %s = %d, exp. 0\n",
   3013                     desc, script);
   3014         }
   3015     }
   3016 }
   3017 
   3018 /* various tests for consistency of UCD data and API behavior */
   3019 static void
   3020 TestConsistency() {
   3021     char buffer[300];
   3022     USet *set1, *set2, *set3, *set4;
   3023     UErrorCode errorCode;
   3024 
   3025     UChar32 start, end;
   3026     int32_t i, length;
   3027 
   3028     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   3029     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   3030     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   3031     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   3032     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   3033 
   3034     U_STRING_DECL(mathBlocksPattern,
   3035         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3036         1+32+46+46+45+43+1+1); /* +1 for NUL */
   3037     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   3038     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   3039     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   3040     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3041 
   3042     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   3043     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   3044     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   3045     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   3046     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   3047 
   3048     U_STRING_INIT(mathBlocksPattern,
   3049         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3050         1+32+46+46+45+43+1+1); /* +1 for NUL */
   3051     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   3052     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   3053     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   3054     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3055 
   3056     /*
   3057      * It used to be that UCD.html and its precursors said
   3058      * "Those dashes used to mark connections between pieces of words,
   3059      *  plus the Katakana middle dot."
   3060      *
   3061      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   3062      * but not from Hyphen.
   3063      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
   3064      * Therefore, do not show errors when testing the Hyphen property.
   3065      */
   3066     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   3067                 "known to the UTC and not considered errors.\n");
   3068 
   3069     errorCode=U_ZERO_ERROR;
   3070     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   3071     set2=uset_openPattern(dashPattern, 8, &errorCode);
   3072     if(U_SUCCESS(errorCode)) {
   3073         /* remove the Katakana middle dot(s) from set1 */
   3074         uset_remove(set1, 0x30fb);
   3075         uset_remove(set1, 0xff65); /* halfwidth variant */
   3076         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   3077     } else {
   3078         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3079     }
   3080 
   3081     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   3082     set3=uset_openPattern(formatPattern, 6, &errorCode);
   3083     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   3084     if(U_SUCCESS(errorCode)) {
   3085         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   3086         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   3087         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   3088     } else {
   3089         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3090     }
   3091 
   3092     uset_close(set1);
   3093     uset_close(set2);
   3094     uset_close(set3);
   3095     uset_close(set4);
   3096 
   3097     /*
   3098      * Check that each lowercase character has "small" in its name
   3099      * and not "capital".
   3100      * There are some such characters, some of which seem odd.
   3101      * Use the verbose flag to see these notices.
   3102      */
   3103     errorCode=U_ZERO_ERROR;
   3104     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   3105     if(U_SUCCESS(errorCode)) {
   3106         for(i=0;; ++i) {
   3107             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   3108             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   3109                 break; /* done */
   3110             }
   3111             if(U_FAILURE(errorCode)) {
   3112                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   3113                         i, u_errorName(errorCode));
   3114                 break;
   3115             }
   3116             if(length!=0) {
   3117                 break; /* done with code points, got a string or -1 */
   3118             }
   3119 
   3120             while(start<=end) {
   3121                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   3122                 if(U_FAILURE(errorCode)) {
   3123                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   3124                     errorCode=U_ZERO_ERROR;
   3125                 }
   3126                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   3127                     strstr(buffer, "SMALL CAPITAL")==NULL
   3128                 ) {
   3129                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   3130                 }
   3131                 ++start;
   3132             }
   3133         }
   3134     } else {
   3135         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3136     }
   3137     uset_close(set1);
   3138 
   3139     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3140     errorCode=U_ZERO_ERROR;
   3141     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3142     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3143     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3144     if(U_SUCCESS(errorCode)) {
   3145         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3146         uset_complement(set3);      /* assigned characters */
   3147         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3148         compareUSets(set1, set2,
   3149                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3150                      TRUE);
   3151     } else {
   3152         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3153     }
   3154     uset_close(set1);
   3155     uset_close(set2);
   3156     uset_close(set3);
   3157 
   3158     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3159     errorCode=U_ZERO_ERROR;
   3160     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3161     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3162     if(U_SUCCESS(errorCode)) {
   3163         compareUSets(set1, set2,
   3164                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3165                      TRUE);
   3166     } else {
   3167         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3168     }
   3169     uset_close(set1);
   3170     uset_close(set2);
   3171 }
   3172 
   3173 /*
   3174  * Starting with ICU4C 3.4, the core Unicode properties files
   3175  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3176  * are hardcoded in the common DLL and therefore not included
   3177  * in the data package any more.
   3178  * Test requiring these files are disabled so that
   3179  * we need not jump through hoops (like adding snapshots of these files
   3180  * to testdata).
   3181  * See Jitterbug 4497.
   3182  */
   3183 #define HARDCODED_DATA_4497 1
   3184 
   3185 /* API coverage for ucase.c */
   3186 static void TestUCase() {
   3187 #if !HARDCODED_DATA_4497
   3188     UDataMemory *pData;
   3189     UCaseProps *csp;
   3190     const UCaseProps *ccsp;
   3191     UErrorCode errorCode;
   3192 
   3193     /* coverage for ucase_openBinary() */
   3194     errorCode=U_ZERO_ERROR;
   3195     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3196     if(U_FAILURE(errorCode)) {
   3197         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3198                     u_errorName(errorCode));
   3199         return;
   3200     }
   3201 
   3202     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3203     if(U_FAILURE(errorCode)) {
   3204         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3205                 u_errorName(errorCode));
   3206         udata_close(pData);
   3207         return;
   3208     }
   3209 
   3210     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3211         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3212     }
   3213 
   3214     ucase_close(csp);
   3215     udata_close(pData);
   3216 
   3217     /* coverage for ucase_getDummy() */
   3218     errorCode=U_ZERO_ERROR;
   3219     ccsp=ucase_getDummy(&errorCode);
   3220     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3221         log_err("ucase_tolower(dummy, A)!=A\n");
   3222     }
   3223 #endif
   3224 }
   3225 
   3226 /* API coverage for ubidi_props.c */
   3227 static void TestUBiDiProps() {
   3228 #if !HARDCODED_DATA_4497
   3229     UDataMemory *pData;
   3230     UBiDiProps *bdp;
   3231     const UBiDiProps *cbdp;
   3232     UErrorCode errorCode;
   3233 
   3234     /* coverage for ubidi_openBinary() */
   3235     errorCode=U_ZERO_ERROR;
   3236     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3237     if(U_FAILURE(errorCode)) {
   3238         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3239                     u_errorName(errorCode));
   3240         return;
   3241     }
   3242 
   3243     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3244     if(U_FAILURE(errorCode)) {
   3245         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3246                 u_errorName(errorCode));
   3247         udata_close(pData);
   3248         return;
   3249     }
   3250 
   3251     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3252         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3253     }
   3254 
   3255     ubidi_closeProps(bdp);
   3256     udata_close(pData);
   3257 
   3258     /* coverage for ubidi_getDummy() */
   3259     errorCode=U_ZERO_ERROR;
   3260     cbdp=ubidi_getDummy(&errorCode);
   3261     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3262         log_err("ubidi_getClass(dummy, space)!=0\n");
   3263     }
   3264 #endif
   3265 }
   3266 
   3267 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3268 
   3269 /* bit set for which case foldings for a character have been tested already */
   3270 enum {
   3271     CF_SIMPLE=1,
   3272     CF_FULL=2,
   3273     CF_TURKIC=4,
   3274     CF_ALL=7
   3275 };
   3276 
   3277 static void
   3278 testFold(UChar32 c, int which,
   3279          UChar32 simple, UChar32 turkic,
   3280          const UChar *full, int32_t fullLength,
   3281          const UChar *turkicFull, int32_t turkicFullLength) {
   3282     UChar s[2], t[32];
   3283     UChar32 c2;
   3284     int32_t length, length2;
   3285 
   3286     UErrorCode errorCode=U_ZERO_ERROR;
   3287 
   3288     length=0;
   3289     U16_APPEND_UNSAFE(s, length, c);
   3290 
   3291     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3292         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3293     }
   3294     if((which&CF_FULL)!=0) {
   3295         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
   3296         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3297             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3298         }
   3299     }
   3300     if((which&CF_TURKIC)!=0) {
   3301         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3302             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3303         }
   3304 
   3305         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3306         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3307             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3308         }
   3309     }
   3310 }
   3311 
   3312 /* test that c case-folds to itself */
   3313 static void
   3314 testFoldToSelf(UChar32 c, int which) {
   3315     UChar s[2];
   3316     int32_t length;
   3317 
   3318     length=0;
   3319     U16_APPEND_UNSAFE(s, length, c);
   3320     testFold(c, which, c, c, s, length, s, length);
   3321 }
   3322 
   3323 struct CaseFoldingData {
   3324     USet *notSeen;
   3325     UChar32 prev, prevSimple;
   3326     UChar prevFull[32];
   3327     int32_t prevFullLength;
   3328     int which;
   3329 };
   3330 typedef struct CaseFoldingData CaseFoldingData;
   3331 
   3332 static void U_CALLCONV
   3333 caseFoldingLineFn(void *context,
   3334                   char *fields[][2], int32_t fieldCount,
   3335                   UErrorCode *pErrorCode) {
   3336     CaseFoldingData *pData=(CaseFoldingData *)context;
   3337     char *end;
   3338     UChar full[32];
   3339     UChar32 c, prev, simple;
   3340     int32_t count;
   3341     int which;
   3342     char status;
   3343 
   3344     /* get code point */
   3345     const char *s=u_skipWhitespace(fields[0][0]);
   3346     if(0==strncmp(s, "0000..10FFFF", 12)) {
   3347         /*
   3348          * Ignore the line
   3349          * # @missing: 0000..10FFFF; C; <code point>
   3350          * because maps-to-self is already our default, and this line breaks this parser.
   3351          */
   3352         return;
   3353     }
   3354     c=(UChar32)strtoul(s, &end, 16);
   3355     end=(char *)u_skipWhitespace(end);
   3356     if(end<=fields[0][0] || end!=fields[0][1]) {
   3357         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3358         *pErrorCode=U_PARSE_ERROR;
   3359         return;
   3360     }
   3361 
   3362     /* get the status of this mapping */
   3363     status=*u_skipWhitespace(fields[1][0]);
   3364     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3365         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3366         *pErrorCode=U_PARSE_ERROR;
   3367         return;
   3368     }
   3369 
   3370     /* get the mapping */
   3371     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3372     if(U_FAILURE(*pErrorCode)) {
   3373         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3374         return;
   3375     }
   3376 
   3377     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3378     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3379         simple=c;
   3380     }
   3381 
   3382     if(c!=(prev=pData->prev)) {
   3383         /*
   3384          * Test remaining mappings for the previous code point.
   3385          * If a turkic folding was not mentioned, then it should fold the same
   3386          * as the regular simple case folding.
   3387          */
   3388         UChar prevString[2];
   3389         int32_t length;
   3390 
   3391         length=0;
   3392         U16_APPEND_UNSAFE(prevString, length, prev);
   3393         testFold(prev, (~pData->which)&CF_ALL,
   3394                  prev, pData->prevSimple,
   3395                  prevString, length,
   3396                  pData->prevFull, pData->prevFullLength);
   3397         pData->prev=pData->prevSimple=c;
   3398         length=0;
   3399         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3400         pData->prevFullLength=length;
   3401         pData->which=0;
   3402     }
   3403 
   3404     /*
   3405      * Turn the status into a bit set of case foldings to test.
   3406      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3407      */
   3408     switch(status) {
   3409     case 'C':
   3410         which=CF_SIMPLE|CF_FULL;
   3411         pData->prevSimple=simple;
   3412         u_memcpy(pData->prevFull, full, count);
   3413         pData->prevFullLength=count;
   3414         break;
   3415     case 'S':
   3416         which=CF_SIMPLE;
   3417         pData->prevSimple=simple;
   3418         break;
   3419     case 'F':
   3420         which=CF_FULL;
   3421         u_memcpy(pData->prevFull, full, count);
   3422         pData->prevFullLength=count;
   3423         break;
   3424     case 'T':
   3425         which=CF_TURKIC;
   3426         break;
   3427     default:
   3428         which=0;
   3429         break; /* won't happen because of test above */
   3430     }
   3431 
   3432     testFold(c, which, simple, simple, full, count, full, count);
   3433 
   3434     /* remember which case foldings of c have been tested */
   3435     pData->which|=which;
   3436 
   3437     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3438     uset_remove(pData->notSeen, c);
   3439 }
   3440 
   3441 static void
   3442 TestCaseFolding() {
   3443     CaseFoldingData data={ NULL };
   3444     char *fields[3][2];
   3445     UErrorCode errorCode;
   3446 
   3447     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3448 
   3449     errorCode=U_ZERO_ERROR;
   3450     /* test BMP & plane 1 - nothing interesting above */
   3451     data.notSeen=uset_open(0, 0x1ffff);
   3452     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3453 
   3454     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3455     if(U_SUCCESS(errorCode)) {
   3456         int32_t i, start, end;
   3457 
   3458         /* add a pseudo-last line to finish testing of the actual last one */
   3459         fields[0][0]=lastLine;
   3460         fields[0][1]=lastLine+6;
   3461         fields[1][0]=lastLine+7;
   3462         fields[1][1]=lastLine+9;
   3463         fields[2][0]=lastLine+10;
   3464         fields[2][1]=lastLine+17;
   3465         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3466 
   3467         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3468         for(i=0;
   3469             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3470                 U_SUCCESS(errorCode);
   3471             ++i
   3472         ) {
   3473             do {
   3474                 testFoldToSelf(start, CF_ALL);
   3475             } while(++start<=end);
   3476         }
   3477     }
   3478 
   3479     uset_close(data.notSeen);
   3480 }
   3481