Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "cintltst.h"
     28 #include "putilimp.h"
     29 #include "uparse.h"
     30 #include "ucase.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "uset_imp.h"
     34 #include "usc_impl.h"
     35 #include "udatamem.h" /* for testing ucase_openBinary() */
     36 #include "cucdapi.h"
     37 
     38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     39 
     40 /* prototypes --------------------------------------------------------------- */
     41 
     42 static void TestUpperLower(void);
     43 static void TestLetterNumber(void);
     44 static void TestMisc(void);
     45 static void TestPOSIX(void);
     46 static void TestControlPrint(void);
     47 static void TestIdentifier(void);
     48 static void TestUnicodeData(void);
     49 static void TestCodeUnit(void);
     50 static void TestCodePoint(void);
     51 static void TestCharLength(void);
     52 static void TestCharNames(void);
     53 static void TestMirroring(void);
     54 static void TestUScriptRunAPI(void);
     55 static void TestAdditionalProperties(void);
     56 static void TestNumericProperties(void);
     57 static void TestPropertyNames(void);
     58 static void TestPropertyValues(void);
     59 static void TestConsistency(void);
     60 static void TestUCase(void);
     61 static void TestUBiDiProps(void);
     62 static void TestCaseFolding(void);
     63 
     64 /* internal methods used */
     65 static int32_t MakeProp(char* str);
     66 static int32_t MakeDir(char* str);
     67 
     68 /* helpers ------------------------------------------------------------------ */
     69 
     70 static void
     71 parseUCDFile(const char *filename,
     72              char *fields[][2], int32_t fieldCount,
     73              UParseLineFn *lineFn, void *context,
     74              UErrorCode *pErrorCode) {
     75     char path[256];
     76     char backupPath[256];
     77 
     78     if(U_FAILURE(*pErrorCode)) {
     79         return;
     80     }
     81 
     82     /* Look inside ICU_DATA first */
     83     strcpy(path, u_getDataDirectory());
     84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     85     strcat(path, filename);
     86 
     87     /* As a fallback, try to guess where the source data was located
     88      *    at the time ICU was built, and look there.
     89      */
     90     strcpy(backupPath, ctest_dataSrcDir());
     91     strcat(backupPath, U_FILE_SEP_STRING);
     92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     93     strcat(backupPath, filename);
     94 
     95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     97         *pErrorCode=U_ZERO_ERROR;
     98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
     99     }
    100     if(U_FAILURE(*pErrorCode)) {
    101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    102     }
    103 }
    104 
    105 /* test data ---------------------------------------------------------------- */
    106 
    107 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    108 static const int32_t tagValues[] =
    109     {
    110     /* Mn */ U_NON_SPACING_MARK,
    111     /* Mc */ U_COMBINING_SPACING_MARK,
    112     /* Me */ U_ENCLOSING_MARK,
    113     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    114     /* Nl */ U_LETTER_NUMBER,
    115     /* No */ U_OTHER_NUMBER,
    116     /* Zs */ U_SPACE_SEPARATOR,
    117     /* Zl */ U_LINE_SEPARATOR,
    118     /* Zp */ U_PARAGRAPH_SEPARATOR,
    119     /* Cc */ U_CONTROL_CHAR,
    120     /* Cf */ U_FORMAT_CHAR,
    121     /* Cs */ U_SURROGATE,
    122     /* Co */ U_PRIVATE_USE_CHAR,
    123     /* Cn */ U_UNASSIGNED,
    124     /* Lu */ U_UPPERCASE_LETTER,
    125     /* Ll */ U_LOWERCASE_LETTER,
    126     /* Lt */ U_TITLECASE_LETTER,
    127     /* Lm */ U_MODIFIER_LETTER,
    128     /* Lo */ U_OTHER_LETTER,
    129     /* Pc */ U_CONNECTOR_PUNCTUATION,
    130     /* Pd */ U_DASH_PUNCTUATION,
    131     /* Ps */ U_START_PUNCTUATION,
    132     /* Pe */ U_END_PUNCTUATION,
    133     /* Po */ U_OTHER_PUNCTUATION,
    134     /* Sm */ U_MATH_SYMBOL,
    135     /* Sc */ U_CURRENCY_SYMBOL,
    136     /* Sk */ U_MODIFIER_SYMBOL,
    137     /* So */ U_OTHER_SYMBOL,
    138     /* Pi */ U_INITIAL_PUNCTUATION,
    139     /* Pf */ U_FINAL_PUNCTUATION
    140     };
    141 
    142 static const char dirStrings[][5] = {
    143     "L",
    144     "R",
    145     "EN",
    146     "ES",
    147     "ET",
    148     "AN",
    149     "CS",
    150     "B",
    151     "S",
    152     "WS",
    153     "ON",
    154     "LRE",
    155     "LRO",
    156     "AL",
    157     "RLE",
    158     "RLO",
    159     "PDF",
    160     "NSM",
    161     "BN",
    162     /* new in Unicode 6.3/ICU 52 */
    163     "FSI",
    164     "LRI",
    165     "RLI",
    166     "PDI"
    167 };
    168 
    169 void addUnicodeTest(TestNode** root);
    170 
    171 void addUnicodeTest(TestNode** root)
    172 {
    173     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    174     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    175     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    176     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    177     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    178     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    179     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    180     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    181     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    182     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    183     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    184     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    185     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    186     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    187     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    188     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    189     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
    190     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
    191     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
    192     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    193     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    194     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    195     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    196     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    197     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    198     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    199 }
    200 
    201 /*==================================================== */
    202 /* test u_toupper() and u_tolower()                    */
    203 /*==================================================== */
    204 static void TestUpperLower()
    205 {
    206     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    207     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    208     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    209     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    210     int32_t i;
    211 
    212     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    213     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    214 
    215 /*
    216 Checks LetterLike Symbols which were previously a source of confusion
    217 [Bertrand A. D. 02/04/98]
    218 */
    219     for (i=0x2100;i<0x2138;i++)
    220     {
    221         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    222         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    223         {
    224             if (i != (int)u_tolower(i)) /* itself */
    225                 log_err("Failed case conversion with itself: U+%04x\n", i);
    226             if (i != (int)u_toupper(i))
    227                 log_err("Failed case conversion with itself: U+%04x\n", i);
    228         }
    229     }
    230 
    231     for(i=0; i < u_strlen(upper); i++){
    232         if(u_tolower(upper[i]) != lower[i]){
    233             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    234         }
    235     }
    236 
    237     log_verbose("testing upper lower\n");
    238     for (i = 0; i < 21; i++) {
    239 
    240         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    241         {
    242             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    243         }
    244         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    245          {
    246             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    247         }
    248         else if (upperTest[i] != u_tolower(lowerTest[i]))
    249         {
    250             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    251         }
    252         else if (lowerTest[i] != u_toupper(upperTest[i]))
    253          {
    254             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    255         }
    256         else if (upperTest[i] != u_tolower(upperTest[i]))
    257         {
    258             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    259         }
    260         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    261         {
    262             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    263         }
    264     }
    265     log_verbose("done testing upper lower\n");
    266 
    267     log_verbose("testing u_istitle\n");
    268     {
    269         static const UChar expected[] = {
    270             0x1F88,
    271             0x1F89,
    272             0x1F8A,
    273             0x1F8B,
    274             0x1F8C,
    275             0x1F8D,
    276             0x1F8E,
    277             0x1F8F,
    278             0x1F88,
    279             0x1F89,
    280             0x1F8A,
    281             0x1F8B,
    282             0x1F8C,
    283             0x1F8D,
    284             0x1F8E,
    285             0x1F8F,
    286             0x1F98,
    287             0x1F99,
    288             0x1F9A,
    289             0x1F9B,
    290             0x1F9C,
    291             0x1F9D,
    292             0x1F9E,
    293             0x1F9F,
    294             0x1F98,
    295             0x1F99,
    296             0x1F9A,
    297             0x1F9B,
    298             0x1F9C,
    299             0x1F9D,
    300             0x1F9E,
    301             0x1F9F,
    302             0x1FA8,
    303             0x1FA9,
    304             0x1FAA,
    305             0x1FAB,
    306             0x1FAC,
    307             0x1FAD,
    308             0x1FAE,
    309             0x1FAF,
    310             0x1FA8,
    311             0x1FA9,
    312             0x1FAA,
    313             0x1FAB,
    314             0x1FAC,
    315             0x1FAD,
    316             0x1FAE,
    317             0x1FAF,
    318             0x1FBC,
    319             0x1FBC,
    320             0x1FCC,
    321             0x1FCC,
    322             0x1FFC,
    323             0x1FFC,
    324         };
    325         int32_t num = sizeof(expected)/sizeof(expected[0]);
    326         for(i=0; i<num; i++){
    327             if(!u_istitle(expected[i])){
    328                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    329             }
    330         }
    331 
    332     }
    333 }
    334 
    335 /* compare two sets and verify that their difference or intersection is empty */
    336 static UBool
    337 showADiffB(const USet *a, const USet *b,
    338            const char *a_name, const char *b_name,
    339            UBool expect, UBool diffIsError) {
    340     USet *aa;
    341     int32_t i, start, end, length;
    342     UErrorCode errorCode;
    343 
    344     /*
    345      * expect:
    346      * TRUE  -> a-b should be empty, that is, b should contain all of a
    347      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    348      */
    349     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    350         return TRUE;
    351     }
    352 
    353     /* clone a to aa because a is const */
    354     aa=uset_open(1, 0);
    355     if(aa==NULL) {
    356         /* unusual problem - out of memory? */
    357         return FALSE;
    358     }
    359     uset_addAll(aa, a);
    360 
    361     /* compute the set in question */
    362     if(expect) {
    363         /* a-b */
    364         uset_removeAll(aa, b);
    365     } else {
    366         /* a&b */
    367         uset_retainAll(aa, b);
    368     }
    369 
    370     /* aa is not empty because of the initial tests above; show its contents */
    371     errorCode=U_ZERO_ERROR;
    372     i=0;
    373     for(;;) {
    374         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    375         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    376             break; /* done */
    377         }
    378         if(U_FAILURE(errorCode)) {
    379             log_err("error comparing %s with %s at difference item %d: %s\n",
    380                 a_name, b_name, i, u_errorName(errorCode));
    381             break;
    382         }
    383         if(length!=0) {
    384             break; /* done with code points, got a string or -1 */
    385         }
    386 
    387         if(diffIsError) {
    388             if(expect) {
    389                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    390             } else {
    391                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    392             }
    393         } else {
    394             if(expect) {
    395                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    396             } else {
    397                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    398             }
    399         }
    400 
    401         ++i;
    402     }
    403 
    404     uset_close(aa);
    405     return FALSE;
    406 }
    407 
    408 static UBool
    409 showAMinusB(const USet *a, const USet *b,
    410             const char *a_name, const char *b_name,
    411             UBool diffIsError) {
    412     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    413 }
    414 
    415 static UBool
    416 showAIntersectB(const USet *a, const USet *b,
    417                 const char *a_name, const char *b_name,
    418                 UBool diffIsError) {
    419     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    420 }
    421 
    422 static UBool
    423 compareUSets(const USet *a, const USet *b,
    424              const char *a_name, const char *b_name,
    425              UBool diffIsError) {
    426     /*
    427      * Use an arithmetic & not a logical && so that both branches
    428      * are always taken and all differences are shown.
    429      */
    430     return
    431         showAMinusB(a, b, a_name, b_name, diffIsError) &
    432         showAMinusB(b, a, b_name, a_name, diffIsError);
    433 }
    434 
    435 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    436 static void TestLetterNumber()
    437 {
    438     UChar i = 0x0000;
    439 
    440     log_verbose("Testing for isalpha\n");
    441     for (i = 0x0041; i < 0x005B; i++) {
    442         if (!u_isalpha(i))
    443         {
    444             log_err("Failed isLetter test at  %.4X\n", i);
    445         }
    446     }
    447     for (i = 0x0660; i < 0x066A; i++) {
    448         if (u_isalpha(i))
    449         {
    450             log_err("Failed isLetter test with numbers at %.4X\n", i);
    451         }
    452     }
    453 
    454     log_verbose("Testing for isdigit\n");
    455     for (i = 0x0660; i < 0x066A; i++) {
    456         if (!u_isdigit(i))
    457         {
    458             log_verbose("Failed isNumber test at %.4X\n", i);
    459         }
    460     }
    461 
    462     log_verbose("Testing for isalnum\n");
    463     for (i = 0x0041; i < 0x005B; i++) {
    464         if (!u_isalnum(i))
    465         {
    466             log_err("Failed isAlNum test at  %.4X\n", i);
    467         }
    468     }
    469     for (i = 0x0660; i < 0x066A; i++) {
    470         if (!u_isalnum(i))
    471         {
    472             log_err("Failed isAlNum test at  %.4X\n", i);
    473         }
    474     }
    475 
    476     {
    477         /*
    478          * The following checks work only starting from Unicode 4.0.
    479          * Check the version number here.
    480          */
    481         static UVersionInfo u401={ 4, 0, 1, 0 };
    482         UVersionInfo version;
    483         u_getUnicodeVersion(version);
    484         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    485             return;
    486         }
    487     }
    488 
    489     {
    490         /*
    491          * Sanity check:
    492          * Verify that exactly the digit characters have decimal digit values.
    493          * This assumption is used in the implementation of u_digit()
    494          * (which checks nt=de)
    495          * compared with the parallel java.lang.Character.digit()
    496          * (which checks Nd).
    497          *
    498          * This was not true in Unicode 3.2 and earlier.
    499          * Unicode 4.0 fixed discrepancies.
    500          * Unicode 4.0.1 re-introduced problems in this area due to an
    501          * unintentionally incomplete last-minute change.
    502          */
    503         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    504         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    505 
    506         USet *digits, *decimalValues;
    507         UErrorCode errorCode;
    508 
    509         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    510         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    511         errorCode=U_ZERO_ERROR;
    512         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    513         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    514 
    515         if(U_SUCCESS(errorCode)) {
    516             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    517         }
    518 
    519         uset_close(digits);
    520         uset_close(decimalValues);
    521     }
    522 }
    523 
    524 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    525                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    526                                 UBool expected) {
    527     int32_t i;
    528     for (i = 0; i < sampleCharsLength; ++i) {
    529         UBool result = propFn(sampleChars[i]);
    530         if (result != expected) {
    531             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    532                     propName, sampleChars[i], result);
    533         }
    534     }
    535 }
    536 
    537 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    538 static void TestMisc()
    539 {
    540     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    541     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    542     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    543     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    544     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    545     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    546 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    547     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    548     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    549     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    550     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    551 
    552     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    553 
    554     uint32_t mask;
    555 
    556     int32_t i;
    557     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    558     UVersionInfo realVersion;
    559 
    560     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    561 
    562     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    563     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    564 
    565     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    566                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    567     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    568                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    569 
    570     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    571                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
    572     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    573                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
    574 
    575     testSampleCharProps(u_isdefined, "u_isdefined",
    576                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
    577     testSampleCharProps(u_isdefined, "u_isdefined",
    578                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
    579 
    580     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
    581     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
    582 
    583     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
    584     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
    585 
    586     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
    587         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    588             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    589                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    590         }
    591     }
    592 
    593     /* Tests the ICU version #*/
    594     u_getVersion(realVersion);
    595     u_versionToString(realVersion, icuVersion);
    596     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    597     {
    598         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    599     }
    600 #if defined(ICU_VERSION)
    601     /* test only happens where we have configure.in with VERSION - sanity check. */
    602     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    603     {
    604         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    605     }
    606 #endif
    607 
    608     /* test U_GC_... */
    609     if(
    610         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    611         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    612         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    613         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    614         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    615         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    616     ) {
    617         log_err("error: U_GET_GC_MASK does not work properly\n");
    618     }
    619 
    620     mask=0;
    621     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    622 
    623     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    624     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    625     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    626     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    627     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    628 
    629     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    630     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    631     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    632 
    633     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    634     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    635     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    636 
    637     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    638     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    639     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    640 
    641     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    642     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    643     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    644     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    645 
    646     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    647     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    648     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    649     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    650     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    651 
    652     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    653     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    654     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    655     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    656 
    657     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    658     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    659 
    660     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    661         log_err("error: problems with U_GC_XX_MASK constants\n");
    662     }
    663 
    664     mask=0;
    665     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    666     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    667     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    668     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    669     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    670     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    671     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    672 
    673     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    674         log_err("error: problems with U_GC_Y_MASK constants\n");
    675     }
    676     {
    677         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    678         for(i=0; i<10; i++){
    679             if(digit[i]!=u_forDigit(i,10)){
    680                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    681             }
    682         }
    683     }
    684 
    685     /* test u_digit() */
    686     {
    687         static const struct {
    688             UChar32 c;
    689             int8_t radix, value;
    690         } data[]={
    691             /* base 16 */
    692             { 0x0031, 16, 1 },
    693             { 0x0038, 16, 8 },
    694             { 0x0043, 16, 12 },
    695             { 0x0066, 16, 15 },
    696             { 0x00e4, 16, -1 },
    697             { 0x0662, 16, 2 },
    698             { 0x06f5, 16, 5 },
    699             { 0xff13, 16, 3 },
    700             { 0xff41, 16, 10 },
    701 
    702             /* base 8 */
    703             { 0x0031, 8, 1 },
    704             { 0x0038, 8, -1 },
    705             { 0x0043, 8, -1 },
    706             { 0x0066, 8, -1 },
    707             { 0x00e4, 8, -1 },
    708             { 0x0662, 8, 2 },
    709             { 0x06f5, 8, 5 },
    710             { 0xff13, 8, 3 },
    711             { 0xff41, 8, -1 },
    712 
    713             /* base 36 */
    714             { 0x5a, 36, 35 },
    715             { 0x7a, 36, 35 },
    716             { 0xff3a, 36, 35 },
    717             { 0xff5a, 36, 35 },
    718 
    719             /* wrong radix values */
    720             { 0x0031, 1, -1 },
    721             { 0xff3a, 37, -1 }
    722         };
    723 
    724         for(i=0; i<LENGTHOF(data); ++i) {
    725             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    726                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    727                         data[i].c,
    728                         data[i].radix,
    729                         u_digit(data[i].c, data[i].radix),
    730                         data[i].value);
    731             }
    732         }
    733     }
    734 }
    735 
    736 /* test C/POSIX-style functions --------------------------------------------- */
    737 
    738 /* bit flags */
    739 #define ISAL     1
    740 #define ISLO     2
    741 #define ISUP     4
    742 
    743 #define ISDI     8
    744 #define ISXD  0x10
    745 
    746 #define ISAN  0x20
    747 
    748 #define ISPU  0x40
    749 #define ISGR  0x80
    750 #define ISPR 0x100
    751 
    752 #define ISSP 0x200
    753 #define ISBL 0x400
    754 #define ISCN 0x800
    755 
    756 /* C/POSIX-style functions, in the same order as the bit flags */
    757 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    758 
    759 static const struct {
    760     IsPOSIXClass *fn;
    761     const char *name;
    762 } posixClasses[]={
    763     { u_isalpha, "isalpha" },
    764     { u_islower, "islower" },
    765     { u_isupper, "isupper" },
    766     { u_isdigit, "isdigit" },
    767     { u_isxdigit, "isxdigit" },
    768     { u_isalnum, "isalnum" },
    769     { u_ispunct, "ispunct" },
    770     { u_isgraph, "isgraph" },
    771     { u_isprint, "isprint" },
    772     { u_isspace, "isspace" },
    773     { u_isblank, "isblank" },
    774     { u_iscntrl, "iscntrl" }
    775 };
    776 
    777 static const struct {
    778     UChar32 c;
    779     uint32_t posixResults;
    780 } posixData[]={
    781     { 0x0008,                                                        ISCN },    /* backspace */
    782     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    783     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    784     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    785     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    786     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    787     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    788     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    789     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    790     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    791     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    792     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    793     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    794     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    795     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    796     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    797     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    798     { 0x0600,                                                        ISCN },    /* arabic number sign */
    799     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    800     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    801     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    802     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    803     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    804     { 0x200b,                                                        ISCN },    /* ZWSP */
    805   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    806     { 0x200e,                                                        ISCN },    /* LRM */
    807     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    808     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    809     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    810     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    811     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    812     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    813     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    814     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    815 };
    816 
    817 static void
    818 TestPOSIX() {
    819     uint32_t mask;
    820     int32_t cl, i;
    821     UBool expect;
    822 
    823     mask=1;
    824     for(cl=0; cl<12; ++cl) {
    825         for(i=0; i<LENGTHOF(posixData); ++i) {
    826             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    827             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    828                 log_err("u_%s(U+%04x)=%s is wrong\n",
    829                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    830             }
    831         }
    832         mask<<=1;
    833     }
    834 }
    835 
    836 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    837 static void TestControlPrint()
    838 {
    839     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    840     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    841     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    842     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    843     UChar32 c;
    844 
    845     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
    846     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
    847 
    848     testSampleCharProps(u_isprint, "u_isprint",
    849                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
    850     testSampleCharProps(u_isprint, "u_isprint",
    851                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
    852 
    853     /* test all ISO 8 controls */
    854     for(c=0; c<=0x9f; ++c) {
    855         if(c==0x20) {
    856             /* skip ASCII graphic characters and continue with DEL */
    857             c=0x7f;
    858         }
    859         if(!u_iscntrl(c)) {
    860             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    861         }
    862         if(!u_isISOControl(c)) {
    863             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    864         }
    865         if(u_isprint(c)) {
    866             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    867         }
    868     }
    869 
    870     /* test all Latin-1 graphic characters */
    871     for(c=0x20; c<=0xff; ++c) {
    872         if(c==0x7f) {
    873             c=0xa0;
    874         } else if(c==0xad) {
    875             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    876             ++c;
    877         }
    878         if(!u_isprint(c)) {
    879             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    880         }
    881     }
    882 }
    883 
    884 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    885 static void TestIdentifier()
    886 {
    887     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    888     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    889     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    890     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    891     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    892     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    893     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    894     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    895     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    896     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    897 
    898     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    899                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    900     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    901                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
    902 
    903     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    904                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
    905     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    906                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
    907 
    908     /* IDPart should imply IDStart */
    909     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    910                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    911 
    912     testSampleCharProps(u_isIDStart, "u_isIDStart",
    913                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    914     testSampleCharProps(u_isIDStart, "u_isIDStart",
    915                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    916 
    917     testSampleCharProps(u_isIDPart, "u_isIDPart",
    918                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
    919     testSampleCharProps(u_isIDPart, "u_isIDPart",
    920                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    921 
    922     /* IDPart should imply IDStart */
    923     testSampleCharProps(u_isIDPart, "u_isIDPart",
    924                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    925 
    926     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    927                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
    928     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    929                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
    930 }
    931 
    932 /* for each line of UnicodeData.txt, check some of the properties */
    933 typedef struct UnicodeDataContext {
    934 #if UCONFIG_NO_NORMALIZATION
    935     const void *dummy;
    936 #else
    937     const UNormalizer2 *nfc;
    938     const UNormalizer2 *nfkc;
    939 #endif
    940 } UnicodeDataContext;
    941 
    942 /*
    943  * ### TODO
    944  * This test fails incorrectly if the First or Last code point of a repetitive area
    945  * is overridden, which is allowed and is encouraged for the PUAs.
    946  * Currently, this means that both area First/Last and override lines are
    947  * tested against the properties from the API,
    948  * and the area boundary will not match and cause an error.
    949  *
    950  * This function should detect area boundaries and skip them for the test of individual
    951  * code points' properties.
    952  * Then it should check that the areas contain all the same properties except where overridden.
    953  * For this, it would have had to set a flag for which code points were listed explicitly.
    954  */
    955 static void U_CALLCONV
    956 unicodeDataLineFn(void *context,
    957                   char *fields[][2], int32_t fieldCount,
    958                   UErrorCode *pErrorCode)
    959 {
    960     char buffer[100];
    961     const char *d;
    962     char *end;
    963     uint32_t value;
    964     UChar32 c;
    965     int32_t i;
    966     int8_t type;
    967     int32_t dt;
    968     UChar dm[32], s[32];
    969     int32_t dmLength, length;
    970 
    971 #if !UCONFIG_NO_NORMALIZATION
    972     const UNormalizer2 *nfc, *nfkc;
    973 #endif
    974 
    975     /* get the character code, field 0 */
    976     c=strtoul(fields[0][0], &end, 16);
    977     if(end<=fields[0][0] || end!=fields[0][1]) {
    978         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    979         return;
    980     }
    981     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    982         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    983         return;
    984     }
    985 
    986     /* get general category, field 2 */
    987     *fields[2][1]=0;
    988     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    989     if(u_charType(c)!=type) {
    990         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    991     }
    992     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    993         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    994     }
    995 
    996     /* get canonical combining class, field 3 */
    997     value=strtoul(fields[3][0], &end, 10);
    998     if(end<=fields[3][0] || end!=fields[3][1]) {
    999         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
   1000         return;
   1001     }
   1002     if(value>255) {
   1003         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
   1004         return;
   1005     }
   1006 #if !UCONFIG_NO_NORMALIZATION
   1007     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
   1008         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
   1009     }
   1010     nfkc=((UnicodeDataContext *)context)->nfkc;
   1011     if(value!=unorm2_getCombiningClass(nfkc, c)) {
   1012         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
   1013     }
   1014 #endif
   1015 
   1016     /* get BiDi category, field 4 */
   1017     *fields[4][1]=0;
   1018     i=MakeDir(fields[4][0]);
   1019     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
   1020         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
   1021     }
   1022 
   1023     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
   1024     d=NULL;
   1025     if(fields[5][0]==fields[5][1]) {
   1026         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
   1027         if(c==0xac00 || c==0xd7a3) {
   1028             dt=U_DT_CANONICAL;
   1029         } else {
   1030             dt=U_DT_NONE;
   1031         }
   1032     } else {
   1033         d=fields[5][0];
   1034         *fields[5][1]=0;
   1035         dt=UCHAR_INVALID_CODE;
   1036         if(*d=='<') {
   1037             end=strchr(++d, '>');
   1038             if(end!=NULL) {
   1039                 *end=0;
   1040                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
   1041                 d=u_skipWhitespace(end+1);
   1042             }
   1043         } else {
   1044             dt=U_DT_CANONICAL;
   1045         }
   1046     }
   1047     if(dt>U_DT_NONE) {
   1048         if(c==0xac00) {
   1049             dm[0]=0x1100;
   1050             dm[1]=0x1161;
   1051             dm[2]=0;
   1052             dmLength=2;
   1053         } else if(c==0xd7a3) {
   1054             dm[0]=0xd788;
   1055             dm[1]=0x11c2;
   1056             dm[2]=0;
   1057             dmLength=2;
   1058         } else {
   1059             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
   1060         }
   1061     } else {
   1062         dmLength=-1;
   1063     }
   1064     if(dt<0 || U_FAILURE(*pErrorCode)) {
   1065         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
   1066         return;
   1067     }
   1068 #if !UCONFIG_NO_NORMALIZATION
   1069     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
   1070     if(i!=dt) {
   1071         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
   1072     }
   1073     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
   1074     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
   1075     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1076         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
   1077                 "or the Decomposition_Mapping is different (%s)\n",
   1078                 c, length, dmLength, u_errorName(*pErrorCode));
   1079         return;
   1080     }
   1081     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
   1082     if(dt!=U_DT_CANONICAL) {
   1083         dmLength=-1;
   1084     }
   1085     nfc=((UnicodeDataContext *)context)->nfc;
   1086     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
   1087     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1088         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
   1089                 "or the Decomposition_Mapping is different (%s)\n",
   1090                 c, length, dmLength, u_errorName(*pErrorCode));
   1091         return;
   1092     }
   1093     /* recompose */
   1094     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
   1095         UChar32 a, b, composite;
   1096         i=0;
   1097         U16_NEXT(dm, i, dmLength, a);
   1098         U16_NEXT(dm, i, dmLength, b);
   1099         /* i==dmLength */
   1100         composite=unorm2_composePair(nfc, a, b);
   1101         if(composite!=c) {
   1102             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
   1103                     (long)c, (long)a, (long)b, (long)composite);
   1104         }
   1105         /*
   1106          * Note: NFKC has fewer round-trip mappings than NFC,
   1107          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
   1108          */
   1109     }
   1110 #endif
   1111 
   1112     /* get ISO Comment, field 11 */
   1113     *fields[11][1]=0;
   1114     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1115     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1116         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1117             c, u_errorName(*pErrorCode),
   1118             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1119             fields[11][0]);
   1120     }
   1121 
   1122     /* get uppercase mapping, field 12 */
   1123     if(fields[12][0]!=fields[12][1]) {
   1124         value=strtoul(fields[12][0], &end, 16);
   1125         if(end!=fields[12][1]) {
   1126             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1127             return;
   1128         }
   1129         if((UChar32)value!=u_toupper(c)) {
   1130             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1131         }
   1132     } else {
   1133         /* no case mapping: the API must map the code point to itself */
   1134         if(c!=u_toupper(c)) {
   1135             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1136         }
   1137     }
   1138 
   1139     /* get lowercase mapping, field 13 */
   1140     if(fields[13][0]!=fields[13][1]) {
   1141         value=strtoul(fields[13][0], &end, 16);
   1142         if(end!=fields[13][1]) {
   1143             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1144             return;
   1145         }
   1146         if((UChar32)value!=u_tolower(c)) {
   1147             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1148         }
   1149     } else {
   1150         /* no case mapping: the API must map the code point to itself */
   1151         if(c!=u_tolower(c)) {
   1152             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1153         }
   1154     }
   1155 
   1156     /* get titlecase mapping, field 14 */
   1157     if(fields[14][0]!=fields[14][1]) {
   1158         value=strtoul(fields[14][0], &end, 16);
   1159         if(end!=fields[14][1]) {
   1160             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1161             return;
   1162         }
   1163         if((UChar32)value!=u_totitle(c)) {
   1164             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1165         }
   1166     } else {
   1167         /* no case mapping: the API must map the code point to itself */
   1168         if(c!=u_totitle(c)) {
   1169             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1170         }
   1171     }
   1172 }
   1173 
   1174 static UBool U_CALLCONV
   1175 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1176     static const UChar32 test[][2]={
   1177         {0x41, U_UPPERCASE_LETTER},
   1178         {0x308, U_NON_SPACING_MARK},
   1179         {0xfffe, U_GENERAL_OTHER_TYPES},
   1180         {0xe0041, U_FORMAT_CHAR},
   1181         {0xeffff, U_UNASSIGNED}
   1182     };
   1183 
   1184     int32_t i, count;
   1185 
   1186     if(0!=strcmp((const char *)context, "a1")) {
   1187         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1188         return FALSE;
   1189     }
   1190 
   1191     count=LENGTHOF(test);
   1192     for(i=0; i<count; ++i) {
   1193         if(start<=test[i][0] && test[i][0]<limit) {
   1194             if(type!=(UCharCategory)test[i][1]) {
   1195                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1196                         start, limit, (long)type, test[i][0], test[i][1]);
   1197             }
   1198             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1199             return i==(count-1) ? FALSE : TRUE;
   1200         }
   1201     }
   1202 
   1203     if(start>test[count-1][0]) {
   1204         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1205                 start, limit, (long)type);
   1206         return FALSE;
   1207     }
   1208 
   1209     return TRUE;
   1210 }
   1211 
   1212 static UBool U_CALLCONV
   1213 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1214     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
   1215     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1216         { 0x0590, U_LEFT_TO_RIGHT },
   1217         { 0x0600, U_RIGHT_TO_LEFT },
   1218         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1219         { 0x08A0, U_RIGHT_TO_LEFT },
   1220         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
   1221         { 0x20A0, U_LEFT_TO_RIGHT },
   1222         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
   1223         { 0xFB1D, U_LEFT_TO_RIGHT },
   1224         { 0xFB50, U_RIGHT_TO_LEFT },
   1225         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1226         { 0xFE70, U_LEFT_TO_RIGHT },
   1227         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1228         { 0x10800, U_LEFT_TO_RIGHT },
   1229         { 0x11000, U_RIGHT_TO_LEFT },
   1230         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1231         { 0x1EE00, U_RIGHT_TO_LEFT },
   1232         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
   1233         { 0x1F000, U_RIGHT_TO_LEFT },
   1234         { 0x110000, U_LEFT_TO_RIGHT }
   1235     };
   1236 
   1237     UChar32 c;
   1238     int32_t i;
   1239     UCharDirection shouldBeDir;
   1240 
   1241     /*
   1242      * LineBreak.txt specifies:
   1243      *   #  - Assigned characters that are not listed explicitly are given the value
   1244      *   #    "AL".
   1245      *   #  - Unassigned characters are given the value "XX".
   1246      *
   1247      * PUA characters are listed explicitly with "XX".
   1248      * Verify that no assigned character has "XX".
   1249      */
   1250     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1251         c=start;
   1252         while(c<limit) {
   1253             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1254                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1255             }
   1256             ++c;
   1257         }
   1258     }
   1259 
   1260     /*
   1261      * Verify default Bidi classes.
   1262      * For recent Unicode versions, see UCD.html.
   1263      *
   1264      * For older Unicode versions:
   1265      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1266      * http://www.unicode.org/reports/tr9/
   1267      *
   1268      * See also DerivedBidiClass.txt for Cn code points!
   1269      *
   1270      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1271      * changed some default values.
   1272      * In particular, non-characters and unassigned Default Ignorable Code Points
   1273      * change from L to BN.
   1274      *
   1275      * UCD.html version 4.0.1 does not yet reflect these changes.
   1276      */
   1277     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1278         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1279         c=start;
   1280         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
   1281             if((int32_t)c<defaultBidi[i][0]) {
   1282                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1283                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1284                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1285                     } else {
   1286                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1287                     }
   1288 
   1289                     if( u_charDirection(c)!=shouldBeDir ||
   1290                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1291                     ) {
   1292                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1293                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1294                     }
   1295                     ++c;
   1296                 }
   1297             }
   1298         }
   1299     }
   1300 
   1301     return TRUE;
   1302 }
   1303 
   1304 /* tests for several properties */
   1305 static void TestUnicodeData()
   1306 {
   1307     UVersionInfo expectVersionArray;
   1308     UVersionInfo versionArray;
   1309     char *fields[15][2];
   1310     UErrorCode errorCode;
   1311     UChar32 c;
   1312     int8_t type;
   1313 
   1314     UnicodeDataContext context;
   1315 
   1316     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1317     u_getUnicodeVersion(versionArray);
   1318     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1319     {
   1320         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1321         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1322     }
   1323 
   1324 #if defined(ICU_UNICODE_VERSION)
   1325     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1326     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1327     {
   1328          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1329     }
   1330 #endif
   1331 
   1332     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1333         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1334     }
   1335 
   1336     errorCode=U_ZERO_ERROR;
   1337 #if !UCONFIG_NO_NORMALIZATION
   1338     context.nfc=unorm2_getNFCInstance(&errorCode);
   1339     context.nfkc=unorm2_getNFKCInstance(&errorCode);
   1340     if(U_FAILURE(errorCode)) {
   1341         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
   1342         return;
   1343     }
   1344 #endif
   1345     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
   1346     if(U_FAILURE(errorCode)) {
   1347         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1348     }
   1349 
   1350     /* sanity check on repeated properties */
   1351     for(c=0xfffe; c<=0x10ffff;) {
   1352         type=u_charType(c);
   1353         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1354             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1355         }
   1356         if(type!=U_UNASSIGNED) {
   1357             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1358         }
   1359         if((c&0xffff)==0xfffe) {
   1360             ++c;
   1361         } else {
   1362             c+=0xffff;
   1363         }
   1364     }
   1365 
   1366     /* test that PUA is not "unassigned" */
   1367     for(c=0xe000; c<=0x10fffd;) {
   1368         type=u_charType(c);
   1369         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1370             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1371         }
   1372         if(type==U_UNASSIGNED) {
   1373             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1374         } else if(type!=U_PRIVATE_USE_CHAR) {
   1375             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1376         }
   1377         if(c==0xf8ff) {
   1378             c=0xf0000;
   1379         } else if(c==0xffffd) {
   1380             c=0x100000;
   1381         } else {
   1382             ++c;
   1383         }
   1384     }
   1385 
   1386     /* test u_enumCharTypes() */
   1387     u_enumCharTypes(enumTypeRange, "a1");
   1388 
   1389     /* check default properties */
   1390     u_enumCharTypes(enumDefaultsRange, NULL);
   1391 }
   1392 
   1393 static void TestCodeUnit(){
   1394     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1395 
   1396     int32_t i;
   1397 
   1398     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1399         UChar c=codeunit[i];
   1400         if(i<4){
   1401             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1402                 log_err("ERROR: U+%04x is a single", c);
   1403             }
   1404 
   1405         }
   1406         if(i >= 4 && i< 8){
   1407             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1408                 log_err("ERROR: U+%04x is a first surrogate", c);
   1409             }
   1410         }
   1411         if(i >= 8 && i< 12){
   1412             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1413                 log_err("ERROR: U+%04x is a second surrogate", c);
   1414             }
   1415         }
   1416     }
   1417 
   1418 }
   1419 
   1420 static void TestCodePoint(){
   1421     const UChar32 codePoint[]={
   1422         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1423         0xd800,
   1424         0xdbff,
   1425         0xdc00,
   1426         0xdfff,
   1427         0xdc04,
   1428         0xd821,
   1429         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1430         0x20ac,
   1431         0xd7ff,
   1432         0xe000,
   1433         0xe123,
   1434         0x0061,
   1435         0xe065,
   1436         0x20402,
   1437         0x24506,
   1438         0x23456,
   1439         0x20402,
   1440         0x10402,
   1441         0x23456,
   1442         /*not a surrogate, not valid, isUnicodeChar, isError */
   1443         0x0015,
   1444         0x009f,
   1445         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1446         0xffff,
   1447         0xfffe,
   1448     };
   1449     int32_t i;
   1450     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1451         UChar32 c=codePoint[i];
   1452         if(i<6){
   1453             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1454                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1455             }
   1456             if(UTF_IS_VALID(c)){
   1457                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1458             }
   1459             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1460                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1461             }
   1462             if(UTF_IS_ERROR(c)){
   1463                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1464             }
   1465         }else if(i >=6 && i<18){
   1466             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1467                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1468             }
   1469             if(!UTF_IS_VALID(c)){
   1470                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1471             }
   1472             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1473                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1474             }
   1475             if(UTF_IS_ERROR(c)){
   1476                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1477             }
   1478         }else if(i >=18 && i<20){
   1479             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1480                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1481             }
   1482             if(UTF_IS_VALID(c)){
   1483                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1484             }
   1485             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1486                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1487             }
   1488             if(!UTF_IS_ERROR(c)){
   1489                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1490             }
   1491         }
   1492         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1493             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1494                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1495             }
   1496             if(UTF_IS_VALID(c)){
   1497                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1498             }
   1499             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1500                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1501             }
   1502             if(!UTF_IS_ERROR(c)){
   1503                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1504             }
   1505         }
   1506     }
   1507 
   1508     if(
   1509         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1510         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1511         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1512         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1513     ) {
   1514         log_err("error with U_IS_BMP()\n");
   1515     }
   1516 
   1517     if(
   1518         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1519         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1520         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1521         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1522     ) {
   1523         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1524     }
   1525 }
   1526 
   1527 static void TestCharLength()
   1528 {
   1529     const int32_t codepoint[]={
   1530         1, 0x0061,
   1531         1, 0xe065,
   1532         1, 0x20ac,
   1533         2, 0x20402,
   1534         2, 0x23456,
   1535         2, 0x24506,
   1536         2, 0x20402,
   1537         2, 0x10402,
   1538         1, 0xd7ff,
   1539         1, 0xe000
   1540     };
   1541 
   1542     int32_t i;
   1543     UBool multiple;
   1544     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1545         UChar32 c=codepoint[i+1];
   1546         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1547             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
   1548         }
   1549         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1550         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1551             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1552         }
   1553     }
   1554 }
   1555 
   1556 /*internal functions ----*/
   1557 static int32_t MakeProp(char* str)
   1558 {
   1559     int32_t result = 0;
   1560     char* matchPosition =0;
   1561 
   1562     matchPosition = strstr(tagStrings, str);
   1563     if (matchPosition == 0)
   1564     {
   1565         log_err("unrecognized type letter ");
   1566         log_err(str);
   1567     }
   1568     else
   1569         result = (int32_t)((matchPosition - tagStrings) / 2);
   1570     return result;
   1571 }
   1572 
   1573 static int32_t MakeDir(char* str)
   1574 {
   1575     int32_t pos = 0;
   1576     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
   1577         if (strcmp(str, dirStrings[pos]) == 0) {
   1578             return pos;
   1579         }
   1580     }
   1581     return -1;
   1582 }
   1583 
   1584 /* test u_charName() -------------------------------------------------------- */
   1585 
   1586 static const struct {
   1587     uint32_t code;
   1588     const char *name, *oldName, *extName, *alias;
   1589 } names[]={
   1590     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1591     {0x01a2, "LATIN CAPITAL LETTER OI", "",
   1592              "LATIN CAPITAL LETTER OI",
   1593              "LATIN CAPITAL LETTER GHA"},
   1594     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
   1595              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1596     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1597              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1598              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1599     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1600     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1601     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1602     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1603     {0xd800, "", "", "<lead surrogate-D800>" },
   1604     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1605     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
   1606     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1607     {0xffff, "", "", "<noncharacter-FFFF>" },
   1608     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1609               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1610               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1611     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1612 };
   1613 
   1614 static UBool
   1615 enumCharNamesFn(void *context,
   1616                 UChar32 code, UCharNameChoice nameChoice,
   1617                 const char *name, int32_t length) {
   1618     int32_t *pCount=(int32_t *)context;
   1619     const char *expected;
   1620     int i;
   1621 
   1622     if(length<=0 || length!=(int32_t)strlen(name)) {
   1623         /* should not be called with an empty string or invalid length */
   1624         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1625         return TRUE;
   1626     }
   1627 
   1628     ++*pCount;
   1629     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1630         if(code==(UChar32)names[i].code) {
   1631             switch (nameChoice) {
   1632                 case U_EXTENDED_CHAR_NAME:
   1633                     if(0!=strcmp(name, names[i].extName)) {
   1634                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1635                     }
   1636                     break;
   1637                 case U_UNICODE_CHAR_NAME:
   1638                     if(0!=strcmp(name, names[i].name)) {
   1639                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1640                     }
   1641                     break;
   1642                 case U_UNICODE_10_CHAR_NAME:
   1643                     expected=names[i].oldName;
   1644                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1645                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1646                     }
   1647                     break;
   1648                 case U_CHAR_NAME_ALIAS:
   1649                     expected=names[i].alias;
   1650                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1651                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1652                     }
   1653                     break;
   1654                 case U_CHAR_NAME_CHOICE_COUNT:
   1655                     break;
   1656             }
   1657             break;
   1658         }
   1659     }
   1660     return TRUE;
   1661 }
   1662 
   1663 struct enumExtCharNamesContext {
   1664     uint32_t length;
   1665     int32_t last;
   1666 };
   1667 
   1668 static UBool
   1669 enumExtCharNamesFn(void *context,
   1670                 UChar32 code, UCharNameChoice nameChoice,
   1671                 const char *name, int32_t length) {
   1672     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1673 
   1674     if (ecncp->last != (int32_t) code - 1) {
   1675         if (ecncp->last < 0) {
   1676             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1677         } else {
   1678             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1679         }
   1680     }
   1681     ecncp->last = (int32_t) code;
   1682 
   1683     if (!*name) {
   1684         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1685     }
   1686 
   1687     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1688 }
   1689 
   1690 /**
   1691  * This can be made more efficient by moving it into putil.c and having
   1692  * it directly access the ebcdic translation tables.
   1693  * TODO: If we get this method in putil.c, then delete it from here.
   1694  */
   1695 static UChar
   1696 u_charToUChar(char c) {
   1697     UChar uc;
   1698     u_charsToUChars(&c, &uc, 1);
   1699     return uc;
   1700 }
   1701 
   1702 static void
   1703 TestCharNames() {
   1704     static char name[80];
   1705     UErrorCode errorCode=U_ZERO_ERROR;
   1706     struct enumExtCharNamesContext extContext;
   1707     const char *expected;
   1708     int32_t length;
   1709     UChar32 c;
   1710     int32_t i;
   1711 
   1712     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1713     length=uprv_getMaxCharNameLength();
   1714     if(length==0) {
   1715         /* no names data available */
   1716         return;
   1717     }
   1718     if(length<83) { /* Unicode 3.2 max char name length */
   1719         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1720     }
   1721     /* ### TODO same tests for max ISO comment length as for max name length */
   1722 
   1723     log_verbose("Testing u_charName()\n");
   1724     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1725         /* modern Unicode character name */
   1726         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1727         if(U_FAILURE(errorCode)) {
   1728             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1729             return;
   1730         }
   1731         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1732             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1733         }
   1734 
   1735         /* find the modern name */
   1736         if (*names[i].name) {
   1737             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1738             if(U_FAILURE(errorCode)) {
   1739                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1740                 return;
   1741             }
   1742             if(c!=(UChar32)names[i].code) {
   1743                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1744             }
   1745         }
   1746 
   1747         /* Unicode 1.0 character name */
   1748         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1749         if(U_FAILURE(errorCode)) {
   1750             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1751             return;
   1752         }
   1753         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1754             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1755         }
   1756 
   1757         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1758         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1759             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1760             if(U_FAILURE(errorCode)) {
   1761                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1762                 return;
   1763             }
   1764             if(c!=(UChar32)names[i].code) {
   1765                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1766             }
   1767         }
   1768 
   1769         /* Unicode character name alias */
   1770         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1771         if(U_FAILURE(errorCode)) {
   1772             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1773             return;
   1774         }
   1775         expected=names[i].alias;
   1776         if(expected==NULL) {
   1777             expected="";
   1778         }
   1779         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1780             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1781                     names[i].code, name, length, expected);
   1782         }
   1783 
   1784         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1785         if(expected[0]!=0 /* && length>0 */) {
   1786             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1787             if(U_FAILURE(errorCode)) {
   1788                 log_err("u_charFromName(%s - alias) error %s\n",
   1789                         expected, u_errorName(errorCode));
   1790                 return;
   1791             }
   1792             if(c!=(UChar32)names[i].code) {
   1793                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1794                         expected, c, names[i].code);
   1795             }
   1796         }
   1797     }
   1798 
   1799     /* test u_enumCharNames() */
   1800     length=0;
   1801     errorCode=U_ZERO_ERROR;
   1802     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1803     if(U_FAILURE(errorCode) || length<94140) {
   1804         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1805     }
   1806 
   1807     extContext.length = 0;
   1808     extContext.last = -1;
   1809     errorCode=U_ZERO_ERROR;
   1810     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1811     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1812         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1813     }
   1814 
   1815     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1816     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1817         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1818     }
   1819 
   1820     /* Test getCharNameCharacters */
   1821     if(!getTestOption(QUICK_OPTION)) {
   1822         enum { BUFSIZE = 256 };
   1823         UErrorCode ec = U_ZERO_ERROR;
   1824         char buf[BUFSIZE];
   1825         int32_t maxLength;
   1826         UChar32 cp;
   1827         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1828         int32_t l1, l2;
   1829         UBool map[256];
   1830         UBool ok;
   1831 
   1832         USet* set = uset_open(1, 0); /* empty set */
   1833         USet* dumb = uset_open(1, 0); /* empty set */
   1834 
   1835         /*
   1836          * uprv_getCharNameCharacters() will likely return more lowercase
   1837          * letters than actual character names contain because
   1838          * it includes all the characters in lowercased names of
   1839          * general categories, for the full possible set of extended names.
   1840          */
   1841         {
   1842             USetAdder sa={
   1843                 NULL,
   1844                 uset_add,
   1845                 uset_addRange,
   1846                 uset_addString,
   1847                 NULL /* don't need remove() */
   1848             };
   1849             sa.set=set;
   1850             uprv_getCharNameCharacters(&sa);
   1851         }
   1852 
   1853         /* build set the dumb (but sure-fire) way */
   1854         for (i=0; i<256; ++i) {
   1855             map[i] = FALSE;
   1856         }
   1857 
   1858         maxLength=0;
   1859         for (cp=0; cp<0x110000; ++cp) {
   1860             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1861                                      buf, BUFSIZE, &ec);
   1862             if (U_FAILURE(ec)) {
   1863                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1864                 uset_close(set);
   1865                 uset_close(dumb);
   1866                 return;
   1867             }
   1868             if(len>maxLength) {
   1869                 maxLength=len;
   1870             }
   1871 
   1872             for (i=0; i<len; ++i) {
   1873                 if (!map[(uint8_t) buf[i]]) {
   1874                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1875                     map[(uint8_t) buf[i]] = TRUE;
   1876                 }
   1877             }
   1878 
   1879             /* test for leading/trailing whitespace */
   1880             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1881                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1882             }
   1883         }
   1884 
   1885         if(map[(uint8_t)'\t']) {
   1886             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1887         }
   1888 
   1889         length=uprv_getMaxCharNameLength();
   1890         if(length!=maxLength) {
   1891             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1892                     length, maxLength);
   1893         }
   1894 
   1895         /* compare the sets.  Where is my uset_equals?!! */
   1896         ok=TRUE;
   1897         for(i=0; i<256; ++i) {
   1898             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1899                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1900                     /* ignore lowercase a-z that are in set but not in dumb */
   1901                     ok=TRUE;
   1902                 } else {
   1903                     ok=FALSE;
   1904                     break;
   1905                 }
   1906             }
   1907         }
   1908 
   1909         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1910         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1911         if (U_FAILURE(ec)) {
   1912             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1913             uset_close(set);
   1914             uset_close(dumb);
   1915             return;
   1916         }
   1917 
   1918         if (l1 >= BUFSIZE) {
   1919             l1 = BUFSIZE-1;
   1920             pat[l1] = 0;
   1921         }
   1922         if (l2 >= BUFSIZE) {
   1923             l2 = BUFSIZE-1;
   1924             dumbPat[l2] = 0;
   1925         }
   1926 
   1927         if (!ok) {
   1928             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1929                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1930         } else if(getTestOption(VERBOSITY_OPTION)) {
   1931             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1932         }
   1933 
   1934         uset_close(set);
   1935         uset_close(dumb);
   1936     }
   1937 
   1938     /* ### TODO: test error cases and other interesting things */
   1939 }
   1940 
   1941 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1942 
   1943 static void
   1944 TestMirroring() {
   1945     USet *set;
   1946     UErrorCode errorCode;
   1947 
   1948     UChar32 start, end, c2, c3;
   1949     int32_t i;
   1950 
   1951     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1952 
   1953     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1954 
   1955     log_verbose("Testing u_isMirrored()\n");
   1956     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1957          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1958         )
   1959     ) {
   1960         log_err("u_isMirrored() does not work correctly\n");
   1961     }
   1962 
   1963     log_verbose("Testing u_charMirror()\n");
   1964     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1965          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1966          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1967          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1968          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1969          )
   1970     ) {
   1971         log_err("u_charMirror() does not work correctly\n");
   1972     }
   1973 
   1974     /* verify that Bidi_Mirroring_Glyph roundtrips */
   1975     errorCode=U_ZERO_ERROR;
   1976     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   1977 
   1978     if (U_FAILURE(errorCode)) {
   1979         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   1980     } else {
   1981         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   1982             do {
   1983                 c2=u_charMirror(start);
   1984                 c3=u_charMirror(c2);
   1985                 if(c3!=start) {
   1986                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   1987                 }
   1988                 c3=u_getBidiPairedBracket(start);
   1989                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
   1990                     if(c3!=start) {
   1991                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
   1992                                 (long)start);
   1993                     }
   1994                 } else {
   1995                     if(c3!=c2) {
   1996                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
   1997                                 (long)start, (long)c2);
   1998                     }
   1999                 }
   2000             } while(++start<=end);
   2001         }
   2002     }
   2003 
   2004     uset_close(set);
   2005 }
   2006 
   2007 
   2008 struct RunTestData
   2009 {
   2010     const char *runText;
   2011     UScriptCode runCode;
   2012 };
   2013 
   2014 typedef struct RunTestData RunTestData;
   2015 
   2016 static void
   2017 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   2018                 const char *prefix)
   2019 {
   2020     int32_t run, runStart, runLimit;
   2021     UScriptCode runCode;
   2022 
   2023     /* iterate over all the runs */
   2024     run = 0;
   2025     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   2026         if (runStart != runStarts[run]) {
   2027             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   2028                 prefix, run, runStarts[run], runStart);
   2029         }
   2030 
   2031         if (runLimit != runStarts[run + 1]) {
   2032             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   2033                 prefix, run, runStarts[run + 1], runLimit);
   2034         }
   2035 
   2036         if (runCode != testData[run].runCode) {
   2037             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   2038                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   2039         }
   2040 
   2041         run += 1;
   2042 
   2043         /* stop when we've seen all the runs we expect to see */
   2044         if (run >= nRuns) {
   2045             break;
   2046         }
   2047     }
   2048 
   2049     /* Complain if we didn't see then number of runs we expected */
   2050     if (run != nRuns) {
   2051         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   2052     }
   2053 }
   2054 
   2055 static void
   2056 TestUScriptRunAPI()
   2057 {
   2058     static const RunTestData testData1[] = {
   2059         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   2060         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   2061         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   2062         {"English (", USCRIPT_LATIN},
   2063         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   2064         {") ", USCRIPT_LATIN},
   2065         {"\\u6F22\\u5B75", USCRIPT_HAN},
   2066         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   2067         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   2068         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   2069     };
   2070 
   2071     static const RunTestData testData2[] = {
   2072        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   2073     };
   2074 
   2075     static const struct {
   2076       const RunTestData *testData;
   2077       int32_t nRuns;
   2078     } testDataEntries[] = {
   2079         {testData1, LENGTHOF(testData1)},
   2080         {testData2, LENGTHOF(testData2)}
   2081     };
   2082 
   2083     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
   2084     int32_t testEntry;
   2085 
   2086     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   2087         UChar testString[1024];
   2088         int32_t runStarts[256];
   2089         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   2090         const RunTestData *testData = testDataEntries[testEntry].testData;
   2091 
   2092         int32_t run, stringLimit;
   2093         UScriptRun *scriptRun = NULL;
   2094         UErrorCode err;
   2095 
   2096         /*
   2097          * Fill in the test string and the runStarts array.
   2098          */
   2099         stringLimit = 0;
   2100         for (run = 0; run < nTestRuns; run += 1) {
   2101             runStarts[run] = stringLimit;
   2102             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   2103             /*stringLimit -= 1;*/
   2104         }
   2105 
   2106         /* The limit of the last run */
   2107         runStarts[nTestRuns] = stringLimit;
   2108 
   2109         /*
   2110          * Make sure that calling uscript_OpenRun with a NULL text pointer
   2111          * and a non-zero text length returns the correct error.
   2112          */
   2113         err = U_ZERO_ERROR;
   2114         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   2115 
   2116         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2117             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2118         }
   2119 
   2120         if (scriptRun != NULL) {
   2121             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   2122             uscript_closeRun(scriptRun);
   2123         }
   2124 
   2125         /*
   2126          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2127          * and a zero text length returns the correct error.
   2128          */
   2129         err = U_ZERO_ERROR;
   2130         scriptRun = uscript_openRun(testString, 0, &err);
   2131 
   2132         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2133             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2134         }
   2135 
   2136         if (scriptRun != NULL) {
   2137             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   2138             uscript_closeRun(scriptRun);
   2139         }
   2140 
   2141         /*
   2142          * Make sure that calling uscript_openRun with a NULL text pointer
   2143          * and a zero text length doesn't return an error.
   2144          */
   2145         err = U_ZERO_ERROR;
   2146         scriptRun = uscript_openRun(NULL, 0, &err);
   2147 
   2148         if (U_FAILURE(err)) {
   2149             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2150         }
   2151 
   2152         /* Make sure that the empty iterator doesn't find any runs */
   2153         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2154             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2155         }
   2156 
   2157         /*
   2158          * Make sure that calling uscript_setRunText with a NULL text pointer
   2159          * and a non-zero text length returns the correct error.
   2160          */
   2161         err = U_ZERO_ERROR;
   2162         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2163 
   2164         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2165             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2166         }
   2167 
   2168         /*
   2169          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2170          * and a zero text length returns the correct error.
   2171          */
   2172         err = U_ZERO_ERROR;
   2173         uscript_setRunText(scriptRun, testString, 0, &err);
   2174 
   2175         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2176             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2177         }
   2178 
   2179         /*
   2180          * Now call uscript_setRunText on the empty iterator
   2181          * and make sure that it works.
   2182          */
   2183         err = U_ZERO_ERROR;
   2184         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2185 
   2186         if (U_FAILURE(err)) {
   2187             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2188         } else {
   2189             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2190         }
   2191 
   2192         uscript_closeRun(scriptRun);
   2193 
   2194         /*
   2195          * Now open an interator over the testString
   2196          * using uscript_openRun and make sure that it works
   2197          */
   2198         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2199 
   2200         if (U_FAILURE(err)) {
   2201             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2202         } else {
   2203             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2204         }
   2205 
   2206         /* Now reset the iterator, and make sure
   2207          * that it still works.
   2208          */
   2209         uscript_resetRun(scriptRun);
   2210 
   2211         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2212 
   2213         /* Close the iterator */
   2214         uscript_closeRun(scriptRun);
   2215     }
   2216 }
   2217 
   2218 /* test additional, non-core properties */
   2219 static void
   2220 TestAdditionalProperties() {
   2221     /* test data for u_charAge() */
   2222     static const struct {
   2223         UChar32 c;
   2224         UVersionInfo version;
   2225     } charAges[]={
   2226         {0x41,    { 1, 1, 0, 0 }},
   2227         {0xffff,  { 1, 1, 0, 0 }},
   2228         {0x20ab,  { 2, 0, 0, 0 }},
   2229         {0x2fffe, { 2, 0, 0, 0 }},
   2230         {0x20ac,  { 2, 1, 0, 0 }},
   2231         {0xfb1d,  { 3, 0, 0, 0 }},
   2232         {0x3f4,   { 3, 1, 0, 0 }},
   2233         {0x10300, { 3, 1, 0, 0 }},
   2234         {0x220,   { 3, 2, 0, 0 }},
   2235         {0xff60,  { 3, 2, 0, 0 }}
   2236     };
   2237 
   2238     /* test data for u_hasBinaryProperty() */
   2239     static const int32_t
   2240     props[][3]={ /* code point, property, value */
   2241         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2242         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2243         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2244 
   2245         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2246         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2247 
   2248         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2249         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2250 
   2251         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2252         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2253 
   2254         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2255         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2256         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2257         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2258         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2259 
   2260         { 0x058a, UCHAR_DASH, TRUE },
   2261         { 0x007e, UCHAR_DASH, FALSE },
   2262 
   2263         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2264         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2265 
   2266         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2267         { 0x0020, UCHAR_EXTENDER, FALSE },
   2268 
   2269 #if !UCONFIG_NO_NORMALIZATION
   2270         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2271         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2272         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2273 
   2274         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2275         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2276 
   2277         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2278         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2279 
   2280         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2281         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2282         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2283         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2284         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2285         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2286 
   2287         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2288         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2289 
   2290         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2291         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2292         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2293         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2294         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2295         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2296 #endif
   2297 
   2298         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2299         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2300         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2301 
   2302         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2303         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2304 
   2305         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2306         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2307         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2308 
   2309         { 0x2172, UCHAR_ID_START, TRUE },
   2310         { 0x007a, UCHAR_ID_START, TRUE },
   2311         { 0x0039, UCHAR_ID_START, FALSE },
   2312 
   2313         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2314         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2315         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2316 
   2317         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2318         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2319 
   2320         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2321         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2322         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2323 
   2324         { 0x1d7a9, UCHAR_MATH, TRUE },
   2325         { 0x2135, UCHAR_MATH, TRUE },
   2326         { 0x0062, UCHAR_MATH, FALSE },
   2327 
   2328         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2329         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2330         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2331 
   2332         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2333         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2334         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2335 
   2336         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2337         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2338 
   2339         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2340         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2341         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2342 
   2343         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2344         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2345         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2346 
   2347         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2348         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2349         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2350 
   2351         { 0x16ee, UCHAR_XID_START, TRUE },
   2352         { 0x23456, UCHAR_XID_START, TRUE },
   2353         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2354 
   2355         /*
   2356          * Version break:
   2357          * The following properties are only supported starting with the
   2358          * Unicode version indicated in the second field.
   2359          */
   2360         { -1, 0x320, 0 },
   2361 
   2362         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2363         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2364         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2365 
   2366         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2367         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2368         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2369         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2370 
   2371         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2372         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2373         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2374         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2375 
   2376         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2377         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2378         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2379         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2380 
   2381         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2382         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2383 
   2384         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2385         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2386 
   2387         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2388         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2389 
   2390         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2391         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2392 
   2393         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2394         { 0x4e00, UCHAR_RADICAL, FALSE },
   2395 
   2396         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2397         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2398 
   2399         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2400         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2401 
   2402         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2403 
   2404         { 0x002e, UCHAR_S_TERM, TRUE },
   2405         { 0x0061, UCHAR_S_TERM, FALSE },
   2406 
   2407         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2408         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2409         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2410         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2411 
   2412         /* enum/integer type properties */
   2413 
   2414         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2415         /* test default Bidi classes for unassigned code points */
   2416         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2417         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2418         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2419         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2420         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2421         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2422         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2423         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2424         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2425         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2426         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2427 
   2428         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2429         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2430         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2431         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2432         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2433         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2434         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2435         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2436 
   2437         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2438         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2439         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2440         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2441         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2442         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2443         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2444         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2445         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2446         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2447         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2448 
   2449         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2450         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2451 
   2452         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2453         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2454         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2455         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2456         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2457         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2458         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2459         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2460         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2461 
   2462         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2463         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2464         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2465         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2466         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2467         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2468         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2469         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2470         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2471         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2472         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2473         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2474         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2475         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2476         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2477         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2478         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2479 
   2480         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2481         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2482         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2483 
   2484         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2485         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2486         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2487         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2488         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2489 
   2490         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2491         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2492         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2493         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2494         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2495         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2496         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2497         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2498 
   2499         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2500         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2501         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2502         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2503         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2504         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2505         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2506         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2507         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2508         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2509         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2510         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2511         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2512         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2513         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2514         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2515 
   2516         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2517 
   2518         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2519 
   2520         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2521         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2522         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2523         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2524         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2525         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2526         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2527 
   2528         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2529         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2530         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2531         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2532 
   2533         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2534         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2535         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2536         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2537         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2538         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2539 
   2540         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2541         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2542         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2543         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2544 
   2545         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2546         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2547         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2548         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2549         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2550         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2551         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2552 
   2553         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2554         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2555         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2556         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2557 
   2558         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2559         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2560         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2561         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2562 
   2563         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2564         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2565         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2566         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2567         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2568 
   2569         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2570 
   2571         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2572 
   2573         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2574         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2575         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2576 
   2577         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2578         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2579         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2580         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2581         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2582 
   2583         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2584         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2585         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2586 
   2587         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2588         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2589         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2590         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2591 
   2592         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2593         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2594         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2595         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2596         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2597         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2598 
   2599         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2600         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2601         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2602         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2603 
   2604         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2605         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2606         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2607         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2608 
   2609         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2610         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2611         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2612         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2613 
   2614         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2615 
   2616         /* unassigned code points in new default Bidi R blocks */
   2617         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2618         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2619 
   2620         /* test some script codes >127 */
   2621         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2622         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2623         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2624 
   2625         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
   2626 
   2627         /* value changed in Unicode 6.0 */
   2628         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
   2629 
   2630         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
   2631 
   2632         /* unassigned code points in new/changed default Bidi AL blocks */
   2633         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2634         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2635 
   2636         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
   2637 
   2638         /* unassigned code points in the currency symbols block now default to ET */
   2639         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2640         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2641 
   2642         /* new property in Unicode 6.3 */
   2643         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2644         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2645         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2646         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2647         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2648         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2649 
   2650         /* undefined UProperty values */
   2651         { 0x61, 0x4a7, 0 },
   2652         { 0x234bc, 0x15ed, 0 }
   2653     };
   2654 
   2655     UVersionInfo version;
   2656     UChar32 c;
   2657     int32_t i, result, uVersion;
   2658     UProperty which;
   2659 
   2660     /* what is our Unicode version? */
   2661     u_getUnicodeVersion(version);
   2662     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2663 
   2664     u_charAge(0x20, version);
   2665     if(version[0]==0) {
   2666         /* no additional properties available */
   2667         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2668         return;
   2669     }
   2670 
   2671     /* test u_charAge() */
   2672     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2673         u_charAge(charAges[i].c, version);
   2674         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2675             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2676                 charAges[i].c,
   2677                 version[0], version[1], version[2], version[3],
   2678                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2679         }
   2680     }
   2681 
   2682     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2683         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2684         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2685         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2686         u_getIntPropertyMinValue(0x2345)!=0
   2687     ) {
   2688         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2689     }
   2690     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2691         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2692     }
   2693     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2694         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2695     }
   2696     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2697         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2698     }
   2699     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2700         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2701     }
   2702     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2703         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2704     }
   2705     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2706         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2707     }
   2708     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2709         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2710     }
   2711     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2712         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2713     }
   2714     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2715         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2716     }
   2717     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2718         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2719     }
   2720     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2721         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2722     }
   2723     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2724         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2725     }
   2726     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2727         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2728     }
   2729     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
   2730         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
   2731     }
   2732     /*JB#2410*/
   2733     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2734         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2735     }
   2736     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2737         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2738     }
   2739     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2740         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2741     }
   2742     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2743         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2744     }
   2745     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2746         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2747     }
   2748 
   2749     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2750     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2751         const char *whichName;
   2752 
   2753         if(props[i][0]<0) {
   2754             /* Unicode version break */
   2755             if(uVersion<props[i][1]) {
   2756                 break; /* do not test properties that are not yet supported */
   2757             } else {
   2758                 continue; /* skip this row */
   2759             }
   2760         }
   2761 
   2762         c=(UChar32)props[i][0];
   2763         which=(UProperty)props[i][1];
   2764         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2765 
   2766         if(which<UCHAR_INT_START) {
   2767             result=u_hasBinaryProperty(c, which);
   2768             if(result!=props[i][2]) {
   2769                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2770                         c, whichName, result, i);
   2771             }
   2772         }
   2773 
   2774         result=u_getIntPropertyValue(c, which);
   2775         if(result!=props[i][2]) {
   2776             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2777                     c, whichName, result, props[i][2], i);
   2778         }
   2779 
   2780         /* test separate functions, too */
   2781         switch((UProperty)props[i][1]) {
   2782         case UCHAR_ALPHABETIC:
   2783             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2784                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2785                         props[i][0], result, i);
   2786             }
   2787             break;
   2788         case UCHAR_LOWERCASE:
   2789             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2790                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2791                         props[i][0], result, i);
   2792             }
   2793             break;
   2794         case UCHAR_UPPERCASE:
   2795             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2796                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2797                         props[i][0], result, i);
   2798             }
   2799             break;
   2800         case UCHAR_WHITE_SPACE:
   2801             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2802                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2803                         props[i][0], result, i);
   2804             }
   2805             break;
   2806         default:
   2807             break;
   2808         }
   2809     }
   2810 }
   2811 
   2812 static void
   2813 TestNumericProperties(void) {
   2814     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2815     static const struct {
   2816         UChar32 c;
   2817         int32_t type;
   2818         double numValue;
   2819     } values[]={
   2820         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2821         { 0x0C66, U_NT_DECIMAL, 0 },
   2822         { 0x96f6, U_NT_NUMERIC, 0 },
   2823         { 0xa833, U_NT_NUMERIC, 1./16. },
   2824         { 0x2152, U_NT_NUMERIC, 1./10. },
   2825         { 0x2151, U_NT_NUMERIC, 1./9. },
   2826         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2827         { 0x2150, U_NT_NUMERIC, 1./7. },
   2828         { 0x2159, U_NT_NUMERIC, 1./6. },
   2829         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2830         { 0x2155, U_NT_NUMERIC, 1./5. },
   2831         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2832         { 0x0031, U_NT_DECIMAL, 1. },
   2833         { 0x4e00, U_NT_NUMERIC, 1. },
   2834         { 0x58f1, U_NT_NUMERIC, 1. },
   2835         { 0x10320, U_NT_NUMERIC, 1. },
   2836         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2837         { 0x00B2, U_NT_DIGIT, 2. },
   2838         { 0x5f10, U_NT_NUMERIC, 2. },
   2839         { 0x1813, U_NT_DECIMAL, 3. },
   2840         { 0x5f0e, U_NT_NUMERIC, 3. },
   2841         { 0x2173, U_NT_NUMERIC, 4. },
   2842         { 0x8086, U_NT_NUMERIC, 4. },
   2843         { 0x278E, U_NT_DIGIT, 5. },
   2844         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2845         { 0x247A, U_NT_DIGIT, 7. },
   2846         { 0x7396, U_NT_NUMERIC, 9. },
   2847         { 0x1372, U_NT_NUMERIC, 10. },
   2848         { 0x216B, U_NT_NUMERIC, 12. },
   2849         { 0x16EE, U_NT_NUMERIC, 17. },
   2850         { 0x249A, U_NT_NUMERIC, 19. },
   2851         { 0x303A, U_NT_NUMERIC, 30. },
   2852         { 0x5345, U_NT_NUMERIC, 30. },
   2853         { 0x32B2, U_NT_NUMERIC, 37. },
   2854         { 0x1375, U_NT_NUMERIC, 40. },
   2855         { 0x10323, U_NT_NUMERIC, 50. },
   2856         { 0x0BF1, U_NT_NUMERIC, 100. },
   2857         { 0x964c, U_NT_NUMERIC, 100. },
   2858         { 0x217E, U_NT_NUMERIC, 500. },
   2859         { 0x2180, U_NT_NUMERIC, 1000. },
   2860         { 0x4edf, U_NT_NUMERIC, 1000. },
   2861         { 0x2181, U_NT_NUMERIC, 5000. },
   2862         { 0x137C, U_NT_NUMERIC, 10000. },
   2863         { 0x4e07, U_NT_NUMERIC, 10000. },
   2864         { 0x12432, U_NT_NUMERIC, 216000. },
   2865         { 0x12433, U_NT_NUMERIC, 432000. },
   2866         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2867         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2868         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2869         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2870         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2871         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2872         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2873         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2874         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2875         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2876     };
   2877 
   2878     double nv;
   2879     UChar32 c;
   2880     int32_t i, type;
   2881 
   2882     for(i=0; i<LENGTHOF(values); ++i) {
   2883         c=values[i].c;
   2884         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2885         nv=u_getNumericValue(c);
   2886 
   2887         if(type!=values[i].type) {
   2888             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2889         }
   2890         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2891             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2892         }
   2893     }
   2894 }
   2895 
   2896 /**
   2897  * Test the property names and property value names API.
   2898  */
   2899 static void
   2900 TestPropertyNames(void) {
   2901     int32_t p, v, choice=0, rev;
   2902     UBool atLeastSomething = FALSE;
   2903 
   2904     for (p=0; ; ++p) {
   2905         UProperty propEnum = (UProperty)p;
   2906         UBool sawProp = FALSE;
   2907         if(p > 10 && !atLeastSomething) {
   2908           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2909           return;
   2910         }
   2911 
   2912         for (choice=0; ; ++choice) {
   2913             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2914             if (name) {
   2915                 if (!sawProp)
   2916                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2917                 log_verbose("%d=\"%s\"", choice, name);
   2918                 sawProp = TRUE;
   2919                 atLeastSomething = TRUE;
   2920 
   2921                 /* test reverse mapping */
   2922                 rev = u_getPropertyEnum(name);
   2923                 if (rev != p) {
   2924                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2925                             p, name, rev);
   2926                 }
   2927             }
   2928             if (!name && choice>0) break;
   2929         }
   2930         if (sawProp) {
   2931             /* looks like a valid property; check the values */
   2932             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2933             int32_t max = 0;
   2934             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2935                 max = 255;
   2936             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2937                 /* it's far too slow to iterate all the way up to
   2938                    the real max, U_GC_P_MASK */
   2939                 max = U_GC_NL_MASK;
   2940             } else if (p == UCHAR_BLOCK) {
   2941                 /* UBlockCodes, unlike other values, start at 1 */
   2942                 max = 1;
   2943             }
   2944             log_verbose("\n");
   2945             for (v=-1; ; ++v) {
   2946                 UBool sawValue = FALSE;
   2947                 for (choice=0; ; ++choice) {
   2948                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2949                     if (vname) {
   2950                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2951                         log_verbose("%d=\"%s\"", choice, vname);
   2952                         sawValue = TRUE;
   2953 
   2954                         /* test reverse mapping */
   2955                         rev = u_getPropertyValueEnum(propEnum, vname);
   2956                         if (rev != v) {
   2957                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2958                                     pname, v, vname, rev);
   2959                         }
   2960                     }
   2961                     if (!vname && choice>0) break;
   2962                 }
   2963                 if (sawValue) {
   2964                     log_verbose("\n");
   2965                 }
   2966                 if (!sawValue && v>=max) break;
   2967             }
   2968         }
   2969         if (!sawProp) {
   2970             if (p>=UCHAR_STRING_LIMIT) {
   2971                 break;
   2972             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   2973                 p = UCHAR_STRING_START - 1;
   2974             } else if (p>=UCHAR_MASK_LIMIT) {
   2975                 p = UCHAR_DOUBLE_START - 1;
   2976             } else if (p>=UCHAR_INT_LIMIT) {
   2977                 p = UCHAR_MASK_START - 1;
   2978             } else if (p>=UCHAR_BINARY_LIMIT) {
   2979                 p = UCHAR_INT_START - 1;
   2980             }
   2981         }
   2982     }
   2983 }
   2984 
   2985 /**
   2986  * Test the property values API.  See JB#2410.
   2987  */
   2988 static void
   2989 TestPropertyValues(void) {
   2990     int32_t i, p, min, max;
   2991     UErrorCode ec;
   2992 
   2993     /* Min should be 0 for everything. */
   2994     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   2995     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   2996         UProperty propEnum = (UProperty)p;
   2997         min = u_getIntPropertyMinValue(propEnum);
   2998         if (min != 0) {
   2999             if (p == UCHAR_BLOCK) {
   3000                 /* This is okay...for now.  See JB#2487.
   3001                    TODO Update this for JB#2487. */
   3002             } else {
   3003                 const char* name;
   3004                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   3005                 if (name == NULL)
   3006                     name = "<ERROR>";
   3007                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   3008                         name, min);
   3009             }
   3010         }
   3011     }
   3012 
   3013     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   3014         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   3015         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   3016     }
   3017 
   3018     /* Max should be -1 for invalid properties. */
   3019     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   3020     if (max != -1) {
   3021         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   3022                 max);
   3023     }
   3024 
   3025     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   3026     for (i=0; i<2; ++i) {
   3027         int32_t script;
   3028         const char* desc;
   3029         ec = U_ZERO_ERROR;
   3030         switch (i) {
   3031         case 0:
   3032             script = uscript_getScript(-1, &ec);
   3033             desc = "uscript_getScript(-1)";
   3034             break;
   3035         case 1:
   3036             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   3037             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   3038             break;
   3039         default:
   3040             log_err("Internal test error. Too many scripts\n");
   3041             return;
   3042         }
   3043         /* We don't explicitly test ec.  It should be U_FAILURE but it
   3044            isn't documented as such. */
   3045         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   3046             log_err("FAIL: %s = %d, exp. 0\n",
   3047                     desc, script);
   3048         }
   3049     }
   3050 }
   3051 
   3052 /* various tests for consistency of UCD data and API behavior */
   3053 static void
   3054 TestConsistency() {
   3055     char buffer[300];
   3056     USet *set1, *set2, *set3, *set4;
   3057     UErrorCode errorCode;
   3058 
   3059     UChar32 start, end;
   3060     int32_t i, length;
   3061 
   3062     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   3063     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   3064     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   3065     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   3066     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   3067 
   3068     U_STRING_DECL(mathBlocksPattern,
   3069         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3070         214);
   3071     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   3072     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   3073     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   3074     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3075 
   3076     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   3077     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   3078     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   3079     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   3080     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   3081 
   3082     U_STRING_INIT(mathBlocksPattern,
   3083         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3084         214);
   3085     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   3086     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   3087     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   3088     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3089 
   3090     /*
   3091      * It used to be that UCD.html and its precursors said
   3092      * "Those dashes used to mark connections between pieces of words,
   3093      *  plus the Katakana middle dot."
   3094      *
   3095      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   3096      * but not from Hyphen.
   3097      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
   3098      * Therefore, do not show errors when testing the Hyphen property.
   3099      */
   3100     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   3101                 "known to the UTC and not considered errors.\n");
   3102 
   3103     errorCode=U_ZERO_ERROR;
   3104     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   3105     set2=uset_openPattern(dashPattern, 8, &errorCode);
   3106     if(U_SUCCESS(errorCode)) {
   3107         /* remove the Katakana middle dot(s) from set1 */
   3108         uset_remove(set1, 0x30fb);
   3109         uset_remove(set1, 0xff65); /* halfwidth variant */
   3110         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   3111     } else {
   3112         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3113     }
   3114 
   3115     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   3116     set3=uset_openPattern(formatPattern, 6, &errorCode);
   3117     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   3118     if(U_SUCCESS(errorCode)) {
   3119         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   3120         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   3121         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   3122     } else {
   3123         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3124     }
   3125 
   3126     uset_close(set1);
   3127     uset_close(set2);
   3128     uset_close(set3);
   3129     uset_close(set4);
   3130 
   3131     /*
   3132      * Check that each lowercase character has "small" in its name
   3133      * and not "capital".
   3134      * There are some such characters, some of which seem odd.
   3135      * Use the verbose flag to see these notices.
   3136      */
   3137     errorCode=U_ZERO_ERROR;
   3138     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   3139     if(U_SUCCESS(errorCode)) {
   3140         for(i=0;; ++i) {
   3141             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   3142             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   3143                 break; /* done */
   3144             }
   3145             if(U_FAILURE(errorCode)) {
   3146                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   3147                         i, u_errorName(errorCode));
   3148                 break;
   3149             }
   3150             if(length!=0) {
   3151                 break; /* done with code points, got a string or -1 */
   3152             }
   3153 
   3154             while(start<=end) {
   3155                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   3156                 if(U_FAILURE(errorCode)) {
   3157                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   3158                     errorCode=U_ZERO_ERROR;
   3159                 }
   3160                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   3161                     strstr(buffer, "SMALL CAPITAL")==NULL
   3162                 ) {
   3163                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   3164                 }
   3165                 ++start;
   3166             }
   3167         }
   3168     } else {
   3169         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3170     }
   3171     uset_close(set1);
   3172 
   3173     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3174     errorCode=U_ZERO_ERROR;
   3175     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3176     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3177     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3178     if(U_SUCCESS(errorCode)) {
   3179         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3180         uset_complement(set3);      /* assigned characters */
   3181         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3182         compareUSets(set1, set2,
   3183                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3184                      TRUE);
   3185     } else {
   3186         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3187     }
   3188     uset_close(set1);
   3189     uset_close(set2);
   3190     uset_close(set3);
   3191 
   3192     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3193     errorCode=U_ZERO_ERROR;
   3194     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3195     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3196     if(U_SUCCESS(errorCode)) {
   3197         compareUSets(set1, set2,
   3198                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3199                      TRUE);
   3200     } else {
   3201         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3202     }
   3203     uset_close(set1);
   3204     uset_close(set2);
   3205 }
   3206 
   3207 /*
   3208  * Starting with ICU4C 3.4, the core Unicode properties files
   3209  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3210  * are hardcoded in the common DLL and therefore not included
   3211  * in the data package any more.
   3212  * Test requiring these files are disabled so that
   3213  * we need not jump through hoops (like adding snapshots of these files
   3214  * to testdata).
   3215  * See Jitterbug 4497.
   3216  */
   3217 #define HARDCODED_DATA_4497 1
   3218 
   3219 /* API coverage for ucase.c */
   3220 static void TestUCase() {
   3221 #if !HARDCODED_DATA_4497
   3222     UDataMemory *pData;
   3223     UCaseProps *csp;
   3224     const UCaseProps *ccsp;
   3225     UErrorCode errorCode;
   3226 
   3227     /* coverage for ucase_openBinary() */
   3228     errorCode=U_ZERO_ERROR;
   3229     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3230     if(U_FAILURE(errorCode)) {
   3231         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3232                     u_errorName(errorCode));
   3233         return;
   3234     }
   3235 
   3236     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3237     if(U_FAILURE(errorCode)) {
   3238         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3239                 u_errorName(errorCode));
   3240         udata_close(pData);
   3241         return;
   3242     }
   3243 
   3244     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3245         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3246     }
   3247 
   3248     ucase_close(csp);
   3249     udata_close(pData);
   3250 
   3251     /* coverage for ucase_getDummy() */
   3252     errorCode=U_ZERO_ERROR;
   3253     ccsp=ucase_getDummy(&errorCode);
   3254     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3255         log_err("ucase_tolower(dummy, A)!=A\n");
   3256     }
   3257 #endif
   3258 }
   3259 
   3260 /* API coverage for ubidi_props.c */
   3261 static void TestUBiDiProps() {
   3262 #if !HARDCODED_DATA_4497
   3263     UDataMemory *pData;
   3264     UBiDiProps *bdp;
   3265     const UBiDiProps *cbdp;
   3266     UErrorCode errorCode;
   3267 
   3268     /* coverage for ubidi_openBinary() */
   3269     errorCode=U_ZERO_ERROR;
   3270     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3271     if(U_FAILURE(errorCode)) {
   3272         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3273                     u_errorName(errorCode));
   3274         return;
   3275     }
   3276 
   3277     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3278     if(U_FAILURE(errorCode)) {
   3279         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3280                 u_errorName(errorCode));
   3281         udata_close(pData);
   3282         return;
   3283     }
   3284 
   3285     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3286         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3287     }
   3288 
   3289     ubidi_closeProps(bdp);
   3290     udata_close(pData);
   3291 
   3292     /* coverage for ubidi_getDummy() */
   3293     errorCode=U_ZERO_ERROR;
   3294     cbdp=ubidi_getDummy(&errorCode);
   3295     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3296         log_err("ubidi_getClass(dummy, space)!=0\n");
   3297     }
   3298 #endif
   3299 }
   3300 
   3301 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3302 
   3303 /* bit set for which case foldings for a character have been tested already */
   3304 enum {
   3305     CF_SIMPLE=1,
   3306     CF_FULL=2,
   3307     CF_TURKIC=4,
   3308     CF_ALL=7
   3309 };
   3310 
   3311 static void
   3312 testFold(UChar32 c, int which,
   3313          UChar32 simple, UChar32 turkic,
   3314          const UChar *full, int32_t fullLength,
   3315          const UChar *turkicFull, int32_t turkicFullLength) {
   3316     UChar s[2], t[32];
   3317     UChar32 c2;
   3318     int32_t length, length2;
   3319 
   3320     UErrorCode errorCode=U_ZERO_ERROR;
   3321 
   3322     length=0;
   3323     U16_APPEND_UNSAFE(s, length, c);
   3324 
   3325     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3326         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3327     }
   3328     if((which&CF_FULL)!=0) {
   3329         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
   3330         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3331             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3332         }
   3333     }
   3334     if((which&CF_TURKIC)!=0) {
   3335         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3336             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3337         }
   3338 
   3339         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3340         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3341             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3342         }
   3343     }
   3344 }
   3345 
   3346 /* test that c case-folds to itself */
   3347 static void
   3348 testFoldToSelf(UChar32 c, int which) {
   3349     UChar s[2];
   3350     int32_t length;
   3351 
   3352     length=0;
   3353     U16_APPEND_UNSAFE(s, length, c);
   3354     testFold(c, which, c, c, s, length, s, length);
   3355 }
   3356 
   3357 struct CaseFoldingData {
   3358     USet *notSeen;
   3359     UChar32 prev, prevSimple;
   3360     UChar prevFull[32];
   3361     int32_t prevFullLength;
   3362     int which;
   3363 };
   3364 typedef struct CaseFoldingData CaseFoldingData;
   3365 
   3366 static void U_CALLCONV
   3367 caseFoldingLineFn(void *context,
   3368                   char *fields[][2], int32_t fieldCount,
   3369                   UErrorCode *pErrorCode) {
   3370     CaseFoldingData *pData=(CaseFoldingData *)context;
   3371     char *end;
   3372     UChar full[32];
   3373     UChar32 c, prev, simple;
   3374     int32_t count;
   3375     int which;
   3376     char status;
   3377 
   3378     /* get code point */
   3379     const char *s=u_skipWhitespace(fields[0][0]);
   3380     if(0==strncmp(s, "0000..10FFFF", 12)) {
   3381         /*
   3382          * Ignore the line
   3383          * # @missing: 0000..10FFFF; C; <code point>
   3384          * because maps-to-self is already our default, and this line breaks this parser.
   3385          */
   3386         return;
   3387     }
   3388     c=(UChar32)strtoul(s, &end, 16);
   3389     end=(char *)u_skipWhitespace(end);
   3390     if(end<=fields[0][0] || end!=fields[0][1]) {
   3391         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3392         *pErrorCode=U_PARSE_ERROR;
   3393         return;
   3394     }
   3395 
   3396     /* get the status of this mapping */
   3397     status=*u_skipWhitespace(fields[1][0]);
   3398     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3399         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3400         *pErrorCode=U_PARSE_ERROR;
   3401         return;
   3402     }
   3403 
   3404     /* get the mapping */
   3405     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3406     if(U_FAILURE(*pErrorCode)) {
   3407         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3408         return;
   3409     }
   3410 
   3411     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3412     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3413         simple=c;
   3414     }
   3415 
   3416     if(c!=(prev=pData->prev)) {
   3417         /*
   3418          * Test remaining mappings for the previous code point.
   3419          * If a turkic folding was not mentioned, then it should fold the same
   3420          * as the regular simple case folding.
   3421          */
   3422         UChar prevString[2];
   3423         int32_t length;
   3424 
   3425         length=0;
   3426         U16_APPEND_UNSAFE(prevString, length, prev);
   3427         testFold(prev, (~pData->which)&CF_ALL,
   3428                  prev, pData->prevSimple,
   3429                  prevString, length,
   3430                  pData->prevFull, pData->prevFullLength);
   3431         pData->prev=pData->prevSimple=c;
   3432         length=0;
   3433         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3434         pData->prevFullLength=length;
   3435         pData->which=0;
   3436     }
   3437 
   3438     /*
   3439      * Turn the status into a bit set of case foldings to test.
   3440      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3441      */
   3442     switch(status) {
   3443     case 'C':
   3444         which=CF_SIMPLE|CF_FULL;
   3445         pData->prevSimple=simple;
   3446         u_memcpy(pData->prevFull, full, count);
   3447         pData->prevFullLength=count;
   3448         break;
   3449     case 'S':
   3450         which=CF_SIMPLE;
   3451         pData->prevSimple=simple;
   3452         break;
   3453     case 'F':
   3454         which=CF_FULL;
   3455         u_memcpy(pData->prevFull, full, count);
   3456         pData->prevFullLength=count;
   3457         break;
   3458     case 'T':
   3459         which=CF_TURKIC;
   3460         break;
   3461     default:
   3462         which=0;
   3463         break; /* won't happen because of test above */
   3464     }
   3465 
   3466     testFold(c, which, simple, simple, full, count, full, count);
   3467 
   3468     /* remember which case foldings of c have been tested */
   3469     pData->which|=which;
   3470 
   3471     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3472     uset_remove(pData->notSeen, c);
   3473 }
   3474 
   3475 static void
   3476 TestCaseFolding() {
   3477     CaseFoldingData data={ NULL };
   3478     char *fields[3][2];
   3479     UErrorCode errorCode;
   3480 
   3481     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3482 
   3483     errorCode=U_ZERO_ERROR;
   3484     /* test BMP & plane 1 - nothing interesting above */
   3485     data.notSeen=uset_open(0, 0x1ffff);
   3486     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3487 
   3488     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3489     if(U_SUCCESS(errorCode)) {
   3490         int32_t i, start, end;
   3491 
   3492         /* add a pseudo-last line to finish testing of the actual last one */
   3493         fields[0][0]=lastLine;
   3494         fields[0][1]=lastLine+6;
   3495         fields[1][0]=lastLine+7;
   3496         fields[1][1]=lastLine+9;
   3497         fields[2][0]=lastLine+10;
   3498         fields[2][1]=lastLine+17;
   3499         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3500 
   3501         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3502         for(i=0;
   3503             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3504                 U_SUCCESS(errorCode);
   3505             ++i
   3506         ) {
   3507             do {
   3508                 testFoldToSelf(start, CF_ALL);
   3509             } while(++start<=end);
   3510         }
   3511     }
   3512 
   3513     uset_close(data.notSeen);
   3514 }
   3515