Home | History | Annotate | Download | only in cintltst
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 1997-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /*******************************************************************************
      9 *
     10 * File CUCDTST.C
     11 *
     12 * Modification History:
     13 *        Name                     Description
     14 *     Madhu Katragadda            Ported for C API, added tests for string functions
     15 ********************************************************************************
     16 */
     17 
     18 #include <string.h>
     19 #include <math.h>
     20 #include <stdlib.h>
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/putil.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/unorm2.h"
     28 
     29 #include "cintltst.h"
     30 #include "putilimp.h"
     31 #include "uparse.h"
     32 #include "ucase.h"
     33 #include "ubidi_props.h"
     34 #include "uprops.h"
     35 #include "uset_imp.h"
     36 #include "usc_impl.h"
     37 #include "udatamem.h" /* for testing ucase_openBinary() */
     38 #include "cucdapi.h"
     39 #include "cmemory.h"
     40 
     41 /* prototypes --------------------------------------------------------------- */
     42 
     43 static void TestUpperLower(void);
     44 static void TestLetterNumber(void);
     45 static void TestMisc(void);
     46 static void TestPOSIX(void);
     47 static void TestControlPrint(void);
     48 static void TestIdentifier(void);
     49 static void TestUnicodeData(void);
     50 static void TestCodeUnit(void);
     51 static void TestCodePoint(void);
     52 static void TestCharLength(void);
     53 static void TestCharNames(void);
     54 static void TestUCharFromNameUnderflow(void);
     55 static void TestMirroring(void);
     56 static void TestUScriptRunAPI(void);
     57 static void TestAdditionalProperties(void);
     58 static void TestNumericProperties(void);
     59 static void TestPropertyNames(void);
     60 static void TestPropertyValues(void);
     61 static void TestConsistency(void);
     62 static void TestUCase(void);
     63 static void TestUBiDiProps(void);
     64 static void TestCaseFolding(void);
     65 
     66 /* internal methods used */
     67 static int32_t MakeProp(char* str);
     68 static int32_t MakeDir(char* str);
     69 
     70 /* helpers ------------------------------------------------------------------ */
     71 
     72 static void
     73 parseUCDFile(const char *filename,
     74              char *fields[][2], int32_t fieldCount,
     75              UParseLineFn *lineFn, void *context,
     76              UErrorCode *pErrorCode) {
     77     char path[256];
     78     char backupPath[256];
     79 
     80     if(U_FAILURE(*pErrorCode)) {
     81         return;
     82     }
     83 
     84     /* Look inside ICU_DATA first */
     85     strcpy(path, u_getDataDirectory());
     86     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     87     strcat(path, filename);
     88 
     89     /* As a fallback, try to guess where the source data was located
     90      *    at the time ICU was built, and look there.
     91      */
     92     strcpy(backupPath, ctest_dataSrcDir());
     93     strcat(backupPath, U_FILE_SEP_STRING);
     94     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     95     strcat(backupPath, filename);
     96 
     97     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     98     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     99         *pErrorCode=U_ZERO_ERROR;
    100         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
    101     }
    102     if(U_FAILURE(*pErrorCode)) {
    103         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    104     }
    105 }
    106 
    107 /* test data ---------------------------------------------------------------- */
    108 
    109 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    110 static const int32_t tagValues[] =
    111     {
    112     /* Mn */ U_NON_SPACING_MARK,
    113     /* Mc */ U_COMBINING_SPACING_MARK,
    114     /* Me */ U_ENCLOSING_MARK,
    115     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    116     /* Nl */ U_LETTER_NUMBER,
    117     /* No */ U_OTHER_NUMBER,
    118     /* Zs */ U_SPACE_SEPARATOR,
    119     /* Zl */ U_LINE_SEPARATOR,
    120     /* Zp */ U_PARAGRAPH_SEPARATOR,
    121     /* Cc */ U_CONTROL_CHAR,
    122     /* Cf */ U_FORMAT_CHAR,
    123     /* Cs */ U_SURROGATE,
    124     /* Co */ U_PRIVATE_USE_CHAR,
    125     /* Cn */ U_UNASSIGNED,
    126     /* Lu */ U_UPPERCASE_LETTER,
    127     /* Ll */ U_LOWERCASE_LETTER,
    128     /* Lt */ U_TITLECASE_LETTER,
    129     /* Lm */ U_MODIFIER_LETTER,
    130     /* Lo */ U_OTHER_LETTER,
    131     /* Pc */ U_CONNECTOR_PUNCTUATION,
    132     /* Pd */ U_DASH_PUNCTUATION,
    133     /* Ps */ U_START_PUNCTUATION,
    134     /* Pe */ U_END_PUNCTUATION,
    135     /* Po */ U_OTHER_PUNCTUATION,
    136     /* Sm */ U_MATH_SYMBOL,
    137     /* Sc */ U_CURRENCY_SYMBOL,
    138     /* Sk */ U_MODIFIER_SYMBOL,
    139     /* So */ U_OTHER_SYMBOL,
    140     /* Pi */ U_INITIAL_PUNCTUATION,
    141     /* Pf */ U_FINAL_PUNCTUATION
    142     };
    143 
    144 static const char dirStrings[][5] = {
    145     "L",
    146     "R",
    147     "EN",
    148     "ES",
    149     "ET",
    150     "AN",
    151     "CS",
    152     "B",
    153     "S",
    154     "WS",
    155     "ON",
    156     "LRE",
    157     "LRO",
    158     "AL",
    159     "RLE",
    160     "RLO",
    161     "PDF",
    162     "NSM",
    163     "BN",
    164     /* new in Unicode 6.3/ICU 52 */
    165     "FSI",
    166     "LRI",
    167     "RLI",
    168     "PDI"
    169 };
    170 
    171 void addUnicodeTest(TestNode** root);
    172 
    173 void addUnicodeTest(TestNode** root)
    174 {
    175     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    176     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    177     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    178     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    179     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    180     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    181     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    182     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    183     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    184     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    185     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    186     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    187     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    188     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    189     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
    190     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    191     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    192     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
    193     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
    194     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
    195     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    196     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    197     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    198     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    199     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    200     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    201     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    202 }
    203 
    204 /*==================================================== */
    205 /* test u_toupper() and u_tolower()                    */
    206 /*==================================================== */
    207 static void TestUpperLower()
    208 {
    209     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    210     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    211     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    212     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    213     int32_t i;
    214 
    215     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    216     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    217 
    218 /*
    219 Checks LetterLike Symbols which were previously a source of confusion
    220 [Bertrand A. D. 02/04/98]
    221 */
    222     for (i=0x2100;i<0x2138;i++)
    223     {
    224         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    225         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    226         {
    227             if (i != (int)u_tolower(i)) /* itself */
    228                 log_err("Failed case conversion with itself: U+%04x\n", i);
    229             if (i != (int)u_toupper(i))
    230                 log_err("Failed case conversion with itself: U+%04x\n", i);
    231         }
    232     }
    233 
    234     for(i=0; i < u_strlen(upper); i++){
    235         if(u_tolower(upper[i]) != lower[i]){
    236             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    237         }
    238     }
    239 
    240     log_verbose("testing upper lower\n");
    241     for (i = 0; i < 21; i++) {
    242 
    243         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    244         {
    245             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    246         }
    247         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    248          {
    249             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    250         }
    251         else if (upperTest[i] != u_tolower(lowerTest[i]))
    252         {
    253             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    254         }
    255         else if (lowerTest[i] != u_toupper(upperTest[i]))
    256          {
    257             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    258         }
    259         else if (upperTest[i] != u_tolower(upperTest[i]))
    260         {
    261             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    262         }
    263         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    264         {
    265             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    266         }
    267     }
    268     log_verbose("done testing upper lower\n");
    269 
    270     log_verbose("testing u_istitle\n");
    271     {
    272         static const UChar expected[] = {
    273             0x1F88,
    274             0x1F89,
    275             0x1F8A,
    276             0x1F8B,
    277             0x1F8C,
    278             0x1F8D,
    279             0x1F8E,
    280             0x1F8F,
    281             0x1F88,
    282             0x1F89,
    283             0x1F8A,
    284             0x1F8B,
    285             0x1F8C,
    286             0x1F8D,
    287             0x1F8E,
    288             0x1F8F,
    289             0x1F98,
    290             0x1F99,
    291             0x1F9A,
    292             0x1F9B,
    293             0x1F9C,
    294             0x1F9D,
    295             0x1F9E,
    296             0x1F9F,
    297             0x1F98,
    298             0x1F99,
    299             0x1F9A,
    300             0x1F9B,
    301             0x1F9C,
    302             0x1F9D,
    303             0x1F9E,
    304             0x1F9F,
    305             0x1FA8,
    306             0x1FA9,
    307             0x1FAA,
    308             0x1FAB,
    309             0x1FAC,
    310             0x1FAD,
    311             0x1FAE,
    312             0x1FAF,
    313             0x1FA8,
    314             0x1FA9,
    315             0x1FAA,
    316             0x1FAB,
    317             0x1FAC,
    318             0x1FAD,
    319             0x1FAE,
    320             0x1FAF,
    321             0x1FBC,
    322             0x1FBC,
    323             0x1FCC,
    324             0x1FCC,
    325             0x1FFC,
    326             0x1FFC,
    327         };
    328         int32_t num = UPRV_LENGTHOF(expected);
    329         for(i=0; i<num; i++){
    330             if(!u_istitle(expected[i])){
    331                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    332             }
    333         }
    334 
    335     }
    336 }
    337 
    338 /* compare two sets and verify that their difference or intersection is empty */
    339 static UBool
    340 showADiffB(const USet *a, const USet *b,
    341            const char *a_name, const char *b_name,
    342            UBool expect, UBool diffIsError) {
    343     USet *aa;
    344     int32_t i, start, end, length;
    345     UErrorCode errorCode;
    346 
    347     /*
    348      * expect:
    349      * TRUE  -> a-b should be empty, that is, b should contain all of a
    350      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    351      */
    352     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    353         return TRUE;
    354     }
    355 
    356     /* clone a to aa because a is const */
    357     aa=uset_open(1, 0);
    358     if(aa==NULL) {
    359         /* unusual problem - out of memory? */
    360         return FALSE;
    361     }
    362     uset_addAll(aa, a);
    363 
    364     /* compute the set in question */
    365     if(expect) {
    366         /* a-b */
    367         uset_removeAll(aa, b);
    368     } else {
    369         /* a&b */
    370         uset_retainAll(aa, b);
    371     }
    372 
    373     /* aa is not empty because of the initial tests above; show its contents */
    374     errorCode=U_ZERO_ERROR;
    375     i=0;
    376     for(;;) {
    377         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    378         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    379             break; /* done */
    380         }
    381         if(U_FAILURE(errorCode)) {
    382             log_err("error comparing %s with %s at difference item %d: %s\n",
    383                 a_name, b_name, i, u_errorName(errorCode));
    384             break;
    385         }
    386         if(length!=0) {
    387             break; /* done with code points, got a string or -1 */
    388         }
    389 
    390         if(diffIsError) {
    391             if(expect) {
    392                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    393             } else {
    394                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    395             }
    396         } else {
    397             if(expect) {
    398                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    399             } else {
    400                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    401             }
    402         }
    403 
    404         ++i;
    405     }
    406 
    407     uset_close(aa);
    408     return FALSE;
    409 }
    410 
    411 static UBool
    412 showAMinusB(const USet *a, const USet *b,
    413             const char *a_name, const char *b_name,
    414             UBool diffIsError) {
    415     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    416 }
    417 
    418 static UBool
    419 showAIntersectB(const USet *a, const USet *b,
    420                 const char *a_name, const char *b_name,
    421                 UBool diffIsError) {
    422     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    423 }
    424 
    425 static UBool
    426 compareUSets(const USet *a, const USet *b,
    427              const char *a_name, const char *b_name,
    428              UBool diffIsError) {
    429     /*
    430      * Use an arithmetic & not a logical && so that both branches
    431      * are always taken and all differences are shown.
    432      */
    433     return
    434         showAMinusB(a, b, a_name, b_name, diffIsError) &
    435         showAMinusB(b, a, b_name, a_name, diffIsError);
    436 }
    437 
    438 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    439 static void TestLetterNumber()
    440 {
    441     UChar i = 0x0000;
    442 
    443     log_verbose("Testing for isalpha\n");
    444     for (i = 0x0041; i < 0x005B; i++) {
    445         if (!u_isalpha(i))
    446         {
    447             log_err("Failed isLetter test at  %.4X\n", i);
    448         }
    449     }
    450     for (i = 0x0660; i < 0x066A; i++) {
    451         if (u_isalpha(i))
    452         {
    453             log_err("Failed isLetter test with numbers at %.4X\n", i);
    454         }
    455     }
    456 
    457     log_verbose("Testing for isdigit\n");
    458     for (i = 0x0660; i < 0x066A; i++) {
    459         if (!u_isdigit(i))
    460         {
    461             log_verbose("Failed isNumber test at %.4X\n", i);
    462         }
    463     }
    464 
    465     log_verbose("Testing for isalnum\n");
    466     for (i = 0x0041; i < 0x005B; i++) {
    467         if (!u_isalnum(i))
    468         {
    469             log_err("Failed isAlNum test at  %.4X\n", i);
    470         }
    471     }
    472     for (i = 0x0660; i < 0x066A; i++) {
    473         if (!u_isalnum(i))
    474         {
    475             log_err("Failed isAlNum test at  %.4X\n", i);
    476         }
    477     }
    478 
    479     {
    480         /*
    481          * The following checks work only starting from Unicode 4.0.
    482          * Check the version number here.
    483          */
    484         static UVersionInfo u401={ 4, 0, 1, 0 };
    485         UVersionInfo version;
    486         u_getUnicodeVersion(version);
    487         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    488             return;
    489         }
    490     }
    491 
    492     {
    493         /*
    494          * Sanity check:
    495          * Verify that exactly the digit characters have decimal digit values.
    496          * This assumption is used in the implementation of u_digit()
    497          * (which checks nt=de)
    498          * compared with the parallel java.lang.Character.digit()
    499          * (which checks Nd).
    500          *
    501          * This was not true in Unicode 3.2 and earlier.
    502          * Unicode 4.0 fixed discrepancies.
    503          * Unicode 4.0.1 re-introduced problems in this area due to an
    504          * unintentionally incomplete last-minute change.
    505          */
    506         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    507         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    508 
    509         USet *digits, *decimalValues;
    510         UErrorCode errorCode;
    511 
    512         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    513         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    514         errorCode=U_ZERO_ERROR;
    515         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    516         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    517 
    518         if(U_SUCCESS(errorCode)) {
    519             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    520         }
    521 
    522         uset_close(digits);
    523         uset_close(decimalValues);
    524     }
    525 }
    526 
    527 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    528                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    529                                 UBool expected) {
    530     int32_t i;
    531     for (i = 0; i < sampleCharsLength; ++i) {
    532         UBool result = propFn(sampleChars[i]);
    533         if (result != expected) {
    534             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    535                     propName, sampleChars[i], result);
    536         }
    537     }
    538 }
    539 
    540 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    541 static void TestMisc()
    542 {
    543     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    544     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    545     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    546     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    547     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    548     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    549 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    550     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    551     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    552     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    553     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    554 
    555     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    556 
    557     uint32_t mask;
    558 
    559     int32_t i;
    560     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    561     UVersionInfo realVersion;
    562 
    563     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    564 
    565     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
    566     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
    567 
    568     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    569                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
    570     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    571                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
    572 
    573     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    574                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
    575     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    576                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
    577 
    578     testSampleCharProps(u_isdefined, "u_isdefined",
    579                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
    580     testSampleCharProps(u_isdefined, "u_isdefined",
    581                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
    582 
    583     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
    584     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
    585 
    586     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
    587     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
    588 
    589     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
    590         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    591             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    592                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    593         }
    594     }
    595 
    596     /* Tests the ICU version #*/
    597     u_getVersion(realVersion);
    598     u_versionToString(realVersion, icuVersion);
    599     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    600     {
    601         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    602     }
    603 #if defined(ICU_VERSION)
    604     /* test only happens where we have configure.in with VERSION - sanity check. */
    605     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    606     {
    607         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    608     }
    609 #endif
    610 
    611     /* test U_GC_... */
    612     if(
    613         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    614         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    615         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    616         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    617         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    618         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    619     ) {
    620         log_err("error: U_GET_GC_MASK does not work properly\n");
    621     }
    622 
    623     mask=0;
    624     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    625 
    626     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    627     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    628     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    629     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    630     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    631 
    632     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    633     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    634     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    635 
    636     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    637     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    638     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    639 
    640     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    641     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    642     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    643 
    644     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    645     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    646     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    647     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    648 
    649     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    650     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    651     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    652     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    653     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    654 
    655     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    656     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    657     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    658     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    659 
    660     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    661     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    662 
    663     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    664         log_err("error: problems with U_GC_XX_MASK constants\n");
    665     }
    666 
    667     mask=0;
    668     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    669     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    670     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    671     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    672     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    673     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    674     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    675 
    676     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    677         log_err("error: problems with U_GC_Y_MASK constants\n");
    678     }
    679     {
    680         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    681         for(i=0; i<10; i++){
    682             if(digit[i]!=u_forDigit(i,10)){
    683                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    684             }
    685         }
    686     }
    687 
    688     /* test u_digit() */
    689     {
    690         static const struct {
    691             UChar32 c;
    692             int8_t radix, value;
    693         } data[]={
    694             /* base 16 */
    695             { 0x0031, 16, 1 },
    696             { 0x0038, 16, 8 },
    697             { 0x0043, 16, 12 },
    698             { 0x0066, 16, 15 },
    699             { 0x00e4, 16, -1 },
    700             { 0x0662, 16, 2 },
    701             { 0x06f5, 16, 5 },
    702             { 0xff13, 16, 3 },
    703             { 0xff41, 16, 10 },
    704 
    705             /* base 8 */
    706             { 0x0031, 8, 1 },
    707             { 0x0038, 8, -1 },
    708             { 0x0043, 8, -1 },
    709             { 0x0066, 8, -1 },
    710             { 0x00e4, 8, -1 },
    711             { 0x0662, 8, 2 },
    712             { 0x06f5, 8, 5 },
    713             { 0xff13, 8, 3 },
    714             { 0xff41, 8, -1 },
    715 
    716             /* base 36 */
    717             { 0x5a, 36, 35 },
    718             { 0x7a, 36, 35 },
    719             { 0xff3a, 36, 35 },
    720             { 0xff5a, 36, 35 },
    721 
    722             /* wrong radix values */
    723             { 0x0031, 1, -1 },
    724             { 0xff3a, 37, -1 }
    725         };
    726 
    727         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
    728             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    729                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    730                         data[i].c,
    731                         data[i].radix,
    732                         u_digit(data[i].c, data[i].radix),
    733                         data[i].value);
    734             }
    735         }
    736     }
    737 }
    738 
    739 /* test C/POSIX-style functions --------------------------------------------- */
    740 
    741 /* bit flags */
    742 #define ISAL     1
    743 #define ISLO     2
    744 #define ISUP     4
    745 
    746 #define ISDI     8
    747 #define ISXD  0x10
    748 
    749 #define ISAN  0x20
    750 
    751 #define ISPU  0x40
    752 #define ISGR  0x80
    753 #define ISPR 0x100
    754 
    755 #define ISSP 0x200
    756 #define ISBL 0x400
    757 #define ISCN 0x800
    758 
    759 /* C/POSIX-style functions, in the same order as the bit flags */
    760 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    761 
    762 static const struct {
    763     IsPOSIXClass *fn;
    764     const char *name;
    765 } posixClasses[]={
    766     { u_isalpha, "isalpha" },
    767     { u_islower, "islower" },
    768     { u_isupper, "isupper" },
    769     { u_isdigit, "isdigit" },
    770     { u_isxdigit, "isxdigit" },
    771     { u_isalnum, "isalnum" },
    772     { u_ispunct, "ispunct" },
    773     { u_isgraph, "isgraph" },
    774     { u_isprint, "isprint" },
    775     { u_isspace, "isspace" },
    776     { u_isblank, "isblank" },
    777     { u_iscntrl, "iscntrl" }
    778 };
    779 
    780 static const struct {
    781     UChar32 c;
    782     uint32_t posixResults;
    783 } posixData[]={
    784     { 0x0008,                                                        ISCN },    /* backspace */
    785     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    786     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    787     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    788     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    789     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    790     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    791     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    792     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    793     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    794     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    795     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    796     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    797     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    798     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    799     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    800     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    801     { 0x0600,                                                        ISCN },    /* arabic number sign */
    802     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    803     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    804     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    805     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    806     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    807     { 0x200b,                                                        ISCN },    /* ZWSP */
    808   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    809     { 0x200e,                                                        ISCN },    /* LRM */
    810     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    811     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    812     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    813     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    814     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    815     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    816     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    817     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    818 };
    819 
    820 static void
    821 TestPOSIX() {
    822     uint32_t mask;
    823     int32_t cl, i;
    824     UBool expect;
    825 
    826     mask=1;
    827     for(cl=0; cl<12; ++cl) {
    828         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
    829             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    830             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    831                 log_err("u_%s(U+%04x)=%s is wrong\n",
    832                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    833             }
    834         }
    835         mask<<=1;
    836     }
    837 }
    838 
    839 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    840 static void TestControlPrint()
    841 {
    842     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    843     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    844     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    845     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    846     UChar32 c;
    847 
    848     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
    849     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
    850 
    851     testSampleCharProps(u_isprint, "u_isprint",
    852                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
    853     testSampleCharProps(u_isprint, "u_isprint",
    854                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
    855 
    856     /* test all ISO 8 controls */
    857     for(c=0; c<=0x9f; ++c) {
    858         if(c==0x20) {
    859             /* skip ASCII graphic characters and continue with DEL */
    860             c=0x7f;
    861         }
    862         if(!u_iscntrl(c)) {
    863             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    864         }
    865         if(!u_isISOControl(c)) {
    866             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    867         }
    868         if(u_isprint(c)) {
    869             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    870         }
    871     }
    872 
    873     /* test all Latin-1 graphic characters */
    874     for(c=0x20; c<=0xff; ++c) {
    875         if(c==0x7f) {
    876             c=0xa0;
    877         } else if(c==0xad) {
    878             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    879             ++c;
    880         }
    881         if(!u_isprint(c)) {
    882             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    883         }
    884     }
    885 }
    886 
    887 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    888 static void TestIdentifier()
    889 {
    890     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    891     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    892     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    893     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    894     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    895     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    896     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    897     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    898     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    899     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    900 
    901     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    902                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
    903     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    904                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
    905 
    906     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    907                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
    908     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    909                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
    910 
    911     /* IDPart should imply IDStart */
    912     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    913                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
    914 
    915     testSampleCharProps(u_isIDStart, "u_isIDStart",
    916                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
    917     testSampleCharProps(u_isIDStart, "u_isIDStart",
    918                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    919 
    920     testSampleCharProps(u_isIDPart, "u_isIDPart",
    921                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
    922     testSampleCharProps(u_isIDPart, "u_isIDPart",
    923                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    924 
    925     /* IDPart should imply IDStart */
    926     testSampleCharProps(u_isIDPart, "u_isIDPart",
    927                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
    928 
    929     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    930                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
    931     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    932                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
    933 }
    934 
    935 /* for each line of UnicodeData.txt, check some of the properties */
    936 typedef struct UnicodeDataContext {
    937 #if UCONFIG_NO_NORMALIZATION
    938     const void *dummy;
    939 #else
    940     const UNormalizer2 *nfc;
    941     const UNormalizer2 *nfkc;
    942 #endif
    943 } UnicodeDataContext;
    944 
    945 /*
    946  * ### TODO
    947  * This test fails incorrectly if the First or Last code point of a repetitive area
    948  * is overridden, which is allowed and is encouraged for the PUAs.
    949  * Currently, this means that both area First/Last and override lines are
    950  * tested against the properties from the API,
    951  * and the area boundary will not match and cause an error.
    952  *
    953  * This function should detect area boundaries and skip them for the test of individual
    954  * code points' properties.
    955  * Then it should check that the areas contain all the same properties except where overridden.
    956  * For this, it would have had to set a flag for which code points were listed explicitly.
    957  */
    958 static void U_CALLCONV
    959 unicodeDataLineFn(void *context,
    960                   char *fields[][2], int32_t fieldCount,
    961                   UErrorCode *pErrorCode)
    962 {
    963     char buffer[100];
    964     const char *d;
    965     char *end;
    966     uint32_t value;
    967     UChar32 c;
    968     int32_t i;
    969     int8_t type;
    970     int32_t dt;
    971     UChar dm[32], s[32];
    972     int32_t dmLength, length;
    973 
    974 #if !UCONFIG_NO_NORMALIZATION
    975     const UNormalizer2 *nfc, *nfkc;
    976 #endif
    977 
    978     /* get the character code, field 0 */
    979     c=strtoul(fields[0][0], &end, 16);
    980     if(end<=fields[0][0] || end!=fields[0][1]) {
    981         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    982         return;
    983     }
    984     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    985         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    986         return;
    987     }
    988 
    989     /* get general category, field 2 */
    990     *fields[2][1]=0;
    991     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    992     if(u_charType(c)!=type) {
    993         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    994     }
    995     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    996         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    997     }
    998 
    999     /* get canonical combining class, field 3 */
   1000     value=strtoul(fields[3][0], &end, 10);
   1001     if(end<=fields[3][0] || end!=fields[3][1]) {
   1002         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
   1003         return;
   1004     }
   1005     if(value>255) {
   1006         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
   1007         return;
   1008     }
   1009 #if !UCONFIG_NO_NORMALIZATION
   1010     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
   1011         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
   1012     }
   1013     nfkc=((UnicodeDataContext *)context)->nfkc;
   1014     if(value!=unorm2_getCombiningClass(nfkc, c)) {
   1015         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
   1016     }
   1017 #endif
   1018 
   1019     /* get BiDi category, field 4 */
   1020     *fields[4][1]=0;
   1021     i=MakeDir(fields[4][0]);
   1022     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
   1023         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
   1024     }
   1025 
   1026     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
   1027     d=NULL;
   1028     if(fields[5][0]==fields[5][1]) {
   1029         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
   1030         if(c==0xac00 || c==0xd7a3) {
   1031             dt=U_DT_CANONICAL;
   1032         } else {
   1033             dt=U_DT_NONE;
   1034         }
   1035     } else {
   1036         d=fields[5][0];
   1037         *fields[5][1]=0;
   1038         dt=UCHAR_INVALID_CODE;
   1039         if(*d=='<') {
   1040             end=strchr(++d, '>');
   1041             if(end!=NULL) {
   1042                 *end=0;
   1043                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
   1044                 d=u_skipWhitespace(end+1);
   1045             }
   1046         } else {
   1047             dt=U_DT_CANONICAL;
   1048         }
   1049     }
   1050     if(dt>U_DT_NONE) {
   1051         if(c==0xac00) {
   1052             dm[0]=0x1100;
   1053             dm[1]=0x1161;
   1054             dm[2]=0;
   1055             dmLength=2;
   1056         } else if(c==0xd7a3) {
   1057             dm[0]=0xd788;
   1058             dm[1]=0x11c2;
   1059             dm[2]=0;
   1060             dmLength=2;
   1061         } else {
   1062             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
   1063         }
   1064     } else {
   1065         dmLength=-1;
   1066     }
   1067     if(dt<0 || U_FAILURE(*pErrorCode)) {
   1068         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
   1069         return;
   1070     }
   1071 #if !UCONFIG_NO_NORMALIZATION
   1072     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
   1073     if(i!=dt) {
   1074         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
   1075     }
   1076     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
   1077     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
   1078     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1079         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
   1080                 "or the Decomposition_Mapping is different (%s)\n",
   1081                 c, length, dmLength, u_errorName(*pErrorCode));
   1082         return;
   1083     }
   1084     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
   1085     if(dt!=U_DT_CANONICAL) {
   1086         dmLength=-1;
   1087     }
   1088     nfc=((UnicodeDataContext *)context)->nfc;
   1089     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
   1090     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1091         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
   1092                 "or the Decomposition_Mapping is different (%s)\n",
   1093                 c, length, dmLength, u_errorName(*pErrorCode));
   1094         return;
   1095     }
   1096     /* recompose */
   1097     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
   1098         UChar32 a, b, composite;
   1099         i=0;
   1100         U16_NEXT(dm, i, dmLength, a);
   1101         U16_NEXT(dm, i, dmLength, b);
   1102         /* i==dmLength */
   1103         composite=unorm2_composePair(nfc, a, b);
   1104         if(composite!=c) {
   1105             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
   1106                     (long)c, (long)a, (long)b, (long)composite);
   1107         }
   1108         /*
   1109          * Note: NFKC has fewer round-trip mappings than NFC,
   1110          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
   1111          */
   1112     }
   1113 #endif
   1114 
   1115     /* get ISO Comment, field 11 */
   1116     *fields[11][1]=0;
   1117     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1118     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1119         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1120             c, u_errorName(*pErrorCode),
   1121             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1122             fields[11][0]);
   1123     }
   1124 
   1125     /* get uppercase mapping, field 12 */
   1126     if(fields[12][0]!=fields[12][1]) {
   1127         value=strtoul(fields[12][0], &end, 16);
   1128         if(end!=fields[12][1]) {
   1129             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1130             return;
   1131         }
   1132         if((UChar32)value!=u_toupper(c)) {
   1133             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1134         }
   1135     } else {
   1136         /* no case mapping: the API must map the code point to itself */
   1137         if(c!=u_toupper(c)) {
   1138             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1139         }
   1140     }
   1141 
   1142     /* get lowercase mapping, field 13 */
   1143     if(fields[13][0]!=fields[13][1]) {
   1144         value=strtoul(fields[13][0], &end, 16);
   1145         if(end!=fields[13][1]) {
   1146             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1147             return;
   1148         }
   1149         if((UChar32)value!=u_tolower(c)) {
   1150             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1151         }
   1152     } else {
   1153         /* no case mapping: the API must map the code point to itself */
   1154         if(c!=u_tolower(c)) {
   1155             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1156         }
   1157     }
   1158 
   1159     /* get titlecase mapping, field 14 */
   1160     if(fields[14][0]!=fields[14][1]) {
   1161         value=strtoul(fields[14][0], &end, 16);
   1162         if(end!=fields[14][1]) {
   1163             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1164             return;
   1165         }
   1166         if((UChar32)value!=u_totitle(c)) {
   1167             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1168         }
   1169     } else {
   1170         /* no case mapping: the API must map the code point to itself */
   1171         if(c!=u_totitle(c)) {
   1172             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1173         }
   1174     }
   1175 }
   1176 
   1177 static UBool U_CALLCONV
   1178 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1179     static const UChar32 test[][2]={
   1180         {0x41, U_UPPERCASE_LETTER},
   1181         {0x308, U_NON_SPACING_MARK},
   1182         {0xfffe, U_GENERAL_OTHER_TYPES},
   1183         {0xe0041, U_FORMAT_CHAR},
   1184         {0xeffff, U_UNASSIGNED}
   1185     };
   1186 
   1187     int32_t i, count;
   1188 
   1189     if(0!=strcmp((const char *)context, "a1")) {
   1190         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1191         return FALSE;
   1192     }
   1193 
   1194     count=UPRV_LENGTHOF(test);
   1195     for(i=0; i<count; ++i) {
   1196         if(start<=test[i][0] && test[i][0]<limit) {
   1197             if(type!=(UCharCategory)test[i][1]) {
   1198                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1199                         start, limit, (long)type, test[i][0], test[i][1]);
   1200             }
   1201             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1202             return i==(count-1) ? FALSE : TRUE;
   1203         }
   1204     }
   1205 
   1206     if(start>test[count-1][0]) {
   1207         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1208                 start, limit, (long)type);
   1209         return FALSE;
   1210     }
   1211 
   1212     return TRUE;
   1213 }
   1214 
   1215 static UBool U_CALLCONV
   1216 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1217     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
   1218     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1219         { 0x0590, U_LEFT_TO_RIGHT },
   1220         { 0x0600, U_RIGHT_TO_LEFT },
   1221         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1222         { 0x08A0, U_RIGHT_TO_LEFT },
   1223         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
   1224         { 0x20A0, U_LEFT_TO_RIGHT },
   1225         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
   1226         { 0xFB1D, U_LEFT_TO_RIGHT },
   1227         { 0xFB50, U_RIGHT_TO_LEFT },
   1228         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1229         { 0xFE70, U_LEFT_TO_RIGHT },
   1230         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1231         { 0x10800, U_LEFT_TO_RIGHT },
   1232         { 0x11000, U_RIGHT_TO_LEFT },
   1233         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1234         { 0x1EE00, U_RIGHT_TO_LEFT },
   1235         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
   1236         { 0x1F000, U_RIGHT_TO_LEFT },
   1237         { 0x110000, U_LEFT_TO_RIGHT }
   1238     };
   1239 
   1240     UChar32 c;
   1241     int32_t i;
   1242     UCharDirection shouldBeDir;
   1243 
   1244     /*
   1245      * LineBreak.txt specifies:
   1246      *   #  - Assigned characters that are not listed explicitly are given the value
   1247      *   #    "AL".
   1248      *   #  - Unassigned characters are given the value "XX".
   1249      *
   1250      * PUA characters are listed explicitly with "XX".
   1251      * Verify that no assigned character has "XX".
   1252      */
   1253     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1254         c=start;
   1255         while(c<limit) {
   1256             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1257                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1258             }
   1259             ++c;
   1260         }
   1261     }
   1262 
   1263     /*
   1264      * Verify default Bidi classes.
   1265      * For recent Unicode versions, see UCD.html.
   1266      *
   1267      * For older Unicode versions:
   1268      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1269      * http://www.unicode.org/reports/tr9/
   1270      *
   1271      * See also DerivedBidiClass.txt for Cn code points!
   1272      *
   1273      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1274      * changed some default values.
   1275      * In particular, non-characters and unassigned Default Ignorable Code Points
   1276      * change from L to BN.
   1277      *
   1278      * UCD.html version 4.0.1 does not yet reflect these changes.
   1279      */
   1280     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1281         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1282         c=start;
   1283         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
   1284             if((int32_t)c<defaultBidi[i][0]) {
   1285                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1286                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1287                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1288                     } else {
   1289                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1290                     }
   1291 
   1292                     if( u_charDirection(c)!=shouldBeDir ||
   1293                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1294                     ) {
   1295                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1296                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1297                     }
   1298                     ++c;
   1299                 }
   1300             }
   1301         }
   1302     }
   1303 
   1304     return TRUE;
   1305 }
   1306 
   1307 /* tests for several properties */
   1308 static void TestUnicodeData()
   1309 {
   1310     UVersionInfo expectVersionArray;
   1311     UVersionInfo versionArray;
   1312     char *fields[15][2];
   1313     UErrorCode errorCode;
   1314     UChar32 c;
   1315     int8_t type;
   1316 
   1317     UnicodeDataContext context;
   1318 
   1319     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1320     u_getUnicodeVersion(versionArray);
   1321     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1322     {
   1323         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1324         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1325     }
   1326 
   1327 #if defined(ICU_UNICODE_VERSION)
   1328     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1329     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1330     {
   1331          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1332     }
   1333 #endif
   1334 
   1335     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1336         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1337     }
   1338 
   1339     errorCode=U_ZERO_ERROR;
   1340 #if !UCONFIG_NO_NORMALIZATION
   1341     context.nfc=unorm2_getNFCInstance(&errorCode);
   1342     context.nfkc=unorm2_getNFKCInstance(&errorCode);
   1343     if(U_FAILURE(errorCode)) {
   1344         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
   1345         return;
   1346     }
   1347 #endif
   1348     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
   1349     if(U_FAILURE(errorCode)) {
   1350         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1351     }
   1352 
   1353     /* sanity check on repeated properties */
   1354     for(c=0xfffe; c<=0x10ffff;) {
   1355         type=u_charType(c);
   1356         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1357             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1358         }
   1359         if(type!=U_UNASSIGNED) {
   1360             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1361         }
   1362         if((c&0xffff)==0xfffe) {
   1363             ++c;
   1364         } else {
   1365             c+=0xffff;
   1366         }
   1367     }
   1368 
   1369     /* test that PUA is not "unassigned" */
   1370     for(c=0xe000; c<=0x10fffd;) {
   1371         type=u_charType(c);
   1372         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1373             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1374         }
   1375         if(type==U_UNASSIGNED) {
   1376             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1377         } else if(type!=U_PRIVATE_USE_CHAR) {
   1378             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1379         }
   1380         if(c==0xf8ff) {
   1381             c=0xf0000;
   1382         } else if(c==0xffffd) {
   1383             c=0x100000;
   1384         } else {
   1385             ++c;
   1386         }
   1387     }
   1388 
   1389     /* test u_enumCharTypes() */
   1390     u_enumCharTypes(enumTypeRange, "a1");
   1391 
   1392     /* check default properties */
   1393     u_enumCharTypes(enumDefaultsRange, NULL);
   1394 }
   1395 
   1396 static void TestCodeUnit(){
   1397     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1398 
   1399     int32_t i;
   1400 
   1401     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
   1402         UChar c=codeunit[i];
   1403         if(i<4){
   1404             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1405                 log_err("ERROR: U+%04x is a single", c);
   1406             }
   1407 
   1408         }
   1409         if(i >= 4 && i< 8){
   1410             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1411                 log_err("ERROR: U+%04x is a first surrogate", c);
   1412             }
   1413         }
   1414         if(i >= 8 && i< 12){
   1415             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1416                 log_err("ERROR: U+%04x is a second surrogate", c);
   1417             }
   1418         }
   1419     }
   1420 
   1421 }
   1422 
   1423 static void TestCodePoint(){
   1424     const UChar32 codePoint[]={
   1425         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1426         0xd800,
   1427         0xdbff,
   1428         0xdc00,
   1429         0xdfff,
   1430         0xdc04,
   1431         0xd821,
   1432         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1433         0x20ac,
   1434         0xd7ff,
   1435         0xe000,
   1436         0xe123,
   1437         0x0061,
   1438         0xe065,
   1439         0x20402,
   1440         0x24506,
   1441         0x23456,
   1442         0x20402,
   1443         0x10402,
   1444         0x23456,
   1445         /*not a surrogate, not valid, isUnicodeChar, isError */
   1446         0x0015,
   1447         0x009f,
   1448         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1449         0xffff,
   1450         0xfffe,
   1451     };
   1452     int32_t i;
   1453     for(i=0; i<UPRV_LENGTHOF(codePoint); i++){
   1454         UChar32 c=codePoint[i];
   1455         if(i<6){
   1456             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1457                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1458             }
   1459             if(UTF_IS_VALID(c)){
   1460                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1461             }
   1462             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1463                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1464             }
   1465             if(UTF_IS_ERROR(c)){
   1466                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1467             }
   1468         }else if(i >=6 && i<18){
   1469             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1470                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1471             }
   1472             if(!UTF_IS_VALID(c)){
   1473                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1474             }
   1475             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1476                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1477             }
   1478             if(UTF_IS_ERROR(c)){
   1479                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1480             }
   1481         }else if(i >=18 && i<20){
   1482             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1483                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1484             }
   1485             if(UTF_IS_VALID(c)){
   1486                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1487             }
   1488             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1489                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1490             }
   1491             if(!UTF_IS_ERROR(c)){
   1492                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1493             }
   1494         }
   1495         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
   1496             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1497                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1498             }
   1499             if(UTF_IS_VALID(c)){
   1500                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1501             }
   1502             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1503                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1504             }
   1505             if(!UTF_IS_ERROR(c)){
   1506                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1507             }
   1508         }
   1509     }
   1510 
   1511     if(
   1512         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1513         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1514         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1515         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1516     ) {
   1517         log_err("error with U_IS_BMP()\n");
   1518     }
   1519 
   1520     if(
   1521         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1522         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1523         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1524         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1525     ) {
   1526         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1527     }
   1528 }
   1529 
   1530 static void TestCharLength()
   1531 {
   1532     const int32_t codepoint[]={
   1533         1, 0x0061,
   1534         1, 0xe065,
   1535         1, 0x20ac,
   1536         2, 0x20402,
   1537         2, 0x23456,
   1538         2, 0x24506,
   1539         2, 0x20402,
   1540         2, 0x10402,
   1541         1, 0xd7ff,
   1542         1, 0xe000
   1543     };
   1544 
   1545     int32_t i;
   1546     UBool multiple;
   1547     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
   1548         UChar32 c=codepoint[i+1];
   1549         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1550             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
   1551         }
   1552         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1553         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1554             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1555         }
   1556     }
   1557 }
   1558 
   1559 /*internal functions ----*/
   1560 static int32_t MakeProp(char* str)
   1561 {
   1562     int32_t result = 0;
   1563     char* matchPosition =0;
   1564 
   1565     matchPosition = strstr(tagStrings, str);
   1566     if (matchPosition == 0)
   1567     {
   1568         log_err("unrecognized type letter ");
   1569         log_err(str);
   1570     }
   1571     else
   1572         result = (int32_t)((matchPosition - tagStrings) / 2);
   1573     return result;
   1574 }
   1575 
   1576 static int32_t MakeDir(char* str)
   1577 {
   1578     int32_t pos = 0;
   1579     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
   1580         if (strcmp(str, dirStrings[pos]) == 0) {
   1581             return pos;
   1582         }
   1583     }
   1584     return -1;
   1585 }
   1586 
   1587 /* test u_charName() -------------------------------------------------------- */
   1588 
   1589 static const struct {
   1590     uint32_t code;
   1591     const char *name, *oldName, *extName, *alias;
   1592 } names[]={
   1593     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1594     {0x01a2, "LATIN CAPITAL LETTER OI", "",
   1595              "LATIN CAPITAL LETTER OI",
   1596              "LATIN CAPITAL LETTER GHA"},
   1597     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
   1598              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1599     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1600              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1601              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1602     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1603     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1604     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1605     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1606     {0xd800, "", "", "<lead surrogate-D800>" },
   1607     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1608     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
   1609     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1610     {0xffff, "", "", "<noncharacter-FFFF>" },
   1611     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1612               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1613               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1614     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1615 };
   1616 
   1617 static UBool
   1618 enumCharNamesFn(void *context,
   1619                 UChar32 code, UCharNameChoice nameChoice,
   1620                 const char *name, int32_t length) {
   1621     int32_t *pCount=(int32_t *)context;
   1622     const char *expected;
   1623     int i;
   1624 
   1625     if(length<=0 || length!=(int32_t)strlen(name)) {
   1626         /* should not be called with an empty string or invalid length */
   1627         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1628         return TRUE;
   1629     }
   1630 
   1631     ++*pCount;
   1632     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
   1633         if(code==(UChar32)names[i].code) {
   1634             switch (nameChoice) {
   1635                 case U_EXTENDED_CHAR_NAME:
   1636                     if(0!=strcmp(name, names[i].extName)) {
   1637                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1638                     }
   1639                     break;
   1640                 case U_UNICODE_CHAR_NAME:
   1641                     if(0!=strcmp(name, names[i].name)) {
   1642                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1643                     }
   1644                     break;
   1645                 case U_UNICODE_10_CHAR_NAME:
   1646                     expected=names[i].oldName;
   1647                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1648                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1649                     }
   1650                     break;
   1651                 case U_CHAR_NAME_ALIAS:
   1652                     expected=names[i].alias;
   1653                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1654                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1655                     }
   1656                     break;
   1657                 case U_CHAR_NAME_CHOICE_COUNT:
   1658                     break;
   1659             }
   1660             break;
   1661         }
   1662     }
   1663     return TRUE;
   1664 }
   1665 
   1666 struct enumExtCharNamesContext {
   1667     uint32_t length;
   1668     int32_t last;
   1669 };
   1670 
   1671 static UBool
   1672 enumExtCharNamesFn(void *context,
   1673                 UChar32 code, UCharNameChoice nameChoice,
   1674                 const char *name, int32_t length) {
   1675     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1676 
   1677     if (ecncp->last != (int32_t) code - 1) {
   1678         if (ecncp->last < 0) {
   1679             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1680         } else {
   1681             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1682         }
   1683     }
   1684     ecncp->last = (int32_t) code;
   1685 
   1686     if (!*name) {
   1687         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1688     }
   1689 
   1690     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1691 }
   1692 
   1693 /**
   1694  * This can be made more efficient by moving it into putil.c and having
   1695  * it directly access the ebcdic translation tables.
   1696  * TODO: If we get this method in putil.c, then delete it from here.
   1697  */
   1698 static UChar
   1699 u_charToUChar(char c) {
   1700     UChar uc;
   1701     u_charsToUChars(&c, &uc, 1);
   1702     return uc;
   1703 }
   1704 
   1705 static void
   1706 TestCharNames() {
   1707     static char name[80];
   1708     UErrorCode errorCode=U_ZERO_ERROR;
   1709     struct enumExtCharNamesContext extContext;
   1710     const char *expected;
   1711     int32_t length;
   1712     UChar32 c;
   1713     int32_t i;
   1714 
   1715     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1716     length=uprv_getMaxCharNameLength();
   1717     if(length==0) {
   1718         /* no names data available */
   1719         return;
   1720     }
   1721     if(length<83) { /* Unicode 3.2 max char name length */
   1722         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1723     }
   1724     /* ### TODO same tests for max ISO comment length as for max name length */
   1725 
   1726     log_verbose("Testing u_charName()\n");
   1727     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
   1728         /* modern Unicode character name */
   1729         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1730         if(U_FAILURE(errorCode)) {
   1731             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1732             return;
   1733         }
   1734         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1735             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1736         }
   1737 
   1738         /* find the modern name */
   1739         if (*names[i].name) {
   1740             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1741             if(U_FAILURE(errorCode)) {
   1742                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1743                 return;
   1744             }
   1745             if(c!=(UChar32)names[i].code) {
   1746                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1747             }
   1748         }
   1749 
   1750         /* Unicode 1.0 character name */
   1751         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1752         if(U_FAILURE(errorCode)) {
   1753             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1754             return;
   1755         }
   1756         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1757             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1758         }
   1759 
   1760         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1761         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1762             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1763             if(U_FAILURE(errorCode)) {
   1764                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1765                 return;
   1766             }
   1767             if(c!=(UChar32)names[i].code) {
   1768                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1769             }
   1770         }
   1771 
   1772         /* Unicode character name alias */
   1773         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1774         if(U_FAILURE(errorCode)) {
   1775             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1776             return;
   1777         }
   1778         expected=names[i].alias;
   1779         if(expected==NULL) {
   1780             expected="";
   1781         }
   1782         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1783             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1784                     names[i].code, name, length, expected);
   1785         }
   1786 
   1787         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1788         if(expected[0]!=0 /* && length>0 */) {
   1789             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1790             if(U_FAILURE(errorCode)) {
   1791                 log_err("u_charFromName(%s - alias) error %s\n",
   1792                         expected, u_errorName(errorCode));
   1793                 return;
   1794             }
   1795             if(c!=(UChar32)names[i].code) {
   1796                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1797                         expected, c, names[i].code);
   1798             }
   1799         }
   1800     }
   1801 
   1802     /* test u_enumCharNames() */
   1803     length=0;
   1804     errorCode=U_ZERO_ERROR;
   1805     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1806     if(U_FAILURE(errorCode) || length<94140) {
   1807         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1808     }
   1809 
   1810     extContext.length = 0;
   1811     extContext.last = -1;
   1812     errorCode=U_ZERO_ERROR;
   1813     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1814     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1815         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1816     }
   1817 
   1818     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1819     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1820         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1821     }
   1822 
   1823     /* Test getCharNameCharacters */
   1824     if(!getTestOption(QUICK_OPTION)) {
   1825         enum { BUFSIZE = 256 };
   1826         UErrorCode ec = U_ZERO_ERROR;
   1827         char buf[BUFSIZE];
   1828         int32_t maxLength;
   1829         UChar32 cp;
   1830         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1831         int32_t l1, l2;
   1832         UBool map[256];
   1833         UBool ok;
   1834 
   1835         USet* set = uset_open(1, 0); /* empty set */
   1836         USet* dumb = uset_open(1, 0); /* empty set */
   1837 
   1838         /*
   1839          * uprv_getCharNameCharacters() will likely return more lowercase
   1840          * letters than actual character names contain because
   1841          * it includes all the characters in lowercased names of
   1842          * general categories, for the full possible set of extended names.
   1843          */
   1844         {
   1845             USetAdder sa={
   1846                 NULL,
   1847                 uset_add,
   1848                 uset_addRange,
   1849                 uset_addString,
   1850                 NULL /* don't need remove() */
   1851             };
   1852             sa.set=set;
   1853             uprv_getCharNameCharacters(&sa);
   1854         }
   1855 
   1856         /* build set the dumb (but sure-fire) way */
   1857         for (i=0; i<256; ++i) {
   1858             map[i] = FALSE;
   1859         }
   1860 
   1861         maxLength=0;
   1862         for (cp=0; cp<0x110000; ++cp) {
   1863             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1864                                      buf, BUFSIZE, &ec);
   1865             if (U_FAILURE(ec)) {
   1866                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1867                 uset_close(set);
   1868                 uset_close(dumb);
   1869                 return;
   1870             }
   1871             if(len>maxLength) {
   1872                 maxLength=len;
   1873             }
   1874 
   1875             for (i=0; i<len; ++i) {
   1876                 if (!map[(uint8_t) buf[i]]) {
   1877                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1878                     map[(uint8_t) buf[i]] = TRUE;
   1879                 }
   1880             }
   1881 
   1882             /* test for leading/trailing whitespace */
   1883             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1884                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1885             }
   1886         }
   1887 
   1888         if(map[(uint8_t)'\t']) {
   1889             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1890         }
   1891 
   1892         length=uprv_getMaxCharNameLength();
   1893         if(length!=maxLength) {
   1894             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1895                     length, maxLength);
   1896         }
   1897 
   1898         /* compare the sets.  Where is my uset_equals?!! */
   1899         ok=TRUE;
   1900         for(i=0; i<256; ++i) {
   1901             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1902                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1903                     /* ignore lowercase a-z that are in set but not in dumb */
   1904                     ok=TRUE;
   1905                 } else {
   1906                     ok=FALSE;
   1907                     break;
   1908                 }
   1909             }
   1910         }
   1911 
   1912         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1913         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1914         if (U_FAILURE(ec)) {
   1915             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1916             uset_close(set);
   1917             uset_close(dumb);
   1918             return;
   1919         }
   1920 
   1921         if (l1 >= BUFSIZE) {
   1922             l1 = BUFSIZE-1;
   1923             pat[l1] = 0;
   1924         }
   1925         if (l2 >= BUFSIZE) {
   1926             l2 = BUFSIZE-1;
   1927             dumbPat[l2] = 0;
   1928         }
   1929 
   1930         if (!ok) {
   1931             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1932                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1933         } else if(getTestOption(VERBOSITY_OPTION)) {
   1934             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1935         }
   1936 
   1937         uset_close(set);
   1938         uset_close(dumb);
   1939     }
   1940 
   1941     /* ### TODO: test error cases and other interesting things */
   1942 }
   1943 
   1944 static void
   1945 TestUCharFromNameUnderflow() {
   1946     // Ticket #10889: Underflow crash when there is no dash.
   1947     UErrorCode errorCode=U_ZERO_ERROR;
   1948     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
   1949     if(U_SUCCESS(errorCode)) {
   1950         log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1951     }
   1952 
   1953     // Test related edge cases.
   1954     errorCode=U_ZERO_ERROR;
   1955     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
   1956     if(U_SUCCESS(errorCode)) {
   1957         log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1958     }
   1959 
   1960     errorCode=U_ZERO_ERROR;
   1961     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
   1962     if(U_SUCCESS(errorCode)) {
   1963         log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1964     }
   1965 
   1966     errorCode=U_ZERO_ERROR;
   1967     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
   1968     if(U_SUCCESS(errorCode)) {
   1969         log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1970     }
   1971 }
   1972 
   1973 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1974 
   1975 static void
   1976 TestMirroring() {
   1977     USet *set;
   1978     UErrorCode errorCode;
   1979 
   1980     UChar32 start, end, c2, c3;
   1981     int32_t i;
   1982 
   1983     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1984 
   1985     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1986 
   1987     log_verbose("Testing u_isMirrored()\n");
   1988     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1989          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1990         )
   1991     ) {
   1992         log_err("u_isMirrored() does not work correctly\n");
   1993     }
   1994 
   1995     log_verbose("Testing u_charMirror()\n");
   1996     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1997          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1998          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1999          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2000          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   2001          )
   2002     ) {
   2003         log_err("u_charMirror() does not work correctly\n");
   2004     }
   2005 
   2006     /* verify that Bidi_Mirroring_Glyph roundtrips */
   2007     errorCode=U_ZERO_ERROR;
   2008     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   2009 
   2010     if (U_FAILURE(errorCode)) {
   2011         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   2012     } else {
   2013         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   2014             do {
   2015                 c2=u_charMirror(start);
   2016                 c3=u_charMirror(c2);
   2017                 if(c3!=start) {
   2018                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   2019                 }
   2020                 c3=u_getBidiPairedBracket(start);
   2021                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
   2022                     if(c3!=start) {
   2023                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
   2024                                 (long)start);
   2025                     }
   2026                 } else {
   2027                     if(c3!=c2) {
   2028                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
   2029                                 (long)start, (long)c2);
   2030                     }
   2031                 }
   2032             } while(++start<=end);
   2033         }
   2034     }
   2035 
   2036     uset_close(set);
   2037 }
   2038 
   2039 
   2040 struct RunTestData
   2041 {
   2042     const char *runText;
   2043     UScriptCode runCode;
   2044 };
   2045 
   2046 typedef struct RunTestData RunTestData;
   2047 
   2048 static void
   2049 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   2050                 const char *prefix)
   2051 {
   2052     int32_t run, runStart, runLimit;
   2053     UScriptCode runCode;
   2054 
   2055     /* iterate over all the runs */
   2056     run = 0;
   2057     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   2058         if (runStart != runStarts[run]) {
   2059             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   2060                 prefix, run, runStarts[run], runStart);
   2061         }
   2062 
   2063         if (runLimit != runStarts[run + 1]) {
   2064             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   2065                 prefix, run, runStarts[run + 1], runLimit);
   2066         }
   2067 
   2068         if (runCode != testData[run].runCode) {
   2069             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   2070                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   2071         }
   2072 
   2073         run += 1;
   2074 
   2075         /* stop when we've seen all the runs we expect to see */
   2076         if (run >= nRuns) {
   2077             break;
   2078         }
   2079     }
   2080 
   2081     /* Complain if we didn't see then number of runs we expected */
   2082     if (run != nRuns) {
   2083         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   2084     }
   2085 }
   2086 
   2087 static void
   2088 TestUScriptRunAPI()
   2089 {
   2090     static const RunTestData testData1[] = {
   2091         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   2092         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   2093         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   2094         {"English (", USCRIPT_LATIN},
   2095         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   2096         {") ", USCRIPT_LATIN},
   2097         {"\\u6F22\\u5B75", USCRIPT_HAN},
   2098         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   2099         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   2100         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   2101     };
   2102 
   2103     static const RunTestData testData2[] = {
   2104        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   2105     };
   2106 
   2107     static const struct {
   2108       const RunTestData *testData;
   2109       int32_t nRuns;
   2110     } testDataEntries[] = {
   2111         {testData1, UPRV_LENGTHOF(testData1)},
   2112         {testData2, UPRV_LENGTHOF(testData2)}
   2113     };
   2114 
   2115     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
   2116     int32_t testEntry;
   2117 
   2118     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   2119         UChar testString[1024];
   2120         int32_t runStarts[256];
   2121         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   2122         const RunTestData *testData = testDataEntries[testEntry].testData;
   2123 
   2124         int32_t run, stringLimit;
   2125         UScriptRun *scriptRun = NULL;
   2126         UErrorCode err;
   2127 
   2128         /*
   2129          * Fill in the test string and the runStarts array.
   2130          */
   2131         stringLimit = 0;
   2132         for (run = 0; run < nTestRuns; run += 1) {
   2133             runStarts[run] = stringLimit;
   2134             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   2135             /*stringLimit -= 1;*/
   2136         }
   2137 
   2138         /* The limit of the last run */
   2139         runStarts[nTestRuns] = stringLimit;
   2140 
   2141         /*
   2142          * Make sure that calling uscript_OpenRun with a NULL text pointer
   2143          * and a non-zero text length returns the correct error.
   2144          */
   2145         err = U_ZERO_ERROR;
   2146         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   2147 
   2148         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2149             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2150         }
   2151 
   2152         if (scriptRun != NULL) {
   2153             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   2154             uscript_closeRun(scriptRun);
   2155         }
   2156 
   2157         /*
   2158          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2159          * and a zero text length returns the correct error.
   2160          */
   2161         err = U_ZERO_ERROR;
   2162         scriptRun = uscript_openRun(testString, 0, &err);
   2163 
   2164         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2165             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2166         }
   2167 
   2168         if (scriptRun != NULL) {
   2169             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   2170             uscript_closeRun(scriptRun);
   2171         }
   2172 
   2173         /*
   2174          * Make sure that calling uscript_openRun with a NULL text pointer
   2175          * and a zero text length doesn't return an error.
   2176          */
   2177         err = U_ZERO_ERROR;
   2178         scriptRun = uscript_openRun(NULL, 0, &err);
   2179 
   2180         if (U_FAILURE(err)) {
   2181             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2182         }
   2183 
   2184         /* Make sure that the empty iterator doesn't find any runs */
   2185         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2186             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2187         }
   2188 
   2189         /*
   2190          * Make sure that calling uscript_setRunText with a NULL text pointer
   2191          * and a non-zero text length returns the correct error.
   2192          */
   2193         err = U_ZERO_ERROR;
   2194         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2195 
   2196         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2197             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2198         }
   2199 
   2200         /*
   2201          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2202          * and a zero text length returns the correct error.
   2203          */
   2204         err = U_ZERO_ERROR;
   2205         uscript_setRunText(scriptRun, testString, 0, &err);
   2206 
   2207         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2208             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2209         }
   2210 
   2211         /*
   2212          * Now call uscript_setRunText on the empty iterator
   2213          * and make sure that it works.
   2214          */
   2215         err = U_ZERO_ERROR;
   2216         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2217 
   2218         if (U_FAILURE(err)) {
   2219             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2220         } else {
   2221             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2222         }
   2223 
   2224         uscript_closeRun(scriptRun);
   2225 
   2226         /*
   2227          * Now open an interator over the testString
   2228          * using uscript_openRun and make sure that it works
   2229          */
   2230         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2231 
   2232         if (U_FAILURE(err)) {
   2233             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2234         } else {
   2235             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2236         }
   2237 
   2238         /* Now reset the iterator, and make sure
   2239          * that it still works.
   2240          */
   2241         uscript_resetRun(scriptRun);
   2242 
   2243         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2244 
   2245         /* Close the iterator */
   2246         uscript_closeRun(scriptRun);
   2247     }
   2248 }
   2249 
   2250 /* test additional, non-core properties */
   2251 static void
   2252 TestAdditionalProperties() {
   2253     /* test data for u_charAge() */
   2254     static const struct {
   2255         UChar32 c;
   2256         UVersionInfo version;
   2257     } charAges[]={
   2258         {0x41,    { 1, 1, 0, 0 }},
   2259         {0xffff,  { 1, 1, 0, 0 }},
   2260         {0x20ab,  { 2, 0, 0, 0 }},
   2261         {0x2fffe, { 2, 0, 0, 0 }},
   2262         {0x20ac,  { 2, 1, 0, 0 }},
   2263         {0xfb1d,  { 3, 0, 0, 0 }},
   2264         {0x3f4,   { 3, 1, 0, 0 }},
   2265         {0x10300, { 3, 1, 0, 0 }},
   2266         {0x220,   { 3, 2, 0, 0 }},
   2267         {0xff60,  { 3, 2, 0, 0 }}
   2268     };
   2269 
   2270     /* test data for u_hasBinaryProperty() */
   2271     static const int32_t
   2272     props[][3]={ /* code point, property, value */
   2273         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2274         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2275         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2276 
   2277         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2278         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2279 
   2280         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2281         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2282 
   2283         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2284         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2285 
   2286         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2287         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2288         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2289         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2290         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2291 
   2292         { 0x058a, UCHAR_DASH, TRUE },
   2293         { 0x007e, UCHAR_DASH, FALSE },
   2294 
   2295         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2296         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2297 
   2298         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2299         { 0x0020, UCHAR_EXTENDER, FALSE },
   2300 
   2301 #if !UCONFIG_NO_NORMALIZATION
   2302         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2303         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2304         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2305 
   2306         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2307         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2308 
   2309         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2310         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2311 
   2312         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2313         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2314         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2315         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2316         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2317         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2318 
   2319         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2320         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2321 
   2322         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2323         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2324         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2325         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2326         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2327         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2328 #endif
   2329 
   2330         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2331         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2332         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2333 
   2334         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2335         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2336 
   2337         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2338         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2339         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2340 
   2341         { 0x2172, UCHAR_ID_START, TRUE },
   2342         { 0x007a, UCHAR_ID_START, TRUE },
   2343         { 0x0039, UCHAR_ID_START, FALSE },
   2344 
   2345         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2346         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2347         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2348 
   2349         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2350         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2351 
   2352         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2353         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2354         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2355 
   2356         { 0x1d7a9, UCHAR_MATH, TRUE },
   2357         { 0x2135, UCHAR_MATH, TRUE },
   2358         { 0x0062, UCHAR_MATH, FALSE },
   2359 
   2360         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2361         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2362         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2363 
   2364         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2365         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2366         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2367 
   2368         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2369         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2370 
   2371         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2372         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2373         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2374 
   2375         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2376         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2377         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2378 
   2379         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2380         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2381         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2382 
   2383         { 0x16ee, UCHAR_XID_START, TRUE },
   2384         { 0x23456, UCHAR_XID_START, TRUE },
   2385         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2386 
   2387         /*
   2388          * Version break:
   2389          * The following properties are only supported starting with the
   2390          * Unicode version indicated in the second field.
   2391          */
   2392         { -1, 0x320, 0 },
   2393 
   2394         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2395         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2396         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2397 
   2398         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2399         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2400         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2401         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2402 
   2403         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2404         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2405         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2406         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2407 
   2408         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2409         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2410         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2411         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2412 
   2413         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2414         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2415 
   2416         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2417         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2418 
   2419         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2420         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2421 
   2422         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2423         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2424 
   2425         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2426         { 0x4e00, UCHAR_RADICAL, FALSE },
   2427 
   2428         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2429         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2430 
   2431         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2432         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2433 
   2434         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2435 
   2436         { 0x002e, UCHAR_S_TERM, TRUE },
   2437         { 0x0061, UCHAR_S_TERM, FALSE },
   2438 
   2439         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2440         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2441         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2442         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2443 
   2444         /* enum/integer type properties */
   2445 
   2446         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2447         /* test default Bidi classes for unassigned code points */
   2448         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2449         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2450         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2451         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2452         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2453         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2454         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2455         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2456         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2457         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2458         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2459 
   2460         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2461         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2462         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2463         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2464         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2465         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2466         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2467 
   2468         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2469         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2470         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2471         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2472         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2473         { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2474         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2475         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2476         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2477         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2478         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2479 
   2480         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2481         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2482 
   2483         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2484         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2485         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2486         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2487         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2488         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2489         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2490         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2491         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2492 
   2493         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2494         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2495         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2496         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2497         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2498         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2499         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2500         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2501         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2502         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2503         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2504         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2505         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2506         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2507         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2508         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2509         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2510 
   2511         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2512         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2513         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2514 
   2515         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2516         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2517         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2518         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2519         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2520 
   2521         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2522         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2523         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2524         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2525         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2526         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2527         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2528         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2529 
   2530         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2531         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2532         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2533         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2534         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2535         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2536         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2537         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2538         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2539         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2540         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2541         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2542         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2543         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2544         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2545         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2546 
   2547         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2548 
   2549         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2550 
   2551         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2552         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2553         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2554         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2555         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2556         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2557         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2558 
   2559         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2560         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2561         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2562         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2563 
   2564         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2565         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2566         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2567         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2568         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2569         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2570 
   2571         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2572         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2573         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2574         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2575 
   2576         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2577         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2578         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2579         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2580         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2581         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2582         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2583 
   2584         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2585         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2586         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2587         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2588 
   2589         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2590         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2591         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2592         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2593 
   2594         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2595         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2596         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2597         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2598         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2599 
   2600         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2601 
   2602         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2603 
   2604         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2605         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2606         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2607 
   2608         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2609         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2610         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2611         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2612         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2613 
   2614         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2615         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2616         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2617 
   2618         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2619         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2620         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2621         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2622 
   2623         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2624         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2625         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2626         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2627         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2628         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2629 
   2630         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2631         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2632         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2633         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2634 
   2635         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2636         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2637         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2638         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2639 
   2640         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2641         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2642         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2643         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2644 
   2645         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2646 
   2647         /* unassigned code points in new default Bidi R blocks */
   2648         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2649         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2650 
   2651         /* test some script codes >127 */
   2652         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2653         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2654         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2655 
   2656         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
   2657 
   2658         /* value changed in Unicode 6.0 */
   2659         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
   2660 
   2661         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
   2662 
   2663         /* unassigned code points in new/changed default Bidi AL blocks */
   2664         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2665         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2666 
   2667         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
   2668 
   2669         /* unassigned code points in the currency symbols block now default to ET */
   2670         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2671         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2672 
   2673         /* new property in Unicode 6.3 */
   2674         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2675         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2676         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2677         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2678         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2679         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2680 
   2681         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
   2682 
   2683         /* new character range with Joining_Group values */
   2684         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2685         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
   2686         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
   2687         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
   2688         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2689 
   2690         /* undefined UProperty values */
   2691         { 0x61, 0x4a7, 0 },
   2692         { 0x234bc, 0x15ed, 0 }
   2693     };
   2694 
   2695     UVersionInfo version;
   2696     UChar32 c;
   2697     int32_t i, result, uVersion;
   2698     UProperty which;
   2699 
   2700     /* what is our Unicode version? */
   2701     u_getUnicodeVersion(version);
   2702     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2703 
   2704     u_charAge(0x20, version);
   2705     if(version[0]==0) {
   2706         /* no additional properties available */
   2707         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2708         return;
   2709     }
   2710 
   2711     /* test u_charAge() */
   2712     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
   2713         u_charAge(charAges[i].c, version);
   2714         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2715             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2716                 charAges[i].c,
   2717                 version[0], version[1], version[2], version[3],
   2718                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2719         }
   2720     }
   2721 
   2722     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2723         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2724         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2725         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2726         u_getIntPropertyMinValue(0x2345)!=0
   2727     ) {
   2728         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2729     }
   2730     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2731         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2732     }
   2733     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2734         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2735     }
   2736     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2737         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2738     }
   2739     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2740         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2741     }
   2742     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2743         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2744     }
   2745     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2746         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2747     }
   2748     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2749         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2750     }
   2751     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2752         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2753     }
   2754     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2755         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2756     }
   2757     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2758         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2759     }
   2760     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2761         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2762     }
   2763     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2764         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2765     }
   2766     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2767         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2768     }
   2769     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
   2770         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
   2771     }
   2772     /*JB#2410*/
   2773     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2774         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2775     }
   2776     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2777         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2778     }
   2779     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2780         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2781     }
   2782     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2783         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2784     }
   2785     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2786         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2787     }
   2788 
   2789     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2790     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
   2791         const char *whichName;
   2792 
   2793         if(props[i][0]<0) {
   2794             /* Unicode version break */
   2795             if(uVersion<props[i][1]) {
   2796                 break; /* do not test properties that are not yet supported */
   2797             } else {
   2798                 continue; /* skip this row */
   2799             }
   2800         }
   2801 
   2802         c=(UChar32)props[i][0];
   2803         which=(UProperty)props[i][1];
   2804         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2805 
   2806         if(which<UCHAR_INT_START) {
   2807             result=u_hasBinaryProperty(c, which);
   2808             if(result!=props[i][2]) {
   2809                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2810                         c, whichName, result, i);
   2811             }
   2812         }
   2813 
   2814         result=u_getIntPropertyValue(c, which);
   2815         if(result!=props[i][2]) {
   2816             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2817                     c, whichName, result, props[i][2], i);
   2818         }
   2819 
   2820         /* test separate functions, too */
   2821         switch((UProperty)props[i][1]) {
   2822         case UCHAR_ALPHABETIC:
   2823             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2824                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2825                         props[i][0], result, i);
   2826             }
   2827             break;
   2828         case UCHAR_LOWERCASE:
   2829             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2830                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2831                         props[i][0], result, i);
   2832             }
   2833             break;
   2834         case UCHAR_UPPERCASE:
   2835             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2836                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2837                         props[i][0], result, i);
   2838             }
   2839             break;
   2840         case UCHAR_WHITE_SPACE:
   2841             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2842                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2843                         props[i][0], result, i);
   2844             }
   2845             break;
   2846         default:
   2847             break;
   2848         }
   2849     }
   2850 }
   2851 
   2852 static void
   2853 TestNumericProperties(void) {
   2854     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2855     static const struct {
   2856         UChar32 c;
   2857         int32_t type;
   2858         double numValue;
   2859     } values[]={
   2860         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2861         { 0x0C66, U_NT_DECIMAL, 0 },
   2862         { 0x96f6, U_NT_NUMERIC, 0 },
   2863         { 0xa833, U_NT_NUMERIC, 1./16. },
   2864         { 0x2152, U_NT_NUMERIC, 1./10. },
   2865         { 0x2151, U_NT_NUMERIC, 1./9. },
   2866         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2867         { 0x2150, U_NT_NUMERIC, 1./7. },
   2868         { 0x2159, U_NT_NUMERIC, 1./6. },
   2869         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2870         { 0x2155, U_NT_NUMERIC, 1./5. },
   2871         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2872         { 0x0031, U_NT_DECIMAL, 1. },
   2873         { 0x4e00, U_NT_NUMERIC, 1. },
   2874         { 0x58f1, U_NT_NUMERIC, 1. },
   2875         { 0x10320, U_NT_NUMERIC, 1. },
   2876         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2877         { 0x00B2, U_NT_DIGIT, 2. },
   2878         { 0x5f10, U_NT_NUMERIC, 2. },
   2879         { 0x1813, U_NT_DECIMAL, 3. },
   2880         { 0x5f0e, U_NT_NUMERIC, 3. },
   2881         { 0x2173, U_NT_NUMERIC, 4. },
   2882         { 0x8086, U_NT_NUMERIC, 4. },
   2883         { 0x278E, U_NT_DIGIT, 5. },
   2884         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2885         { 0x247A, U_NT_DIGIT, 7. },
   2886         { 0x7396, U_NT_NUMERIC, 9. },
   2887         { 0x1372, U_NT_NUMERIC, 10. },
   2888         { 0x216B, U_NT_NUMERIC, 12. },
   2889         { 0x16EE, U_NT_NUMERIC, 17. },
   2890         { 0x249A, U_NT_NUMERIC, 19. },
   2891         { 0x303A, U_NT_NUMERIC, 30. },
   2892         { 0x5345, U_NT_NUMERIC, 30. },
   2893         { 0x32B2, U_NT_NUMERIC, 37. },
   2894         { 0x1375, U_NT_NUMERIC, 40. },
   2895         { 0x10323, U_NT_NUMERIC, 50. },
   2896         { 0x0BF1, U_NT_NUMERIC, 100. },
   2897         { 0x964c, U_NT_NUMERIC, 100. },
   2898         { 0x217E, U_NT_NUMERIC, 500. },
   2899         { 0x2180, U_NT_NUMERIC, 1000. },
   2900         { 0x4edf, U_NT_NUMERIC, 1000. },
   2901         { 0x2181, U_NT_NUMERIC, 5000. },
   2902         { 0x137C, U_NT_NUMERIC, 10000. },
   2903         { 0x4e07, U_NT_NUMERIC, 10000. },
   2904         { 0x12432, U_NT_NUMERIC, 216000. },
   2905         { 0x12433, U_NT_NUMERIC, 432000. },
   2906         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2907         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2908         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2909         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2910         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2911         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2912         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2913         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2914         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2915         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2916     };
   2917 
   2918     double nv;
   2919     UChar32 c;
   2920     int32_t i, type;
   2921 
   2922     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
   2923         c=values[i].c;
   2924         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2925         nv=u_getNumericValue(c);
   2926 
   2927         if(type!=values[i].type) {
   2928             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2929         }
   2930         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2931             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2932         }
   2933     }
   2934 }
   2935 
   2936 /**
   2937  * Test the property names and property value names API.
   2938  */
   2939 static void
   2940 TestPropertyNames(void) {
   2941     int32_t p, v, choice=0, rev;
   2942     UBool atLeastSomething = FALSE;
   2943 
   2944     for (p=0; ; ++p) {
   2945         UProperty propEnum = (UProperty)p;
   2946         UBool sawProp = FALSE;
   2947         if(p > 10 && !atLeastSomething) {
   2948           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2949           return;
   2950         }
   2951 
   2952         for (choice=0; ; ++choice) {
   2953             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2954             if (name) {
   2955                 if (!sawProp)
   2956                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2957                 log_verbose("%d=\"%s\"", choice, name);
   2958                 sawProp = TRUE;
   2959                 atLeastSomething = TRUE;
   2960 
   2961                 /* test reverse mapping */
   2962                 rev = u_getPropertyEnum(name);
   2963                 if (rev != p) {
   2964                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2965                             p, name, rev);
   2966                 }
   2967             }
   2968             if (!name && choice>0) break;
   2969         }
   2970         if (sawProp) {
   2971             /* looks like a valid property; check the values */
   2972             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2973             int32_t max = 0;
   2974             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2975                 max = 255;
   2976             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2977                 /* it's far too slow to iterate all the way up to
   2978                    the real max, U_GC_P_MASK */
   2979                 max = U_GC_NL_MASK;
   2980             } else if (p == UCHAR_BLOCK) {
   2981                 /* UBlockCodes, unlike other values, start at 1 */
   2982                 max = 1;
   2983             }
   2984             log_verbose("\n");
   2985             for (v=-1; ; ++v) {
   2986                 UBool sawValue = FALSE;
   2987                 for (choice=0; ; ++choice) {
   2988                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2989                     if (vname) {
   2990                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2991                         log_verbose("%d=\"%s\"", choice, vname);
   2992                         sawValue = TRUE;
   2993 
   2994                         /* test reverse mapping */
   2995                         rev = u_getPropertyValueEnum(propEnum, vname);
   2996                         if (rev != v) {
   2997                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2998                                     pname, v, vname, rev);
   2999                         }
   3000                     }
   3001                     if (!vname && choice>0) break;
   3002                 }
   3003                 if (sawValue) {
   3004                     log_verbose("\n");
   3005                 }
   3006                 if (!sawValue && v>=max) break;
   3007             }
   3008         }
   3009         if (!sawProp) {
   3010             if (p>=UCHAR_STRING_LIMIT) {
   3011                 break;
   3012             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   3013                 p = UCHAR_STRING_START - 1;
   3014             } else if (p>=UCHAR_MASK_LIMIT) {
   3015                 p = UCHAR_DOUBLE_START - 1;
   3016             } else if (p>=UCHAR_INT_LIMIT) {
   3017                 p = UCHAR_MASK_START - 1;
   3018             } else if (p>=UCHAR_BINARY_LIMIT) {
   3019                 p = UCHAR_INT_START - 1;
   3020             }
   3021         }
   3022     }
   3023 }
   3024 
   3025 /**
   3026  * Test the property values API.  See JB#2410.
   3027  */
   3028 static void
   3029 TestPropertyValues(void) {
   3030     int32_t i, p, min, max;
   3031     UErrorCode ec;
   3032 
   3033     /* Min should be 0 for everything. */
   3034     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   3035     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   3036         UProperty propEnum = (UProperty)p;
   3037         min = u_getIntPropertyMinValue(propEnum);
   3038         if (min != 0) {
   3039             if (p == UCHAR_BLOCK) {
   3040                 /* This is okay...for now.  See JB#2487.
   3041                    TODO Update this for JB#2487. */
   3042             } else {
   3043                 const char* name;
   3044                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   3045                 if (name == NULL)
   3046                     name = "<ERROR>";
   3047                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   3048                         name, min);
   3049             }
   3050         }
   3051     }
   3052 
   3053     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   3054         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   3055         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   3056     }
   3057 
   3058     /* Max should be -1 for invalid properties. */
   3059     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   3060     if (max != -1) {
   3061         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   3062                 max);
   3063     }
   3064 
   3065     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   3066     for (i=0; i<2; ++i) {
   3067         int32_t script;
   3068         const char* desc;
   3069         ec = U_ZERO_ERROR;
   3070         switch (i) {
   3071         case 0:
   3072             script = uscript_getScript(-1, &ec);
   3073             desc = "uscript_getScript(-1)";
   3074             break;
   3075         case 1:
   3076             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   3077             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   3078             break;
   3079         default:
   3080             log_err("Internal test error. Too many scripts\n");
   3081             return;
   3082         }
   3083         /* We don't explicitly test ec.  It should be U_FAILURE but it
   3084            isn't documented as such. */
   3085         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   3086             log_err("FAIL: %s = %d, exp. 0\n",
   3087                     desc, script);
   3088         }
   3089     }
   3090 }
   3091 
   3092 /* various tests for consistency of UCD data and API behavior */
   3093 static void
   3094 TestConsistency() {
   3095     char buffer[300];
   3096     USet *set1, *set2, *set3, *set4;
   3097     UErrorCode errorCode;
   3098 
   3099     UChar32 start, end;
   3100     int32_t i, length;
   3101 
   3102     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   3103     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   3104     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   3105     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   3106     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   3107 
   3108     U_STRING_DECL(mathBlocksPattern,
   3109         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3110         214);
   3111     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   3112     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   3113     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   3114     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3115 
   3116     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   3117     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   3118     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   3119     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   3120     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   3121 
   3122     U_STRING_INIT(mathBlocksPattern,
   3123         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3124         214);
   3125     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   3126     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   3127     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   3128     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3129 
   3130     /*
   3131      * It used to be that UCD.html and its precursors said
   3132      * "Those dashes used to mark connections between pieces of words,
   3133      *  plus the Katakana middle dot."
   3134      *
   3135      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   3136      * but not from Hyphen.
   3137      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
   3138      * Therefore, do not show errors when testing the Hyphen property.
   3139      */
   3140     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   3141                 "known to the UTC and not considered errors.\n");
   3142 
   3143     errorCode=U_ZERO_ERROR;
   3144     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   3145     set2=uset_openPattern(dashPattern, 8, &errorCode);
   3146     if(U_SUCCESS(errorCode)) {
   3147         /* remove the Katakana middle dot(s) from set1 */
   3148         uset_remove(set1, 0x30fb);
   3149         uset_remove(set1, 0xff65); /* halfwidth variant */
   3150         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   3151     } else {
   3152         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3153     }
   3154 
   3155     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   3156     set3=uset_openPattern(formatPattern, 6, &errorCode);
   3157     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   3158     if(U_SUCCESS(errorCode)) {
   3159         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   3160         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   3161         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   3162     } else {
   3163         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3164     }
   3165 
   3166     uset_close(set1);
   3167     uset_close(set2);
   3168     uset_close(set3);
   3169     uset_close(set4);
   3170 
   3171     /*
   3172      * Check that each lowercase character has "small" in its name
   3173      * and not "capital".
   3174      * There are some such characters, some of which seem odd.
   3175      * Use the verbose flag to see these notices.
   3176      */
   3177     errorCode=U_ZERO_ERROR;
   3178     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   3179     if(U_SUCCESS(errorCode)) {
   3180         for(i=0;; ++i) {
   3181             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   3182             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   3183                 break; /* done */
   3184             }
   3185             if(U_FAILURE(errorCode)) {
   3186                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   3187                         i, u_errorName(errorCode));
   3188                 break;
   3189             }
   3190             if(length!=0) {
   3191                 break; /* done with code points, got a string or -1 */
   3192             }
   3193 
   3194             while(start<=end) {
   3195                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   3196                 if(U_FAILURE(errorCode)) {
   3197                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   3198                     errorCode=U_ZERO_ERROR;
   3199                 }
   3200                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   3201                     strstr(buffer, "SMALL CAPITAL")==NULL
   3202                 ) {
   3203                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   3204                 }
   3205                 ++start;
   3206             }
   3207         }
   3208     } else {
   3209         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3210     }
   3211     uset_close(set1);
   3212 
   3213     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3214     errorCode=U_ZERO_ERROR;
   3215     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3216     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3217     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3218     if(U_SUCCESS(errorCode)) {
   3219         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3220         uset_complement(set3);      /* assigned characters */
   3221         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3222         compareUSets(set1, set2,
   3223                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3224                      TRUE);
   3225     } else {
   3226         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3227     }
   3228     uset_close(set1);
   3229     uset_close(set2);
   3230     uset_close(set3);
   3231 
   3232     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3233     errorCode=U_ZERO_ERROR;
   3234     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3235     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3236     if(U_SUCCESS(errorCode)) {
   3237         compareUSets(set1, set2,
   3238                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3239                      TRUE);
   3240     } else {
   3241         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3242     }
   3243     uset_close(set1);
   3244     uset_close(set2);
   3245 }
   3246 
   3247 /*
   3248  * Starting with ICU4C 3.4, the core Unicode properties files
   3249  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3250  * are hardcoded in the common DLL and therefore not included
   3251  * in the data package any more.
   3252  * Test requiring these files are disabled so that
   3253  * we need not jump through hoops (like adding snapshots of these files
   3254  * to testdata).
   3255  * See Jitterbug 4497.
   3256  */
   3257 #define HARDCODED_DATA_4497 1
   3258 
   3259 /* API coverage for ucase.c */
   3260 static void TestUCase() {
   3261 #if !HARDCODED_DATA_4497
   3262     UDataMemory *pData;
   3263     UCaseProps *csp;
   3264     const UCaseProps *ccsp;
   3265     UErrorCode errorCode;
   3266 
   3267     /* coverage for ucase_openBinary() */
   3268     errorCode=U_ZERO_ERROR;
   3269     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3270     if(U_FAILURE(errorCode)) {
   3271         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3272                     u_errorName(errorCode));
   3273         return;
   3274     }
   3275 
   3276     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3277     if(U_FAILURE(errorCode)) {
   3278         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3279                 u_errorName(errorCode));
   3280         udata_close(pData);
   3281         return;
   3282     }
   3283 
   3284     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3285         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3286     }
   3287 
   3288     ucase_close(csp);
   3289     udata_close(pData);
   3290 
   3291     /* coverage for ucase_getDummy() */
   3292     errorCode=U_ZERO_ERROR;
   3293     ccsp=ucase_getDummy(&errorCode);
   3294     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3295         log_err("ucase_tolower(dummy, A)!=A\n");
   3296     }
   3297 #endif
   3298 }
   3299 
   3300 /* API coverage for ubidi_props.c */
   3301 static void TestUBiDiProps() {
   3302 #if !HARDCODED_DATA_4497
   3303     UDataMemory *pData;
   3304     UBiDiProps *bdp;
   3305     const UBiDiProps *cbdp;
   3306     UErrorCode errorCode;
   3307 
   3308     /* coverage for ubidi_openBinary() */
   3309     errorCode=U_ZERO_ERROR;
   3310     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3311     if(U_FAILURE(errorCode)) {
   3312         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3313                     u_errorName(errorCode));
   3314         return;
   3315     }
   3316 
   3317     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3318     if(U_FAILURE(errorCode)) {
   3319         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3320                 u_errorName(errorCode));
   3321         udata_close(pData);
   3322         return;
   3323     }
   3324 
   3325     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3326         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3327     }
   3328 
   3329     ubidi_closeProps(bdp);
   3330     udata_close(pData);
   3331 
   3332     /* coverage for ubidi_getDummy() */
   3333     errorCode=U_ZERO_ERROR;
   3334     cbdp=ubidi_getDummy(&errorCode);
   3335     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3336         log_err("ubidi_getClass(dummy, space)!=0\n");
   3337     }
   3338 #endif
   3339 }
   3340 
   3341 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3342 
   3343 /* bit set for which case foldings for a character have been tested already */
   3344 enum {
   3345     CF_SIMPLE=1,
   3346     CF_FULL=2,
   3347     CF_TURKIC=4,
   3348     CF_ALL=7
   3349 };
   3350 
   3351 static void
   3352 testFold(UChar32 c, int which,
   3353          UChar32 simple, UChar32 turkic,
   3354          const UChar *full, int32_t fullLength,
   3355          const UChar *turkicFull, int32_t turkicFullLength) {
   3356     UChar s[2], t[32];
   3357     UChar32 c2;
   3358     int32_t length, length2;
   3359 
   3360     UErrorCode errorCode=U_ZERO_ERROR;
   3361 
   3362     length=0;
   3363     U16_APPEND_UNSAFE(s, length, c);
   3364 
   3365     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3366         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3367     }
   3368     if((which&CF_FULL)!=0) {
   3369         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
   3370         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3371             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3372         }
   3373     }
   3374     if((which&CF_TURKIC)!=0) {
   3375         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3376             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3377         }
   3378 
   3379         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3380         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3381             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3382         }
   3383     }
   3384 }
   3385 
   3386 /* test that c case-folds to itself */
   3387 static void
   3388 testFoldToSelf(UChar32 c, int which) {
   3389     UChar s[2];
   3390     int32_t length;
   3391 
   3392     length=0;
   3393     U16_APPEND_UNSAFE(s, length, c);
   3394     testFold(c, which, c, c, s, length, s, length);
   3395 }
   3396 
   3397 struct CaseFoldingData {
   3398     USet *notSeen;
   3399     UChar32 prev, prevSimple;
   3400     UChar prevFull[32];
   3401     int32_t prevFullLength;
   3402     int which;
   3403 };
   3404 typedef struct CaseFoldingData CaseFoldingData;
   3405 
   3406 static void U_CALLCONV
   3407 caseFoldingLineFn(void *context,
   3408                   char *fields[][2], int32_t fieldCount,
   3409                   UErrorCode *pErrorCode) {
   3410     CaseFoldingData *pData=(CaseFoldingData *)context;
   3411     char *end;
   3412     UChar full[32];
   3413     UChar32 c, prev, simple;
   3414     int32_t count;
   3415     int which;
   3416     char status;
   3417 
   3418     /* get code point */
   3419     const char *s=u_skipWhitespace(fields[0][0]);
   3420     if(0==strncmp(s, "0000..10FFFF", 12)) {
   3421         /*
   3422          * Ignore the line
   3423          * # @missing: 0000..10FFFF; C; <code point>
   3424          * because maps-to-self is already our default, and this line breaks this parser.
   3425          */
   3426         return;
   3427     }
   3428     c=(UChar32)strtoul(s, &end, 16);
   3429     end=(char *)u_skipWhitespace(end);
   3430     if(end<=fields[0][0] || end!=fields[0][1]) {
   3431         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3432         *pErrorCode=U_PARSE_ERROR;
   3433         return;
   3434     }
   3435 
   3436     /* get the status of this mapping */
   3437     status=*u_skipWhitespace(fields[1][0]);
   3438     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3439         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3440         *pErrorCode=U_PARSE_ERROR;
   3441         return;
   3442     }
   3443 
   3444     /* get the mapping */
   3445     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3446     if(U_FAILURE(*pErrorCode)) {
   3447         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3448         return;
   3449     }
   3450 
   3451     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3452     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3453         simple=c;
   3454     }
   3455 
   3456     if(c!=(prev=pData->prev)) {
   3457         /*
   3458          * Test remaining mappings for the previous code point.
   3459          * If a turkic folding was not mentioned, then it should fold the same
   3460          * as the regular simple case folding.
   3461          */
   3462         UChar prevString[2];
   3463         int32_t length;
   3464 
   3465         length=0;
   3466         U16_APPEND_UNSAFE(prevString, length, prev);
   3467         testFold(prev, (~pData->which)&CF_ALL,
   3468                  prev, pData->prevSimple,
   3469                  prevString, length,
   3470                  pData->prevFull, pData->prevFullLength);
   3471         pData->prev=pData->prevSimple=c;
   3472         length=0;
   3473         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3474         pData->prevFullLength=length;
   3475         pData->which=0;
   3476     }
   3477 
   3478     /*
   3479      * Turn the status into a bit set of case foldings to test.
   3480      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3481      */
   3482     switch(status) {
   3483     case 'C':
   3484         which=CF_SIMPLE|CF_FULL;
   3485         pData->prevSimple=simple;
   3486         u_memcpy(pData->prevFull, full, count);
   3487         pData->prevFullLength=count;
   3488         break;
   3489     case 'S':
   3490         which=CF_SIMPLE;
   3491         pData->prevSimple=simple;
   3492         break;
   3493     case 'F':
   3494         which=CF_FULL;
   3495         u_memcpy(pData->prevFull, full, count);
   3496         pData->prevFullLength=count;
   3497         break;
   3498     case 'T':
   3499         which=CF_TURKIC;
   3500         break;
   3501     default:
   3502         which=0;
   3503         break; /* won't happen because of test above */
   3504     }
   3505 
   3506     testFold(c, which, simple, simple, full, count, full, count);
   3507 
   3508     /* remember which case foldings of c have been tested */
   3509     pData->which|=which;
   3510 
   3511     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3512     uset_remove(pData->notSeen, c);
   3513 }
   3514 
   3515 static void
   3516 TestCaseFolding() {
   3517     CaseFoldingData data={ NULL };
   3518     char *fields[3][2];
   3519     UErrorCode errorCode;
   3520 
   3521     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3522 
   3523     errorCode=U_ZERO_ERROR;
   3524     /* test BMP & plane 1 - nothing interesting above */
   3525     data.notSeen=uset_open(0, 0x1ffff);
   3526     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3527 
   3528     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3529     if(U_SUCCESS(errorCode)) {
   3530         int32_t i, start, end;
   3531 
   3532         /* add a pseudo-last line to finish testing of the actual last one */
   3533         fields[0][0]=lastLine;
   3534         fields[0][1]=lastLine+6;
   3535         fields[1][0]=lastLine+7;
   3536         fields[1][1]=lastLine+9;
   3537         fields[2][0]=lastLine+10;
   3538         fields[2][1]=lastLine+17;
   3539         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3540 
   3541         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3542         for(i=0;
   3543             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3544                 U_SUCCESS(errorCode);
   3545             ++i
   3546         ) {
   3547             do {
   3548                 testFoldToSelf(start, CF_ALL);
   3549             } while(++start<=end);
   3550         }
   3551     }
   3552 
   3553     uset_close(data.notSeen);
   3554 }
   3555