Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2015, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "cintltst.h"
     28 #include "putilimp.h"
     29 #include "uparse.h"
     30 #include "ucase.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "uset_imp.h"
     34 #include "usc_impl.h"
     35 #include "udatamem.h" /* for testing ucase_openBinary() */
     36 #include "cucdapi.h"
     37 #include "cmemory.h"
     38 
     39 /* prototypes --------------------------------------------------------------- */
     40 
     41 static void TestUpperLower(void);
     42 static void TestLetterNumber(void);
     43 static void TestMisc(void);
     44 static void TestPOSIX(void);
     45 static void TestControlPrint(void);
     46 static void TestIdentifier(void);
     47 static void TestUnicodeData(void);
     48 static void TestCodeUnit(void);
     49 static void TestCodePoint(void);
     50 static void TestCharLength(void);
     51 static void TestCharNames(void);
     52 static void TestUCharFromNameUnderflow(void);
     53 static void TestMirroring(void);
     54 static void TestUScriptRunAPI(void);
     55 static void TestAdditionalProperties(void);
     56 static void TestNumericProperties(void);
     57 static void TestPropertyNames(void);
     58 static void TestPropertyValues(void);
     59 static void TestConsistency(void);
     60 static void TestUCase(void);
     61 static void TestUBiDiProps(void);
     62 static void TestCaseFolding(void);
     63 
     64 /* internal methods used */
     65 static int32_t MakeProp(char* str);
     66 static int32_t MakeDir(char* str);
     67 
     68 /* helpers ------------------------------------------------------------------ */
     69 
     70 static void
     71 parseUCDFile(const char *filename,
     72              char *fields[][2], int32_t fieldCount,
     73              UParseLineFn *lineFn, void *context,
     74              UErrorCode *pErrorCode) {
     75     char path[256];
     76     char backupPath[256];
     77 
     78     if(U_FAILURE(*pErrorCode)) {
     79         return;
     80     }
     81 
     82     /* Look inside ICU_DATA first */
     83     strcpy(path, u_getDataDirectory());
     84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     85     strcat(path, filename);
     86 
     87     /* As a fallback, try to guess where the source data was located
     88      *    at the time ICU was built, and look there.
     89      */
     90     strcpy(backupPath, ctest_dataSrcDir());
     91     strcat(backupPath, U_FILE_SEP_STRING);
     92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     93     strcat(backupPath, filename);
     94 
     95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     97         *pErrorCode=U_ZERO_ERROR;
     98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
     99     }
    100     if(U_FAILURE(*pErrorCode)) {
    101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    102     }
    103 }
    104 
    105 /* test data ---------------------------------------------------------------- */
    106 
    107 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    108 static const int32_t tagValues[] =
    109     {
    110     /* Mn */ U_NON_SPACING_MARK,
    111     /* Mc */ U_COMBINING_SPACING_MARK,
    112     /* Me */ U_ENCLOSING_MARK,
    113     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    114     /* Nl */ U_LETTER_NUMBER,
    115     /* No */ U_OTHER_NUMBER,
    116     /* Zs */ U_SPACE_SEPARATOR,
    117     /* Zl */ U_LINE_SEPARATOR,
    118     /* Zp */ U_PARAGRAPH_SEPARATOR,
    119     /* Cc */ U_CONTROL_CHAR,
    120     /* Cf */ U_FORMAT_CHAR,
    121     /* Cs */ U_SURROGATE,
    122     /* Co */ U_PRIVATE_USE_CHAR,
    123     /* Cn */ U_UNASSIGNED,
    124     /* Lu */ U_UPPERCASE_LETTER,
    125     /* Ll */ U_LOWERCASE_LETTER,
    126     /* Lt */ U_TITLECASE_LETTER,
    127     /* Lm */ U_MODIFIER_LETTER,
    128     /* Lo */ U_OTHER_LETTER,
    129     /* Pc */ U_CONNECTOR_PUNCTUATION,
    130     /* Pd */ U_DASH_PUNCTUATION,
    131     /* Ps */ U_START_PUNCTUATION,
    132     /* Pe */ U_END_PUNCTUATION,
    133     /* Po */ U_OTHER_PUNCTUATION,
    134     /* Sm */ U_MATH_SYMBOL,
    135     /* Sc */ U_CURRENCY_SYMBOL,
    136     /* Sk */ U_MODIFIER_SYMBOL,
    137     /* So */ U_OTHER_SYMBOL,
    138     /* Pi */ U_INITIAL_PUNCTUATION,
    139     /* Pf */ U_FINAL_PUNCTUATION
    140     };
    141 
    142 static const char dirStrings[][5] = {
    143     "L",
    144     "R",
    145     "EN",
    146     "ES",
    147     "ET",
    148     "AN",
    149     "CS",
    150     "B",
    151     "S",
    152     "WS",
    153     "ON",
    154     "LRE",
    155     "LRO",
    156     "AL",
    157     "RLE",
    158     "RLO",
    159     "PDF",
    160     "NSM",
    161     "BN",
    162     /* new in Unicode 6.3/ICU 52 */
    163     "FSI",
    164     "LRI",
    165     "RLI",
    166     "PDI"
    167 };
    168 
    169 void addUnicodeTest(TestNode** root);
    170 
    171 void addUnicodeTest(TestNode** root)
    172 {
    173     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    174     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    175     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    176     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    177     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    178     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    179     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    180     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    181     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    182     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    183     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    184     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    185     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    186     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    187     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
    188     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    189     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    190     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
    191     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
    192     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
    193     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    194     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    195     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    196     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    197     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    198     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    199     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    200 }
    201 
    202 /*==================================================== */
    203 /* test u_toupper() and u_tolower()                    */
    204 /*==================================================== */
    205 static void TestUpperLower()
    206 {
    207     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    208     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    209     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    210     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    211     int32_t i;
    212 
    213     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    214     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    215 
    216 /*
    217 Checks LetterLike Symbols which were previously a source of confusion
    218 [Bertrand A. D. 02/04/98]
    219 */
    220     for (i=0x2100;i<0x2138;i++)
    221     {
    222         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    223         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    224         {
    225             if (i != (int)u_tolower(i)) /* itself */
    226                 log_err("Failed case conversion with itself: U+%04x\n", i);
    227             if (i != (int)u_toupper(i))
    228                 log_err("Failed case conversion with itself: U+%04x\n", i);
    229         }
    230     }
    231 
    232     for(i=0; i < u_strlen(upper); i++){
    233         if(u_tolower(upper[i]) != lower[i]){
    234             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    235         }
    236     }
    237 
    238     log_verbose("testing upper lower\n");
    239     for (i = 0; i < 21; i++) {
    240 
    241         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    242         {
    243             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    244         }
    245         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    246          {
    247             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    248         }
    249         else if (upperTest[i] != u_tolower(lowerTest[i]))
    250         {
    251             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    252         }
    253         else if (lowerTest[i] != u_toupper(upperTest[i]))
    254          {
    255             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    256         }
    257         else if (upperTest[i] != u_tolower(upperTest[i]))
    258         {
    259             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    260         }
    261         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    262         {
    263             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    264         }
    265     }
    266     log_verbose("done testing upper lower\n");
    267 
    268     log_verbose("testing u_istitle\n");
    269     {
    270         static const UChar expected[] = {
    271             0x1F88,
    272             0x1F89,
    273             0x1F8A,
    274             0x1F8B,
    275             0x1F8C,
    276             0x1F8D,
    277             0x1F8E,
    278             0x1F8F,
    279             0x1F88,
    280             0x1F89,
    281             0x1F8A,
    282             0x1F8B,
    283             0x1F8C,
    284             0x1F8D,
    285             0x1F8E,
    286             0x1F8F,
    287             0x1F98,
    288             0x1F99,
    289             0x1F9A,
    290             0x1F9B,
    291             0x1F9C,
    292             0x1F9D,
    293             0x1F9E,
    294             0x1F9F,
    295             0x1F98,
    296             0x1F99,
    297             0x1F9A,
    298             0x1F9B,
    299             0x1F9C,
    300             0x1F9D,
    301             0x1F9E,
    302             0x1F9F,
    303             0x1FA8,
    304             0x1FA9,
    305             0x1FAA,
    306             0x1FAB,
    307             0x1FAC,
    308             0x1FAD,
    309             0x1FAE,
    310             0x1FAF,
    311             0x1FA8,
    312             0x1FA9,
    313             0x1FAA,
    314             0x1FAB,
    315             0x1FAC,
    316             0x1FAD,
    317             0x1FAE,
    318             0x1FAF,
    319             0x1FBC,
    320             0x1FBC,
    321             0x1FCC,
    322             0x1FCC,
    323             0x1FFC,
    324             0x1FFC,
    325         };
    326         int32_t num = sizeof(expected)/sizeof(expected[0]);
    327         for(i=0; i<num; i++){
    328             if(!u_istitle(expected[i])){
    329                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    330             }
    331         }
    332 
    333     }
    334 }
    335 
    336 /* compare two sets and verify that their difference or intersection is empty */
    337 static UBool
    338 showADiffB(const USet *a, const USet *b,
    339            const char *a_name, const char *b_name,
    340            UBool expect, UBool diffIsError) {
    341     USet *aa;
    342     int32_t i, start, end, length;
    343     UErrorCode errorCode;
    344 
    345     /*
    346      * expect:
    347      * TRUE  -> a-b should be empty, that is, b should contain all of a
    348      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    349      */
    350     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    351         return TRUE;
    352     }
    353 
    354     /* clone a to aa because a is const */
    355     aa=uset_open(1, 0);
    356     if(aa==NULL) {
    357         /* unusual problem - out of memory? */
    358         return FALSE;
    359     }
    360     uset_addAll(aa, a);
    361 
    362     /* compute the set in question */
    363     if(expect) {
    364         /* a-b */
    365         uset_removeAll(aa, b);
    366     } else {
    367         /* a&b */
    368         uset_retainAll(aa, b);
    369     }
    370 
    371     /* aa is not empty because of the initial tests above; show its contents */
    372     errorCode=U_ZERO_ERROR;
    373     i=0;
    374     for(;;) {
    375         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    376         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    377             break; /* done */
    378         }
    379         if(U_FAILURE(errorCode)) {
    380             log_err("error comparing %s with %s at difference item %d: %s\n",
    381                 a_name, b_name, i, u_errorName(errorCode));
    382             break;
    383         }
    384         if(length!=0) {
    385             break; /* done with code points, got a string or -1 */
    386         }
    387 
    388         if(diffIsError) {
    389             if(expect) {
    390                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    391             } else {
    392                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    393             }
    394         } else {
    395             if(expect) {
    396                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    397             } else {
    398                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    399             }
    400         }
    401 
    402         ++i;
    403     }
    404 
    405     uset_close(aa);
    406     return FALSE;
    407 }
    408 
    409 static UBool
    410 showAMinusB(const USet *a, const USet *b,
    411             const char *a_name, const char *b_name,
    412             UBool diffIsError) {
    413     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    414 }
    415 
    416 static UBool
    417 showAIntersectB(const USet *a, const USet *b,
    418                 const char *a_name, const char *b_name,
    419                 UBool diffIsError) {
    420     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    421 }
    422 
    423 static UBool
    424 compareUSets(const USet *a, const USet *b,
    425              const char *a_name, const char *b_name,
    426              UBool diffIsError) {
    427     /*
    428      * Use an arithmetic & not a logical && so that both branches
    429      * are always taken and all differences are shown.
    430      */
    431     return
    432         showAMinusB(a, b, a_name, b_name, diffIsError) &
    433         showAMinusB(b, a, b_name, a_name, diffIsError);
    434 }
    435 
    436 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    437 static void TestLetterNumber()
    438 {
    439     UChar i = 0x0000;
    440 
    441     log_verbose("Testing for isalpha\n");
    442     for (i = 0x0041; i < 0x005B; i++) {
    443         if (!u_isalpha(i))
    444         {
    445             log_err("Failed isLetter test at  %.4X\n", i);
    446         }
    447     }
    448     for (i = 0x0660; i < 0x066A; i++) {
    449         if (u_isalpha(i))
    450         {
    451             log_err("Failed isLetter test with numbers at %.4X\n", i);
    452         }
    453     }
    454 
    455     log_verbose("Testing for isdigit\n");
    456     for (i = 0x0660; i < 0x066A; i++) {
    457         if (!u_isdigit(i))
    458         {
    459             log_verbose("Failed isNumber test at %.4X\n", i);
    460         }
    461     }
    462 
    463     log_verbose("Testing for isalnum\n");
    464     for (i = 0x0041; i < 0x005B; i++) {
    465         if (!u_isalnum(i))
    466         {
    467             log_err("Failed isAlNum test at  %.4X\n", i);
    468         }
    469     }
    470     for (i = 0x0660; i < 0x066A; i++) {
    471         if (!u_isalnum(i))
    472         {
    473             log_err("Failed isAlNum test at  %.4X\n", i);
    474         }
    475     }
    476 
    477     {
    478         /*
    479          * The following checks work only starting from Unicode 4.0.
    480          * Check the version number here.
    481          */
    482         static UVersionInfo u401={ 4, 0, 1, 0 };
    483         UVersionInfo version;
    484         u_getUnicodeVersion(version);
    485         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    486             return;
    487         }
    488     }
    489 
    490     {
    491         /*
    492          * Sanity check:
    493          * Verify that exactly the digit characters have decimal digit values.
    494          * This assumption is used in the implementation of u_digit()
    495          * (which checks nt=de)
    496          * compared with the parallel java.lang.Character.digit()
    497          * (which checks Nd).
    498          *
    499          * This was not true in Unicode 3.2 and earlier.
    500          * Unicode 4.0 fixed discrepancies.
    501          * Unicode 4.0.1 re-introduced problems in this area due to an
    502          * unintentionally incomplete last-minute change.
    503          */
    504         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    505         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    506 
    507         USet *digits, *decimalValues;
    508         UErrorCode errorCode;
    509 
    510         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    511         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    512         errorCode=U_ZERO_ERROR;
    513         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    514         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    515 
    516         if(U_SUCCESS(errorCode)) {
    517             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    518         }
    519 
    520         uset_close(digits);
    521         uset_close(decimalValues);
    522     }
    523 }
    524 
    525 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    526                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    527                                 UBool expected) {
    528     int32_t i;
    529     for (i = 0; i < sampleCharsLength; ++i) {
    530         UBool result = propFn(sampleChars[i]);
    531         if (result != expected) {
    532             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    533                     propName, sampleChars[i], result);
    534         }
    535     }
    536 }
    537 
    538 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    539 static void TestMisc()
    540 {
    541     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    542     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    543     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    544     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    545     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    546     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    547 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    548     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    549     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    550     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    551     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    552 
    553     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    554 
    555     uint32_t mask;
    556 
    557     int32_t i;
    558     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    559     UVersionInfo realVersion;
    560 
    561     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    562 
    563     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
    564     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
    565 
    566     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    567                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
    568     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    569                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
    570 
    571     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    572                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
    573     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    574                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
    575 
    576     testSampleCharProps(u_isdefined, "u_isdefined",
    577                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
    578     testSampleCharProps(u_isdefined, "u_isdefined",
    579                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
    580 
    581     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
    582     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
    583 
    584     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
    585     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
    586 
    587     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
    588         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    589             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    590                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    591         }
    592     }
    593 
    594     /* Tests the ICU version #*/
    595     u_getVersion(realVersion);
    596     u_versionToString(realVersion, icuVersion);
    597     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    598     {
    599         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    600     }
    601 #if defined(ICU_VERSION)
    602     /* test only happens where we have configure.in with VERSION - sanity check. */
    603     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    604     {
    605         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    606     }
    607 #endif
    608 
    609     /* test U_GC_... */
    610     if(
    611         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    612         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    613         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    614         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    615         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    616         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    617     ) {
    618         log_err("error: U_GET_GC_MASK does not work properly\n");
    619     }
    620 
    621     mask=0;
    622     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    623 
    624     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    625     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    626     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    627     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    628     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    629 
    630     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    631     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    632     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    633 
    634     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    635     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    636     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    637 
    638     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    639     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    640     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    641 
    642     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    643     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    644     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    645     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    646 
    647     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    648     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    649     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    650     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    651     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    652 
    653     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    654     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    655     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    656     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    657 
    658     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    659     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    660 
    661     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    662         log_err("error: problems with U_GC_XX_MASK constants\n");
    663     }
    664 
    665     mask=0;
    666     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    667     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    668     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    669     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    670     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    671     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    672     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    673 
    674     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    675         log_err("error: problems with U_GC_Y_MASK constants\n");
    676     }
    677     {
    678         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    679         for(i=0; i<10; i++){
    680             if(digit[i]!=u_forDigit(i,10)){
    681                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    682             }
    683         }
    684     }
    685 
    686     /* test u_digit() */
    687     {
    688         static const struct {
    689             UChar32 c;
    690             int8_t radix, value;
    691         } data[]={
    692             /* base 16 */
    693             { 0x0031, 16, 1 },
    694             { 0x0038, 16, 8 },
    695             { 0x0043, 16, 12 },
    696             { 0x0066, 16, 15 },
    697             { 0x00e4, 16, -1 },
    698             { 0x0662, 16, 2 },
    699             { 0x06f5, 16, 5 },
    700             { 0xff13, 16, 3 },
    701             { 0xff41, 16, 10 },
    702 
    703             /* base 8 */
    704             { 0x0031, 8, 1 },
    705             { 0x0038, 8, -1 },
    706             { 0x0043, 8, -1 },
    707             { 0x0066, 8, -1 },
    708             { 0x00e4, 8, -1 },
    709             { 0x0662, 8, 2 },
    710             { 0x06f5, 8, 5 },
    711             { 0xff13, 8, 3 },
    712             { 0xff41, 8, -1 },
    713 
    714             /* base 36 */
    715             { 0x5a, 36, 35 },
    716             { 0x7a, 36, 35 },
    717             { 0xff3a, 36, 35 },
    718             { 0xff5a, 36, 35 },
    719 
    720             /* wrong radix values */
    721             { 0x0031, 1, -1 },
    722             { 0xff3a, 37, -1 }
    723         };
    724 
    725         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
    726             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    727                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    728                         data[i].c,
    729                         data[i].radix,
    730                         u_digit(data[i].c, data[i].radix),
    731                         data[i].value);
    732             }
    733         }
    734     }
    735 }
    736 
    737 /* test C/POSIX-style functions --------------------------------------------- */
    738 
    739 /* bit flags */
    740 #define ISAL     1
    741 #define ISLO     2
    742 #define ISUP     4
    743 
    744 #define ISDI     8
    745 #define ISXD  0x10
    746 
    747 #define ISAN  0x20
    748 
    749 #define ISPU  0x40
    750 #define ISGR  0x80
    751 #define ISPR 0x100
    752 
    753 #define ISSP 0x200
    754 #define ISBL 0x400
    755 #define ISCN 0x800
    756 
    757 /* C/POSIX-style functions, in the same order as the bit flags */
    758 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    759 
    760 static const struct {
    761     IsPOSIXClass *fn;
    762     const char *name;
    763 } posixClasses[]={
    764     { u_isalpha, "isalpha" },
    765     { u_islower, "islower" },
    766     { u_isupper, "isupper" },
    767     { u_isdigit, "isdigit" },
    768     { u_isxdigit, "isxdigit" },
    769     { u_isalnum, "isalnum" },
    770     { u_ispunct, "ispunct" },
    771     { u_isgraph, "isgraph" },
    772     { u_isprint, "isprint" },
    773     { u_isspace, "isspace" },
    774     { u_isblank, "isblank" },
    775     { u_iscntrl, "iscntrl" }
    776 };
    777 
    778 static const struct {
    779     UChar32 c;
    780     uint32_t posixResults;
    781 } posixData[]={
    782     { 0x0008,                                                        ISCN },    /* backspace */
    783     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    784     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    785     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    786     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    787     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    788     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    789     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    790     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    791     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    792     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    793     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    794     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    795     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    796     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    797     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    798     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    799     { 0x0600,                                                        ISCN },    /* arabic number sign */
    800     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    801     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    802     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    803     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    804     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    805     { 0x200b,                                                        ISCN },    /* ZWSP */
    806   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    807     { 0x200e,                                                        ISCN },    /* LRM */
    808     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    809     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    810     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    811     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    812     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    813     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    814     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    815     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    816 };
    817 
    818 static void
    819 TestPOSIX() {
    820     uint32_t mask;
    821     int32_t cl, i;
    822     UBool expect;
    823 
    824     mask=1;
    825     for(cl=0; cl<12; ++cl) {
    826         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
    827             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    828             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    829                 log_err("u_%s(U+%04x)=%s is wrong\n",
    830                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    831             }
    832         }
    833         mask<<=1;
    834     }
    835 }
    836 
    837 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    838 static void TestControlPrint()
    839 {
    840     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    841     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    842     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    843     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    844     UChar32 c;
    845 
    846     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
    847     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
    848 
    849     testSampleCharProps(u_isprint, "u_isprint",
    850                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
    851     testSampleCharProps(u_isprint, "u_isprint",
    852                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
    853 
    854     /* test all ISO 8 controls */
    855     for(c=0; c<=0x9f; ++c) {
    856         if(c==0x20) {
    857             /* skip ASCII graphic characters and continue with DEL */
    858             c=0x7f;
    859         }
    860         if(!u_iscntrl(c)) {
    861             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    862         }
    863         if(!u_isISOControl(c)) {
    864             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    865         }
    866         if(u_isprint(c)) {
    867             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    868         }
    869     }
    870 
    871     /* test all Latin-1 graphic characters */
    872     for(c=0x20; c<=0xff; ++c) {
    873         if(c==0x7f) {
    874             c=0xa0;
    875         } else if(c==0xad) {
    876             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    877             ++c;
    878         }
    879         if(!u_isprint(c)) {
    880             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    881         }
    882     }
    883 }
    884 
    885 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    886 static void TestIdentifier()
    887 {
    888     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    889     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    890     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    891     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    892     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    893     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    894     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    895     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    896     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    897     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    898 
    899     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    900                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
    901     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    902                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
    903 
    904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    905                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
    906     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    907                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
    908 
    909     /* IDPart should imply IDStart */
    910     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    911                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
    912 
    913     testSampleCharProps(u_isIDStart, "u_isIDStart",
    914                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
    915     testSampleCharProps(u_isIDStart, "u_isIDStart",
    916                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    917 
    918     testSampleCharProps(u_isIDPart, "u_isIDPart",
    919                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
    920     testSampleCharProps(u_isIDPart, "u_isIDPart",
    921                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    922 
    923     /* IDPart should imply IDStart */
    924     testSampleCharProps(u_isIDPart, "u_isIDPart",
    925                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
    926 
    927     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    928                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
    929     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    930                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
    931 }
    932 
    933 /* for each line of UnicodeData.txt, check some of the properties */
    934 typedef struct UnicodeDataContext {
    935 #if UCONFIG_NO_NORMALIZATION
    936     const void *dummy;
    937 #else
    938     const UNormalizer2 *nfc;
    939     const UNormalizer2 *nfkc;
    940 #endif
    941 } UnicodeDataContext;
    942 
    943 /*
    944  * ### TODO
    945  * This test fails incorrectly if the First or Last code point of a repetitive area
    946  * is overridden, which is allowed and is encouraged for the PUAs.
    947  * Currently, this means that both area First/Last and override lines are
    948  * tested against the properties from the API,
    949  * and the area boundary will not match and cause an error.
    950  *
    951  * This function should detect area boundaries and skip them for the test of individual
    952  * code points' properties.
    953  * Then it should check that the areas contain all the same properties except where overridden.
    954  * For this, it would have had to set a flag for which code points were listed explicitly.
    955  */
    956 static void U_CALLCONV
    957 unicodeDataLineFn(void *context,
    958                   char *fields[][2], int32_t fieldCount,
    959                   UErrorCode *pErrorCode)
    960 {
    961     char buffer[100];
    962     const char *d;
    963     char *end;
    964     uint32_t value;
    965     UChar32 c;
    966     int32_t i;
    967     int8_t type;
    968     int32_t dt;
    969     UChar dm[32], s[32];
    970     int32_t dmLength, length;
    971 
    972 #if !UCONFIG_NO_NORMALIZATION
    973     const UNormalizer2 *nfc, *nfkc;
    974 #endif
    975 
    976     /* get the character code, field 0 */
    977     c=strtoul(fields[0][0], &end, 16);
    978     if(end<=fields[0][0] || end!=fields[0][1]) {
    979         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    980         return;
    981     }
    982     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    983         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    984         return;
    985     }
    986 
    987     /* get general category, field 2 */
    988     *fields[2][1]=0;
    989     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    990     if(u_charType(c)!=type) {
    991         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    992     }
    993     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    994         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    995     }
    996 
    997     /* get canonical combining class, field 3 */
    998     value=strtoul(fields[3][0], &end, 10);
    999     if(end<=fields[3][0] || end!=fields[3][1]) {
   1000         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
   1001         return;
   1002     }
   1003     if(value>255) {
   1004         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
   1005         return;
   1006     }
   1007 #if !UCONFIG_NO_NORMALIZATION
   1008     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
   1009         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
   1010     }
   1011     nfkc=((UnicodeDataContext *)context)->nfkc;
   1012     if(value!=unorm2_getCombiningClass(nfkc, c)) {
   1013         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
   1014     }
   1015 #endif
   1016 
   1017     /* get BiDi category, field 4 */
   1018     *fields[4][1]=0;
   1019     i=MakeDir(fields[4][0]);
   1020     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
   1021         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
   1022     }
   1023 
   1024     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
   1025     d=NULL;
   1026     if(fields[5][0]==fields[5][1]) {
   1027         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
   1028         if(c==0xac00 || c==0xd7a3) {
   1029             dt=U_DT_CANONICAL;
   1030         } else {
   1031             dt=U_DT_NONE;
   1032         }
   1033     } else {
   1034         d=fields[5][0];
   1035         *fields[5][1]=0;
   1036         dt=UCHAR_INVALID_CODE;
   1037         if(*d=='<') {
   1038             end=strchr(++d, '>');
   1039             if(end!=NULL) {
   1040                 *end=0;
   1041                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
   1042                 d=u_skipWhitespace(end+1);
   1043             }
   1044         } else {
   1045             dt=U_DT_CANONICAL;
   1046         }
   1047     }
   1048     if(dt>U_DT_NONE) {
   1049         if(c==0xac00) {
   1050             dm[0]=0x1100;
   1051             dm[1]=0x1161;
   1052             dm[2]=0;
   1053             dmLength=2;
   1054         } else if(c==0xd7a3) {
   1055             dm[0]=0xd788;
   1056             dm[1]=0x11c2;
   1057             dm[2]=0;
   1058             dmLength=2;
   1059         } else {
   1060             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
   1061         }
   1062     } else {
   1063         dmLength=-1;
   1064     }
   1065     if(dt<0 || U_FAILURE(*pErrorCode)) {
   1066         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
   1067         return;
   1068     }
   1069 #if !UCONFIG_NO_NORMALIZATION
   1070     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
   1071     if(i!=dt) {
   1072         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
   1073     }
   1074     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
   1075     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
   1076     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1077         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
   1078                 "or the Decomposition_Mapping is different (%s)\n",
   1079                 c, length, dmLength, u_errorName(*pErrorCode));
   1080         return;
   1081     }
   1082     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
   1083     if(dt!=U_DT_CANONICAL) {
   1084         dmLength=-1;
   1085     }
   1086     nfc=((UnicodeDataContext *)context)->nfc;
   1087     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
   1088     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
   1089         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
   1090                 "or the Decomposition_Mapping is different (%s)\n",
   1091                 c, length, dmLength, u_errorName(*pErrorCode));
   1092         return;
   1093     }
   1094     /* recompose */
   1095     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
   1096         UChar32 a, b, composite;
   1097         i=0;
   1098         U16_NEXT(dm, i, dmLength, a);
   1099         U16_NEXT(dm, i, dmLength, b);
   1100         /* i==dmLength */
   1101         composite=unorm2_composePair(nfc, a, b);
   1102         if(composite!=c) {
   1103             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
   1104                     (long)c, (long)a, (long)b, (long)composite);
   1105         }
   1106         /*
   1107          * Note: NFKC has fewer round-trip mappings than NFC,
   1108          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
   1109          */
   1110     }
   1111 #endif
   1112 
   1113     /* get ISO Comment, field 11 */
   1114     *fields[11][1]=0;
   1115     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
   1116     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1117         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1118             c, u_errorName(*pErrorCode),
   1119             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1120             fields[11][0]);
   1121     }
   1122 
   1123     /* get uppercase mapping, field 12 */
   1124     if(fields[12][0]!=fields[12][1]) {
   1125         value=strtoul(fields[12][0], &end, 16);
   1126         if(end!=fields[12][1]) {
   1127             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1128             return;
   1129         }
   1130         if((UChar32)value!=u_toupper(c)) {
   1131             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1132         }
   1133     } else {
   1134         /* no case mapping: the API must map the code point to itself */
   1135         if(c!=u_toupper(c)) {
   1136             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1137         }
   1138     }
   1139 
   1140     /* get lowercase mapping, field 13 */
   1141     if(fields[13][0]!=fields[13][1]) {
   1142         value=strtoul(fields[13][0], &end, 16);
   1143         if(end!=fields[13][1]) {
   1144             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1145             return;
   1146         }
   1147         if((UChar32)value!=u_tolower(c)) {
   1148             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1149         }
   1150     } else {
   1151         /* no case mapping: the API must map the code point to itself */
   1152         if(c!=u_tolower(c)) {
   1153             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1154         }
   1155     }
   1156 
   1157     /* get titlecase mapping, field 14 */
   1158     if(fields[14][0]!=fields[14][1]) {
   1159         value=strtoul(fields[14][0], &end, 16);
   1160         if(end!=fields[14][1]) {
   1161             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1162             return;
   1163         }
   1164         if((UChar32)value!=u_totitle(c)) {
   1165             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1166         }
   1167     } else {
   1168         /* no case mapping: the API must map the code point to itself */
   1169         if(c!=u_totitle(c)) {
   1170             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1171         }
   1172     }
   1173 }
   1174 
   1175 static UBool U_CALLCONV
   1176 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1177     static const UChar32 test[][2]={
   1178         {0x41, U_UPPERCASE_LETTER},
   1179         {0x308, U_NON_SPACING_MARK},
   1180         {0xfffe, U_GENERAL_OTHER_TYPES},
   1181         {0xe0041, U_FORMAT_CHAR},
   1182         {0xeffff, U_UNASSIGNED}
   1183     };
   1184 
   1185     int32_t i, count;
   1186 
   1187     if(0!=strcmp((const char *)context, "a1")) {
   1188         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1189         return FALSE;
   1190     }
   1191 
   1192     count=UPRV_LENGTHOF(test);
   1193     for(i=0; i<count; ++i) {
   1194         if(start<=test[i][0] && test[i][0]<limit) {
   1195             if(type!=(UCharCategory)test[i][1]) {
   1196                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1197                         start, limit, (long)type, test[i][0], test[i][1]);
   1198             }
   1199             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1200             return i==(count-1) ? FALSE : TRUE;
   1201         }
   1202     }
   1203 
   1204     if(start>test[count-1][0]) {
   1205         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1206                 start, limit, (long)type);
   1207         return FALSE;
   1208     }
   1209 
   1210     return TRUE;
   1211 }
   1212 
   1213 static UBool U_CALLCONV
   1214 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1215     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
   1216     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1217         { 0x0590, U_LEFT_TO_RIGHT },
   1218         { 0x0600, U_RIGHT_TO_LEFT },
   1219         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1220         { 0x08A0, U_RIGHT_TO_LEFT },
   1221         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
   1222         { 0x20A0, U_LEFT_TO_RIGHT },
   1223         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
   1224         { 0xFB1D, U_LEFT_TO_RIGHT },
   1225         { 0xFB50, U_RIGHT_TO_LEFT },
   1226         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1227         { 0xFE70, U_LEFT_TO_RIGHT },
   1228         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1229         { 0x10800, U_LEFT_TO_RIGHT },
   1230         { 0x11000, U_RIGHT_TO_LEFT },
   1231         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1232         { 0x1EE00, U_RIGHT_TO_LEFT },
   1233         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
   1234         { 0x1F000, U_RIGHT_TO_LEFT },
   1235         { 0x110000, U_LEFT_TO_RIGHT }
   1236     };
   1237 
   1238     UChar32 c;
   1239     int32_t i;
   1240     UCharDirection shouldBeDir;
   1241 
   1242     /*
   1243      * LineBreak.txt specifies:
   1244      *   #  - Assigned characters that are not listed explicitly are given the value
   1245      *   #    "AL".
   1246      *   #  - Unassigned characters are given the value "XX".
   1247      *
   1248      * PUA characters are listed explicitly with "XX".
   1249      * Verify that no assigned character has "XX".
   1250      */
   1251     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1252         c=start;
   1253         while(c<limit) {
   1254             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1255                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1256             }
   1257             ++c;
   1258         }
   1259     }
   1260 
   1261     /*
   1262      * Verify default Bidi classes.
   1263      * For recent Unicode versions, see UCD.html.
   1264      *
   1265      * For older Unicode versions:
   1266      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1267      * http://www.unicode.org/reports/tr9/
   1268      *
   1269      * See also DerivedBidiClass.txt for Cn code points!
   1270      *
   1271      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1272      * changed some default values.
   1273      * In particular, non-characters and unassigned Default Ignorable Code Points
   1274      * change from L to BN.
   1275      *
   1276      * UCD.html version 4.0.1 does not yet reflect these changes.
   1277      */
   1278     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1279         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1280         c=start;
   1281         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
   1282             if((int32_t)c<defaultBidi[i][0]) {
   1283                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1284                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1285                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1286                     } else {
   1287                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1288                     }
   1289 
   1290                     if( u_charDirection(c)!=shouldBeDir ||
   1291                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1292                     ) {
   1293                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1294                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1295                     }
   1296                     ++c;
   1297                 }
   1298             }
   1299         }
   1300     }
   1301 
   1302     return TRUE;
   1303 }
   1304 
   1305 /* tests for several properties */
   1306 static void TestUnicodeData()
   1307 {
   1308     UVersionInfo expectVersionArray;
   1309     UVersionInfo versionArray;
   1310     char *fields[15][2];
   1311     UErrorCode errorCode;
   1312     UChar32 c;
   1313     int8_t type;
   1314 
   1315     UnicodeDataContext context;
   1316 
   1317     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1318     u_getUnicodeVersion(versionArray);
   1319     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1320     {
   1321         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1322         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1323     }
   1324 
   1325 #if defined(ICU_UNICODE_VERSION)
   1326     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1327     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1328     {
   1329          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1330     }
   1331 #endif
   1332 
   1333     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1334         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1335     }
   1336 
   1337     errorCode=U_ZERO_ERROR;
   1338 #if !UCONFIG_NO_NORMALIZATION
   1339     context.nfc=unorm2_getNFCInstance(&errorCode);
   1340     context.nfkc=unorm2_getNFKCInstance(&errorCode);
   1341     if(U_FAILURE(errorCode)) {
   1342         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
   1343         return;
   1344     }
   1345 #endif
   1346     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
   1347     if(U_FAILURE(errorCode)) {
   1348         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1349     }
   1350 
   1351     /* sanity check on repeated properties */
   1352     for(c=0xfffe; c<=0x10ffff;) {
   1353         type=u_charType(c);
   1354         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1355             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1356         }
   1357         if(type!=U_UNASSIGNED) {
   1358             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1359         }
   1360         if((c&0xffff)==0xfffe) {
   1361             ++c;
   1362         } else {
   1363             c+=0xffff;
   1364         }
   1365     }
   1366 
   1367     /* test that PUA is not "unassigned" */
   1368     for(c=0xe000; c<=0x10fffd;) {
   1369         type=u_charType(c);
   1370         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1371             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1372         }
   1373         if(type==U_UNASSIGNED) {
   1374             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1375         } else if(type!=U_PRIVATE_USE_CHAR) {
   1376             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1377         }
   1378         if(c==0xf8ff) {
   1379             c=0xf0000;
   1380         } else if(c==0xffffd) {
   1381             c=0x100000;
   1382         } else {
   1383             ++c;
   1384         }
   1385     }
   1386 
   1387     /* test u_enumCharTypes() */
   1388     u_enumCharTypes(enumTypeRange, "a1");
   1389 
   1390     /* check default properties */
   1391     u_enumCharTypes(enumDefaultsRange, NULL);
   1392 }
   1393 
   1394 static void TestCodeUnit(){
   1395     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1396 
   1397     int32_t i;
   1398 
   1399     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1400         UChar c=codeunit[i];
   1401         if(i<4){
   1402             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1403                 log_err("ERROR: U+%04x is a single", c);
   1404             }
   1405 
   1406         }
   1407         if(i >= 4 && i< 8){
   1408             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1409                 log_err("ERROR: U+%04x is a first surrogate", c);
   1410             }
   1411         }
   1412         if(i >= 8 && i< 12){
   1413             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1414                 log_err("ERROR: U+%04x is a second surrogate", c);
   1415             }
   1416         }
   1417     }
   1418 
   1419 }
   1420 
   1421 static void TestCodePoint(){
   1422     const UChar32 codePoint[]={
   1423         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1424         0xd800,
   1425         0xdbff,
   1426         0xdc00,
   1427         0xdfff,
   1428         0xdc04,
   1429         0xd821,
   1430         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1431         0x20ac,
   1432         0xd7ff,
   1433         0xe000,
   1434         0xe123,
   1435         0x0061,
   1436         0xe065,
   1437         0x20402,
   1438         0x24506,
   1439         0x23456,
   1440         0x20402,
   1441         0x10402,
   1442         0x23456,
   1443         /*not a surrogate, not valid, isUnicodeChar, isError */
   1444         0x0015,
   1445         0x009f,
   1446         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1447         0xffff,
   1448         0xfffe,
   1449     };
   1450     int32_t i;
   1451     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1452         UChar32 c=codePoint[i];
   1453         if(i<6){
   1454             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1455                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1456             }
   1457             if(UTF_IS_VALID(c)){
   1458                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1459             }
   1460             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1461                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1462             }
   1463             if(UTF_IS_ERROR(c)){
   1464                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1465             }
   1466         }else if(i >=6 && i<18){
   1467             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1468                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1469             }
   1470             if(!UTF_IS_VALID(c)){
   1471                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1472             }
   1473             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1474                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1475             }
   1476             if(UTF_IS_ERROR(c)){
   1477                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1478             }
   1479         }else if(i >=18 && i<20){
   1480             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1481                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1482             }
   1483             if(UTF_IS_VALID(c)){
   1484                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1485             }
   1486             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1487                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1488             }
   1489             if(!UTF_IS_ERROR(c)){
   1490                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1491             }
   1492         }
   1493         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1494             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1495                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1496             }
   1497             if(UTF_IS_VALID(c)){
   1498                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1499             }
   1500             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1501                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1502             }
   1503             if(!UTF_IS_ERROR(c)){
   1504                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1505             }
   1506         }
   1507     }
   1508 
   1509     if(
   1510         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1511         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1512         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1513         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1514     ) {
   1515         log_err("error with U_IS_BMP()\n");
   1516     }
   1517 
   1518     if(
   1519         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1520         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1521         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1522         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1523     ) {
   1524         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1525     }
   1526 }
   1527 
   1528 static void TestCharLength()
   1529 {
   1530     const int32_t codepoint[]={
   1531         1, 0x0061,
   1532         1, 0xe065,
   1533         1, 0x20ac,
   1534         2, 0x20402,
   1535         2, 0x23456,
   1536         2, 0x24506,
   1537         2, 0x20402,
   1538         2, 0x10402,
   1539         1, 0xd7ff,
   1540         1, 0xe000
   1541     };
   1542 
   1543     int32_t i;
   1544     UBool multiple;
   1545     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1546         UChar32 c=codepoint[i+1];
   1547         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1548             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
   1549         }
   1550         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1551         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1552             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1553         }
   1554     }
   1555 }
   1556 
   1557 /*internal functions ----*/
   1558 static int32_t MakeProp(char* str)
   1559 {
   1560     int32_t result = 0;
   1561     char* matchPosition =0;
   1562 
   1563     matchPosition = strstr(tagStrings, str);
   1564     if (matchPosition == 0)
   1565     {
   1566         log_err("unrecognized type letter ");
   1567         log_err(str);
   1568     }
   1569     else
   1570         result = (int32_t)((matchPosition - tagStrings) / 2);
   1571     return result;
   1572 }
   1573 
   1574 static int32_t MakeDir(char* str)
   1575 {
   1576     int32_t pos = 0;
   1577     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
   1578         if (strcmp(str, dirStrings[pos]) == 0) {
   1579             return pos;
   1580         }
   1581     }
   1582     return -1;
   1583 }
   1584 
   1585 /* test u_charName() -------------------------------------------------------- */
   1586 
   1587 static const struct {
   1588     uint32_t code;
   1589     const char *name, *oldName, *extName, *alias;
   1590 } names[]={
   1591     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1592     {0x01a2, "LATIN CAPITAL LETTER OI", "",
   1593              "LATIN CAPITAL LETTER OI",
   1594              "LATIN CAPITAL LETTER GHA"},
   1595     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
   1596              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1597     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1598              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1599              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1600     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1601     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1602     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1603     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1604     {0xd800, "", "", "<lead surrogate-D800>" },
   1605     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1606     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
   1607     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1608     {0xffff, "", "", "<noncharacter-FFFF>" },
   1609     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1610               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1611               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1612     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1613 };
   1614 
   1615 static UBool
   1616 enumCharNamesFn(void *context,
   1617                 UChar32 code, UCharNameChoice nameChoice,
   1618                 const char *name, int32_t length) {
   1619     int32_t *pCount=(int32_t *)context;
   1620     const char *expected;
   1621     int i;
   1622 
   1623     if(length<=0 || length!=(int32_t)strlen(name)) {
   1624         /* should not be called with an empty string or invalid length */
   1625         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1626         return TRUE;
   1627     }
   1628 
   1629     ++*pCount;
   1630     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1631         if(code==(UChar32)names[i].code) {
   1632             switch (nameChoice) {
   1633                 case U_EXTENDED_CHAR_NAME:
   1634                     if(0!=strcmp(name, names[i].extName)) {
   1635                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1636                     }
   1637                     break;
   1638                 case U_UNICODE_CHAR_NAME:
   1639                     if(0!=strcmp(name, names[i].name)) {
   1640                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1641                     }
   1642                     break;
   1643                 case U_UNICODE_10_CHAR_NAME:
   1644                     expected=names[i].oldName;
   1645                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1646                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1647                     }
   1648                     break;
   1649                 case U_CHAR_NAME_ALIAS:
   1650                     expected=names[i].alias;
   1651                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1652                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1653                     }
   1654                     break;
   1655                 case U_CHAR_NAME_CHOICE_COUNT:
   1656                     break;
   1657             }
   1658             break;
   1659         }
   1660     }
   1661     return TRUE;
   1662 }
   1663 
   1664 struct enumExtCharNamesContext {
   1665     uint32_t length;
   1666     int32_t last;
   1667 };
   1668 
   1669 static UBool
   1670 enumExtCharNamesFn(void *context,
   1671                 UChar32 code, UCharNameChoice nameChoice,
   1672                 const char *name, int32_t length) {
   1673     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1674 
   1675     if (ecncp->last != (int32_t) code - 1) {
   1676         if (ecncp->last < 0) {
   1677             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1678         } else {
   1679             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1680         }
   1681     }
   1682     ecncp->last = (int32_t) code;
   1683 
   1684     if (!*name) {
   1685         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1686     }
   1687 
   1688     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1689 }
   1690 
   1691 /**
   1692  * This can be made more efficient by moving it into putil.c and having
   1693  * it directly access the ebcdic translation tables.
   1694  * TODO: If we get this method in putil.c, then delete it from here.
   1695  */
   1696 static UChar
   1697 u_charToUChar(char c) {
   1698     UChar uc;
   1699     u_charsToUChars(&c, &uc, 1);
   1700     return uc;
   1701 }
   1702 
   1703 static void
   1704 TestCharNames() {
   1705     static char name[80];
   1706     UErrorCode errorCode=U_ZERO_ERROR;
   1707     struct enumExtCharNamesContext extContext;
   1708     const char *expected;
   1709     int32_t length;
   1710     UChar32 c;
   1711     int32_t i;
   1712 
   1713     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1714     length=uprv_getMaxCharNameLength();
   1715     if(length==0) {
   1716         /* no names data available */
   1717         return;
   1718     }
   1719     if(length<83) { /* Unicode 3.2 max char name length */
   1720         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1721     }
   1722     /* ### TODO same tests for max ISO comment length as for max name length */
   1723 
   1724     log_verbose("Testing u_charName()\n");
   1725     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1726         /* modern Unicode character name */
   1727         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1728         if(U_FAILURE(errorCode)) {
   1729             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1730             return;
   1731         }
   1732         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1733             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1734         }
   1735 
   1736         /* find the modern name */
   1737         if (*names[i].name) {
   1738             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1739             if(U_FAILURE(errorCode)) {
   1740                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1741                 return;
   1742             }
   1743             if(c!=(UChar32)names[i].code) {
   1744                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1745             }
   1746         }
   1747 
   1748         /* Unicode 1.0 character name */
   1749         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1750         if(U_FAILURE(errorCode)) {
   1751             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1752             return;
   1753         }
   1754         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1755             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1756         }
   1757 
   1758         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1759         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1760             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1761             if(U_FAILURE(errorCode)) {
   1762                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1763                 return;
   1764             }
   1765             if(c!=(UChar32)names[i].code) {
   1766                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1767             }
   1768         }
   1769 
   1770         /* Unicode character name alias */
   1771         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1772         if(U_FAILURE(errorCode)) {
   1773             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1774             return;
   1775         }
   1776         expected=names[i].alias;
   1777         if(expected==NULL) {
   1778             expected="";
   1779         }
   1780         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1781             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1782                     names[i].code, name, length, expected);
   1783         }
   1784 
   1785         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1786         if(expected[0]!=0 /* && length>0 */) {
   1787             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1788             if(U_FAILURE(errorCode)) {
   1789                 log_err("u_charFromName(%s - alias) error %s\n",
   1790                         expected, u_errorName(errorCode));
   1791                 return;
   1792             }
   1793             if(c!=(UChar32)names[i].code) {
   1794                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1795                         expected, c, names[i].code);
   1796             }
   1797         }
   1798     }
   1799 
   1800     /* test u_enumCharNames() */
   1801     length=0;
   1802     errorCode=U_ZERO_ERROR;
   1803     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1804     if(U_FAILURE(errorCode) || length<94140) {
   1805         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1806     }
   1807 
   1808     extContext.length = 0;
   1809     extContext.last = -1;
   1810     errorCode=U_ZERO_ERROR;
   1811     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1812     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1813         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1814     }
   1815 
   1816     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1817     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1818         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1819     }
   1820 
   1821     /* Test getCharNameCharacters */
   1822     if(!getTestOption(QUICK_OPTION)) {
   1823         enum { BUFSIZE = 256 };
   1824         UErrorCode ec = U_ZERO_ERROR;
   1825         char buf[BUFSIZE];
   1826         int32_t maxLength;
   1827         UChar32 cp;
   1828         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1829         int32_t l1, l2;
   1830         UBool map[256];
   1831         UBool ok;
   1832 
   1833         USet* set = uset_open(1, 0); /* empty set */
   1834         USet* dumb = uset_open(1, 0); /* empty set */
   1835 
   1836         /*
   1837          * uprv_getCharNameCharacters() will likely return more lowercase
   1838          * letters than actual character names contain because
   1839          * it includes all the characters in lowercased names of
   1840          * general categories, for the full possible set of extended names.
   1841          */
   1842         {
   1843             USetAdder sa={
   1844                 NULL,
   1845                 uset_add,
   1846                 uset_addRange,
   1847                 uset_addString,
   1848                 NULL /* don't need remove() */
   1849             };
   1850             sa.set=set;
   1851             uprv_getCharNameCharacters(&sa);
   1852         }
   1853 
   1854         /* build set the dumb (but sure-fire) way */
   1855         for (i=0; i<256; ++i) {
   1856             map[i] = FALSE;
   1857         }
   1858 
   1859         maxLength=0;
   1860         for (cp=0; cp<0x110000; ++cp) {
   1861             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1862                                      buf, BUFSIZE, &ec);
   1863             if (U_FAILURE(ec)) {
   1864                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1865                 uset_close(set);
   1866                 uset_close(dumb);
   1867                 return;
   1868             }
   1869             if(len>maxLength) {
   1870                 maxLength=len;
   1871             }
   1872 
   1873             for (i=0; i<len; ++i) {
   1874                 if (!map[(uint8_t) buf[i]]) {
   1875                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1876                     map[(uint8_t) buf[i]] = TRUE;
   1877                 }
   1878             }
   1879 
   1880             /* test for leading/trailing whitespace */
   1881             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1882                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1883             }
   1884         }
   1885 
   1886         if(map[(uint8_t)'\t']) {
   1887             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1888         }
   1889 
   1890         length=uprv_getMaxCharNameLength();
   1891         if(length!=maxLength) {
   1892             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1893                     length, maxLength);
   1894         }
   1895 
   1896         /* compare the sets.  Where is my uset_equals?!! */
   1897         ok=TRUE;
   1898         for(i=0; i<256; ++i) {
   1899             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1900                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1901                     /* ignore lowercase a-z that are in set but not in dumb */
   1902                     ok=TRUE;
   1903                 } else {
   1904                     ok=FALSE;
   1905                     break;
   1906                 }
   1907             }
   1908         }
   1909 
   1910         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1911         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1912         if (U_FAILURE(ec)) {
   1913             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1914             uset_close(set);
   1915             uset_close(dumb);
   1916             return;
   1917         }
   1918 
   1919         if (l1 >= BUFSIZE) {
   1920             l1 = BUFSIZE-1;
   1921             pat[l1] = 0;
   1922         }
   1923         if (l2 >= BUFSIZE) {
   1924             l2 = BUFSIZE-1;
   1925             dumbPat[l2] = 0;
   1926         }
   1927 
   1928         if (!ok) {
   1929             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1930                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1931         } else if(getTestOption(VERBOSITY_OPTION)) {
   1932             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1933         }
   1934 
   1935         uset_close(set);
   1936         uset_close(dumb);
   1937     }
   1938 
   1939     /* ### TODO: test error cases and other interesting things */
   1940 }
   1941 
   1942 static void
   1943 TestUCharFromNameUnderflow() {
   1944     // Ticket #10889: Underflow crash when there is no dash.
   1945     UErrorCode errorCode=U_ZERO_ERROR;
   1946     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
   1947     if(U_SUCCESS(errorCode)) {
   1948         log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1949     }
   1950 
   1951     // Test related edge cases.
   1952     errorCode=U_ZERO_ERROR;
   1953     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
   1954     if(U_SUCCESS(errorCode)) {
   1955         log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1956     }
   1957 
   1958     errorCode=U_ZERO_ERROR;
   1959     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
   1960     if(U_SUCCESS(errorCode)) {
   1961         log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1962     }
   1963 
   1964     errorCode=U_ZERO_ERROR;
   1965     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
   1966     if(U_SUCCESS(errorCode)) {
   1967         log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
   1968     }
   1969 }
   1970 
   1971 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1972 
   1973 static void
   1974 TestMirroring() {
   1975     USet *set;
   1976     UErrorCode errorCode;
   1977 
   1978     UChar32 start, end, c2, c3;
   1979     int32_t i;
   1980 
   1981     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1982 
   1983     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1984 
   1985     log_verbose("Testing u_isMirrored()\n");
   1986     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1987          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1988         )
   1989     ) {
   1990         log_err("u_isMirrored() does not work correctly\n");
   1991     }
   1992 
   1993     log_verbose("Testing u_charMirror()\n");
   1994     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1995          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1996          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1997          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1998          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1999          )
   2000     ) {
   2001         log_err("u_charMirror() does not work correctly\n");
   2002     }
   2003 
   2004     /* verify that Bidi_Mirroring_Glyph roundtrips */
   2005     errorCode=U_ZERO_ERROR;
   2006     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   2007 
   2008     if (U_FAILURE(errorCode)) {
   2009         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   2010     } else {
   2011         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   2012             do {
   2013                 c2=u_charMirror(start);
   2014                 c3=u_charMirror(c2);
   2015                 if(c3!=start) {
   2016                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   2017                 }
   2018                 c3=u_getBidiPairedBracket(start);
   2019                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
   2020                     if(c3!=start) {
   2021                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
   2022                                 (long)start);
   2023                     }
   2024                 } else {
   2025                     if(c3!=c2) {
   2026                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
   2027                                 (long)start, (long)c2);
   2028                     }
   2029                 }
   2030             } while(++start<=end);
   2031         }
   2032     }
   2033 
   2034     uset_close(set);
   2035 }
   2036 
   2037 
   2038 struct RunTestData
   2039 {
   2040     const char *runText;
   2041     UScriptCode runCode;
   2042 };
   2043 
   2044 typedef struct RunTestData RunTestData;
   2045 
   2046 static void
   2047 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   2048                 const char *prefix)
   2049 {
   2050     int32_t run, runStart, runLimit;
   2051     UScriptCode runCode;
   2052 
   2053     /* iterate over all the runs */
   2054     run = 0;
   2055     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   2056         if (runStart != runStarts[run]) {
   2057             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   2058                 prefix, run, runStarts[run], runStart);
   2059         }
   2060 
   2061         if (runLimit != runStarts[run + 1]) {
   2062             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   2063                 prefix, run, runStarts[run + 1], runLimit);
   2064         }
   2065 
   2066         if (runCode != testData[run].runCode) {
   2067             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   2068                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   2069         }
   2070 
   2071         run += 1;
   2072 
   2073         /* stop when we've seen all the runs we expect to see */
   2074         if (run >= nRuns) {
   2075             break;
   2076         }
   2077     }
   2078 
   2079     /* Complain if we didn't see then number of runs we expected */
   2080     if (run != nRuns) {
   2081         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   2082     }
   2083 }
   2084 
   2085 static void
   2086 TestUScriptRunAPI()
   2087 {
   2088     static const RunTestData testData1[] = {
   2089         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   2090         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   2091         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   2092         {"English (", USCRIPT_LATIN},
   2093         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   2094         {") ", USCRIPT_LATIN},
   2095         {"\\u6F22\\u5B75", USCRIPT_HAN},
   2096         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   2097         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   2098         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   2099     };
   2100 
   2101     static const RunTestData testData2[] = {
   2102        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   2103     };
   2104 
   2105     static const struct {
   2106       const RunTestData *testData;
   2107       int32_t nRuns;
   2108     } testDataEntries[] = {
   2109         {testData1, UPRV_LENGTHOF(testData1)},
   2110         {testData2, UPRV_LENGTHOF(testData2)}
   2111     };
   2112 
   2113     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
   2114     int32_t testEntry;
   2115 
   2116     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   2117         UChar testString[1024];
   2118         int32_t runStarts[256];
   2119         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   2120         const RunTestData *testData = testDataEntries[testEntry].testData;
   2121 
   2122         int32_t run, stringLimit;
   2123         UScriptRun *scriptRun = NULL;
   2124         UErrorCode err;
   2125 
   2126         /*
   2127          * Fill in the test string and the runStarts array.
   2128          */
   2129         stringLimit = 0;
   2130         for (run = 0; run < nTestRuns; run += 1) {
   2131             runStarts[run] = stringLimit;
   2132             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   2133             /*stringLimit -= 1;*/
   2134         }
   2135 
   2136         /* The limit of the last run */
   2137         runStarts[nTestRuns] = stringLimit;
   2138 
   2139         /*
   2140          * Make sure that calling uscript_OpenRun with a NULL text pointer
   2141          * and a non-zero text length returns the correct error.
   2142          */
   2143         err = U_ZERO_ERROR;
   2144         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   2145 
   2146         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2147             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2148         }
   2149 
   2150         if (scriptRun != NULL) {
   2151             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   2152             uscript_closeRun(scriptRun);
   2153         }
   2154 
   2155         /*
   2156          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2157          * and a zero text length returns the correct error.
   2158          */
   2159         err = U_ZERO_ERROR;
   2160         scriptRun = uscript_openRun(testString, 0, &err);
   2161 
   2162         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2163             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2164         }
   2165 
   2166         if (scriptRun != NULL) {
   2167             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   2168             uscript_closeRun(scriptRun);
   2169         }
   2170 
   2171         /*
   2172          * Make sure that calling uscript_openRun with a NULL text pointer
   2173          * and a zero text length doesn't return an error.
   2174          */
   2175         err = U_ZERO_ERROR;
   2176         scriptRun = uscript_openRun(NULL, 0, &err);
   2177 
   2178         if (U_FAILURE(err)) {
   2179             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2180         }
   2181 
   2182         /* Make sure that the empty iterator doesn't find any runs */
   2183         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2184             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2185         }
   2186 
   2187         /*
   2188          * Make sure that calling uscript_setRunText with a NULL text pointer
   2189          * and a non-zero text length returns the correct error.
   2190          */
   2191         err = U_ZERO_ERROR;
   2192         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2193 
   2194         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2195             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2196         }
   2197 
   2198         /*
   2199          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2200          * and a zero text length returns the correct error.
   2201          */
   2202         err = U_ZERO_ERROR;
   2203         uscript_setRunText(scriptRun, testString, 0, &err);
   2204 
   2205         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2206             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2207         }
   2208 
   2209         /*
   2210          * Now call uscript_setRunText on the empty iterator
   2211          * and make sure that it works.
   2212          */
   2213         err = U_ZERO_ERROR;
   2214         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2215 
   2216         if (U_FAILURE(err)) {
   2217             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2218         } else {
   2219             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2220         }
   2221 
   2222         uscript_closeRun(scriptRun);
   2223 
   2224         /*
   2225          * Now open an interator over the testString
   2226          * using uscript_openRun and make sure that it works
   2227          */
   2228         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2229 
   2230         if (U_FAILURE(err)) {
   2231             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2232         } else {
   2233             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2234         }
   2235 
   2236         /* Now reset the iterator, and make sure
   2237          * that it still works.
   2238          */
   2239         uscript_resetRun(scriptRun);
   2240 
   2241         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2242 
   2243         /* Close the iterator */
   2244         uscript_closeRun(scriptRun);
   2245     }
   2246 }
   2247 
   2248 /* test additional, non-core properties */
   2249 static void
   2250 TestAdditionalProperties() {
   2251     /* test data for u_charAge() */
   2252     static const struct {
   2253         UChar32 c;
   2254         UVersionInfo version;
   2255     } charAges[]={
   2256         {0x41,    { 1, 1, 0, 0 }},
   2257         {0xffff,  { 1, 1, 0, 0 }},
   2258         {0x20ab,  { 2, 0, 0, 0 }},
   2259         {0x2fffe, { 2, 0, 0, 0 }},
   2260         {0x20ac,  { 2, 1, 0, 0 }},
   2261         {0xfb1d,  { 3, 0, 0, 0 }},
   2262         {0x3f4,   { 3, 1, 0, 0 }},
   2263         {0x10300, { 3, 1, 0, 0 }},
   2264         {0x220,   { 3, 2, 0, 0 }},
   2265         {0xff60,  { 3, 2, 0, 0 }}
   2266     };
   2267 
   2268     /* test data for u_hasBinaryProperty() */
   2269     static const int32_t
   2270     props[][3]={ /* code point, property, value */
   2271         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2272         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2273         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2274 
   2275         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2276         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2277 
   2278         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2279         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2280 
   2281         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2282         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2283 
   2284         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2285         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2286         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2287         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2288         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2289 
   2290         { 0x058a, UCHAR_DASH, TRUE },
   2291         { 0x007e, UCHAR_DASH, FALSE },
   2292 
   2293         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2294         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2295 
   2296         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2297         { 0x0020, UCHAR_EXTENDER, FALSE },
   2298 
   2299 #if !UCONFIG_NO_NORMALIZATION
   2300         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2301         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2302         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2303 
   2304         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2305         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2306 
   2307         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2308         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2309 
   2310         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2311         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2312         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2313         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2314         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2315         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2316 
   2317         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2318         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2319 
   2320         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2321         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2322         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2323         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2324         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2325         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2326 #endif
   2327 
   2328         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2329         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2330         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2331 
   2332         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2333         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2334 
   2335         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2336         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2337         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2338 
   2339         { 0x2172, UCHAR_ID_START, TRUE },
   2340         { 0x007a, UCHAR_ID_START, TRUE },
   2341         { 0x0039, UCHAR_ID_START, FALSE },
   2342 
   2343         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2344         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2345         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2346 
   2347         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2348         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2349 
   2350         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2351         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2352         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2353 
   2354         { 0x1d7a9, UCHAR_MATH, TRUE },
   2355         { 0x2135, UCHAR_MATH, TRUE },
   2356         { 0x0062, UCHAR_MATH, FALSE },
   2357 
   2358         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2359         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2360         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2361 
   2362         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2363         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2364         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2365 
   2366         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2367         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2368 
   2369         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2370         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2371         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2372 
   2373         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2374         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2375         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2376 
   2377         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2378         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2379         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2380 
   2381         { 0x16ee, UCHAR_XID_START, TRUE },
   2382         { 0x23456, UCHAR_XID_START, TRUE },
   2383         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2384 
   2385         /*
   2386          * Version break:
   2387          * The following properties are only supported starting with the
   2388          * Unicode version indicated in the second field.
   2389          */
   2390         { -1, 0x320, 0 },
   2391 
   2392         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2393         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2394         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2395 
   2396         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2397         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2398         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2399         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2400 
   2401         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2402         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2403         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2404         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2405 
   2406         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2407         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2408         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2409         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2410 
   2411         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2412         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2413 
   2414         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2415         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2416 
   2417         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2418         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2419 
   2420         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2421         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2422 
   2423         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2424         { 0x4e00, UCHAR_RADICAL, FALSE },
   2425 
   2426         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2427         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2428 
   2429         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2430         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2431 
   2432         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2433 
   2434         { 0x002e, UCHAR_S_TERM, TRUE },
   2435         { 0x0061, UCHAR_S_TERM, FALSE },
   2436 
   2437         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2438         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2439         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2440         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2441 
   2442         /* enum/integer type properties */
   2443 
   2444         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2445         /* test default Bidi classes for unassigned code points */
   2446         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2447         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2448         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2449         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2450         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2451         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2452         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2453         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2454         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2455         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2456         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2457 
   2458         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2459         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2460         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2461         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2462         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2463         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2464         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2465 
   2466         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2467         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2468         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2469         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2470         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2471         { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2472         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2473         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2474         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2475         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2476         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2477 
   2478         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2479         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2480 
   2481         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2482         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2483         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2484         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2485         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2486         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2487         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2488         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2489         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2490 
   2491         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2492         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2493         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2494         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2495         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2496         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2497         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2498         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2499         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2500         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2501         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2502         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2503         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2504         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2505         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2506         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2507         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2508 
   2509         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2510         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2511         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2512 
   2513         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2514         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2515         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2516         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2517         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2518 
   2519         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2520         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2521         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2522         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2523         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2524         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2525         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2526         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2527 
   2528         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2529         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2530         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2531         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2532         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2533         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2534         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2535         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2536         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2537         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2538         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2539         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2540         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2541         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2542         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2543         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2544 
   2545         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2546 
   2547         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2548 
   2549         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2550         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2551         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2552         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2553         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2554         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2555         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2556 
   2557         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2558         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2559         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2560         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2561 
   2562         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2563         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2564         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2565         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2566         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2567         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2568 
   2569         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2570         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2571         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2572         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2573 
   2574         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2575         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2576         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2577         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2578         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2579         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2580         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2581 
   2582         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2583         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2584         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2585         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2586 
   2587         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2588         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2589         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2590         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2591 
   2592         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2593         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2594         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2595         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2596         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2597 
   2598         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2599 
   2600         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2601 
   2602         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2603         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2604         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2605 
   2606         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2607         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2608         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2609         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2610         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2611 
   2612         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2613         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2614         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2615 
   2616         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2617         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2618         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2619         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2620 
   2621         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2622         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2623         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2624         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2625         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2626         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2627 
   2628         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2629         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2630         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2631         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2632 
   2633         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2634         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2635         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2636         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2637 
   2638         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2639         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2640         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2641         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2642 
   2643         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2644 
   2645         /* unassigned code points in new default Bidi R blocks */
   2646         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2647         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2648 
   2649         /* test some script codes >127 */
   2650         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2651         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2652         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2653 
   2654         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
   2655 
   2656         /* value changed in Unicode 6.0 */
   2657         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
   2658 
   2659         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
   2660 
   2661         /* unassigned code points in new/changed default Bidi AL blocks */
   2662         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2663         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2664 
   2665         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
   2666 
   2667         /* unassigned code points in the currency symbols block now default to ET */
   2668         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2669         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
   2670 
   2671         /* new property in Unicode 6.3 */
   2672         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2673         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2674         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2675         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
   2676         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
   2677         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
   2678 
   2679         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
   2680 
   2681         /* new character range with Joining_Group values */
   2682         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2683         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
   2684         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
   2685         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
   2686         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2687 
   2688         /* undefined UProperty values */
   2689         { 0x61, 0x4a7, 0 },
   2690         { 0x234bc, 0x15ed, 0 }
   2691     };
   2692 
   2693     UVersionInfo version;
   2694     UChar32 c;
   2695     int32_t i, result, uVersion;
   2696     UProperty which;
   2697 
   2698     /* what is our Unicode version? */
   2699     u_getUnicodeVersion(version);
   2700     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2701 
   2702     u_charAge(0x20, version);
   2703     if(version[0]==0) {
   2704         /* no additional properties available */
   2705         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2706         return;
   2707     }
   2708 
   2709     /* test u_charAge() */
   2710     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2711         u_charAge(charAges[i].c, version);
   2712         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2713             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2714                 charAges[i].c,
   2715                 version[0], version[1], version[2], version[3],
   2716                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2717         }
   2718     }
   2719 
   2720     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2721         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2722         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2723         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2724         u_getIntPropertyMinValue(0x2345)!=0
   2725     ) {
   2726         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2727     }
   2728     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2729         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2730     }
   2731     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2732         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2733     }
   2734     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2735         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2736     }
   2737     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2738         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2739     }
   2740     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2741         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2742     }
   2743     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2744         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2745     }
   2746     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2747         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2748     }
   2749     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2750         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2751     }
   2752     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2753         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2754     }
   2755     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2756         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2757     }
   2758     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2759         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2760     }
   2761     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2762         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2763     }
   2764     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2765         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2766     }
   2767     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
   2768         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
   2769     }
   2770     /*JB#2410*/
   2771     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2772         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2773     }
   2774     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2775         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2776     }
   2777     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2778         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2779     }
   2780     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2781         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2782     }
   2783     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2784         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2785     }
   2786 
   2787     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2788     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2789         const char *whichName;
   2790 
   2791         if(props[i][0]<0) {
   2792             /* Unicode version break */
   2793             if(uVersion<props[i][1]) {
   2794                 break; /* do not test properties that are not yet supported */
   2795             } else {
   2796                 continue; /* skip this row */
   2797             }
   2798         }
   2799 
   2800         c=(UChar32)props[i][0];
   2801         which=(UProperty)props[i][1];
   2802         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2803 
   2804         if(which<UCHAR_INT_START) {
   2805             result=u_hasBinaryProperty(c, which);
   2806             if(result!=props[i][2]) {
   2807                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2808                         c, whichName, result, i);
   2809             }
   2810         }
   2811 
   2812         result=u_getIntPropertyValue(c, which);
   2813         if(result!=props[i][2]) {
   2814             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2815                     c, whichName, result, props[i][2], i);
   2816         }
   2817 
   2818         /* test separate functions, too */
   2819         switch((UProperty)props[i][1]) {
   2820         case UCHAR_ALPHABETIC:
   2821             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2822                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2823                         props[i][0], result, i);
   2824             }
   2825             break;
   2826         case UCHAR_LOWERCASE:
   2827             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2828                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2829                         props[i][0], result, i);
   2830             }
   2831             break;
   2832         case UCHAR_UPPERCASE:
   2833             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2834                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2835                         props[i][0], result, i);
   2836             }
   2837             break;
   2838         case UCHAR_WHITE_SPACE:
   2839             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2840                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2841                         props[i][0], result, i);
   2842             }
   2843             break;
   2844         default:
   2845             break;
   2846         }
   2847     }
   2848 }
   2849 
   2850 static void
   2851 TestNumericProperties(void) {
   2852     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2853     static const struct {
   2854         UChar32 c;
   2855         int32_t type;
   2856         double numValue;
   2857     } values[]={
   2858         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2859         { 0x0C66, U_NT_DECIMAL, 0 },
   2860         { 0x96f6, U_NT_NUMERIC, 0 },
   2861         { 0xa833, U_NT_NUMERIC, 1./16. },
   2862         { 0x2152, U_NT_NUMERIC, 1./10. },
   2863         { 0x2151, U_NT_NUMERIC, 1./9. },
   2864         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2865         { 0x2150, U_NT_NUMERIC, 1./7. },
   2866         { 0x2159, U_NT_NUMERIC, 1./6. },
   2867         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2868         { 0x2155, U_NT_NUMERIC, 1./5. },
   2869         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2870         { 0x0031, U_NT_DECIMAL, 1. },
   2871         { 0x4e00, U_NT_NUMERIC, 1. },
   2872         { 0x58f1, U_NT_NUMERIC, 1. },
   2873         { 0x10320, U_NT_NUMERIC, 1. },
   2874         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2875         { 0x00B2, U_NT_DIGIT, 2. },
   2876         { 0x5f10, U_NT_NUMERIC, 2. },
   2877         { 0x1813, U_NT_DECIMAL, 3. },
   2878         { 0x5f0e, U_NT_NUMERIC, 3. },
   2879         { 0x2173, U_NT_NUMERIC, 4. },
   2880         { 0x8086, U_NT_NUMERIC, 4. },
   2881         { 0x278E, U_NT_DIGIT, 5. },
   2882         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2883         { 0x247A, U_NT_DIGIT, 7. },
   2884         { 0x7396, U_NT_NUMERIC, 9. },
   2885         { 0x1372, U_NT_NUMERIC, 10. },
   2886         { 0x216B, U_NT_NUMERIC, 12. },
   2887         { 0x16EE, U_NT_NUMERIC, 17. },
   2888         { 0x249A, U_NT_NUMERIC, 19. },
   2889         { 0x303A, U_NT_NUMERIC, 30. },
   2890         { 0x5345, U_NT_NUMERIC, 30. },
   2891         { 0x32B2, U_NT_NUMERIC, 37. },
   2892         { 0x1375, U_NT_NUMERIC, 40. },
   2893         { 0x10323, U_NT_NUMERIC, 50. },
   2894         { 0x0BF1, U_NT_NUMERIC, 100. },
   2895         { 0x964c, U_NT_NUMERIC, 100. },
   2896         { 0x217E, U_NT_NUMERIC, 500. },
   2897         { 0x2180, U_NT_NUMERIC, 1000. },
   2898         { 0x4edf, U_NT_NUMERIC, 1000. },
   2899         { 0x2181, U_NT_NUMERIC, 5000. },
   2900         { 0x137C, U_NT_NUMERIC, 10000. },
   2901         { 0x4e07, U_NT_NUMERIC, 10000. },
   2902         { 0x12432, U_NT_NUMERIC, 216000. },
   2903         { 0x12433, U_NT_NUMERIC, 432000. },
   2904         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2905         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2906         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2907         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2908         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2909         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2910         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2911         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2912         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2913         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2914     };
   2915 
   2916     double nv;
   2917     UChar32 c;
   2918     int32_t i, type;
   2919 
   2920     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
   2921         c=values[i].c;
   2922         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2923         nv=u_getNumericValue(c);
   2924 
   2925         if(type!=values[i].type) {
   2926             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2927         }
   2928         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2929             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2930         }
   2931     }
   2932 }
   2933 
   2934 /**
   2935  * Test the property names and property value names API.
   2936  */
   2937 static void
   2938 TestPropertyNames(void) {
   2939     int32_t p, v, choice=0, rev;
   2940     UBool atLeastSomething = FALSE;
   2941 
   2942     for (p=0; ; ++p) {
   2943         UProperty propEnum = (UProperty)p;
   2944         UBool sawProp = FALSE;
   2945         if(p > 10 && !atLeastSomething) {
   2946           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2947           return;
   2948         }
   2949 
   2950         for (choice=0; ; ++choice) {
   2951             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2952             if (name) {
   2953                 if (!sawProp)
   2954                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2955                 log_verbose("%d=\"%s\"", choice, name);
   2956                 sawProp = TRUE;
   2957                 atLeastSomething = TRUE;
   2958 
   2959                 /* test reverse mapping */
   2960                 rev = u_getPropertyEnum(name);
   2961                 if (rev != p) {
   2962                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2963                             p, name, rev);
   2964                 }
   2965             }
   2966             if (!name && choice>0) break;
   2967         }
   2968         if (sawProp) {
   2969             /* looks like a valid property; check the values */
   2970             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2971             int32_t max = 0;
   2972             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2973                 max = 255;
   2974             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2975                 /* it's far too slow to iterate all the way up to
   2976                    the real max, U_GC_P_MASK */
   2977                 max = U_GC_NL_MASK;
   2978             } else if (p == UCHAR_BLOCK) {
   2979                 /* UBlockCodes, unlike other values, start at 1 */
   2980                 max = 1;
   2981             }
   2982             log_verbose("\n");
   2983             for (v=-1; ; ++v) {
   2984                 UBool sawValue = FALSE;
   2985                 for (choice=0; ; ++choice) {
   2986                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2987                     if (vname) {
   2988                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2989                         log_verbose("%d=\"%s\"", choice, vname);
   2990                         sawValue = TRUE;
   2991 
   2992                         /* test reverse mapping */
   2993                         rev = u_getPropertyValueEnum(propEnum, vname);
   2994                         if (rev != v) {
   2995                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2996                                     pname, v, vname, rev);
   2997                         }
   2998                     }
   2999                     if (!vname && choice>0) break;
   3000                 }
   3001                 if (sawValue) {
   3002                     log_verbose("\n");
   3003                 }
   3004                 if (!sawValue && v>=max) break;
   3005             }
   3006         }
   3007         if (!sawProp) {
   3008             if (p>=UCHAR_STRING_LIMIT) {
   3009                 break;
   3010             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   3011                 p = UCHAR_STRING_START - 1;
   3012             } else if (p>=UCHAR_MASK_LIMIT) {
   3013                 p = UCHAR_DOUBLE_START - 1;
   3014             } else if (p>=UCHAR_INT_LIMIT) {
   3015                 p = UCHAR_MASK_START - 1;
   3016             } else if (p>=UCHAR_BINARY_LIMIT) {
   3017                 p = UCHAR_INT_START - 1;
   3018             }
   3019         }
   3020     }
   3021 }
   3022 
   3023 /**
   3024  * Test the property values API.  See JB#2410.
   3025  */
   3026 static void
   3027 TestPropertyValues(void) {
   3028     int32_t i, p, min, max;
   3029     UErrorCode ec;
   3030 
   3031     /* Min should be 0 for everything. */
   3032     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   3033     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   3034         UProperty propEnum = (UProperty)p;
   3035         min = u_getIntPropertyMinValue(propEnum);
   3036         if (min != 0) {
   3037             if (p == UCHAR_BLOCK) {
   3038                 /* This is okay...for now.  See JB#2487.
   3039                    TODO Update this for JB#2487. */
   3040             } else {
   3041                 const char* name;
   3042                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   3043                 if (name == NULL)
   3044                     name = "<ERROR>";
   3045                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   3046                         name, min);
   3047             }
   3048         }
   3049     }
   3050 
   3051     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   3052         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   3053         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   3054     }
   3055 
   3056     /* Max should be -1 for invalid properties. */
   3057     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   3058     if (max != -1) {
   3059         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   3060                 max);
   3061     }
   3062 
   3063     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   3064     for (i=0; i<2; ++i) {
   3065         int32_t script;
   3066         const char* desc;
   3067         ec = U_ZERO_ERROR;
   3068         switch (i) {
   3069         case 0:
   3070             script = uscript_getScript(-1, &ec);
   3071             desc = "uscript_getScript(-1)";
   3072             break;
   3073         case 1:
   3074             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   3075             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   3076             break;
   3077         default:
   3078             log_err("Internal test error. Too many scripts\n");
   3079             return;
   3080         }
   3081         /* We don't explicitly test ec.  It should be U_FAILURE but it
   3082            isn't documented as such. */
   3083         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   3084             log_err("FAIL: %s = %d, exp. 0\n",
   3085                     desc, script);
   3086         }
   3087     }
   3088 }
   3089 
   3090 /* various tests for consistency of UCD data and API behavior */
   3091 static void
   3092 TestConsistency() {
   3093     char buffer[300];
   3094     USet *set1, *set2, *set3, *set4;
   3095     UErrorCode errorCode;
   3096 
   3097     UChar32 start, end;
   3098     int32_t i, length;
   3099 
   3100     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   3101     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   3102     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   3103     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   3104     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   3105 
   3106     U_STRING_DECL(mathBlocksPattern,
   3107         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3108         214);
   3109     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   3110     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   3111     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   3112     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3113 
   3114     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   3115     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   3116     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   3117     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   3118     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   3119 
   3120     U_STRING_INIT(mathBlocksPattern,
   3121         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   3122         214);
   3123     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   3124     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   3125     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   3126     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   3127 
   3128     /*
   3129      * It used to be that UCD.html and its precursors said
   3130      * "Those dashes used to mark connections between pieces of words,
   3131      *  plus the Katakana middle dot."
   3132      *
   3133      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   3134      * but not from Hyphen.
   3135      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
   3136      * Therefore, do not show errors when testing the Hyphen property.
   3137      */
   3138     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   3139                 "known to the UTC and not considered errors.\n");
   3140 
   3141     errorCode=U_ZERO_ERROR;
   3142     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   3143     set2=uset_openPattern(dashPattern, 8, &errorCode);
   3144     if(U_SUCCESS(errorCode)) {
   3145         /* remove the Katakana middle dot(s) from set1 */
   3146         uset_remove(set1, 0x30fb);
   3147         uset_remove(set1, 0xff65); /* halfwidth variant */
   3148         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   3149     } else {
   3150         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3151     }
   3152 
   3153     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   3154     set3=uset_openPattern(formatPattern, 6, &errorCode);
   3155     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   3156     if(U_SUCCESS(errorCode)) {
   3157         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   3158         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   3159         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   3160     } else {
   3161         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3162     }
   3163 
   3164     uset_close(set1);
   3165     uset_close(set2);
   3166     uset_close(set3);
   3167     uset_close(set4);
   3168 
   3169     /*
   3170      * Check that each lowercase character has "small" in its name
   3171      * and not "capital".
   3172      * There are some such characters, some of which seem odd.
   3173      * Use the verbose flag to see these notices.
   3174      */
   3175     errorCode=U_ZERO_ERROR;
   3176     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   3177     if(U_SUCCESS(errorCode)) {
   3178         for(i=0;; ++i) {
   3179             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   3180             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   3181                 break; /* done */
   3182             }
   3183             if(U_FAILURE(errorCode)) {
   3184                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   3185                         i, u_errorName(errorCode));
   3186                 break;
   3187             }
   3188             if(length!=0) {
   3189                 break; /* done with code points, got a string or -1 */
   3190             }
   3191 
   3192             while(start<=end) {
   3193                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   3194                 if(U_FAILURE(errorCode)) {
   3195                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   3196                     errorCode=U_ZERO_ERROR;
   3197                 }
   3198                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   3199                     strstr(buffer, "SMALL CAPITAL")==NULL
   3200                 ) {
   3201                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   3202                 }
   3203                 ++start;
   3204             }
   3205         }
   3206     } else {
   3207         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3208     }
   3209     uset_close(set1);
   3210 
   3211     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3212     errorCode=U_ZERO_ERROR;
   3213     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3214     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3215     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3216     if(U_SUCCESS(errorCode)) {
   3217         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3218         uset_complement(set3);      /* assigned characters */
   3219         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3220         compareUSets(set1, set2,
   3221                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3222                      TRUE);
   3223     } else {
   3224         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3225     }
   3226     uset_close(set1);
   3227     uset_close(set2);
   3228     uset_close(set3);
   3229 
   3230     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3231     errorCode=U_ZERO_ERROR;
   3232     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3233     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3234     if(U_SUCCESS(errorCode)) {
   3235         compareUSets(set1, set2,
   3236                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3237                      TRUE);
   3238     } else {
   3239         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3240     }
   3241     uset_close(set1);
   3242     uset_close(set2);
   3243 }
   3244 
   3245 /*
   3246  * Starting with ICU4C 3.4, the core Unicode properties files
   3247  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3248  * are hardcoded in the common DLL and therefore not included
   3249  * in the data package any more.
   3250  * Test requiring these files are disabled so that
   3251  * we need not jump through hoops (like adding snapshots of these files
   3252  * to testdata).
   3253  * See Jitterbug 4497.
   3254  */
   3255 #define HARDCODED_DATA_4497 1
   3256 
   3257 /* API coverage for ucase.c */
   3258 static void TestUCase() {
   3259 #if !HARDCODED_DATA_4497
   3260     UDataMemory *pData;
   3261     UCaseProps *csp;
   3262     const UCaseProps *ccsp;
   3263     UErrorCode errorCode;
   3264 
   3265     /* coverage for ucase_openBinary() */
   3266     errorCode=U_ZERO_ERROR;
   3267     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3268     if(U_FAILURE(errorCode)) {
   3269         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3270                     u_errorName(errorCode));
   3271         return;
   3272     }
   3273 
   3274     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3275     if(U_FAILURE(errorCode)) {
   3276         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3277                 u_errorName(errorCode));
   3278         udata_close(pData);
   3279         return;
   3280     }
   3281 
   3282     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3283         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3284     }
   3285 
   3286     ucase_close(csp);
   3287     udata_close(pData);
   3288 
   3289     /* coverage for ucase_getDummy() */
   3290     errorCode=U_ZERO_ERROR;
   3291     ccsp=ucase_getDummy(&errorCode);
   3292     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3293         log_err("ucase_tolower(dummy, A)!=A\n");
   3294     }
   3295 #endif
   3296 }
   3297 
   3298 /* API coverage for ubidi_props.c */
   3299 static void TestUBiDiProps() {
   3300 #if !HARDCODED_DATA_4497
   3301     UDataMemory *pData;
   3302     UBiDiProps *bdp;
   3303     const UBiDiProps *cbdp;
   3304     UErrorCode errorCode;
   3305 
   3306     /* coverage for ubidi_openBinary() */
   3307     errorCode=U_ZERO_ERROR;
   3308     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3309     if(U_FAILURE(errorCode)) {
   3310         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3311                     u_errorName(errorCode));
   3312         return;
   3313     }
   3314 
   3315     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3316     if(U_FAILURE(errorCode)) {
   3317         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3318                 u_errorName(errorCode));
   3319         udata_close(pData);
   3320         return;
   3321     }
   3322 
   3323     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3324         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3325     }
   3326 
   3327     ubidi_closeProps(bdp);
   3328     udata_close(pData);
   3329 
   3330     /* coverage for ubidi_getDummy() */
   3331     errorCode=U_ZERO_ERROR;
   3332     cbdp=ubidi_getDummy(&errorCode);
   3333     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3334         log_err("ubidi_getClass(dummy, space)!=0\n");
   3335     }
   3336 #endif
   3337 }
   3338 
   3339 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3340 
   3341 /* bit set for which case foldings for a character have been tested already */
   3342 enum {
   3343     CF_SIMPLE=1,
   3344     CF_FULL=2,
   3345     CF_TURKIC=4,
   3346     CF_ALL=7
   3347 };
   3348 
   3349 static void
   3350 testFold(UChar32 c, int which,
   3351          UChar32 simple, UChar32 turkic,
   3352          const UChar *full, int32_t fullLength,
   3353          const UChar *turkicFull, int32_t turkicFullLength) {
   3354     UChar s[2], t[32];
   3355     UChar32 c2;
   3356     int32_t length, length2;
   3357 
   3358     UErrorCode errorCode=U_ZERO_ERROR;
   3359 
   3360     length=0;
   3361     U16_APPEND_UNSAFE(s, length, c);
   3362 
   3363     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3364         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3365     }
   3366     if((which&CF_FULL)!=0) {
   3367         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
   3368         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3369             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3370         }
   3371     }
   3372     if((which&CF_TURKIC)!=0) {
   3373         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3374             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3375         }
   3376 
   3377         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3378         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3379             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3380         }
   3381     }
   3382 }
   3383 
   3384 /* test that c case-folds to itself */
   3385 static void
   3386 testFoldToSelf(UChar32 c, int which) {
   3387     UChar s[2];
   3388     int32_t length;
   3389 
   3390     length=0;
   3391     U16_APPEND_UNSAFE(s, length, c);
   3392     testFold(c, which, c, c, s, length, s, length);
   3393 }
   3394 
   3395 struct CaseFoldingData {
   3396     USet *notSeen;
   3397     UChar32 prev, prevSimple;
   3398     UChar prevFull[32];
   3399     int32_t prevFullLength;
   3400     int which;
   3401 };
   3402 typedef struct CaseFoldingData CaseFoldingData;
   3403 
   3404 static void U_CALLCONV
   3405 caseFoldingLineFn(void *context,
   3406                   char *fields[][2], int32_t fieldCount,
   3407                   UErrorCode *pErrorCode) {
   3408     CaseFoldingData *pData=(CaseFoldingData *)context;
   3409     char *end;
   3410     UChar full[32];
   3411     UChar32 c, prev, simple;
   3412     int32_t count;
   3413     int which;
   3414     char status;
   3415 
   3416     /* get code point */
   3417     const char *s=u_skipWhitespace(fields[0][0]);
   3418     if(0==strncmp(s, "0000..10FFFF", 12)) {
   3419         /*
   3420          * Ignore the line
   3421          * # @missing: 0000..10FFFF; C; <code point>
   3422          * because maps-to-self is already our default, and this line breaks this parser.
   3423          */
   3424         return;
   3425     }
   3426     c=(UChar32)strtoul(s, &end, 16);
   3427     end=(char *)u_skipWhitespace(end);
   3428     if(end<=fields[0][0] || end!=fields[0][1]) {
   3429         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3430         *pErrorCode=U_PARSE_ERROR;
   3431         return;
   3432     }
   3433 
   3434     /* get the status of this mapping */
   3435     status=*u_skipWhitespace(fields[1][0]);
   3436     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3437         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3438         *pErrorCode=U_PARSE_ERROR;
   3439         return;
   3440     }
   3441 
   3442     /* get the mapping */
   3443     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3444     if(U_FAILURE(*pErrorCode)) {
   3445         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3446         return;
   3447     }
   3448 
   3449     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3450     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3451         simple=c;
   3452     }
   3453 
   3454     if(c!=(prev=pData->prev)) {
   3455         /*
   3456          * Test remaining mappings for the previous code point.
   3457          * If a turkic folding was not mentioned, then it should fold the same
   3458          * as the regular simple case folding.
   3459          */
   3460         UChar prevString[2];
   3461         int32_t length;
   3462 
   3463         length=0;
   3464         U16_APPEND_UNSAFE(prevString, length, prev);
   3465         testFold(prev, (~pData->which)&CF_ALL,
   3466                  prev, pData->prevSimple,
   3467                  prevString, length,
   3468                  pData->prevFull, pData->prevFullLength);
   3469         pData->prev=pData->prevSimple=c;
   3470         length=0;
   3471         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3472         pData->prevFullLength=length;
   3473         pData->which=0;
   3474     }
   3475 
   3476     /*
   3477      * Turn the status into a bit set of case foldings to test.
   3478      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3479      */
   3480     switch(status) {
   3481     case 'C':
   3482         which=CF_SIMPLE|CF_FULL;
   3483         pData->prevSimple=simple;
   3484         u_memcpy(pData->prevFull, full, count);
   3485         pData->prevFullLength=count;
   3486         break;
   3487     case 'S':
   3488         which=CF_SIMPLE;
   3489         pData->prevSimple=simple;
   3490         break;
   3491     case 'F':
   3492         which=CF_FULL;
   3493         u_memcpy(pData->prevFull, full, count);
   3494         pData->prevFullLength=count;
   3495         break;
   3496     case 'T':
   3497         which=CF_TURKIC;
   3498         break;
   3499     default:
   3500         which=0;
   3501         break; /* won't happen because of test above */
   3502     }
   3503 
   3504     testFold(c, which, simple, simple, full, count, full, count);
   3505 
   3506     /* remember which case foldings of c have been tested */
   3507     pData->which|=which;
   3508 
   3509     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3510     uset_remove(pData->notSeen, c);
   3511 }
   3512 
   3513 static void
   3514 TestCaseFolding() {
   3515     CaseFoldingData data={ NULL };
   3516     char *fields[3][2];
   3517     UErrorCode errorCode;
   3518 
   3519     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3520 
   3521     errorCode=U_ZERO_ERROR;
   3522     /* test BMP & plane 1 - nothing interesting above */
   3523     data.notSeen=uset_open(0, 0x1ffff);
   3524     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3525 
   3526     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3527     if(U_SUCCESS(errorCode)) {
   3528         int32_t i, start, end;
   3529 
   3530         /* add a pseudo-last line to finish testing of the actual last one */
   3531         fields[0][0]=lastLine;
   3532         fields[0][1]=lastLine+6;
   3533         fields[1][0]=lastLine+7;
   3534         fields[1][1]=lastLine+9;
   3535         fields[2][0]=lastLine+10;
   3536         fields[2][1]=lastLine+17;
   3537         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3538 
   3539         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3540         for(i=0;
   3541             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3542                 U_SUCCESS(errorCode);
   3543             ++i
   3544         ) {
   3545             do {
   3546                 testFoldToSelf(start, CF_ALL);
   3547             } while(++start<=end);
   3548         }
   3549     }
   3550 
   3551     uset_close(data.notSeen);
   3552 }
   3553