Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File CUCDTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API, added tests for string functions
     13 ********************************************************************************
     14 */
     15 
     16 #include <string.h>
     17 #include <math.h>
     18 #include <stdlib.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "cintltst.h"
     28 #include "putilimp.h"
     29 #include "uparse.h"
     30 #include "ucase.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "uset_imp.h"
     34 #include "usc_impl.h"
     35 #include "unormimp.h"
     36 #include "udatamem.h" /* for testing ucase_openBinary() */
     37 #include "cucdapi.h"
     38 
     39 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     40 
     41 /* prototypes --------------------------------------------------------------- */
     42 
     43 static void TestUpperLower(void);
     44 static void TestLetterNumber(void);
     45 static void TestMisc(void);
     46 static void TestPOSIX(void);
     47 static void TestControlPrint(void);
     48 static void TestIdentifier(void);
     49 static void TestUnicodeData(void);
     50 static void TestCodeUnit(void);
     51 static void TestCodePoint(void);
     52 static void TestCharLength(void);
     53 static void TestCharNames(void);
     54 static void TestMirroring(void);
     55 static void TestUScriptRunAPI(void);
     56 static void TestAdditionalProperties(void);
     57 static void TestNumericProperties(void);
     58 static void TestPropertyNames(void);
     59 static void TestPropertyValues(void);
     60 static void TestConsistency(void);
     61 static void TestUCase(void);
     62 static void TestUBiDiProps(void);
     63 static void TestCaseFolding(void);
     64 
     65 /* internal methods used */
     66 static int32_t MakeProp(char* str);
     67 static int32_t MakeDir(char* str);
     68 
     69 /* helpers ------------------------------------------------------------------ */
     70 
     71 static void
     72 parseUCDFile(const char *filename,
     73              char *fields[][2], int32_t fieldCount,
     74              UParseLineFn *lineFn, void *context,
     75              UErrorCode *pErrorCode) {
     76     char path[256];
     77     char backupPath[256];
     78 
     79     if(U_FAILURE(*pErrorCode)) {
     80         return;
     81     }
     82 
     83     /* Look inside ICU_DATA first */
     84     strcpy(path, u_getDataDirectory());
     85     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
     86     strcat(path, filename);
     87 
     88     /* As a fallback, try to guess where the source data was located
     89      *    at the time ICU was built, and look there.
     90      */
     91     strcpy(backupPath, ctest_dataSrcDir());
     92     strcat(backupPath, U_FILE_SEP_STRING);
     93     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
     94     strcat(backupPath, filename);
     95 
     96     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
     97     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
     98         *pErrorCode=U_ZERO_ERROR;
     99         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
    100     }
    101     if(U_FAILURE(*pErrorCode)) {
    102         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
    103     }
    104 }
    105 
    106 /* test data ---------------------------------------------------------------- */
    107 
    108 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
    109 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
    110 static const int32_t tagValues[] =
    111     {
    112     /* Mn */ U_NON_SPACING_MARK,
    113     /* Mc */ U_COMBINING_SPACING_MARK,
    114     /* Me */ U_ENCLOSING_MARK,
    115     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
    116     /* Nl */ U_LETTER_NUMBER,
    117     /* No */ U_OTHER_NUMBER,
    118     /* Zs */ U_SPACE_SEPARATOR,
    119     /* Zl */ U_LINE_SEPARATOR,
    120     /* Zp */ U_PARAGRAPH_SEPARATOR,
    121     /* Cc */ U_CONTROL_CHAR,
    122     /* Cf */ U_FORMAT_CHAR,
    123     /* Cs */ U_SURROGATE,
    124     /* Co */ U_PRIVATE_USE_CHAR,
    125     /* Cn */ U_UNASSIGNED,
    126     /* Lu */ U_UPPERCASE_LETTER,
    127     /* Ll */ U_LOWERCASE_LETTER,
    128     /* Lt */ U_TITLECASE_LETTER,
    129     /* Lm */ U_MODIFIER_LETTER,
    130     /* Lo */ U_OTHER_LETTER,
    131     /* Pc */ U_CONNECTOR_PUNCTUATION,
    132     /* Pd */ U_DASH_PUNCTUATION,
    133     /* Ps */ U_START_PUNCTUATION,
    134     /* Pe */ U_END_PUNCTUATION,
    135     /* Po */ U_OTHER_PUNCTUATION,
    136     /* Sm */ U_MATH_SYMBOL,
    137     /* Sc */ U_CURRENCY_SYMBOL,
    138     /* Sk */ U_MODIFIER_SYMBOL,
    139     /* So */ U_OTHER_SYMBOL,
    140     /* Pi */ U_INITIAL_PUNCTUATION,
    141     /* Pf */ U_FINAL_PUNCTUATION
    142     };
    143 
    144 static const char dirStrings[][5] = {
    145     "L",
    146     "R",
    147     "EN",
    148     "ES",
    149     "ET",
    150     "AN",
    151     "CS",
    152     "B",
    153     "S",
    154     "WS",
    155     "ON",
    156     "LRE",
    157     "LRO",
    158     "AL",
    159     "RLE",
    160     "RLO",
    161     "PDF",
    162     "NSM",
    163     "BN"
    164 };
    165 
    166 void addUnicodeTest(TestNode** root);
    167 
    168 void addUnicodeTest(TestNode** root)
    169 {
    170     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
    171     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
    172     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
    173     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
    174     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
    175     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
    176     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
    177     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
    178     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
    179     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
    180     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
    181     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
    182     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
    183     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
    184     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
    185     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
    186     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
    187     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
    188     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
    189     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
    190     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
    191     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
    192     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
    193 }
    194 
    195 /*==================================================== */
    196 /* test u_toupper() and u_tolower()                    */
    197 /*==================================================== */
    198 static void TestUpperLower()
    199 {
    200     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
    201     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
    202     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
    203     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    204     int32_t i;
    205 
    206     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
    207     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
    208 
    209 /*
    210 Checks LetterLike Symbols which were previously a source of confusion
    211 [Bertrand A. D. 02/04/98]
    212 */
    213     for (i=0x2100;i<0x2138;i++)
    214     {
    215         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
    216         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
    217         {
    218             if (i != (int)u_tolower(i)) /* itself */
    219                 log_err("Failed case conversion with itself: U+%04x\n", i);
    220             if (i != (int)u_toupper(i))
    221                 log_err("Failed case conversion with itself: U+%04x\n", i);
    222         }
    223     }
    224 
    225     for(i=0; i < u_strlen(upper); i++){
    226         if(u_tolower(upper[i]) != lower[i]){
    227             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
    228         }
    229     }
    230 
    231     log_verbose("testing upper lower\n");
    232     for (i = 0; i < 21; i++) {
    233 
    234         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
    235         {
    236             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
    237         }
    238         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
    239          {
    240             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
    241         }
    242         else if (upperTest[i] != u_tolower(lowerTest[i]))
    243         {
    244             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
    245         }
    246         else if (lowerTest[i] != u_toupper(upperTest[i]))
    247          {
    248             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
    249         }
    250         else if (upperTest[i] != u_tolower(upperTest[i]))
    251         {
    252             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
    253         }
    254         else if (lowerTest[i] != u_toupper(lowerTest[i]))
    255         {
    256             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
    257         }
    258     }
    259     log_verbose("done testing upper lower\n");
    260 
    261     log_verbose("testing u_istitle\n");
    262     {
    263         static const UChar expected[] = {
    264             0x1F88,
    265             0x1F89,
    266             0x1F8A,
    267             0x1F8B,
    268             0x1F8C,
    269             0x1F8D,
    270             0x1F8E,
    271             0x1F8F,
    272             0x1F88,
    273             0x1F89,
    274             0x1F8A,
    275             0x1F8B,
    276             0x1F8C,
    277             0x1F8D,
    278             0x1F8E,
    279             0x1F8F,
    280             0x1F98,
    281             0x1F99,
    282             0x1F9A,
    283             0x1F9B,
    284             0x1F9C,
    285             0x1F9D,
    286             0x1F9E,
    287             0x1F9F,
    288             0x1F98,
    289             0x1F99,
    290             0x1F9A,
    291             0x1F9B,
    292             0x1F9C,
    293             0x1F9D,
    294             0x1F9E,
    295             0x1F9F,
    296             0x1FA8,
    297             0x1FA9,
    298             0x1FAA,
    299             0x1FAB,
    300             0x1FAC,
    301             0x1FAD,
    302             0x1FAE,
    303             0x1FAF,
    304             0x1FA8,
    305             0x1FA9,
    306             0x1FAA,
    307             0x1FAB,
    308             0x1FAC,
    309             0x1FAD,
    310             0x1FAE,
    311             0x1FAF,
    312             0x1FBC,
    313             0x1FBC,
    314             0x1FCC,
    315             0x1FCC,
    316             0x1FFC,
    317             0x1FFC,
    318         };
    319         int32_t num = sizeof(expected)/sizeof(expected[0]);
    320         for(i=0; i<num; i++){
    321             if(!u_istitle(expected[i])){
    322                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
    323             }
    324         }
    325 
    326     }
    327 }
    328 
    329 /* compare two sets and verify that their difference or intersection is empty */
    330 static UBool
    331 showADiffB(const USet *a, const USet *b,
    332            const char *a_name, const char *b_name,
    333            UBool expect, UBool diffIsError) {
    334     USet *aa;
    335     int32_t i, start, end, length;
    336     UErrorCode errorCode;
    337 
    338     /*
    339      * expect:
    340      * TRUE  -> a-b should be empty, that is, b should contain all of a
    341      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
    342      */
    343     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
    344         return TRUE;
    345     }
    346 
    347     /* clone a to aa because a is const */
    348     aa=uset_open(1, 0);
    349     if(aa==NULL) {
    350         /* unusual problem - out of memory? */
    351         return FALSE;
    352     }
    353     uset_addAll(aa, a);
    354 
    355     /* compute the set in question */
    356     if(expect) {
    357         /* a-b */
    358         uset_removeAll(aa, b);
    359     } else {
    360         /* a&b */
    361         uset_retainAll(aa, b);
    362     }
    363 
    364     /* aa is not empty because of the initial tests above; show its contents */
    365     errorCode=U_ZERO_ERROR;
    366     i=0;
    367     for(;;) {
    368         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
    369         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    370             break; /* done */
    371         }
    372         if(U_FAILURE(errorCode)) {
    373             log_err("error comparing %s with %s at difference item %d: %s\n",
    374                 a_name, b_name, i, u_errorName(errorCode));
    375             break;
    376         }
    377         if(length!=0) {
    378             break; /* done with code points, got a string or -1 */
    379         }
    380 
    381         if(diffIsError) {
    382             if(expect) {
    383                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    384             } else {
    385                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    386             }
    387         } else {
    388             if(expect) {
    389                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
    390             } else {
    391                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
    392             }
    393         }
    394 
    395         ++i;
    396     }
    397 
    398     uset_close(aa);
    399     return FALSE;
    400 }
    401 
    402 static UBool
    403 showAMinusB(const USet *a, const USet *b,
    404             const char *a_name, const char *b_name,
    405             UBool diffIsError) {
    406     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
    407 }
    408 
    409 static UBool
    410 showAIntersectB(const USet *a, const USet *b,
    411                 const char *a_name, const char *b_name,
    412                 UBool diffIsError) {
    413     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
    414 }
    415 
    416 static UBool
    417 compareUSets(const USet *a, const USet *b,
    418              const char *a_name, const char *b_name,
    419              UBool diffIsError) {
    420     /*
    421      * Use an arithmetic & not a logical && so that both branches
    422      * are always taken and all differences are shown.
    423      */
    424     return
    425         showAMinusB(a, b, a_name, b_name, diffIsError) &
    426         showAMinusB(b, a, b_name, a_name, diffIsError);
    427 }
    428 
    429 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
    430 static void TestLetterNumber()
    431 {
    432     UChar i = 0x0000;
    433 
    434     log_verbose("Testing for isalpha\n");
    435     for (i = 0x0041; i < 0x005B; i++) {
    436         if (!u_isalpha(i))
    437         {
    438             log_err("Failed isLetter test at  %.4X\n", i);
    439         }
    440     }
    441     for (i = 0x0660; i < 0x066A; i++) {
    442         if (u_isalpha(i))
    443         {
    444             log_err("Failed isLetter test with numbers at %.4X\n", i);
    445         }
    446     }
    447 
    448     log_verbose("Testing for isdigit\n");
    449     for (i = 0x0660; i < 0x066A; i++) {
    450         if (!u_isdigit(i))
    451         {
    452             log_verbose("Failed isNumber test at %.4X\n", i);
    453         }
    454     }
    455 
    456     log_verbose("Testing for isalnum\n");
    457     for (i = 0x0041; i < 0x005B; i++) {
    458         if (!u_isalnum(i))
    459         {
    460             log_err("Failed isAlNum test at  %.4X\n", i);
    461         }
    462     }
    463     for (i = 0x0660; i < 0x066A; i++) {
    464         if (!u_isalnum(i))
    465         {
    466             log_err("Failed isAlNum test at  %.4X\n", i);
    467         }
    468     }
    469 
    470     {
    471         /*
    472          * The following checks work only starting from Unicode 4.0.
    473          * Check the version number here.
    474          */
    475         static UVersionInfo u401={ 4, 0, 1, 0 };
    476         UVersionInfo version;
    477         u_getUnicodeVersion(version);
    478         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
    479             return;
    480         }
    481     }
    482 
    483     {
    484         /*
    485          * Sanity check:
    486          * Verify that exactly the digit characters have decimal digit values.
    487          * This assumption is used in the implementation of u_digit()
    488          * (which checks nt=de)
    489          * compared with the parallel java.lang.Character.digit()
    490          * (which checks Nd).
    491          *
    492          * This was not true in Unicode 3.2 and earlier.
    493          * Unicode 4.0 fixed discrepancies.
    494          * Unicode 4.0.1 re-introduced problems in this area due to an
    495          * unintentionally incomplete last-minute change.
    496          */
    497         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
    498         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    499 
    500         USet *digits, *decimalValues;
    501         UErrorCode errorCode;
    502 
    503         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
    504         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
    505         errorCode=U_ZERO_ERROR;
    506         digits=uset_openPattern(digitsPattern, 6, &errorCode);
    507         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
    508 
    509         if(U_SUCCESS(errorCode)) {
    510             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
    511         }
    512 
    513         uset_close(digits);
    514         uset_close(decimalValues);
    515     }
    516 }
    517 
    518 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
    519                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
    520                                 UBool expected) {
    521     int32_t i;
    522     for (i = 0; i < sampleCharsLength; ++i) {
    523         UBool result = propFn(sampleChars[i]);
    524         if (result != expected) {
    525             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
    526                     propName, sampleChars[i], result);
    527         }
    528     }
    529 }
    530 
    531 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
    532 static void TestMisc()
    533 {
    534     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
    535     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
    536     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
    537     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
    538     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
    539     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
    540 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
    541     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
    542     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
    543     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
    544     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
    545 
    546     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
    547 
    548     uint32_t mask;
    549 
    550     int32_t i;
    551     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
    552     UVersionInfo realVersion;
    553 
    554     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
    555 
    556     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    557     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    558 
    559     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    560                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
    561     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
    562                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
    563 
    564     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    565                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
    566     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
    567                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
    568 
    569     testSampleCharProps(u_isdefined, "u_isdefined",
    570                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
    571     testSampleCharProps(u_isdefined, "u_isdefined",
    572                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
    573 
    574     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
    575     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
    576 
    577     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
    578     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
    579 
    580     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
    581         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
    582             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
    583                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
    584         }
    585     }
    586 
    587     /* Tests the ICU version #*/
    588     u_getVersion(realVersion);
    589     u_versionToString(realVersion, icuVersion);
    590     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
    591     {
    592         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
    593     }
    594 #if defined(ICU_VERSION)
    595     /* test only happens where we have configure.in with VERSION - sanity check. */
    596     if(strcmp(U_ICU_VERSION, ICU_VERSION))
    597     {
    598         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
    599     }
    600 #endif
    601 
    602     /* test U_GC_... */
    603     if(
    604         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
    605         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
    606         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
    607         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
    608         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
    609         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
    610     ) {
    611         log_err("error: U_GET_GC_MASK does not work properly\n");
    612     }
    613 
    614     mask=0;
    615     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
    616 
    617     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
    618     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
    619     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
    620     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
    621     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
    622 
    623     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
    624     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
    625     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
    626 
    627     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
    628     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
    629     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
    630 
    631     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
    632     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
    633     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
    634 
    635     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
    636     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
    637     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
    638     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
    639 
    640     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
    641     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
    642     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
    643     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
    644     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
    645 
    646     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
    647     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
    648     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
    649     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
    650 
    651     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
    652     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
    653 
    654     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    655         log_err("error: problems with U_GC_XX_MASK constants\n");
    656     }
    657 
    658     mask=0;
    659     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
    660     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
    661     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
    662     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
    663     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
    664     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
    665     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
    666 
    667     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
    668         log_err("error: problems with U_GC_Y_MASK constants\n");
    669     }
    670     {
    671         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
    672         for(i=0; i<10; i++){
    673             if(digit[i]!=u_forDigit(i,10)){
    674                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
    675             }
    676         }
    677     }
    678 
    679     /* test u_digit() */
    680     {
    681         static const struct {
    682             UChar32 c;
    683             int8_t radix, value;
    684         } data[]={
    685             /* base 16 */
    686             { 0x0031, 16, 1 },
    687             { 0x0038, 16, 8 },
    688             { 0x0043, 16, 12 },
    689             { 0x0066, 16, 15 },
    690             { 0x00e4, 16, -1 },
    691             { 0x0662, 16, 2 },
    692             { 0x06f5, 16, 5 },
    693             { 0xff13, 16, 3 },
    694             { 0xff41, 16, 10 },
    695 
    696             /* base 8 */
    697             { 0x0031, 8, 1 },
    698             { 0x0038, 8, -1 },
    699             { 0x0043, 8, -1 },
    700             { 0x0066, 8, -1 },
    701             { 0x00e4, 8, -1 },
    702             { 0x0662, 8, 2 },
    703             { 0x06f5, 8, 5 },
    704             { 0xff13, 8, 3 },
    705             { 0xff41, 8, -1 },
    706 
    707             /* base 36 */
    708             { 0x5a, 36, 35 },
    709             { 0x7a, 36, 35 },
    710             { 0xff3a, 36, 35 },
    711             { 0xff5a, 36, 35 },
    712 
    713             /* wrong radix values */
    714             { 0x0031, 1, -1 },
    715             { 0xff3a, 37, -1 }
    716         };
    717 
    718         for(i=0; i<LENGTHOF(data); ++i) {
    719             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
    720                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
    721                         data[i].c,
    722                         data[i].radix,
    723                         u_digit(data[i].c, data[i].radix),
    724                         data[i].value);
    725             }
    726         }
    727     }
    728 }
    729 
    730 /* test C/POSIX-style functions --------------------------------------------- */
    731 
    732 /* bit flags */
    733 #define ISAL     1
    734 #define ISLO     2
    735 #define ISUP     4
    736 
    737 #define ISDI     8
    738 #define ISXD  0x10
    739 
    740 #define ISAN  0x20
    741 
    742 #define ISPU  0x40
    743 #define ISGR  0x80
    744 #define ISPR 0x100
    745 
    746 #define ISSP 0x200
    747 #define ISBL 0x400
    748 #define ISCN 0x800
    749 
    750 /* C/POSIX-style functions, in the same order as the bit flags */
    751 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
    752 
    753 static const struct {
    754     IsPOSIXClass *fn;
    755     const char *name;
    756 } posixClasses[]={
    757     { u_isalpha, "isalpha" },
    758     { u_islower, "islower" },
    759     { u_isupper, "isupper" },
    760     { u_isdigit, "isdigit" },
    761     { u_isxdigit, "isxdigit" },
    762     { u_isalnum, "isalnum" },
    763     { u_ispunct, "ispunct" },
    764     { u_isgraph, "isgraph" },
    765     { u_isprint, "isprint" },
    766     { u_isspace, "isspace" },
    767     { u_isblank, "isblank" },
    768     { u_iscntrl, "iscntrl" }
    769 };
    770 
    771 static const struct {
    772     UChar32 c;
    773     uint32_t posixResults;
    774 } posixData[]={
    775     { 0x0008,                                                        ISCN },    /* backspace */
    776     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
    777     { 0x000a,                                              ISSP|     ISCN },    /* LF */
    778     { 0x000c,                                              ISSP|     ISCN },    /* FF */
    779     { 0x000d,                                              ISSP|     ISCN },    /* CR */
    780     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
    781     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
    782     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
    783     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
    784     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
    785     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
    786     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
    787     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
    788     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
    789     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
    790     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
    791     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
    792     { 0x0600,                                                        ISCN },    /* arabic number sign */
    793     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
    794     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
    795     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
    796     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
    797     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
    798     { 0x200b,                                                        ISCN },    /* ZWSP */
    799   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
    800     { 0x200e,                                                        ISCN },    /* LRM */
    801     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
    802     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
    803     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
    804     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
    805     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
    806     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
    807     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
    808     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
    809 };
    810 
    811 static void
    812 TestPOSIX() {
    813     uint32_t mask;
    814     int32_t cl, i;
    815     UBool expect;
    816 
    817     mask=1;
    818     for(cl=0; cl<12; ++cl) {
    819         for(i=0; i<LENGTHOF(posixData); ++i) {
    820             expect=(UBool)((posixData[i].posixResults&mask)!=0);
    821             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
    822                 log_err("u_%s(U+%04x)=%s is wrong\n",
    823                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
    824             }
    825         }
    826         mask<<=1;
    827     }
    828 }
    829 
    830 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
    831 static void TestControlPrint()
    832 {
    833     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
    834     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
    835     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
    836     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
    837     UChar32 c;
    838 
    839     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
    840     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
    841 
    842     testSampleCharProps(u_isprint, "u_isprint",
    843                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
    844     testSampleCharProps(u_isprint, "u_isprint",
    845                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
    846 
    847     /* test all ISO 8 controls */
    848     for(c=0; c<=0x9f; ++c) {
    849         if(c==0x20) {
    850             /* skip ASCII graphic characters and continue with DEL */
    851             c=0x7f;
    852         }
    853         if(!u_iscntrl(c)) {
    854             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
    855         }
    856         if(!u_isISOControl(c)) {
    857             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
    858         }
    859         if(u_isprint(c)) {
    860             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
    861         }
    862     }
    863 
    864     /* test all Latin-1 graphic characters */
    865     for(c=0x20; c<=0xff; ++c) {
    866         if(c==0x7f) {
    867             c=0xa0;
    868         } else if(c==0xad) {
    869             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
    870             ++c;
    871         }
    872         if(!u_isprint(c)) {
    873             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
    874         }
    875     }
    876 }
    877 
    878 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
    879 static void TestIdentifier()
    880 {
    881     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
    882     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
    883     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
    884     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
    885     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
    886     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
    887     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
    888     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
    889     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
    890     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
    891 
    892     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    893                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    894     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
    895                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
    896 
    897     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    898                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
    899     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    900                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
    901 
    902     /* IDPart should imply IDStart */
    903     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
    904                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
    905 
    906     testSampleCharProps(u_isIDStart, "u_isIDStart",
    907                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    908     testSampleCharProps(u_isIDStart, "u_isIDStart",
    909                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
    910 
    911     testSampleCharProps(u_isIDPart, "u_isIDPart",
    912                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
    913     testSampleCharProps(u_isIDPart, "u_isIDPart",
    914                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
    915 
    916     /* IDPart should imply IDStart */
    917     testSampleCharProps(u_isIDPart, "u_isIDPart",
    918                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
    919 
    920     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    921                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
    922     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
    923                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
    924 }
    925 
    926 /* for each line of UnicodeData.txt, check some of the properties */
    927 /*
    928  * ### TODO
    929  * This test fails incorrectly if the First or Last code point of a repetitive area
    930  * is overridden, which is allowed and is encouraged for the PUAs.
    931  * Currently, this means that both area First/Last and override lines are
    932  * tested against the properties from the API,
    933  * and the area boundary will not match and cause an error.
    934  *
    935  * This function should detect area boundaries and skip them for the test of individual
    936  * code points' properties.
    937  * Then it should check that the areas contain all the same properties except where overridden.
    938  * For this, it would have had to set a flag for which code points were listed explicitly.
    939  */
    940 static void U_CALLCONV
    941 unicodeDataLineFn(void *context,
    942                   char *fields[][2], int32_t fieldCount,
    943                   UErrorCode *pErrorCode)
    944 {
    945     char buffer[100];
    946     char *end;
    947     uint32_t value;
    948     UChar32 c;
    949     int32_t i;
    950     int8_t type;
    951 
    952     /* get the character code, field 0 */
    953     c=strtoul(fields[0][0], &end, 16);
    954     if(end<=fields[0][0] || end!=fields[0][1]) {
    955         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
    956         return;
    957     }
    958     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
    959         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
    960         return;
    961     }
    962 
    963     /* get general category, field 2 */
    964     *fields[2][1]=0;
    965     type = (int8_t)tagValues[MakeProp(fields[2][0])];
    966     if(u_charType(c)!=type) {
    967         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
    968     }
    969     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
    970         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
    971     }
    972 
    973     /* get canonical combining class, field 3 */
    974     value=strtoul(fields[3][0], &end, 10);
    975     if(end<=fields[3][0] || end!=fields[3][1]) {
    976         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
    977         return;
    978     }
    979     if(value>255) {
    980         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
    981         return;
    982     }
    983 #if !UCONFIG_NO_NORMALIZATION
    984     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
    985         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
    986     }
    987 #endif
    988 
    989     /* get BiDi category, field 4 */
    990     *fields[4][1]=0;
    991     i=MakeDir(fields[4][0]);
    992     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
    993         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
    994     }
    995 
    996     /* get ISO Comment, field 11 */
    997     *fields[11][1]=0;
    998     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
    999     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
   1000         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
   1001             c, u_errorName(*pErrorCode),
   1002             U_FAILURE(*pErrorCode) ? buffer : "[error]",
   1003             fields[11][0]);
   1004     }
   1005 
   1006     /* get uppercase mapping, field 12 */
   1007     if(fields[12][0]!=fields[12][1]) {
   1008         value=strtoul(fields[12][0], &end, 16);
   1009         if(end!=fields[12][1]) {
   1010             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
   1011             return;
   1012         }
   1013         if((UChar32)value!=u_toupper(c)) {
   1014             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
   1015         }
   1016     } else {
   1017         /* no case mapping: the API must map the code point to itself */
   1018         if(c!=u_toupper(c)) {
   1019             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
   1020         }
   1021     }
   1022 
   1023     /* get lowercase mapping, field 13 */
   1024     if(fields[13][0]!=fields[13][1]) {
   1025         value=strtoul(fields[13][0], &end, 16);
   1026         if(end!=fields[13][1]) {
   1027             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
   1028             return;
   1029         }
   1030         if((UChar32)value!=u_tolower(c)) {
   1031             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
   1032         }
   1033     } else {
   1034         /* no case mapping: the API must map the code point to itself */
   1035         if(c!=u_tolower(c)) {
   1036             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
   1037         }
   1038     }
   1039 
   1040     /* get titlecase mapping, field 14 */
   1041     if(fields[14][0]!=fields[14][1]) {
   1042         value=strtoul(fields[14][0], &end, 16);
   1043         if(end!=fields[14][1]) {
   1044             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
   1045             return;
   1046         }
   1047         if((UChar32)value!=u_totitle(c)) {
   1048             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
   1049         }
   1050     } else {
   1051         /* no case mapping: the API must map the code point to itself */
   1052         if(c!=u_totitle(c)) {
   1053             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
   1054         }
   1055     }
   1056 }
   1057 
   1058 static UBool U_CALLCONV
   1059 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1060     static const UChar32 test[][2]={
   1061         {0x41, U_UPPERCASE_LETTER},
   1062         {0x308, U_NON_SPACING_MARK},
   1063         {0xfffe, U_GENERAL_OTHER_TYPES},
   1064         {0xe0041, U_FORMAT_CHAR},
   1065         {0xeffff, U_UNASSIGNED}
   1066     };
   1067 
   1068     int32_t i, count;
   1069 
   1070     if(0!=strcmp((const char *)context, "a1")) {
   1071         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
   1072         return FALSE;
   1073     }
   1074 
   1075     count=LENGTHOF(test);
   1076     for(i=0; i<count; ++i) {
   1077         if(start<=test[i][0] && test[i][0]<limit) {
   1078             if(type!=(UCharCategory)test[i][1]) {
   1079                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
   1080                         start, limit, (long)type, test[i][0], test[i][1]);
   1081             }
   1082             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
   1083             return i==(count-1) ? FALSE : TRUE;
   1084         }
   1085     }
   1086 
   1087     if(start>test[count-1][0]) {
   1088         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
   1089                 start, limit, (long)type);
   1090         return FALSE;
   1091     }
   1092 
   1093     return TRUE;
   1094 }
   1095 
   1096 static UBool U_CALLCONV
   1097 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
   1098     /* default Bidi classes for unassigned code points */
   1099     static const int32_t defaultBidi[][2]={ /* { limit, class } */
   1100         { 0x0590, U_LEFT_TO_RIGHT },
   1101         { 0x0600, U_RIGHT_TO_LEFT },
   1102         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
   1103         { 0x0900, U_RIGHT_TO_LEFT },
   1104         { 0xFB1D, U_LEFT_TO_RIGHT },
   1105         { 0xFB50, U_RIGHT_TO_LEFT },
   1106         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
   1107         { 0xFE70, U_LEFT_TO_RIGHT },
   1108         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
   1109         { 0x10800, U_LEFT_TO_RIGHT },
   1110         { 0x11000, U_RIGHT_TO_LEFT },
   1111         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
   1112         { 0x1F000, U_RIGHT_TO_LEFT },
   1113         { 0x110000, U_LEFT_TO_RIGHT }
   1114     };
   1115 
   1116     UChar32 c;
   1117     int32_t i;
   1118     UCharDirection shouldBeDir;
   1119 
   1120     /*
   1121      * LineBreak.txt specifies:
   1122      *   #  - Assigned characters that are not listed explicitly are given the value
   1123      *   #    "AL".
   1124      *   #  - Unassigned characters are given the value "XX".
   1125      *
   1126      * PUA characters are listed explicitly with "XX".
   1127      * Verify that no assigned character has "XX".
   1128      */
   1129     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
   1130         c=start;
   1131         while(c<limit) {
   1132             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
   1133                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
   1134             }
   1135             ++c;
   1136         }
   1137     }
   1138 
   1139     /*
   1140      * Verify default Bidi classes.
   1141      * For recent Unicode versions, see UCD.html.
   1142      *
   1143      * For older Unicode versions:
   1144      * See table 3-7 "Bidirectional Character Types" in UAX #9.
   1145      * http://www.unicode.org/reports/tr9/
   1146      *
   1147      * See also DerivedBidiClass.txt for Cn code points!
   1148      *
   1149      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
   1150      * changed some default values.
   1151      * In particular, non-characters and unassigned Default Ignorable Code Points
   1152      * change from L to BN.
   1153      *
   1154      * UCD.html version 4.0.1 does not yet reflect these changes.
   1155      */
   1156     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
   1157         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
   1158         c=start;
   1159         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
   1160             if((int32_t)c<defaultBidi[i][0]) {
   1161                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
   1162                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
   1163                         shouldBeDir=U_BOUNDARY_NEUTRAL;
   1164                     } else {
   1165                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
   1166                     }
   1167 
   1168                     if( u_charDirection(c)!=shouldBeDir ||
   1169                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
   1170                     ) {
   1171                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
   1172                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
   1173                     }
   1174                     ++c;
   1175                 }
   1176             }
   1177         }
   1178     }
   1179 
   1180     return TRUE;
   1181 }
   1182 
   1183 /* tests for several properties */
   1184 static void TestUnicodeData()
   1185 {
   1186     UVersionInfo expectVersionArray;
   1187     UVersionInfo versionArray;
   1188     char *fields[15][2];
   1189     UErrorCode errorCode;
   1190     UChar32 c;
   1191     int8_t type;
   1192 
   1193     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
   1194     u_getUnicodeVersion(versionArray);
   1195     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
   1196     {
   1197         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
   1198         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
   1199     }
   1200 
   1201 #if defined(ICU_UNICODE_VERSION)
   1202     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
   1203     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
   1204     {
   1205          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
   1206     }
   1207 #endif
   1208 
   1209     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
   1210         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
   1211     }
   1212 
   1213     errorCode=U_ZERO_ERROR;
   1214     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
   1215     if(U_FAILURE(errorCode)) {
   1216         return; /* if we couldn't parse UnicodeData.txt, we should return */
   1217     }
   1218 
   1219     /* sanity check on repeated properties */
   1220     for(c=0xfffe; c<=0x10ffff;) {
   1221         type=u_charType(c);
   1222         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1223             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1224         }
   1225         if(type!=U_UNASSIGNED) {
   1226             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
   1227         }
   1228         if((c&0xffff)==0xfffe) {
   1229             ++c;
   1230         } else {
   1231             c+=0xffff;
   1232         }
   1233     }
   1234 
   1235     /* test that PUA is not "unassigned" */
   1236     for(c=0xe000; c<=0x10fffd;) {
   1237         type=u_charType(c);
   1238         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
   1239             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
   1240         }
   1241         if(type==U_UNASSIGNED) {
   1242             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
   1243         } else if(type!=U_PRIVATE_USE_CHAR) {
   1244             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
   1245         }
   1246         if(c==0xf8ff) {
   1247             c=0xf0000;
   1248         } else if(c==0xffffd) {
   1249             c=0x100000;
   1250         } else {
   1251             ++c;
   1252         }
   1253     }
   1254 
   1255     /* test u_enumCharTypes() */
   1256     u_enumCharTypes(enumTypeRange, "a1");
   1257 
   1258     /* check default properties */
   1259     u_enumCharTypes(enumDefaultsRange, NULL);
   1260 }
   1261 
   1262 static void TestCodeUnit(){
   1263     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
   1264 
   1265     int32_t i;
   1266 
   1267     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
   1268         UChar c=codeunit[i];
   1269         if(i<4){
   1270             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
   1271                 log_err("ERROR: U+%04x is a single", c);
   1272             }
   1273 
   1274         }
   1275         if(i >= 4 && i< 8){
   1276             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
   1277                 log_err("ERROR: U+%04x is a first surrogate", c);
   1278             }
   1279         }
   1280         if(i >= 8 && i< 12){
   1281             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
   1282                 log_err("ERROR: U+%04x is a second surrogate", c);
   1283             }
   1284         }
   1285     }
   1286 
   1287 }
   1288 
   1289 static void TestCodePoint(){
   1290     const UChar32 codePoint[]={
   1291         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
   1292         0xd800,
   1293         0xdbff,
   1294         0xdc00,
   1295         0xdfff,
   1296         0xdc04,
   1297         0xd821,
   1298         /*not a surrogate, valid, isUnicodeChar , not Error*/
   1299         0x20ac,
   1300         0xd7ff,
   1301         0xe000,
   1302         0xe123,
   1303         0x0061,
   1304         0xe065,
   1305         0x20402,
   1306         0x24506,
   1307         0x23456,
   1308         0x20402,
   1309         0x10402,
   1310         0x23456,
   1311         /*not a surrogate, not valid, isUnicodeChar, isError */
   1312         0x0015,
   1313         0x009f,
   1314         /*not a surrogate, not valid, not isUnicodeChar, isError */
   1315         0xffff,
   1316         0xfffe,
   1317     };
   1318     int32_t i;
   1319     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
   1320         UChar32 c=codePoint[i];
   1321         if(i<6){
   1322             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
   1323                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1324             }
   1325             if(UTF_IS_VALID(c)){
   1326                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1327             }
   1328             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1329                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1330             }
   1331             if(UTF_IS_ERROR(c)){
   1332                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1333             }
   1334         }else if(i >=6 && i<18){
   1335             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1336                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1337             }
   1338             if(!UTF_IS_VALID(c)){
   1339                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1340             }
   1341             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1342                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1343             }
   1344             if(UTF_IS_ERROR(c)){
   1345                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1346             }
   1347         }else if(i >=18 && i<20){
   1348             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1349                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1350             }
   1351             if(UTF_IS_VALID(c)){
   1352                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1353             }
   1354             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
   1355                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1356             }
   1357             if(!UTF_IS_ERROR(c)){
   1358                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1359             }
   1360         }
   1361         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
   1362             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
   1363                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
   1364             }
   1365             if(UTF_IS_VALID(c)){
   1366                 log_err("ERROR: isValid() failed for U+%04x\n", c);
   1367             }
   1368             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
   1369                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
   1370             }
   1371             if(!UTF_IS_ERROR(c)){
   1372                 log_err("ERROR: isError() failed for U+%04x\n", c);
   1373             }
   1374         }
   1375     }
   1376 
   1377     if(
   1378         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
   1379         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
   1380         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
   1381         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
   1382     ) {
   1383         log_err("error with U_IS_BMP()\n");
   1384     }
   1385 
   1386     if(
   1387         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
   1388         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
   1389         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
   1390         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
   1391     ) {
   1392         log_err("error with U_IS_SUPPLEMENTARY()\n");
   1393     }
   1394 }
   1395 
   1396 static void TestCharLength()
   1397 {
   1398     const int32_t codepoint[]={
   1399         1, 0x0061,
   1400         1, 0xe065,
   1401         1, 0x20ac,
   1402         2, 0x20402,
   1403         2, 0x23456,
   1404         2, 0x24506,
   1405         2, 0x20402,
   1406         2, 0x10402,
   1407         1, 0xd7ff,
   1408         1, 0xe000
   1409     };
   1410 
   1411     int32_t i;
   1412     UBool multiple;
   1413     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
   1414         UChar32 c=codepoint[i+1];
   1415         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
   1416             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
   1417         }
   1418         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
   1419         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
   1420             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
   1421         }
   1422     }
   1423 }
   1424 
   1425 /*internal functions ----*/
   1426 static int32_t MakeProp(char* str)
   1427 {
   1428     int32_t result = 0;
   1429     char* matchPosition =0;
   1430 
   1431     matchPosition = strstr(tagStrings, str);
   1432     if (matchPosition == 0)
   1433     {
   1434         log_err("unrecognized type letter ");
   1435         log_err(str);
   1436     }
   1437     else
   1438         result = (int32_t)((matchPosition - tagStrings) / 2);
   1439     return result;
   1440 }
   1441 
   1442 static int32_t MakeDir(char* str)
   1443 {
   1444     int32_t pos = 0;
   1445     for (pos = 0; pos < 19; pos++) {
   1446         if (strcmp(str, dirStrings[pos]) == 0) {
   1447             return pos;
   1448         }
   1449     }
   1450     return -1;
   1451 }
   1452 
   1453 /* test u_charName() -------------------------------------------------------- */
   1454 
   1455 static const struct {
   1456     uint32_t code;
   1457     const char *name, *oldName, *extName, *alias;
   1458 } names[]={
   1459     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
   1460     {0x01a2, "LATIN CAPITAL LETTER OI",
   1461              "LATIN CAPITAL LETTER O I",
   1462              "LATIN CAPITAL LETTER OI",
   1463              "LATIN CAPITAL LETTER GHA"},
   1464     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
   1465              "LATIN SMALL LETTER DOTLESS J BAR HOOK",
   1466              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
   1467     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
   1468              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
   1469              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
   1470     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
   1471     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
   1472     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
   1473     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
   1474     {0xd800, "", "", "<lead surrogate-D800>" },
   1475     {0xdc00, "", "", "<trail surrogate-DC00>" },
   1476     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
   1477     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
   1478     {0xffff, "", "", "<noncharacter-FFFF>" },
   1479     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
   1480               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
   1481               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
   1482     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
   1483 };
   1484 
   1485 static UBool
   1486 enumCharNamesFn(void *context,
   1487                 UChar32 code, UCharNameChoice nameChoice,
   1488                 const char *name, int32_t length) {
   1489     int32_t *pCount=(int32_t *)context;
   1490     const char *expected;
   1491     int i;
   1492 
   1493     if(length<=0 || length!=(int32_t)strlen(name)) {
   1494         /* should not be called with an empty string or invalid length */
   1495         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
   1496         return TRUE;
   1497     }
   1498 
   1499     ++*pCount;
   1500     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
   1501         if(code==(UChar32)names[i].code) {
   1502             switch (nameChoice) {
   1503                 case U_EXTENDED_CHAR_NAME:
   1504                     if(0!=strcmp(name, names[i].extName)) {
   1505                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
   1506                     }
   1507                     break;
   1508                 case U_UNICODE_CHAR_NAME:
   1509                     if(0!=strcmp(name, names[i].name)) {
   1510                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
   1511                     }
   1512                     break;
   1513                 case U_UNICODE_10_CHAR_NAME:
   1514                     expected=names[i].oldName;
   1515                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
   1516                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
   1517                     }
   1518                     break;
   1519                 case U_CHAR_NAME_ALIAS:
   1520                     expected=names[i].alias;
   1521                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
   1522                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
   1523                     }
   1524                     break;
   1525                 case U_CHAR_NAME_CHOICE_COUNT:
   1526                     break;
   1527             }
   1528             break;
   1529         }
   1530     }
   1531     return TRUE;
   1532 }
   1533 
   1534 struct enumExtCharNamesContext {
   1535     uint32_t length;
   1536     int32_t last;
   1537 };
   1538 
   1539 static UBool
   1540 enumExtCharNamesFn(void *context,
   1541                 UChar32 code, UCharNameChoice nameChoice,
   1542                 const char *name, int32_t length) {
   1543     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
   1544 
   1545     if (ecncp->last != (int32_t) code - 1) {
   1546         if (ecncp->last < 0) {
   1547             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
   1548         } else {
   1549             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
   1550         }
   1551     }
   1552     ecncp->last = (int32_t) code;
   1553 
   1554     if (!*name) {
   1555         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
   1556     }
   1557 
   1558     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
   1559 }
   1560 
   1561 /**
   1562  * This can be made more efficient by moving it into putil.c and having
   1563  * it directly access the ebcdic translation tables.
   1564  * TODO: If we get this method in putil.c, then delete it from here.
   1565  */
   1566 static UChar
   1567 u_charToUChar(char c) {
   1568     UChar uc;
   1569     u_charsToUChars(&c, &uc, 1);
   1570     return uc;
   1571 }
   1572 
   1573 static void
   1574 TestCharNames() {
   1575     static char name[80];
   1576     UErrorCode errorCode=U_ZERO_ERROR;
   1577     struct enumExtCharNamesContext extContext;
   1578     const char *expected;
   1579     int32_t length;
   1580     UChar32 c;
   1581     int32_t i;
   1582 
   1583     log_verbose("Testing uprv_getMaxCharNameLength()\n");
   1584     length=uprv_getMaxCharNameLength();
   1585     if(length==0) {
   1586         /* no names data available */
   1587         return;
   1588     }
   1589     if(length<83) { /* Unicode 3.2 max char name length */
   1590         log_err("uprv_getMaxCharNameLength()=%d is too short");
   1591     }
   1592     /* ### TODO same tests for max ISO comment length as for max name length */
   1593 
   1594     log_verbose("Testing u_charName()\n");
   1595     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
   1596         /* modern Unicode character name */
   1597         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
   1598         if(U_FAILURE(errorCode)) {
   1599             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
   1600             return;
   1601         }
   1602         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
   1603             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
   1604         }
   1605 
   1606         /* find the modern name */
   1607         if (*names[i].name) {
   1608             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
   1609             if(U_FAILURE(errorCode)) {
   1610                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
   1611                 return;
   1612             }
   1613             if(c!=(UChar32)names[i].code) {
   1614                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
   1615             }
   1616         }
   1617 
   1618         /* Unicode 1.0 character name */
   1619         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
   1620         if(U_FAILURE(errorCode)) {
   1621             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
   1622             return;
   1623         }
   1624         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
   1625             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
   1626         }
   1627 
   1628         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
   1629         if(names[i].oldName[0]!=0 /* && length>0 */) {
   1630             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
   1631             if(U_FAILURE(errorCode)) {
   1632                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
   1633                 return;
   1634             }
   1635             if(c!=(UChar32)names[i].code) {
   1636                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
   1637             }
   1638         }
   1639 
   1640         /* Unicode character name alias */
   1641         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
   1642         if(U_FAILURE(errorCode)) {
   1643             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
   1644             return;
   1645         }
   1646         expected=names[i].alias;
   1647         if(expected==NULL) {
   1648             expected="";
   1649         }
   1650         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
   1651             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
   1652                     names[i].code, name, length, expected);
   1653         }
   1654 
   1655         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
   1656         if(expected[0]!=0 /* && length>0 */) {
   1657             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
   1658             if(U_FAILURE(errorCode)) {
   1659                 log_err("u_charFromName(%s - alias) error %s\n",
   1660                         expected, u_errorName(errorCode));
   1661                 return;
   1662             }
   1663             if(c!=(UChar32)names[i].code) {
   1664                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
   1665                         expected, c, names[i].code);
   1666             }
   1667         }
   1668     }
   1669 
   1670     /* test u_enumCharNames() */
   1671     length=0;
   1672     errorCode=U_ZERO_ERROR;
   1673     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
   1674     if(U_FAILURE(errorCode) || length<94140) {
   1675         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
   1676     }
   1677 
   1678     extContext.length = 0;
   1679     extContext.last = -1;
   1680     errorCode=U_ZERO_ERROR;
   1681     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
   1682     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
   1683         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
   1684     }
   1685 
   1686     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
   1687     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
   1688         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
   1689     }
   1690 
   1691     /* Test getCharNameCharacters */
   1692     if(!QUICK) {
   1693         enum { BUFSIZE = 256 };
   1694         UErrorCode ec = U_ZERO_ERROR;
   1695         char buf[BUFSIZE];
   1696         int32_t maxLength;
   1697         UChar32 cp;
   1698         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
   1699         int32_t l1, l2;
   1700         UBool map[256];
   1701         UBool ok;
   1702 
   1703         USet* set = uset_open(1, 0); /* empty set */
   1704         USet* dumb = uset_open(1, 0); /* empty set */
   1705 
   1706         /*
   1707          * uprv_getCharNameCharacters() will likely return more lowercase
   1708          * letters than actual character names contain because
   1709          * it includes all the characters in lowercased names of
   1710          * general categories, for the full possible set of extended names.
   1711          */
   1712         {
   1713             USetAdder sa={
   1714                 NULL,
   1715                 uset_add,
   1716                 uset_addRange,
   1717                 uset_addString,
   1718                 NULL /* don't need remove() */
   1719             };
   1720             sa.set=set;
   1721             uprv_getCharNameCharacters(&sa);
   1722         }
   1723 
   1724         /* build set the dumb (but sure-fire) way */
   1725         for (i=0; i<256; ++i) {
   1726             map[i] = FALSE;
   1727         }
   1728 
   1729         maxLength=0;
   1730         for (cp=0; cp<0x110000; ++cp) {
   1731             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
   1732                                      buf, BUFSIZE, &ec);
   1733             if (U_FAILURE(ec)) {
   1734                 log_err("FAIL: u_charName failed when it shouldn't\n");
   1735                 uset_close(set);
   1736                 uset_close(dumb);
   1737                 return;
   1738             }
   1739             if(len>maxLength) {
   1740                 maxLength=len;
   1741             }
   1742 
   1743             for (i=0; i<len; ++i) {
   1744                 if (!map[(uint8_t) buf[i]]) {
   1745                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
   1746                     map[(uint8_t) buf[i]] = TRUE;
   1747                 }
   1748             }
   1749 
   1750             /* test for leading/trailing whitespace */
   1751             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
   1752                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
   1753             }
   1754         }
   1755 
   1756         if(map[(uint8_t)'\t']) {
   1757             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
   1758         }
   1759 
   1760         length=uprv_getMaxCharNameLength();
   1761         if(length!=maxLength) {
   1762             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
   1763                     length, maxLength);
   1764         }
   1765 
   1766         /* compare the sets.  Where is my uset_equals?!! */
   1767         ok=TRUE;
   1768         for(i=0; i<256; ++i) {
   1769             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
   1770                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
   1771                     /* ignore lowercase a-z that are in set but not in dumb */
   1772                     ok=TRUE;
   1773                 } else {
   1774                     ok=FALSE;
   1775                     break;
   1776                 }
   1777             }
   1778         }
   1779 
   1780         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
   1781         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
   1782         if (U_FAILURE(ec)) {
   1783             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
   1784             uset_close(set);
   1785             uset_close(dumb);
   1786             return;
   1787         }
   1788 
   1789         if (l1 >= BUFSIZE) {
   1790             l1 = BUFSIZE-1;
   1791             pat[l1] = 0;
   1792         }
   1793         if (l2 >= BUFSIZE) {
   1794             l2 = BUFSIZE-1;
   1795             dumbPat[l2] = 0;
   1796         }
   1797 
   1798         if (!ok) {
   1799             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
   1800                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
   1801         } else if(VERBOSITY) {
   1802             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
   1803         }
   1804 
   1805         uset_close(set);
   1806         uset_close(dumb);
   1807     }
   1808 
   1809     /* ### TODO: test error cases and other interesting things */
   1810 }
   1811 
   1812 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
   1813 
   1814 static void
   1815 TestMirroring() {
   1816     USet *set;
   1817     UErrorCode errorCode;
   1818 
   1819     UChar32 start, end, c2, c3;
   1820     int32_t i;
   1821 
   1822     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1823 
   1824     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
   1825 
   1826     log_verbose("Testing u_isMirrored()\n");
   1827     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
   1828          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
   1829         )
   1830     ) {
   1831         log_err("u_isMirrored() does not work correctly\n");
   1832     }
   1833 
   1834     log_verbose("Testing u_charMirror()\n");
   1835     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
   1836          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
   1837          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
   1838          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   1839          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
   1840          )
   1841     ) {
   1842         log_err("u_charMirror() does not work correctly\n");
   1843     }
   1844 
   1845     /* verify that Bidi_Mirroring_Glyph roundtrips */
   1846     errorCode=U_ZERO_ERROR;
   1847     set=uset_openPattern(mirroredPattern, 17, &errorCode);
   1848 
   1849     if (U_FAILURE(errorCode)) {
   1850         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
   1851     } else {
   1852         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
   1853             do {
   1854                 c2=u_charMirror(start);
   1855                 c3=u_charMirror(c2);
   1856                 if(c3!=start) {
   1857                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
   1858                 }
   1859             } while(++start<=end);
   1860         }
   1861     }
   1862 
   1863     uset_close(set);
   1864 }
   1865 
   1866 
   1867 struct RunTestData
   1868 {
   1869     const char *runText;
   1870     UScriptCode runCode;
   1871 };
   1872 
   1873 typedef struct RunTestData RunTestData;
   1874 
   1875 static void
   1876 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
   1877                 const char *prefix)
   1878 {
   1879     int32_t run, runStart, runLimit;
   1880     UScriptCode runCode;
   1881 
   1882     /* iterate over all the runs */
   1883     run = 0;
   1884     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
   1885         if (runStart != runStarts[run]) {
   1886             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
   1887                 prefix, run, runStarts[run], runStart);
   1888         }
   1889 
   1890         if (runLimit != runStarts[run + 1]) {
   1891             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
   1892                 prefix, run, runStarts[run + 1], runLimit);
   1893         }
   1894 
   1895         if (runCode != testData[run].runCode) {
   1896             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
   1897                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
   1898         }
   1899 
   1900         run += 1;
   1901 
   1902         /* stop when we've seen all the runs we expect to see */
   1903         if (run >= nRuns) {
   1904             break;
   1905         }
   1906     }
   1907 
   1908     /* Complain if we didn't see then number of runs we expected */
   1909     if (run != nRuns) {
   1910         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
   1911     }
   1912 }
   1913 
   1914 static void
   1915 TestUScriptRunAPI()
   1916 {
   1917     static const RunTestData testData1[] = {
   1918         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
   1919         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
   1920         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
   1921         {"English (", USCRIPT_LATIN},
   1922         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
   1923         {") ", USCRIPT_LATIN},
   1924         {"\\u6F22\\u5B75", USCRIPT_HAN},
   1925         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
   1926         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
   1927         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
   1928     };
   1929 
   1930     static const RunTestData testData2[] = {
   1931        {"((((((((((abc))))))))))", USCRIPT_LATIN}
   1932     };
   1933 
   1934     static const struct {
   1935       const RunTestData *testData;
   1936       int32_t nRuns;
   1937     } testDataEntries[] = {
   1938         {testData1, LENGTHOF(testData1)},
   1939         {testData2, LENGTHOF(testData2)}
   1940     };
   1941 
   1942     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
   1943     int32_t testEntry;
   1944 
   1945     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
   1946         UChar testString[1024];
   1947         int32_t runStarts[256];
   1948         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
   1949         const RunTestData *testData = testDataEntries[testEntry].testData;
   1950 
   1951         int32_t run, stringLimit;
   1952         UScriptRun *scriptRun = NULL;
   1953         UErrorCode err;
   1954 
   1955         /*
   1956          * Fill in the test string and the runStarts array.
   1957          */
   1958         stringLimit = 0;
   1959         for (run = 0; run < nTestRuns; run += 1) {
   1960             runStarts[run] = stringLimit;
   1961             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
   1962             /*stringLimit -= 1;*/
   1963         }
   1964 
   1965         /* The limit of the last run */
   1966         runStarts[nTestRuns] = stringLimit;
   1967 
   1968         /*
   1969          * Make sure that calling uscript_OpenRun with a NULL text pointer
   1970          * and a non-zero text length returns the correct error.
   1971          */
   1972         err = U_ZERO_ERROR;
   1973         scriptRun = uscript_openRun(NULL, stringLimit, &err);
   1974 
   1975         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1976             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1977         }
   1978 
   1979         if (scriptRun != NULL) {
   1980             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
   1981             uscript_closeRun(scriptRun);
   1982         }
   1983 
   1984         /*
   1985          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   1986          * and a zero text length returns the correct error.
   1987          */
   1988         err = U_ZERO_ERROR;
   1989         scriptRun = uscript_openRun(testString, 0, &err);
   1990 
   1991         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   1992             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   1993         }
   1994 
   1995         if (scriptRun != NULL) {
   1996             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
   1997             uscript_closeRun(scriptRun);
   1998         }
   1999 
   2000         /*
   2001          * Make sure that calling uscript_openRun with a NULL text pointer
   2002          * and a zero text length doesn't return an error.
   2003          */
   2004         err = U_ZERO_ERROR;
   2005         scriptRun = uscript_openRun(NULL, 0, &err);
   2006 
   2007         if (U_FAILURE(err)) {
   2008             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
   2009         }
   2010 
   2011         /* Make sure that the empty iterator doesn't find any runs */
   2012         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
   2013             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
   2014         }
   2015 
   2016         /*
   2017          * Make sure that calling uscript_setRunText with a NULL text pointer
   2018          * and a non-zero text length returns the correct error.
   2019          */
   2020         err = U_ZERO_ERROR;
   2021         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
   2022 
   2023         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2024             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2025         }
   2026 
   2027         /*
   2028          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
   2029          * and a zero text length returns the correct error.
   2030          */
   2031         err = U_ZERO_ERROR;
   2032         uscript_setRunText(scriptRun, testString, 0, &err);
   2033 
   2034         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
   2035             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
   2036         }
   2037 
   2038         /*
   2039          * Now call uscript_setRunText on the empty iterator
   2040          * and make sure that it works.
   2041          */
   2042         err = U_ZERO_ERROR;
   2043         uscript_setRunText(scriptRun, testString, stringLimit, &err);
   2044 
   2045         if (U_FAILURE(err)) {
   2046             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
   2047         } else {
   2048             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
   2049         }
   2050 
   2051         uscript_closeRun(scriptRun);
   2052 
   2053         /*
   2054          * Now open an interator over the testString
   2055          * using uscript_openRun and make sure that it works
   2056          */
   2057         scriptRun = uscript_openRun(testString, stringLimit, &err);
   2058 
   2059         if (U_FAILURE(err)) {
   2060             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
   2061         } else {
   2062             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
   2063         }
   2064 
   2065         /* Now reset the iterator, and make sure
   2066          * that it still works.
   2067          */
   2068         uscript_resetRun(scriptRun);
   2069 
   2070         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
   2071 
   2072         /* Close the iterator */
   2073         uscript_closeRun(scriptRun);
   2074     }
   2075 }
   2076 
   2077 /* test additional, non-core properties */
   2078 static void
   2079 TestAdditionalProperties() {
   2080     /* test data for u_charAge() */
   2081     static const struct {
   2082         UChar32 c;
   2083         UVersionInfo version;
   2084     } charAges[]={
   2085         {0x41,    { 1, 1, 0, 0 }},
   2086         {0xffff,  { 1, 1, 0, 0 }},
   2087         {0x20ab,  { 2, 0, 0, 0 }},
   2088         {0x2fffe, { 2, 0, 0, 0 }},
   2089         {0x20ac,  { 2, 1, 0, 0 }},
   2090         {0xfb1d,  { 3, 0, 0, 0 }},
   2091         {0x3f4,   { 3, 1, 0, 0 }},
   2092         {0x10300, { 3, 1, 0, 0 }},
   2093         {0x220,   { 3, 2, 0, 0 }},
   2094         {0xff60,  { 3, 2, 0, 0 }}
   2095     };
   2096 
   2097     /* test data for u_hasBinaryProperty() */
   2098     static const int32_t
   2099     props[][3]={ /* code point, property, value */
   2100         { 0x0627, UCHAR_ALPHABETIC, TRUE },
   2101         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
   2102         { 0x2028, UCHAR_ALPHABETIC, FALSE },
   2103 
   2104         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
   2105         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
   2106 
   2107         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
   2108         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
   2109 
   2110         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
   2111         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
   2112 
   2113         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
   2114         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
   2115         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
   2116         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
   2117         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
   2118 
   2119         { 0x058a, UCHAR_DASH, TRUE },
   2120         { 0x007e, UCHAR_DASH, FALSE },
   2121 
   2122         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
   2123         { 0x3000, UCHAR_DIACRITIC, FALSE },
   2124 
   2125         { 0x0e46, UCHAR_EXTENDER, TRUE },
   2126         { 0x0020, UCHAR_EXTENDER, FALSE },
   2127 
   2128 #if !UCONFIG_NO_NORMALIZATION
   2129         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2130         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
   2131         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
   2132 
   2133         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
   2134         { 0x0308, UCHAR_NFD_INERT, FALSE },
   2135 
   2136         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
   2137         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
   2138 
   2139         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
   2140         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
   2141         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
   2142         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
   2143         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
   2144         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
   2145 
   2146         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
   2147         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
   2148 
   2149         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
   2150         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
   2151         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
   2152         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
   2153         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
   2154         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
   2155 #endif
   2156 
   2157         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
   2158         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
   2159         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
   2160 
   2161         { 0x30fb, UCHAR_HYPHEN, TRUE },
   2162         { 0xfe58, UCHAR_HYPHEN, FALSE },
   2163 
   2164         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
   2165         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
   2166         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
   2167 
   2168         { 0x2172, UCHAR_ID_START, TRUE },
   2169         { 0x007a, UCHAR_ID_START, TRUE },
   2170         { 0x0039, UCHAR_ID_START, FALSE },
   2171 
   2172         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
   2173         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
   2174         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
   2175 
   2176         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
   2177         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
   2178 
   2179         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
   2180         { 0x0345, UCHAR_LOWERCASE, TRUE },
   2181         { 0x0030, UCHAR_LOWERCASE, FALSE },
   2182 
   2183         { 0x1d7a9, UCHAR_MATH, TRUE },
   2184         { 0x2135, UCHAR_MATH, TRUE },
   2185         { 0x0062, UCHAR_MATH, FALSE },
   2186 
   2187         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2188         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
   2189         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
   2190 
   2191         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
   2192         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
   2193         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
   2194 
   2195         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
   2196         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
   2197 
   2198         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
   2199         { 0x2162, UCHAR_UPPERCASE, TRUE },
   2200         { 0x0345, UCHAR_UPPERCASE, FALSE },
   2201 
   2202         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
   2203         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
   2204         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
   2205 
   2206         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
   2207         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
   2208         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
   2209 
   2210         { 0x16ee, UCHAR_XID_START, TRUE },
   2211         { 0x23456, UCHAR_XID_START, TRUE },
   2212         { 0x1d1aa, UCHAR_XID_START, FALSE },
   2213 
   2214         /*
   2215          * Version break:
   2216          * The following properties are only supported starting with the
   2217          * Unicode version indicated in the second field.
   2218          */
   2219         { -1, 0x320, 0 },
   2220 
   2221         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2222         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
   2223         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
   2224 
   2225         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
   2226         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
   2227         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
   2228         { 0xe0100, UCHAR_DEPRECATED, FALSE },
   2229 
   2230         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
   2231         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
   2232         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
   2233         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2234 
   2235         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
   2236         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
   2237         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
   2238         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
   2239 
   2240         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
   2241         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
   2242 
   2243         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
   2244         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
   2245 
   2246         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
   2247         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
   2248 
   2249         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
   2250         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
   2251 
   2252         { 0x2e9b, UCHAR_RADICAL, TRUE },
   2253         { 0x4e00, UCHAR_RADICAL, FALSE },
   2254 
   2255         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
   2256         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
   2257 
   2258         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
   2259         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
   2260 
   2261         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
   2262 
   2263         { 0x002e, UCHAR_S_TERM, TRUE },
   2264         { 0x0061, UCHAR_S_TERM, FALSE },
   2265 
   2266         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
   2267         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
   2268         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
   2269         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
   2270 
   2271         /* enum/integer type properties */
   2272 
   2273         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
   2274         /* test default Bidi classes for unassigned code points */
   2275         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2276         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2277         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2278         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
   2279         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
   2280         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2281         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2282         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2283         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2284         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2285         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
   2286 
   2287         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2288         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2289         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2290         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2291         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2292         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2293         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2294         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
   2295 
   2296         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
   2297         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
   2298         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
   2299         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
   2300         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
   2301         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2302         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
   2303         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
   2304         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2305         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
   2306         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
   2307 
   2308         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
   2309         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
   2310 
   2311         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
   2312         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
   2313         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2314         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2315         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2316         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
   2317         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
   2318         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
   2319         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
   2320 
   2321         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2322         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
   2323         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2324         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
   2325         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2326         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
   2327         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2328         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2329         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2330         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2331         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2332         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2333         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
   2334         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
   2335         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
   2336         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2337         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
   2338 
   2339         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
   2340         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
   2341         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
   2342 
   2343         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
   2344         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
   2345         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
   2346         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
   2347         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
   2348         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
   2349 
   2350         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
   2351         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2352         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
   2353         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
   2354         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
   2355         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2356         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2357         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
   2358 
   2359         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
   2360         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2361         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
   2362         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
   2363         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
   2364         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2365         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2366         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2367         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
   2368         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2369         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2370         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
   2371         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
   2372         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
   2373         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2374         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
   2375 
   2376         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
   2377 
   2378         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
   2379 
   2380         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2381         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2382         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2383         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2384         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2385         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2386         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
   2387 
   2388         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2389         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2390         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
   2391         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2392 
   2393         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2394         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2395         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2396         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
   2397         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2398         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2399 
   2400         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2401         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2402         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
   2403         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2404 
   2405         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2406         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2407         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2408         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
   2409         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2410         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2411         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2412 
   2413         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2414         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2415         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
   2416         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2417 
   2418         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2419         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2420         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2421         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
   2422 
   2423         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2424         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2425         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2426         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2427         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
   2428 
   2429         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
   2430 
   2431         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
   2432 
   2433         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
   2434         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
   2435         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
   2436 
   2437         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2438         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2439         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
   2440         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2441         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
   2442 
   2443         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
   2444         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
   2445         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
   2446 
   2447         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
   2448         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
   2449         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
   2450         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
   2451 
   2452         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
   2453         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
   2454         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
   2455         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
   2456         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
   2457         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
   2458 
   2459         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
   2460         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
   2461         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
   2462         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
   2463 
   2464         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
   2465         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
   2466         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
   2467         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
   2468 
   2469         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
   2470         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
   2471         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
   2472         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
   2473 
   2474         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
   2475 
   2476         /* test some script codes >127 */
   2477         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
   2478         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
   2479         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
   2480 
   2481         /* undefined UProperty values */
   2482         { 0x61, 0x4a7, 0 },
   2483         { 0x234bc, 0x15ed, 0 }
   2484     };
   2485 
   2486     UVersionInfo version;
   2487     UChar32 c;
   2488     int32_t i, result, uVersion;
   2489     UProperty which;
   2490 
   2491     /* what is our Unicode version? */
   2492     u_getUnicodeVersion(version);
   2493     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
   2494 
   2495     u_charAge(0x20, version);
   2496     if(version[0]==0) {
   2497         /* no additional properties available */
   2498         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
   2499         return;
   2500     }
   2501 
   2502     /* test u_charAge() */
   2503     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
   2504         u_charAge(charAges[i].c, version);
   2505         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
   2506             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
   2507                 charAges[i].c,
   2508                 version[0], version[1], version[2], version[3],
   2509                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
   2510         }
   2511     }
   2512 
   2513     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
   2514         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
   2515         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
   2516         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
   2517         u_getIntPropertyMinValue(0x2345)!=0
   2518     ) {
   2519         log_err("error: u_getIntPropertyMinValue() wrong\n");
   2520     }
   2521     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
   2522         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
   2523     }
   2524     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
   2525         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
   2526     }
   2527     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
   2528         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
   2529     }
   2530     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
   2531         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
   2532     }
   2533     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
   2534         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
   2535     }
   2536     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
   2537         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
   2538     }
   2539     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
   2540         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
   2541     }
   2542     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
   2543         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
   2544     }
   2545     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
   2546         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
   2547     }
   2548     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
   2549         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
   2550     }
   2551     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
   2552         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
   2553     }
   2554     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
   2555         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
   2556     }
   2557     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
   2558         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
   2559     }
   2560     /*JB#2410*/
   2561     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
   2562         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
   2563     }
   2564     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
   2565         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
   2566     }
   2567     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
   2568         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
   2569     }
   2570     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
   2571         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
   2572     }
   2573     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
   2574         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
   2575     }
   2576 
   2577     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
   2578     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
   2579         const char *whichName;
   2580 
   2581         if(props[i][0]<0) {
   2582             /* Unicode version break */
   2583             if(uVersion<props[i][1]) {
   2584                 break; /* do not test properties that are not yet supported */
   2585             } else {
   2586                 continue; /* skip this row */
   2587             }
   2588         }
   2589 
   2590         c=(UChar32)props[i][0];
   2591         which=(UProperty)props[i][1];
   2592         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
   2593 
   2594         if(which<UCHAR_INT_START) {
   2595             result=u_hasBinaryProperty(c, which);
   2596             if(result!=props[i][2]) {
   2597                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
   2598                         c, whichName, result, i);
   2599             }
   2600         }
   2601 
   2602         result=u_getIntPropertyValue(c, which);
   2603         if(result!=props[i][2]) {
   2604             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
   2605                     c, whichName, result, props[i][2], i);
   2606         }
   2607 
   2608         /* test separate functions, too */
   2609         switch((UProperty)props[i][1]) {
   2610         case UCHAR_ALPHABETIC:
   2611             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2612                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
   2613                         props[i][0], result, i);
   2614             }
   2615             break;
   2616         case UCHAR_LOWERCASE:
   2617             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2618                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
   2619                         props[i][0], result, i);
   2620             }
   2621             break;
   2622         case UCHAR_UPPERCASE:
   2623             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2624                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
   2625                         props[i][0], result, i);
   2626             }
   2627             break;
   2628         case UCHAR_WHITE_SPACE:
   2629             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
   2630                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
   2631                         props[i][0], result, i);
   2632             }
   2633             break;
   2634         default:
   2635             break;
   2636         }
   2637     }
   2638 }
   2639 
   2640 static void
   2641 TestNumericProperties(void) {
   2642     /* see UnicodeData.txt, DerivedNumericValues.txt */
   2643     static const struct {
   2644         UChar32 c;
   2645         int32_t type;
   2646         double numValue;
   2647     } values[]={
   2648         { 0x0F33, U_NT_NUMERIC, -1./2. },
   2649         { 0x0C66, U_NT_DECIMAL, 0 },
   2650         { 0x96f6, U_NT_NUMERIC, 0 },
   2651         { 0xa833, U_NT_NUMERIC, 1./16. },
   2652         { 0x2152, U_NT_NUMERIC, 1./10. },
   2653         { 0x2151, U_NT_NUMERIC, 1./9. },
   2654         { 0x1245f, U_NT_NUMERIC, 1./8. },
   2655         { 0x2150, U_NT_NUMERIC, 1./7. },
   2656         { 0x2159, U_NT_NUMERIC, 1./6. },
   2657         { 0x09f6, U_NT_NUMERIC, 3./16. },
   2658         { 0x2155, U_NT_NUMERIC, 1./5. },
   2659         { 0x00BD, U_NT_NUMERIC, 1./2. },
   2660         { 0x0031, U_NT_DECIMAL, 1. },
   2661         { 0x4e00, U_NT_NUMERIC, 1. },
   2662         { 0x58f1, U_NT_NUMERIC, 1. },
   2663         { 0x10320, U_NT_NUMERIC, 1. },
   2664         { 0x0F2B, U_NT_NUMERIC, 3./2. },
   2665         { 0x00B2, U_NT_DIGIT, 2. },
   2666         { 0x5f10, U_NT_NUMERIC, 2. },
   2667         { 0x1813, U_NT_DECIMAL, 3. },
   2668         { 0x5f0e, U_NT_NUMERIC, 3. },
   2669         { 0x2173, U_NT_NUMERIC, 4. },
   2670         { 0x8086, U_NT_NUMERIC, 4. },
   2671         { 0x278E, U_NT_DIGIT, 5. },
   2672         { 0x1D7F2, U_NT_DECIMAL, 6. },
   2673         { 0x247A, U_NT_DIGIT, 7. },
   2674         { 0x7396, U_NT_NUMERIC, 9. },
   2675         { 0x1372, U_NT_NUMERIC, 10. },
   2676         { 0x216B, U_NT_NUMERIC, 12. },
   2677         { 0x16EE, U_NT_NUMERIC, 17. },
   2678         { 0x249A, U_NT_NUMERIC, 19. },
   2679         { 0x303A, U_NT_NUMERIC, 30. },
   2680         { 0x5345, U_NT_NUMERIC, 30. },
   2681         { 0x32B2, U_NT_NUMERIC, 37. },
   2682         { 0x1375, U_NT_NUMERIC, 40. },
   2683         { 0x10323, U_NT_NUMERIC, 50. },
   2684         { 0x0BF1, U_NT_NUMERIC, 100. },
   2685         { 0x964c, U_NT_NUMERIC, 100. },
   2686         { 0x217E, U_NT_NUMERIC, 500. },
   2687         { 0x2180, U_NT_NUMERIC, 1000. },
   2688         { 0x4edf, U_NT_NUMERIC, 1000. },
   2689         { 0x2181, U_NT_NUMERIC, 5000. },
   2690         { 0x137C, U_NT_NUMERIC, 10000. },
   2691         { 0x4e07, U_NT_NUMERIC, 10000. },
   2692         { 0x4ebf, U_NT_NUMERIC, 100000000. },
   2693         { 0x5146, U_NT_NUMERIC, 1000000000000. },
   2694         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2695         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2696         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2697         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2698         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2699         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2700         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
   2701         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
   2702     };
   2703 
   2704     double nv;
   2705     UChar32 c;
   2706     int32_t i, type;
   2707 
   2708     for(i=0; i<LENGTHOF(values); ++i) {
   2709         c=values[i].c;
   2710         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
   2711         nv=u_getNumericValue(c);
   2712 
   2713         if(type!=values[i].type) {
   2714             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
   2715         }
   2716         if(0.000001 <= fabs(nv - values[i].numValue)) {
   2717             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
   2718         }
   2719     }
   2720 }
   2721 
   2722 /**
   2723  * Test the property names and property value names API.
   2724  */
   2725 static void
   2726 TestPropertyNames(void) {
   2727     int32_t p, v, choice=0, rev;
   2728     UBool atLeastSomething = FALSE;
   2729 
   2730     for (p=0; ; ++p) {
   2731         UProperty propEnum = (UProperty)p;
   2732         UBool sawProp = FALSE;
   2733         if(p > 10 && !atLeastSomething) {
   2734           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
   2735           return;
   2736         }
   2737 
   2738         for (choice=0; ; ++choice) {
   2739             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
   2740             if (name) {
   2741                 if (!sawProp)
   2742                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
   2743                 log_verbose("%d=\"%s\"", choice, name);
   2744                 sawProp = TRUE;
   2745                 atLeastSomething = TRUE;
   2746 
   2747                 /* test reverse mapping */
   2748                 rev = u_getPropertyEnum(name);
   2749                 if (rev != p) {
   2750                     log_err("Property round-trip failure: %d -> %s -> %d\n",
   2751                             p, name, rev);
   2752                 }
   2753             }
   2754             if (!name && choice>0) break;
   2755         }
   2756         if (sawProp) {
   2757             /* looks like a valid property; check the values */
   2758             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2759             int32_t max = 0;
   2760             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
   2761                 max = 255;
   2762             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
   2763                 /* it's far too slow to iterate all the way up to
   2764                    the real max, U_GC_P_MASK */
   2765                 max = U_GC_NL_MASK;
   2766             } else if (p == UCHAR_BLOCK) {
   2767                 /* UBlockCodes, unlike other values, start at 1 */
   2768                 max = 1;
   2769             }
   2770             log_verbose("\n");
   2771             for (v=-1; ; ++v) {
   2772                 UBool sawValue = FALSE;
   2773                 for (choice=0; ; ++choice) {
   2774                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
   2775                     if (vname) {
   2776                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
   2777                         log_verbose("%d=\"%s\"", choice, vname);
   2778                         sawValue = TRUE;
   2779 
   2780                         /* test reverse mapping */
   2781                         rev = u_getPropertyValueEnum(propEnum, vname);
   2782                         if (rev != v) {
   2783                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
   2784                                     pname, v, vname, rev);
   2785                         }
   2786                     }
   2787                     if (!vname && choice>0) break;
   2788                 }
   2789                 if (sawValue) {
   2790                     log_verbose("\n");
   2791                 }
   2792                 if (!sawValue && v>=max) break;
   2793             }
   2794         }
   2795         if (!sawProp) {
   2796             if (p>=UCHAR_STRING_LIMIT) {
   2797                 break;
   2798             } else if (p>=UCHAR_DOUBLE_LIMIT) {
   2799                 p = UCHAR_STRING_START - 1;
   2800             } else if (p>=UCHAR_MASK_LIMIT) {
   2801                 p = UCHAR_DOUBLE_START - 1;
   2802             } else if (p>=UCHAR_INT_LIMIT) {
   2803                 p = UCHAR_MASK_START - 1;
   2804             } else if (p>=UCHAR_BINARY_LIMIT) {
   2805                 p = UCHAR_INT_START - 1;
   2806             }
   2807         }
   2808     }
   2809 }
   2810 
   2811 /**
   2812  * Test the property values API.  See JB#2410.
   2813  */
   2814 static void
   2815 TestPropertyValues(void) {
   2816     int32_t i, p, min, max;
   2817     UErrorCode ec;
   2818 
   2819     /* Min should be 0 for everything. */
   2820     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
   2821     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
   2822         UProperty propEnum = (UProperty)p;
   2823         min = u_getIntPropertyMinValue(propEnum);
   2824         if (min != 0) {
   2825             if (p == UCHAR_BLOCK) {
   2826                 /* This is okay...for now.  See JB#2487.
   2827                    TODO Update this for JB#2487. */
   2828             } else {
   2829                 const char* name;
   2830                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
   2831                 if (name == NULL)
   2832                     name = "<ERROR>";
   2833                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
   2834                         name, min);
   2835             }
   2836         }
   2837     }
   2838 
   2839     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
   2840         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
   2841         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
   2842     }
   2843 
   2844     /* Max should be -1 for invalid properties. */
   2845     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
   2846     if (max != -1) {
   2847         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
   2848                 max);
   2849     }
   2850 
   2851     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
   2852     for (i=0; i<2; ++i) {
   2853         int32_t script;
   2854         const char* desc;
   2855         ec = U_ZERO_ERROR;
   2856         switch (i) {
   2857         case 0:
   2858             script = uscript_getScript(-1, &ec);
   2859             desc = "uscript_getScript(-1)";
   2860             break;
   2861         case 1:
   2862             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
   2863             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
   2864             break;
   2865         default:
   2866             log_err("Internal test error. Too many scripts\n");
   2867             return;
   2868         }
   2869         /* We don't explicitly test ec.  It should be U_FAILURE but it
   2870            isn't documented as such. */
   2871         if (script != (int32_t)USCRIPT_INVALID_CODE) {
   2872             log_err("FAIL: %s = %d, exp. 0\n",
   2873                     desc, script);
   2874         }
   2875     }
   2876 }
   2877 
   2878 /* add characters from a serialized set to a normal one */
   2879 static void
   2880 _setAddSerialized(USet *set, const USerializedSet *sset) {
   2881     UChar32 start, end;
   2882     int32_t i, count;
   2883 
   2884     count=uset_getSerializedRangeCount(sset);
   2885     for(i=0; i<count; ++i) {
   2886         uset_getSerializedRange(sset, i, &start, &end);
   2887         uset_addRange(set, start, end);
   2888     }
   2889 }
   2890 
   2891 /* various tests for consistency of UCD data and API behavior */
   2892 static void
   2893 TestConsistency() {
   2894 #if !UCONFIG_NO_NORMALIZATION
   2895     UChar buffer16[300];
   2896 #endif
   2897     char buffer[300];
   2898     USet *set1, *set2, *set3, *set4;
   2899     UErrorCode errorCode;
   2900 
   2901 #if !UCONFIG_NO_NORMALIZATION
   2902     const UNormalizer2 *norm2;
   2903     USerializedSet sset;
   2904 #endif
   2905     UChar32 start, end;
   2906     int32_t i, length;
   2907 
   2908     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
   2909     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
   2910     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
   2911     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
   2912     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
   2913 
   2914     U_STRING_DECL(mathBlocksPattern,
   2915         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2916         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2917     U_STRING_DECL(mathPattern, "[:Math:]", 8);
   2918     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
   2919     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
   2920     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2921 
   2922     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
   2923     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
   2924     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
   2925     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
   2926     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
   2927 
   2928     U_STRING_INIT(mathBlocksPattern,
   2929         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
   2930         1+32+46+46+45+43+1+1); /* +1 for NUL */
   2931     U_STRING_INIT(mathPattern, "[:Math:]", 8);
   2932     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
   2933     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
   2934     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
   2935 
   2936     /*
   2937      * It used to be that UCD.html and its precursors said
   2938      * "Those dashes used to mark connections between pieces of words,
   2939      *  plus the Katakana middle dot."
   2940      *
   2941      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
   2942      * but not from Hyphen.
   2943      * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
   2944      * Therefore, do not show errors when testing the Hyphen property.
   2945      */
   2946     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
   2947                 "known to the UTC and not considered errors.\n");
   2948 
   2949     errorCode=U_ZERO_ERROR;
   2950     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
   2951     set2=uset_openPattern(dashPattern, 8, &errorCode);
   2952     if(U_SUCCESS(errorCode)) {
   2953         /* remove the Katakana middle dot(s) from set1 */
   2954         uset_remove(set1, 0x30fb);
   2955         uset_remove(set1, 0xff65); /* halfwidth variant */
   2956         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
   2957     } else {
   2958         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2959     }
   2960 
   2961     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
   2962     set3=uset_openPattern(formatPattern, 6, &errorCode);
   2963     set4=uset_openPattern(alphaPattern, 14, &errorCode);
   2964     if(U_SUCCESS(errorCode)) {
   2965         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
   2966         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
   2967         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
   2968     } else {
   2969         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   2970     }
   2971 
   2972     uset_close(set1);
   2973     uset_close(set2);
   2974     uset_close(set3);
   2975     uset_close(set4);
   2976 
   2977     /*
   2978      * Check that each lowercase character has "small" in its name
   2979      * and not "capital".
   2980      * There are some such characters, some of which seem odd.
   2981      * Use the verbose flag to see these notices.
   2982      */
   2983     errorCode=U_ZERO_ERROR;
   2984     set1=uset_openPattern(lowerPattern, 13, &errorCode);
   2985     if(U_SUCCESS(errorCode)) {
   2986         for(i=0;; ++i) {
   2987             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
   2988             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
   2989                 break; /* done */
   2990             }
   2991             if(U_FAILURE(errorCode)) {
   2992                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
   2993                         i, u_errorName(errorCode));
   2994                 break;
   2995             }
   2996             if(length!=0) {
   2997                 break; /* done with code points, got a string or -1 */
   2998             }
   2999 
   3000             while(start<=end) {
   3001                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
   3002                 if(U_FAILURE(errorCode)) {
   3003                     log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
   3004                     errorCode=U_ZERO_ERROR;
   3005                     continue;
   3006                 }
   3007                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
   3008                     strstr(buffer, "SMALL CAPITAL")==NULL
   3009                 ) {
   3010                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
   3011                 }
   3012                 ++start;
   3013             }
   3014         }
   3015     } else {
   3016         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3017     }
   3018     uset_close(set1);
   3019 
   3020 #if !UCONFIG_NO_NORMALIZATION
   3021 
   3022     /*
   3023      * Test for an example that unorm_getCanonStartSet() delivers
   3024      * all characters that compose from the input one,
   3025      * even in multiple steps.
   3026      * For example, the set for "I" (0049) should contain both
   3027      * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
   3028      * In general, the set for the middle such character should be a subset
   3029      * of the set for the first.
   3030      */
   3031     errorCode=U_ZERO_ERROR;
   3032     norm2=unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, &errorCode);
   3033     if(U_FAILURE(errorCode)) {
   3034         log_data_err("unorm2_getInstance(NFD) failed - %s\n", u_errorName(errorCode));
   3035         return;
   3036     }
   3037 
   3038     set1=uset_open(1, 0);
   3039     set2=uset_open(1, 0);
   3040 
   3041     if (unorm_getCanonStartSet(0x49, &sset)) {
   3042         UChar source[1];
   3043 
   3044         _setAddSerialized(set1, &sset);
   3045 
   3046         /* enumerate all characters that are plausible to be latin letters */
   3047         for(start=0xa0; start<0x2000; ++start) {
   3048             source[0]=(UChar)start;
   3049             length=unorm2_normalize(norm2, source, 1, buffer16, LENGTHOF(buffer16), &errorCode);
   3050             if(length>1 && buffer16[0]==0x49) {
   3051                 uset_add(set2, start);
   3052             }
   3053         }
   3054 
   3055         compareUSets(set1, set2,
   3056                      "[canon start set of 0049]", "[all c with canon decomp with 0049]",
   3057                      TRUE);
   3058     } else {
   3059       log_err("error calling unorm_getCanonStartSet()\n");
   3060     }
   3061 
   3062     uset_close(set1);
   3063     uset_close(set2);
   3064 
   3065 #endif
   3066 
   3067     /* verify that all assigned characters in Math blocks are exactly Math characters */
   3068     errorCode=U_ZERO_ERROR;
   3069     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
   3070     set2=uset_openPattern(mathPattern, 8, &errorCode);
   3071     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
   3072     if(U_SUCCESS(errorCode)) {
   3073         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
   3074         uset_complement(set3);      /* assigned characters */
   3075         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
   3076         compareUSets(set1, set2,
   3077                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
   3078                      TRUE);
   3079     } else {
   3080         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3081     }
   3082     uset_close(set1);
   3083     uset_close(set2);
   3084     uset_close(set3);
   3085 
   3086     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
   3087     errorCode=U_ZERO_ERROR;
   3088     set1=uset_openPattern(unknownPattern, 14, &errorCode);
   3089     set2=uset_openPattern(reservedPattern, 20, &errorCode);
   3090     if(U_SUCCESS(errorCode)) {
   3091         compareUSets(set1, set2,
   3092                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
   3093                      TRUE);
   3094     } else {
   3095         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
   3096     }
   3097     uset_close(set1);
   3098     uset_close(set2);
   3099 }
   3100 
   3101 /*
   3102  * Starting with ICU4C 3.4, the core Unicode properties files
   3103  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
   3104  * are hardcoded in the common DLL and therefore not included
   3105  * in the data package any more.
   3106  * Test requiring these files are disabled so that
   3107  * we need not jump through hoops (like adding snapshots of these files
   3108  * to testdata).
   3109  * See Jitterbug 4497.
   3110  */
   3111 #define HARDCODED_DATA_4497 1
   3112 
   3113 /* API coverage for ucase.c */
   3114 static void TestUCase() {
   3115 #if !HARDCODED_DATA_4497
   3116     UDataMemory *pData;
   3117     UCaseProps *csp;
   3118     const UCaseProps *ccsp;
   3119     UErrorCode errorCode;
   3120 
   3121     /* coverage for ucase_openBinary() */
   3122     errorCode=U_ZERO_ERROR;
   3123     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
   3124     if(U_FAILURE(errorCode)) {
   3125         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3126                     u_errorName(errorCode));
   3127         return;
   3128     }
   3129 
   3130     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3131     if(U_FAILURE(errorCode)) {
   3132         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
   3133                 u_errorName(errorCode));
   3134         udata_close(pData);
   3135         return;
   3136     }
   3137 
   3138     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
   3139         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
   3140     }
   3141 
   3142     ucase_close(csp);
   3143     udata_close(pData);
   3144 
   3145     /* coverage for ucase_getDummy() */
   3146     errorCode=U_ZERO_ERROR;
   3147     ccsp=ucase_getDummy(&errorCode);
   3148     if(ucase_tolower(ccsp, 0x41)!=0x41) {
   3149         log_err("ucase_tolower(dummy, A)!=A\n");
   3150     }
   3151 #endif
   3152 }
   3153 
   3154 /* API coverage for ubidi_props.c */
   3155 static void TestUBiDiProps() {
   3156 #if !HARDCODED_DATA_4497
   3157     UDataMemory *pData;
   3158     UBiDiProps *bdp;
   3159     const UBiDiProps *cbdp;
   3160     UErrorCode errorCode;
   3161 
   3162     /* coverage for ubidi_openBinary() */
   3163     errorCode=U_ZERO_ERROR;
   3164     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
   3165     if(U_FAILURE(errorCode)) {
   3166         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3167                     u_errorName(errorCode));
   3168         return;
   3169     }
   3170 
   3171     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
   3172     if(U_FAILURE(errorCode)) {
   3173         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
   3174                 u_errorName(errorCode));
   3175         udata_close(pData);
   3176         return;
   3177     }
   3178 
   3179     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
   3180         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
   3181     }
   3182 
   3183     ubidi_closeProps(bdp);
   3184     udata_close(pData);
   3185 
   3186     /* coverage for ubidi_getDummy() */
   3187     errorCode=U_ZERO_ERROR;
   3188     cbdp=ubidi_getDummy(&errorCode);
   3189     if(ubidi_getClass(cbdp, 0x20)!=0) {
   3190         log_err("ubidi_getClass(dummy, space)!=0\n");
   3191     }
   3192 #endif
   3193 }
   3194 
   3195 /* test case folding, compare return values with CaseFolding.txt ------------ */
   3196 
   3197 /* bit set for which case foldings for a character have been tested already */
   3198 enum {
   3199     CF_SIMPLE=1,
   3200     CF_FULL=2,
   3201     CF_TURKIC=4,
   3202     CF_ALL=7
   3203 };
   3204 
   3205 static void
   3206 testFold(UChar32 c, int which,
   3207          UChar32 simple, UChar32 turkic,
   3208          const UChar *full, int32_t fullLength,
   3209          const UChar *turkicFull, int32_t turkicFullLength) {
   3210     UChar s[2], t[32];
   3211     UChar32 c2;
   3212     int32_t length, length2;
   3213 
   3214     UErrorCode errorCode=U_ZERO_ERROR;
   3215 
   3216     length=0;
   3217     U16_APPEND_UNSAFE(s, length, c);
   3218 
   3219     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
   3220         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3221     }
   3222     if((which&CF_FULL)!=0) {
   3223         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
   3224         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
   3225             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
   3226         }
   3227     }
   3228     if((which&CF_TURKIC)!=0) {
   3229         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
   3230             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
   3231         }
   3232 
   3233         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
   3234         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
   3235             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
   3236         }
   3237     }
   3238 }
   3239 
   3240 /* test that c case-folds to itself */
   3241 static void
   3242 testFoldToSelf(UChar32 c, int which) {
   3243     UChar s[2];
   3244     int32_t length;
   3245 
   3246     length=0;
   3247     U16_APPEND_UNSAFE(s, length, c);
   3248     testFold(c, which, c, c, s, length, s, length);
   3249 }
   3250 
   3251 struct CaseFoldingData {
   3252     USet *notSeen;
   3253     UChar32 prev, prevSimple;
   3254     UChar prevFull[32];
   3255     int32_t prevFullLength;
   3256     int which;
   3257 };
   3258 typedef struct CaseFoldingData CaseFoldingData;
   3259 
   3260 static void U_CALLCONV
   3261 caseFoldingLineFn(void *context,
   3262                   char *fields[][2], int32_t fieldCount,
   3263                   UErrorCode *pErrorCode) {
   3264     CaseFoldingData *pData=(CaseFoldingData *)context;
   3265     char *end;
   3266     UChar full[32];
   3267     UChar32 c, prev, simple;
   3268     int32_t count;
   3269     int which;
   3270     char status;
   3271 
   3272     /* get code point */
   3273     c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
   3274     end=(char *)u_skipWhitespace(end);
   3275     if(end<=fields[0][0] || end!=fields[0][1]) {
   3276         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
   3277         *pErrorCode=U_PARSE_ERROR;
   3278         return;
   3279     }
   3280 
   3281     /* get the status of this mapping */
   3282     status=*u_skipWhitespace(fields[1][0]);
   3283     if(status!='C' && status!='S' && status!='F' && status!='T') {
   3284         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
   3285         *pErrorCode=U_PARSE_ERROR;
   3286         return;
   3287     }
   3288 
   3289     /* get the mapping */
   3290     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
   3291     if(U_FAILURE(*pErrorCode)) {
   3292         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
   3293         return;
   3294     }
   3295 
   3296     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
   3297     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
   3298         simple=c;
   3299     }
   3300 
   3301     if(c!=(prev=pData->prev)) {
   3302         /*
   3303          * Test remaining mappings for the previous code point.
   3304          * If a turkic folding was not mentioned, then it should fold the same
   3305          * as the regular simple case folding.
   3306          */
   3307         UChar s[2];
   3308         int32_t length;
   3309 
   3310         length=0;
   3311         U16_APPEND_UNSAFE(s, length, prev);
   3312         testFold(prev, (~pData->which)&CF_ALL,
   3313                  prev, pData->prevSimple,
   3314                  s, length,
   3315                  pData->prevFull, pData->prevFullLength);
   3316         pData->prev=pData->prevSimple=c;
   3317         length=0;
   3318         U16_APPEND_UNSAFE(pData->prevFull, length, c);
   3319         pData->prevFullLength=length;
   3320         pData->which=0;
   3321     }
   3322 
   3323     /*
   3324      * Turn the status into a bit set of case foldings to test.
   3325      * Remember non-Turkic case foldings as defaults for Turkic mode.
   3326      */
   3327     switch(status) {
   3328     case 'C':
   3329         which=CF_SIMPLE|CF_FULL;
   3330         pData->prevSimple=simple;
   3331         u_memcpy(pData->prevFull, full, count);
   3332         pData->prevFullLength=count;
   3333         break;
   3334     case 'S':
   3335         which=CF_SIMPLE;
   3336         pData->prevSimple=simple;
   3337         break;
   3338     case 'F':
   3339         which=CF_FULL;
   3340         u_memcpy(pData->prevFull, full, count);
   3341         pData->prevFullLength=count;
   3342         break;
   3343     case 'T':
   3344         which=CF_TURKIC;
   3345         break;
   3346     default:
   3347         which=0;
   3348         break; /* won't happen because of test above */
   3349     }
   3350 
   3351     testFold(c, which, simple, simple, full, count, full, count);
   3352 
   3353     /* remember which case foldings of c have been tested */
   3354     pData->which|=which;
   3355 
   3356     /* remove c from the set of ones not mentioned in CaseFolding.txt */
   3357     uset_remove(pData->notSeen, c);
   3358 }
   3359 
   3360 static void
   3361 TestCaseFolding() {
   3362     CaseFoldingData data={ NULL };
   3363     char *fields[3][2];
   3364     UErrorCode errorCode;
   3365 
   3366     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
   3367 
   3368     errorCode=U_ZERO_ERROR;
   3369     /* test BMP & plane 1 - nothing interesting above */
   3370     data.notSeen=uset_open(0, 0x1ffff);
   3371     data.prevFullLength=1; /* length of full case folding of U+0000 */
   3372 
   3373     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
   3374     if(U_SUCCESS(errorCode)) {
   3375         int32_t i, start, end;
   3376 
   3377         /* add a pseudo-last line to finish testing of the actual last one */
   3378         fields[0][0]=lastLine;
   3379         fields[0][1]=lastLine+6;
   3380         fields[1][0]=lastLine+7;
   3381         fields[1][1]=lastLine+9;
   3382         fields[2][0]=lastLine+10;
   3383         fields[2][1]=lastLine+17;
   3384         caseFoldingLineFn(&data, fields, 3, &errorCode);
   3385 
   3386         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
   3387         for(i=0;
   3388             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
   3389                 U_SUCCESS(errorCode);
   3390             ++i
   3391         ) {
   3392             do {
   3393                 testFoldToSelf(start, CF_ALL);
   3394             } while(++start<=end);
   3395         }
   3396     }
   3397 
   3398     uset_close(data.notSeen);
   3399 }
   3400