Home | History | Annotate | Download | only in intltest
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2002-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  strcase.cpp
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2002mar12
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Test file for string casing C++ API functions.
     19 */
     20 
     21 #include "unicode/std_string.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/ures.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/locid.h"
     26 #include "unicode/ubrk.h"
     27 #include "unicode/unistr.h"
     28 #include "unicode/ucasemap.h"
     29 #include "ucase.h"
     30 #include "ustrtest.h"
     31 #include "unicode/tstdtmod.h"
     32 #include "cmemory.h"
     33 
     34 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
     35 
     36 StringCaseTest::~StringCaseTest() {}
     37 
     38 void
     39 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     40     if(exec) {
     41         logln("TestSuite StringCaseTest: ");
     42     }
     43     TESTCASE_AUTO_BEGIN;
     44     TESTCASE_AUTO(TestCaseConversion);
     45 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
     46     TESTCASE_AUTO(TestCasing);
     47 #endif
     48     TESTCASE_AUTO(TestFullCaseFoldingIterator);
     49     TESTCASE_AUTO(TestGreekUpper);
     50     TESTCASE_AUTO(TestLongUpper);
     51     TESTCASE_AUTO(TestMalformedUTF8);
     52     TESTCASE_AUTO(TestBufferOverflow);
     53     TESTCASE_AUTO_END;
     54 }
     55 
     56 void
     57 StringCaseTest::TestCaseConversion()
     58 {
     59     static const UChar uppercaseGreek[] =
     60         { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
     61         0x39f, 0x3a3, 0 };
     62         // "IESUS CHRISTOS"
     63 
     64     static const UChar lowercaseGreek[] =
     65         { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
     66         0x3bf, 0x3c2, 0 };
     67         // "iesus christos"
     68 
     69     static const UChar lowercaseTurkish[] =
     70         { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
     71         0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
     72 
     73     static const UChar uppercaseTurkish[] =
     74         { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
     75         0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
     76 
     77     UnicodeString expectedResult;
     78     UnicodeString   test3;
     79 
     80     test3 += (UChar32)0x0130;
     81     test3 += "STANBUL, NOT CONSTANTINOPLE!";
     82 
     83     UnicodeString   test4(test3);
     84     test4.toLower(Locale(""));
     85     expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
     86     if (test4 != expectedResult)
     87         errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
     88 
     89     test4 = test3;
     90     test4.toLower(Locale("tr", "TR"));
     91     expectedResult = lowercaseTurkish;
     92     if (test4 != expectedResult)
     93         errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
     94 
     95     test3 = "topkap";
     96     test3 += (UChar32)0x0131;
     97     test3 += " palace, istanbul";
     98     test4 = test3;
     99 
    100     test4.toUpper(Locale(""));
    101     expectedResult = "TOPKAPI PALACE, ISTANBUL";
    102     if (test4 != expectedResult)
    103         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
    104 
    105     test4 = test3;
    106     test4.toUpper(Locale("tr", "TR"));
    107     expectedResult = uppercaseTurkish;
    108     if (test4 != expectedResult)
    109         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
    110 
    111     test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
    112 
    113     test3.toUpper(Locale("de", "DE"));
    114     expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
    115     if (test3 != expectedResult)
    116         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
    117 
    118     test4.replace(0, test4.length(), uppercaseGreek);
    119 
    120     test4.toLower(Locale("el", "GR"));
    121     expectedResult = lowercaseGreek;
    122     if (test4 != expectedResult)
    123         errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
    124 
    125     test4.replace(0, test4.length(), lowercaseGreek);
    126 
    127     test4.toUpper();
    128     expectedResult = uppercaseGreek;
    129     if (test4 != expectedResult)
    130         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
    131 
    132     // more string case mapping tests with the new implementation
    133     {
    134         static const UChar
    135 
    136         beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
    137         lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
    138         lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
    139 
    140         beforeUpper[]= { 0x61, 0x42, 0x69,  0x3c2, 0xdf,       0x3c3, 0x2f, 0xfb03,           0xfb03,           0xfb03,           0xd93f, 0xdfff },
    141         upperRoot[]=   { 0x41, 0x42, 0x49,  0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
    142         upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
    143 
    144         beforeMiniUpper[]=  { 0xdf, 0x61 },
    145         miniUpper[]=        { 0x53, 0x53, 0x41 };
    146 
    147         UnicodeString s;
    148 
    149         /* lowercase with root locale */
    150         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
    151         s.toLower("");
    152         if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
    153             s!=UnicodeString(FALSE, lowerRoot, s.length())
    154         ) {
    155             errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
    156         }
    157 
    158         /* lowercase with turkish locale */
    159         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
    160         s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
    161         if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
    162             s!=UnicodeString(FALSE, lowerTurkish, s.length())
    163         ) {
    164             errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
    165         }
    166 
    167         /* uppercase with root locale */
    168         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
    169         s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
    170         if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
    171             s!=UnicodeString(FALSE, upperRoot, s.length())
    172         ) {
    173             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
    174         }
    175 
    176         /* uppercase with turkish locale */
    177         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
    178         s.toUpper(Locale("tr"));
    179         if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
    180             s!=UnicodeString(FALSE, upperTurkish, s.length())
    181         ) {
    182             errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
    183         }
    184 
    185         /* uppercase a short string with root locale */
    186         s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
    187         s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
    188         if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
    189             s!=UnicodeString(FALSE, miniUpper, s.length())
    190         ) {
    191             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
    192         }
    193     }
    194 
    195     // test some supplementary characters (>= Unicode 3.1)
    196     {
    197         UnicodeString t;
    198 
    199         UnicodeString
    200             deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
    201             deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
    202             deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
    203         (t=deseretInput).toLower();
    204         if(t!=deseretLower) {
    205             errln("error lowercasing Deseret (plane 1) characters");
    206         }
    207         (t=deseretInput).toUpper();
    208         if(t!=deseretUpper) {
    209             errln("error uppercasing Deseret (plane 1) characters");
    210         }
    211     }
    212 
    213     // test some more cases that looked like problems
    214     {
    215         UnicodeString t;
    216 
    217         UnicodeString
    218             ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
    219             ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
    220             ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
    221         (t=ljInput).toLower("en");
    222         if(t!=ljLower) {
    223             errln("error lowercasing LJ characters");
    224         }
    225         (t=ljInput).toUpper("en");
    226         if(t!=ljUpper) {
    227             errln("error uppercasing LJ characters");
    228         }
    229     }
    230 
    231 #if !UCONFIG_NO_NORMALIZATION
    232     // some context-sensitive casing depends on normalization data being present
    233 
    234     // Unicode 3.1.1 SpecialCasing tests
    235     {
    236         UnicodeString t;
    237 
    238         // sigmas preceded and/or followed by cased letters
    239         UnicodeString
    240             sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
    241             sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
    242             sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
    243 
    244         (t=sigmas).toLower();
    245         if(t!=sigmasLower) {
    246             errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
    247         }
    248 
    249         (t=sigmas).toUpper(Locale(""));
    250         if(t!=sigmasUpper) {
    251             errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
    252         }
    253 
    254         // turkish & azerbaijani dotless i & dotted I
    255         // remove dot above if there was a capital I before and there are no more accents above
    256         UnicodeString
    257             dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
    258             dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
    259             dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
    260 
    261         (t=dots).toLower("tr");
    262         if(t!=dotsTurkish) {
    263             errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
    264         }
    265 
    266         (t=dots).toLower("de");
    267         if(t!=dotsDefault) {
    268             errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
    269         }
    270     }
    271 
    272     // more Unicode 3.1.1 tests
    273     {
    274         UnicodeString t;
    275 
    276         // lithuanian dot above in uppercasing
    277         UnicodeString
    278             dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
    279             dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
    280             dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
    281 
    282         (t=dots).toUpper("lt");
    283         if(t!=dotsLithuanian) {
    284             errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
    285         }
    286 
    287         (t=dots).toUpper("de");
    288         if(t!=dotsDefault) {
    289             errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
    290         }
    291 
    292         // lithuanian adds dot above to i in lowercasing if there are more above accents
    293         UnicodeString
    294             i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
    295             iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
    296             iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
    297 
    298         (t=i).toLower("lt");
    299         if(t!=iLithuanian) {
    300             errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
    301         }
    302 
    303         (t=i).toLower("de");
    304         if(t!=iDefault) {
    305             errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
    306         }
    307     }
    308 
    309 #endif
    310 
    311     // test case folding
    312     {
    313         UnicodeString
    314             s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
    315             f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
    316             g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
    317             t;
    318 
    319         (t=s).foldCase();
    320         if(f!=t) {
    321             errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
    322         }
    323 
    324         // alternate handling for dotted I/dotless i (U+0130, U+0131)
    325         (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
    326         if(g!=t) {
    327             errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
    328         }
    329     }
    330 }
    331 
    332 // data-driven case mapping tests ------------------------------------------ ***
    333 
    334 enum {
    335     TEST_LOWER,
    336     TEST_UPPER,
    337     TEST_TITLE,
    338     TEST_FOLD,
    339     TEST_COUNT
    340 };
    341 
    342 // names of TestData children in casing.txt
    343 static const char *const dataNames[TEST_COUNT+1]={
    344     "lowercasing",
    345     "uppercasing",
    346     "titlecasing",
    347     "casefolding",
    348     ""
    349 };
    350 
    351 void
    352 StringCaseTest::TestCasingImpl(const UnicodeString &input,
    353                                const UnicodeString &output,
    354                                int32_t whichCase,
    355                                void *iter, const char *localeID, uint32_t options) {
    356     // UnicodeString
    357     UnicodeString result;
    358     const char *name;
    359     Locale locale(localeID);
    360 
    361     result=input;
    362     switch(whichCase) {
    363     case TEST_LOWER:
    364         name="toLower";
    365         result.toLower(locale);
    366         break;
    367     case TEST_UPPER:
    368         name="toUpper";
    369         result.toUpper(locale);
    370         break;
    371 #if !UCONFIG_NO_BREAK_ITERATION
    372     case TEST_TITLE:
    373         name="toTitle";
    374         result.toTitle((BreakIterator *)iter, locale, options);
    375         break;
    376 #endif
    377     case TEST_FOLD:
    378         name="foldCase";
    379         result.foldCase(options);
    380         break;
    381     default:
    382         name="";
    383         break; // won't happen
    384     }
    385     if(result!=output) {
    386         dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
    387     }
    388 #if !UCONFIG_NO_BREAK_ITERATION
    389     if(whichCase==TEST_TITLE && options==0) {
    390         result=input;
    391         result.toTitle((BreakIterator *)iter, locale);
    392         if(result!=output) {
    393             dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
    394         }
    395     }
    396 #endif
    397 
    398     // UTF-8
    399     char utf8In[100], utf8Out[100];
    400     int32_t utf8InLength, utf8OutLength, resultLength;
    401     UChar *buffer;
    402 
    403     IcuTestErrorCode errorCode(*this, "TestCasingImpl");
    404     LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
    405 #if !UCONFIG_NO_BREAK_ITERATION
    406     if(iter!=NULL) {
    407         // Clone the break iterator so that the UCaseMap can safely adopt it.
    408         UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
    409         ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
    410     }
    411 #endif
    412 
    413     u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
    414     switch(whichCase) {
    415     case TEST_LOWER:
    416         name="ucasemap_utf8ToLower";
    417         utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
    418                     utf8Out, (int32_t)sizeof(utf8Out),
    419                     utf8In, utf8InLength, errorCode);
    420         break;
    421     case TEST_UPPER:
    422         name="ucasemap_utf8ToUpper";
    423         utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
    424                     utf8Out, (int32_t)sizeof(utf8Out),
    425                     utf8In, utf8InLength, errorCode);
    426         break;
    427 #if !UCONFIG_NO_BREAK_ITERATION
    428     case TEST_TITLE:
    429         name="ucasemap_utf8ToTitle";
    430         utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
    431                     utf8Out, (int32_t)sizeof(utf8Out),
    432                     utf8In, utf8InLength, errorCode);
    433         break;
    434 #endif
    435     case TEST_FOLD:
    436         name="ucasemap_utf8FoldCase";
    437         utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
    438                     utf8Out, (int32_t)sizeof(utf8Out),
    439                     utf8In, utf8InLength, errorCode);
    440         break;
    441     default:
    442         name="";
    443         utf8OutLength=0;
    444         break; // won't happen
    445     }
    446     buffer=result.getBuffer(utf8OutLength);
    447     u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
    448     result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
    449 
    450     if(errorCode.isFailure()) {
    451         errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
    452         errorCode.reset();
    453     } else if(result!=output) {
    454         errln("error: %s() got a wrong result for a test case from casing.res", name);
    455         errln("expected \"" + output + "\" got \"" + result + "\"" );
    456     }
    457 }
    458 
    459 void
    460 StringCaseTest::TestCasing() {
    461     UErrorCode status = U_ZERO_ERROR;
    462 #if !UCONFIG_NO_BREAK_ITERATION
    463     LocalUBreakIteratorPointer iter;
    464 #endif
    465     char cLocaleID[100];
    466     UnicodeString locale, input, output, optionsString, result;
    467     uint32_t options;
    468     int32_t whichCase, type;
    469     LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
    470     if(U_SUCCESS(status)) {
    471         for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
    472 #if UCONFIG_NO_BREAK_ITERATION
    473             if(whichCase==TEST_TITLE) {
    474                 continue;
    475             }
    476 #endif
    477             LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
    478             if(U_FAILURE(status)) {
    479                 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
    480                 break;
    481             }
    482             const DataMap *myCase = NULL;
    483             while(casingTest->nextCase(myCase, status)) {
    484                 input = myCase->getString("Input", status);
    485                 output = myCase->getString("Output", status);
    486 
    487                 if(whichCase!=TEST_FOLD) {
    488                     locale = myCase->getString("Locale", status);
    489                 }
    490                 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
    491 
    492 #if !UCONFIG_NO_BREAK_ITERATION
    493                 if(whichCase==TEST_TITLE) {
    494                     type = myCase->getInt("Type", status);
    495                     if(type>=0) {
    496                         iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
    497                     } else if(type==-2) {
    498                         // Open a trivial break iterator that only delivers { 0, length }
    499                         // or even just { 0 } as boundaries.
    500                         static const UChar rules[] = { 0x2e, 0x2a, 0x3b };  // ".*;"
    501                         UParseError parseError;
    502                         iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
    503                     }
    504                 }
    505 #endif
    506                 options = 0;
    507                 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
    508                     optionsString = myCase->getString("Options", status);
    509                     if(optionsString.indexOf((UChar)0x54)>=0) {  // T
    510                         options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
    511                     }
    512                     if(optionsString.indexOf((UChar)0x4c)>=0) {  // L
    513                         options|=U_TITLECASE_NO_LOWERCASE;
    514                     }
    515                     if(optionsString.indexOf((UChar)0x41)>=0) {  // A
    516                         options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
    517                     }
    518                 }
    519 
    520                 if(U_FAILURE(status)) {
    521                     dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase],  u_errorName(status));
    522                     status = U_ZERO_ERROR;
    523                 } else {
    524 #if UCONFIG_NO_BREAK_ITERATION
    525                     LocalPointer<UMemory> iter;
    526 #endif
    527                     TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
    528                 }
    529 
    530 #if !UCONFIG_NO_BREAK_ITERATION
    531                 iter.adoptInstead(NULL);
    532 #endif
    533             }
    534         }
    535     }
    536 
    537 #if !UCONFIG_NO_BREAK_ITERATION
    538     // more tests for API coverage
    539     status=U_ZERO_ERROR;
    540     input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
    541     (result=input).toTitle(NULL);
    542     if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
    543         dataerrln("UnicodeString::toTitle(NULL) failed.");
    544     }
    545 #endif
    546 }
    547 
    548 void
    549 StringCaseTest::TestFullCaseFoldingIterator() {
    550     UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
    551     UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
    552     FullCaseFoldingIterator iter;
    553     int32_t count=0;
    554     int32_t countSpecific=0;
    555     UChar32 c;
    556     UnicodeString full;
    557     while((c=iter.next(full))>=0) {
    558         ++count;
    559         // Check that the full Case_Folding has more than 1 code point.
    560         if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
    561             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
    562             continue;
    563         }
    564         // Check that full == Case_Folding(c).
    565         UnicodeString cf(c);
    566         cf.foldCase();
    567         if(full!=cf) {
    568             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
    569             continue;
    570         }
    571         // Spot-check a couple of specific cases.
    572         if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
    573             ++countSpecific;
    574         }
    575     }
    576     if(countSpecific!=3) {
    577         errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
    578     }
    579     if(count<70) {
    580         errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
    581     }
    582 }
    583 
    584 void
    585 StringCaseTest::assertGreekUpper(const char *s, const char *expected) {
    586     UnicodeString s16 = UnicodeString(s).unescape();
    587     UnicodeString expected16 = UnicodeString(expected).unescape();
    588     UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
    589     UnicodeString result16(s16);
    590     result16.toUpper(GREEK_LOCALE_);
    591     assertEquals(msg, expected16, result16);
    592 
    593     msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
    594     int32_t length = expected16.length();
    595     int32_t capacities[] = {
    596         // Keep in sync with the UTF-8 capacities near the bottom of this function.
    597         0, length / 2, length - 1, length, length + 1
    598     };
    599     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
    600         int32_t cap = capacities[i];
    601         UChar *dest16 = result16.getBuffer(expected16.length() + 1);
    602         u_memset(dest16, 0x55AA, result16.getCapacity());
    603         UErrorCode errorCode = U_ZERO_ERROR;
    604         length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
    605         assertEquals(msg + cap, expected16.length(), length);
    606         UErrorCode expectedErrorCode;
    607         if (cap < expected16.length()) {
    608             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
    609         } else if (cap == expected16.length()) {
    610             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
    611         } else {
    612             expectedErrorCode = U_ZERO_ERROR;
    613             assertEquals(msg + cap + " NUL", 0, dest16[length]);
    614         }
    615         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
    616         result16.releaseBuffer(length);
    617         if (cap >= expected16.length()) {
    618             assertEquals(msg + cap, expected16, result16);
    619         }
    620     }
    621 
    622 #if U_HAVE_STD_STRING
    623     UErrorCode errorCode = U_ZERO_ERROR;
    624     LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
    625     assertSuccess("ucasemap_open", errorCode);
    626     std::string s8;
    627     s16.toUTF8String(s8);
    628     msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
    629     char dest8[1000];
    630     length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
    631                                   s8.data(), s8.length(), &errorCode);
    632     assertSuccess("ucasemap_utf8ToUpper", errorCode);
    633     StringPiece result8(dest8, length);
    634     UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
    635     assertEquals(msg, expected16, result16From8);
    636 
    637     msg += " cap=";
    638     capacities[1] = length / 2;
    639     capacities[2] = length - 1;
    640     capacities[3] = length;
    641     capacities[4] = length + 1;
    642     char dest8b[1000];
    643     int32_t expected8Length = length;  // Assuming the previous call worked.
    644     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
    645         int32_t cap = capacities[i];
    646         memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
    647         UErrorCode errorCode = U_ZERO_ERROR;
    648         length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
    649                                       s8.data(), s8.length(), &errorCode);
    650         assertEquals(msg + cap, expected8Length, length);
    651         UErrorCode expectedErrorCode;
    652         if (cap < expected8Length) {
    653             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
    654         } else if (cap == expected8Length) {
    655             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
    656         } else {
    657             expectedErrorCode = U_ZERO_ERROR;
    658             assertEquals(msg + cap + " NUL", 0, dest8b[length]);
    659         }
    660         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
    661         if (cap >= expected8Length) {
    662             assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
    663         }
    664     }
    665 #endif
    666 }
    667 
    668 void
    669 StringCaseTest::TestGreekUpper() {
    670     // See UCharacterCaseTest.java for human-readable strings.
    671 
    672     // http://bugs.icu-project.org/trac/ticket/5456
    673     assertGreekUpper("\\u03AC\\u03B4\\u03B9\\u03BA\\u03BF\\u03C2, "
    674                      "\\u03BA\\u03B5\\u03AF\\u03BC\\u03B5\\u03BD\\u03BF, "
    675                      "\\u03AF\\u03C1\\u03B9\\u03B4\\u03B1",
    676                      "\\u0391\\u0394\\u0399\\u039A\\u039F\\u03A3, "
    677                      "\\u039A\\u0395\\u0399\\u039C\\u0395\\u039D\\u039F, "
    678                      "\\u0399\\u03A1\\u0399\\u0394\\u0391");
    679     // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
    680     // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
    681     assertGreekUpper("\\u03A0\\u03B1\\u03C4\\u03AC\\u03C4\\u03B1",
    682                      "\\u03A0\\u0391\\u03A4\\u0391\\u03A4\\u0391");
    683     assertGreekUpper("\\u0391\\u03AD\\u03C1\\u03B1\\u03C2, "
    684                      "\\u039C\\u03C5\\u03C3\\u03C4\\u03AE\\u03C1\\u03B9\\u03BF, "
    685                      "\\u03A9\\u03C1\\u03B1\\u03AF\\u03BF",
    686                      "\\u0391\\u0395\\u03A1\\u0391\\u03A3, "
    687                      "\\u039C\\u03A5\\u03A3\\u03A4\\u0397\\u03A1\\u0399\\u039F, "
    688                      "\\u03A9\\u03A1\\u0391\\u0399\\u039F");
    689     assertGreekUpper("\\u039C\\u03B1\\u0390\\u03BF\\u03C5, \\u03A0\\u03CC\\u03C1\\u03BF\\u03C2, "
    690                      "\\u03A1\\u03CD\\u03B8\\u03BC\\u03B9\\u03C3\\u03B7",
    691                      "\\u039C\\u0391\\u03AA\\u039F\\u03A5, \\u03A0\\u039F\\u03A1\\u039F\\u03A3, "
    692                      "\\u03A1\\u03A5\\u0398\\u039C\\u0399\\u03A3\\u0397");
    693     assertGreekUpper("\\u03B0, \\u03A4\\u03B7\\u03C1\\u03CE, \\u039C\\u03AC\\u03B9\\u03BF\\u03C2",
    694                      "\\u03AB, \\u03A4\\u0397\\u03A1\\u03A9, \\u039C\\u0391\\u03AA\\u039F\\u03A3");
    695     assertGreekUpper("\\u03AC\\u03C5\\u03BB\\u03BF\\u03C2",
    696                      "\\u0391\\u03AB\\u039B\\u039F\\u03A3");
    697     assertGreekUpper("\\u0391\\u03AB\\u039B\\u039F\\u03A3",
    698                      "\\u0391\\u03AB\\u039B\\u039F\\u03A3");
    699     assertGreekUpper("\\u0386\\u03BA\\u03BB\\u03B9\\u03C4\\u03B1 "
    700                      "\\u03C1\\u03AE\\u03BC\\u03B1\\u03C4\\u03B1 \\u03AE "
    701                      "\\u03AC\\u03BA\\u03BB\\u03B9\\u03C4\\u03B5\\u03C2 "
    702                      "\\u03BC\\u03B5\\u03C4\\u03BF\\u03C7\\u03AD\\u03C2",
    703                      "\\u0391\\u039A\\u039B\\u0399\\u03A4\\u0391 "
    704                      "\\u03A1\\u0397\\u039C\\u0391\\u03A4\\u0391 \\u0397\\u0301 "
    705                      "\\u0391\\u039A\\u039B\\u0399\\u03A4\\u0395\\u03A3 "
    706                      "\\u039C\\u0395\\u03A4\\u039F\\u03A7\\u0395\\u03A3");
    707     // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
    708     assertGreekUpper("\\u0395\\u03C0\\u03B5\\u03B9\\u03B4\\u03AE \\u03B7 "
    709                      "\\u03B1\\u03BD\\u03B1\\u03B3\\u03BD\\u03CE\\u03C1\\u03B9\\u03C3\\u03B7 "
    710                      "\\u03C4\\u03B7\\u03C2 \\u03B1\\u03BE\\u03B9\\u03BF\\u03C0\\u03C1\\u03AD"
    711                      "\\u03C0\\u03B5\\u03B9\\u03B1\\u03C2",
    712                      "\\u0395\\u03A0\\u0395\\u0399\\u0394\\u0397 \\u0397 "
    713                      "\\u0391\\u039D\\u0391\\u0393\\u039D\\u03A9\\u03A1\\u0399\\u03A3\\u0397 "
    714                      "\\u03A4\\u0397\\u03A3 \\u0391\\u039E\\u0399\\u039F\\u03A0\\u03A1\\u0395"
    715                      "\\u03A0\\u0395\\u0399\\u0391\\u03A3");
    716     assertGreekUpper("\\u03BD\\u03BF\\u03BC\\u03B9\\u03BA\\u03BF\\u03CD \\u03AE "
    717                      "\\u03B4\\u03B9\\u03B5\\u03B8\\u03BD\\u03BF\\u03CD\\u03C2",
    718                      "\\u039D\\u039F\\u039C\\u0399\\u039A\\u039F\\u03A5 \\u0397\\u0301 "
    719                      "\\u0394\\u0399\\u0395\\u0398\\u039D\\u039F\\u03A5\\u03A3");
    720     // http://unicode.org/udhr/d/udhr_ell_polytonic.html
    721     assertGreekUpper("\\u1F18\\u03C0\\u03B5\\u03B9\\u03B4\\u1F74 \\u1F21 "
    722                      "\\u1F00\\u03BD\\u03B1\\u03B3\\u03BD\\u1F7D\\u03C1\\u03B9\\u03C3\\u03B7",
    723                      "\\u0395\\u03A0\\u0395\\u0399\\u0394\\u0397 \\u0397 "
    724                      "\\u0391\\u039D\\u0391\\u0393\\u039D\\u03A9\\u03A1\\u0399\\u03A3\\u0397");
    725     assertGreekUpper("\\u03BD\\u03BF\\u03BC\\u03B9\\u03BA\\u03BF\\u1FE6 \\u1F22 "
    726                      "\\u03B4\\u03B9\\u03B5\\u03B8\\u03BD\\u03BF\\u1FE6\\u03C2",
    727                      "\\u039D\\u039F\\u039C\\u0399\\u039A\\u039F\\u03A5 \\u0397\\u0301 "
    728                      "\\u0394\\u0399\\u0395\\u0398\\u039D\\u039F\\u03A5\\u03A3");
    729     // From Google bug report
    730     assertGreekUpper("\\u039D\\u03AD\\u03BF, "
    731                      "\\u0394\\u03B7\\u03BC\\u03B9\\u03BF\\u03C5\\u03C1\\u03B3\\u03AF\\u03B1",
    732                      "\\u039D\\u0395\\u039F, "
    733                      "\\u0394\\u0397\\u039C\\u0399\\u039F\\u03A5\\u03A1\\u0393\\u0399\\u0391");
    734     // http://crbug.com/234797
    735     assertGreekUpper("\\u0395\\u03BB\\u03AC\\u03C4\\u03B5 \\u03BD\\u03B1 \\u03C6\\u03AC\\u03C4\\u03B5 "
    736                      "\\u03C4\\u03B1 \\u03BA\\u03B1\\u03BB\\u03CD\\u03C4\\u03B5\\u03C1\\u03B1 "
    737                      "\\u03C0\\u03B1\\u03CA\\u03B4\\u03AC\\u03BA\\u03B9\\u03B1!",
    738                      "\\u0395\\u039B\\u0391\\u03A4\\u0395 \\u039D\\u0391 \\u03A6\\u0391\\u03A4\\u0395 "
    739                      "\\u03A4\\u0391 \\u039A\\u0391\\u039B\\u03A5\\u03A4\\u0395\\u03A1\\u0391 "
    740                      "\\u03A0\\u0391\\u03AA\\u0394\\u0391\\u039A\\u0399\\u0391!");
    741     assertGreekUpper("\\u039C\\u03B1\\u0390\\u03BF\\u03C5, \\u03C4\\u03C1\\u03CC\\u03BB\\u03B5\\u03CA",
    742                      "\\u039C\\u0391\\u03AA\\u039F\\u03A5, \\u03A4\\u03A1\\u039F\\u039B\\u0395\\u03AA");
    743     assertGreekUpper("\\u03A4\\u03BF \\u03AD\\u03BD\\u03B1 \\u03AE \\u03C4\\u03BF "
    744                      "\\u03AC\\u03BB\\u03BB\\u03BF.",
    745                      "\\u03A4\\u039F \\u0395\\u039D\\u0391 \\u0397\\u0301 \\u03A4\\u039F "
    746                      "\\u0391\\u039B\\u039B\\u039F.");
    747     // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
    748     assertGreekUpper("\\u03C1\\u03C9\\u03BC\\u03AD\\u03B9\\u03BA\\u03B1",
    749                      "\\u03A1\\u03A9\\u039C\\u0395\\u03AA\\u039A\\u0391");
    750     assertGreekUpper("\\u03AE.", "\\u0397\\u0301.");
    751 }
    752 
    753 void
    754 StringCaseTest::TestLongUpper() {
    755     if (quick) {
    756         logln("not exhaustive mode: skipping this test");
    757         return;
    758     }
    759     // Ticket #12663, crash with an extremely long string where
    760     // U+0390 maps to 0399 0308 0301 so that the result is three times as long
    761     // and overflows an int32_t.
    762     int32_t length = 0x40000004;  // more than 1G UChars
    763     UnicodeString s(length, (UChar32)0x390, length);
    764     UnicodeString result;
    765     UChar *dest = result.getBuffer(length + 1);
    766     if (s.isBogus() || dest == NULL) {
    767         logln("Out of memory, unable to run this test on this machine.");
    768         return;
    769     }
    770     IcuTestErrorCode errorCode(*this, "TestLongUpper");
    771     int32_t destLength = u_strToUpper(dest, result.getCapacity(),
    772                                       s.getBuffer(), s.length(), "", errorCode);
    773     result.releaseBuffer(destLength);
    774     if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
    775         errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
    776               errorCode.errorName(), (long)destLength);
    777     }
    778 }
    779 
    780 void StringCaseTest::TestMalformedUTF8() {
    781     // ticket #12639
    782     IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
    783     LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
    784     if (errorCode.isFailure()) {
    785         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
    786         return;
    787     }
    788     char src[1] = { (char)0x85 };  // malformed UTF-8
    789     char dest[3] = { 0, 0, 0 };
    790     int32_t destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
    791     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
    792         errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
    793               errorCode.errorName(), (int)destLength, dest[0]);
    794     }
    795 
    796     errorCode.reset();
    797     dest[0] = 0;
    798     destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
    799     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
    800         errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
    801               errorCode.errorName(), (int)destLength, dest[0]);
    802     }
    803 
    804     errorCode.reset();
    805     dest[0] = 0;
    806     destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
    807     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
    808         errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
    809               errorCode.errorName(), (int)destLength, dest[0]);
    810     }
    811 
    812     errorCode.reset();
    813     dest[0] = 0;
    814     destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
    815     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
    816         errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
    817               errorCode.errorName(), (int)destLength, dest[0]);
    818     }
    819 }
    820 
    821 void StringCaseTest::TestBufferOverflow() {
    822     // Ticket #12849, incorrect result from Title Case preflight operation,
    823     // when buffer overflow error is expected.
    824     IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
    825     LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
    826     if (errorCode.isFailure()) {
    827         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
    828         return;
    829     }
    830 
    831     UnicodeString data("hello world");
    832     int32_t result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
    833     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
    834         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
    835               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
    836               __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
    837     }
    838     errorCode.reset();
    839 
    840 #if U_HAVE_STD_STRING
    841     std::string data_utf8;
    842     data.toUTF8String(data_utf8);
    843     result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode);
    844     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
    845         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
    846               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
    847               __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
    848     }
    849     errorCode.reset();
    850 #endif  // U_HAVE_STD_STRING
    851 }
    852