Home | History | Annotate | Download | only in intltest
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 2005-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /************************************************************************
      9 *   Tests for the UText and UTextIterator text abstraction classses
     10 *
     11 ************************************************************************/
     12 
     13 #include <string.h>
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include "unicode/utypes.h"
     17 #include "unicode/utext.h"
     18 #include "unicode/utf8.h"
     19 #include "unicode/ustring.h"
     20 #include "unicode/uchriter.h"
     21 #include "cmemory.h"
     22 #include "cstr.h"
     23 #include "utxttest.h"
     24 
     25 static UBool  gFailed = FALSE;
     26 static int    gTestNum = 0;
     27 
     28 // Forward decl
     29 UText *openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
     30 
     31 #define TEST_ASSERT(x) \
     32 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
     33                      gFailed = TRUE;\
     34    }}
     35 
     36 
     37 #define TEST_SUCCESS(status) \
     38 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
     39        gTestNum, __FILE__, __LINE__, u_errorName(status)); \
     40        gFailed = TRUE;\
     41    }}
     42 
     43 UTextTest::UTextTest() {
     44 }
     45 
     46 UTextTest::~UTextTest() {
     47 }
     48 
     49 
     50 void
     51 UTextTest::runIndexedTest(int32_t index, UBool exec,
     52                           const char* &name, char* /*par*/) {
     53     switch (index) {
     54         case 0: name = "TextTest";
     55             if (exec) TextTest();    break;
     56         case 1: name = "ErrorTest";
     57             if (exec) ErrorTest();   break;
     58         case 2: name = "FreezeTest";
     59             if (exec) FreezeTest();  break;
     60         case 3: name = "Ticket5560";
     61             if (exec) Ticket5560();  break;
     62         case 4: name = "Ticket6847";
     63             if (exec) Ticket6847();  break;
     64         case 5: name = "Ticket10562";
     65             if (exec) Ticket10562();  break;
     66         case 6: name = "Ticket10983";
     67             if (exec) Ticket10983();  break;
     68         case 7: name = "Ticket12130";
     69             if (exec) Ticket12130(); break;
     70         default: name = "";          break;
     71     }
     72 }
     73 
     74 //
     75 // Quick and dirty random number generator.
     76 //   (don't use library so that results are portable.
     77 static uint32_t m_seed = 1;
     78 static uint32_t m_rand()
     79 {
     80     m_seed = m_seed * 1103515245 + 12345;
     81     return (uint32_t)(m_seed/65536) % 32768;
     82 }
     83 
     84 
     85 //
     86 //   TextTest()
     87 //
     88 //       Top Level function for UText testing.
     89 //       Specifies the strings to be tested, with the acutal testing itself
     90 //       being carried out in another function, TestString().
     91 //
     92 void  UTextTest::TextTest() {
     93     int32_t i, j;
     94 
     95     TestString("abcd\\U00010001xyz");
     96     TestString("");
     97 
     98     // Supplementary chars at start or end
     99     TestString("\\U00010001");
    100     TestString("abc\\U00010001");
    101     TestString("\\U00010001abc");
    102 
    103     // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
    104     UnicodeString s;
    105     for (i=1; i<60; i++) {
    106         s.truncate(0);
    107         for (j=0; j<i; j++) {
    108             if (j+0x30 == 0x5c) {
    109                 // backslash.  Needs to be escaped
    110                 s.append((UChar)0x5c);
    111             }
    112             s.append(UChar(j+0x30));
    113         }
    114         TestString(s);
    115     }
    116 
    117    // Test strings with odd-aligned supplementary chars,
    118    //    looking for glitches at buffer boundaries
    119     for (i=1; i<60; i++) {
    120         s.truncate(0);
    121         s.append((UChar)0x41);
    122         for (j=0; j<i; j++) {
    123             s.append(UChar32(j+0x11000));
    124         }
    125         TestString(s);
    126     }
    127 
    128     // String of chars of randomly varying size in utf-8 representation.
    129     //   Exercise the mapping, and the varying sized buffer.
    130     //
    131     s.truncate(0);
    132     UChar32  c1 = 0;
    133     UChar32  c2 = 0x100;
    134     UChar32  c3 = 0xa000;
    135     UChar32  c4 = 0x11000;
    136     for (i=0; i<1000; i++) {
    137         int len8 = m_rand()%4 + 1;
    138         switch (len8) {
    139             case 1:
    140                 c1 = (c1+1)%0x80;
    141                 // don't put 0 into string (0 terminated strings for some tests)
    142                 // don't put '\', will cause unescape() to fail.
    143                 if (c1==0x5c || c1==0) {
    144                     c1++;
    145                 }
    146                 s.append(c1);
    147                 break;
    148             case 2:
    149                 s.append(c2++);
    150                 break;
    151             case 3:
    152                 s.append(c3++);
    153                 break;
    154             case 4:
    155                 s.append(c4++);
    156                 break;
    157         }
    158     }
    159     TestString(s);
    160 }
    161 
    162 
    163 //
    164 //  TestString()     Run a suite of UText tests on a string.
    165 //                   The test string is unescaped before use.
    166 //
    167 void UTextTest::TestString(const UnicodeString &s) {
    168     int32_t       i;
    169     int32_t       j;
    170     UChar32       c;
    171     int32_t       cpCount = 0;
    172     UErrorCode    status  = U_ZERO_ERROR;
    173     UText        *ut      = NULL;
    174     int32_t       saLen;
    175 
    176     UnicodeString sa = s.unescape();
    177     saLen = sa.length();
    178 
    179     //
    180     // Build up a mapping between code points and UTF-16 code unit indexes.
    181     //
    182     m *cpMap = new m[sa.length() + 1];
    183     j = 0;
    184     for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
    185         c = sa.char32At(i);
    186         cpMap[j].nativeIdx = i;
    187         cpMap[j].cp = c;
    188         j++;
    189         cpCount++;
    190     }
    191     cpMap[j].nativeIdx = i;   // position following the last char in utf-16 string.
    192 
    193 
    194     // UChar * test, null terminated
    195     status = U_ZERO_ERROR;
    196     UChar *buf = new UChar[saLen+1];
    197     sa.extract(buf, saLen+1, status);
    198     TEST_SUCCESS(status);
    199     ut = utext_openUChars(NULL, buf, -1, &status);
    200     TEST_SUCCESS(status);
    201     TestAccess(sa, ut, cpCount, cpMap);
    202     utext_close(ut);
    203     delete [] buf;
    204 
    205     // UChar * test, with length
    206     status = U_ZERO_ERROR;
    207     buf = new UChar[saLen+1];
    208     sa.extract(buf, saLen+1, status);
    209     TEST_SUCCESS(status);
    210     ut = utext_openUChars(NULL, buf, saLen, &status);
    211     TEST_SUCCESS(status);
    212     TestAccess(sa, ut, cpCount, cpMap);
    213     utext_close(ut);
    214     delete [] buf;
    215 
    216 
    217     // UnicodeString test
    218     status = U_ZERO_ERROR;
    219     ut = utext_openUnicodeString(NULL, &sa, &status);
    220     TEST_SUCCESS(status);
    221     TestAccess(sa, ut, cpCount, cpMap);
    222     TestCMR(sa, ut, cpCount, cpMap, cpMap);
    223     utext_close(ut);
    224 
    225 
    226     // Const UnicodeString test
    227     status = U_ZERO_ERROR;
    228     ut = utext_openConstUnicodeString(NULL, &sa, &status);
    229     TEST_SUCCESS(status);
    230     TestAccess(sa, ut, cpCount, cpMap);
    231     utext_close(ut);
    232 
    233 
    234     // Replaceable test.  (UnicodeString inherits Replaceable)
    235     status = U_ZERO_ERROR;
    236     ut = utext_openReplaceable(NULL, &sa, &status);
    237     TEST_SUCCESS(status);
    238     TestAccess(sa, ut, cpCount, cpMap);
    239     TestCMR(sa, ut, cpCount, cpMap, cpMap);
    240     utext_close(ut);
    241 
    242     // Character Iterator Tests
    243     status = U_ZERO_ERROR;
    244     const UChar *cbuf = sa.getBuffer();
    245     CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
    246     TEST_SUCCESS(status);
    247     ut = utext_openCharacterIterator(NULL, ci, &status);
    248     TEST_SUCCESS(status);
    249     TestAccess(sa, ut, cpCount, cpMap);
    250     utext_close(ut);
    251     delete ci;
    252 
    253 
    254     // Fragmented UnicodeString  (Chunk size of one)
    255     //
    256     status = U_ZERO_ERROR;
    257     ut = openFragmentedUnicodeString(NULL, &sa, &status);
    258     TEST_SUCCESS(status);
    259     TestAccess(sa, ut, cpCount, cpMap);
    260     utext_close(ut);
    261 
    262     //
    263     // UTF-8 test
    264     //
    265 
    266     // Convert the test string from UnicodeString to (char *) in utf-8 format
    267     int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
    268     char *u8String = new char[u8Len + 1];
    269     sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
    270 
    271     // Build up the map of code point indices in the utf-8 string
    272     m * u8Map = new m[sa.length() + 1];
    273     i = 0;   // native utf-8 index
    274     for (j=0; j<cpCount ; j++) {  // code point number
    275         u8Map[j].nativeIdx = i;
    276         U8_NEXT(u8String, i, u8Len, c)
    277         u8Map[j].cp = c;
    278     }
    279     u8Map[cpCount].nativeIdx = u8Len;   // position following the last char in utf-8 string.
    280 
    281     // Do the test itself
    282     status = U_ZERO_ERROR;
    283     ut = utext_openUTF8(NULL, u8String, -1, &status);
    284     TEST_SUCCESS(status);
    285     TestAccess(sa, ut, cpCount, u8Map);
    286     utext_close(ut);
    287 
    288 
    289 
    290     delete []cpMap;
    291     delete []u8Map;
    292     delete []u8String;
    293 }
    294 
    295 //  TestCMR   test Copy, Move and Replace operations.
    296 //              us         UnicodeString containing the test text.
    297 //              ut         UText containing the same test text.
    298 //              cpCount    number of code points in the test text.
    299 //              nativeMap  Mapping from code points to native indexes for the UText.
    300 //              u16Map     Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
    301 //
    302 //     This function runs a whole series of opertions on each incoming UText.
    303 //     The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
    304 //
    305 void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
    306     TEST_ASSERT(utext_isWritable(ut) == TRUE);
    307 
    308     int  srcLengthType;       // Loop variables for selecting the postion and length
    309     int  srcPosType;          //   of the block to operate on within the source text.
    310     int  destPosType;
    311 
    312     int  srcIndex  = 0;       // Code Point indexes of the block to operate on for
    313     int  srcLength = 0;       //   a specific test.
    314 
    315     int  destIndex = 0;       // Code point index of the destination for a copy/move test.
    316 
    317     int32_t  nativeStart = 0; // Native unit indexes for a test.
    318     int32_t  nativeLimit = 0;
    319     int32_t  nativeDest  = 0;
    320 
    321     int32_t  u16Start    = 0; // UTF-16 indexes for a test.
    322     int32_t  u16Limit    = 0; //   used when performing the same operation in a Unicode String
    323     int32_t  u16Dest     = 0;
    324 
    325     // Iterate over a whole series of source index, length and a target indexes.
    326     // This is done with code point indexes; these will be later translated to native
    327     //   indexes using the cpMap.
    328     for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
    329         switch (srcLengthType) {
    330             case 1: srcLength = 1; break;
    331             case 2: srcLength = 5; break;
    332             case 3: srcLength = cpCount / 3;
    333         }
    334         for (srcPosType=1; srcPosType<=5; srcPosType++) {
    335             switch (srcPosType) {
    336                 case 1: srcIndex = 0; break;
    337                 case 2: srcIndex = 1; break;
    338                 case 3: srcIndex = cpCount - srcLength; break;
    339                 case 4: srcIndex = cpCount - srcLength - 1; break;
    340                 case 5: srcIndex = cpCount / 2; break;
    341             }
    342             if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
    343                 // filter out bogus test cases -
    344                 //   those with a source range that falls of an edge of the string.
    345                 continue;
    346             }
    347 
    348             //
    349             // Copy and move tests.
    350             //   iterate over a variety of destination positions.
    351             //
    352             for (destPosType=1; destPosType<=4; destPosType++) {
    353                 switch (destPosType) {
    354                     case 1: destIndex = 0; break;
    355                     case 2: destIndex = 1; break;
    356                     case 3: destIndex = srcIndex - 1; break;
    357                     case 4: destIndex = srcIndex + srcLength + 1; break;
    358                     case 5: destIndex = cpCount-1; break;
    359                     case 6: destIndex = cpCount; break;
    360                 }
    361                 if (destIndex<0 || destIndex>cpCount) {
    362                     // filter out bogus test cases.
    363                     continue;
    364                 }
    365 
    366                 nativeStart = nativeMap[srcIndex].nativeIdx;
    367                 nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
    368                 nativeDest  = nativeMap[destIndex].nativeIdx;
    369 
    370                 u16Start    = u16Map[srcIndex].nativeIdx;
    371                 u16Limit    = u16Map[srcIndex+srcLength].nativeIdx;
    372                 u16Dest     = u16Map[destIndex].nativeIdx;
    373 
    374                 gFailed = FALSE;
    375                 TestCopyMove(us, ut, FALSE,
    376                     nativeStart, nativeLimit, nativeDest,
    377                     u16Start, u16Limit, u16Dest);
    378 
    379                 TestCopyMove(us, ut, TRUE,
    380                     nativeStart, nativeLimit, nativeDest,
    381                     u16Start, u16Limit, u16Dest);
    382 
    383                 if (gFailed) {
    384                     return;
    385                 }
    386             }
    387 
    388             //
    389             //  Replace tests.
    390             //
    391             UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
    392             for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
    393                 UnicodeString repStr(fullRepString, 0, replStrLen);
    394                 TestReplace(us, ut,
    395                     nativeStart, nativeLimit,
    396                     u16Start, u16Limit,
    397                     repStr);
    398                 if (gFailed) {
    399                     return;
    400                 }
    401             }
    402 
    403         }
    404     }
    405 
    406 }
    407 
    408 //
    409 //   TestCopyMove    run a single test case for utext_copy.
    410 //                   Test cases are created in TestCMR and dispatched here for execution.
    411 //
    412 void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
    413                     int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
    414                     int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
    415 {
    416     UErrorCode      status   = U_ZERO_ERROR;
    417     UText          *targetUT = NULL;
    418     gTestNum++;
    419     gFailed = FALSE;
    420 
    421     //
    422     //  clone the UText.  The test will be run in the cloned copy
    423     //  so that we don't alter the original.
    424     //
    425     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
    426     TEST_SUCCESS(status);
    427     UnicodeString targetUS(us);    // And copy the reference string.
    428 
    429     // do the test operation first in the reference
    430     targetUS.copy(u16Start, u16Limit, u16Dest);
    431     if (move) {
    432         // delete out the source range.
    433         if (u16Limit < u16Dest) {
    434             targetUS.removeBetween(u16Start, u16Limit);
    435         } else {
    436             int32_t amtCopied = u16Limit - u16Start;
    437             targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
    438         }
    439     }
    440 
    441     // Do the same operation in the UText under test
    442     utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
    443     if (nativeDest > nativeStart && nativeDest < nativeLimit) {
    444         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    445     } else {
    446         TEST_SUCCESS(status);
    447 
    448         // Compare the results of the two parallel tests
    449         int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
    450         int64_t  uti = 0;    // UText position, native index.
    451         int32_t  cpi;        // char32 position (code point index)
    452         UChar32  usc;        // code point from Unicode String
    453         UChar32  utc;        // code point from UText
    454         utext_setNativeIndex(targetUT, 0);
    455         for (cpi=0; ; cpi++) {
    456             usc = targetUS.char32At(usi);
    457             utc = utext_next32(targetUT);
    458             if (utc < 0) {
    459                 break;
    460             }
    461             TEST_ASSERT(uti == usi);
    462             TEST_ASSERT(utc == usc);
    463             usi = targetUS.moveIndex32(usi, 1);
    464             uti = utext_getNativeIndex(targetUT);
    465             if (gFailed) {
    466                 goto cleanupAndReturn;
    467             }
    468         }
    469         int64_t expectedNativeLength = utext_nativeLength(ut);
    470         if (move == FALSE) {
    471             expectedNativeLength += nativeLimit - nativeStart;
    472         }
    473         uti = utext_getNativeIndex(targetUT);
    474         TEST_ASSERT(uti == expectedNativeLength);
    475     }
    476 
    477 cleanupAndReturn:
    478     utext_close(targetUT);
    479 }
    480 
    481 
    482 //
    483 //  TestReplace   Test a single Replace operation.
    484 //
    485 void UTextTest::TestReplace(
    486             const UnicodeString &us,     // reference UnicodeString in which to do the replace
    487             UText         *ut,                // UnicodeText object under test.
    488             int32_t       nativeStart,        // Range to be replaced, in UText native units.
    489             int32_t       nativeLimit,
    490             int32_t       u16Start,           // Range to be replaced, in UTF-16 units
    491             int32_t       u16Limit,           //    for use in the reference UnicodeString.
    492             const UnicodeString &repStr)      // The replacement string
    493 {
    494     UErrorCode      status   = U_ZERO_ERROR;
    495     UText          *targetUT = NULL;
    496     gTestNum++;
    497     gFailed = FALSE;
    498 
    499     //
    500     //  clone the target UText.  The test will be run in the cloned copy
    501     //  so that we don't alter the original.
    502     //
    503     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
    504     TEST_SUCCESS(status);
    505     UnicodeString targetUS(us);    // And copy the reference string.
    506 
    507     //
    508     // Do the replace operation in the Unicode String, to
    509     //   produce a reference result.
    510     //
    511     targetUS.replace(u16Start, u16Limit-u16Start, repStr);
    512 
    513     //
    514     // Do the replace on the UText under test
    515     //
    516     const UChar *rs = repStr.getBuffer();
    517     int32_t  rsLen = repStr.length();
    518     int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
    519     int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
    520     TEST_ASSERT(actualDelta == expectedDelta);
    521 
    522     //
    523     // Compare the results
    524     //
    525     int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
    526     int64_t  uti = 0;    // UText position, native index.
    527     int32_t  cpi;        // char32 position (code point index)
    528     UChar32  usc;        // code point from Unicode String
    529     UChar32  utc;        // code point from UText
    530     int64_t  expectedNativeLength = 0;
    531     utext_setNativeIndex(targetUT, 0);
    532     for (cpi=0; ; cpi++) {
    533         usc = targetUS.char32At(usi);
    534         utc = utext_next32(targetUT);
    535         if (utc < 0) {
    536             break;
    537         }
    538         TEST_ASSERT(uti == usi);
    539         TEST_ASSERT(utc == usc);
    540         usi = targetUS.moveIndex32(usi, 1);
    541         uti = utext_getNativeIndex(targetUT);
    542         if (gFailed) {
    543             goto cleanupAndReturn;
    544         }
    545     }
    546     expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
    547     uti = utext_getNativeIndex(targetUT);
    548     TEST_ASSERT(uti == expectedNativeLength);
    549 
    550 cleanupAndReturn:
    551     utext_close(targetUT);
    552 }
    553 
    554 //
    555 //  TestAccess      Test the read only access functions on a UText, including cloning.
    556 //                  The text is accessed in a variety of ways, and compared with
    557 //                  the reference UnicodeString.
    558 //
    559 void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
    560     // Run the standard tests on the caller-supplied UText.
    561     TestAccessNoClone(us, ut, cpCount, cpMap);
    562 
    563     // Re-run tests on a shallow clone.
    564     utext_setNativeIndex(ut, 0);
    565     UErrorCode status = U_ZERO_ERROR;
    566     UText *shallowClone = utext_clone(NULL, ut, FALSE /*deep*/, FALSE /*readOnly*/, &status);
    567     TEST_SUCCESS(status);
    568     TestAccessNoClone(us, shallowClone, cpCount, cpMap);
    569 
    570     //
    571     // Rerun again on a deep clone.
    572     // Note that text providers are not required to provide deep cloning,
    573     //   so unsupported errors are ignored.
    574     //
    575     status = U_ZERO_ERROR;
    576     utext_setNativeIndex(shallowClone, 0);
    577     UText *deepClone = utext_clone(NULL, shallowClone, TRUE, FALSE, &status);
    578     utext_close(shallowClone);
    579     if (status != U_UNSUPPORTED_ERROR) {
    580         TEST_SUCCESS(status);
    581         TestAccessNoClone(us, deepClone, cpCount, cpMap);
    582     }
    583     utext_close(deepClone);
    584 }
    585 
    586 
    587 //
    588 //  TestAccessNoClone()    Test the read only access functions on a UText.
    589 //                         The text is accessed in a variety of ways, and compared with
    590 //                         the reference UnicodeString.
    591 //
    592 void UTextTest::TestAccessNoClone(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
    593     UErrorCode  status = U_ZERO_ERROR;
    594     gTestNum++;
    595 
    596     //
    597     //  Check the length from the UText
    598     //
    599     int64_t expectedLen = cpMap[cpCount].nativeIdx;
    600     int64_t utlen = utext_nativeLength(ut);
    601     TEST_ASSERT(expectedLen == utlen);
    602 
    603     //
    604     //  Iterate forwards, verify that we get the correct code points
    605     //   at the correct native offsets.
    606     //
    607     int         i = 0;
    608     int64_t     index;
    609     int64_t     expectedIndex = 0;
    610     int64_t     foundIndex = 0;
    611     UChar32     expectedC;
    612     UChar32     foundC;
    613     int64_t     len;
    614 
    615     for (i=0; i<cpCount; i++) {
    616         expectedIndex = cpMap[i].nativeIdx;
    617         foundIndex    = utext_getNativeIndex(ut);
    618         TEST_ASSERT(expectedIndex == foundIndex);
    619         expectedC     = cpMap[i].cp;
    620         foundC        = utext_next32(ut);
    621         TEST_ASSERT(expectedC == foundC);
    622         foundIndex    = utext_getPreviousNativeIndex(ut);
    623         TEST_ASSERT(expectedIndex == foundIndex);
    624         if (gFailed) {
    625             return;
    626         }
    627     }
    628     foundC = utext_next32(ut);
    629     TEST_ASSERT(foundC == U_SENTINEL);
    630 
    631     // Repeat above, using macros
    632     utext_setNativeIndex(ut, 0);
    633     for (i=0; i<cpCount; i++) {
    634         expectedIndex = cpMap[i].nativeIdx;
    635         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
    636         TEST_ASSERT(expectedIndex == foundIndex);
    637         expectedC     = cpMap[i].cp;
    638         foundC        = UTEXT_NEXT32(ut);
    639         TEST_ASSERT(expectedC == foundC);
    640         if (gFailed) {
    641             return;
    642         }
    643     }
    644     foundC = UTEXT_NEXT32(ut);
    645     TEST_ASSERT(foundC == U_SENTINEL);
    646 
    647     //
    648     //  Forward iteration (above) should have left index at the
    649     //   end of the input, which should == length().
    650     //
    651     len = utext_nativeLength(ut);
    652     foundIndex  = utext_getNativeIndex(ut);
    653     TEST_ASSERT(len == foundIndex);
    654 
    655     //
    656     // Iterate backwards over entire test string
    657     //
    658     len = utext_getNativeIndex(ut);
    659     utext_setNativeIndex(ut, len);
    660     for (i=cpCount-1; i>=0; i--) {
    661         expectedC     = cpMap[i].cp;
    662         expectedIndex = cpMap[i].nativeIdx;
    663         int64_t prevIndex = utext_getPreviousNativeIndex(ut);
    664         foundC        = utext_previous32(ut);
    665         foundIndex    = utext_getNativeIndex(ut);
    666         TEST_ASSERT(expectedIndex == foundIndex);
    667         TEST_ASSERT(expectedC == foundC);
    668         TEST_ASSERT(prevIndex == foundIndex);
    669         if (gFailed) {
    670             return;
    671         }
    672     }
    673 
    674     //
    675     //  Backwards iteration, above, should have left our iterator
    676     //   position at zero, and continued backwards iterationshould fail.
    677     //
    678     foundIndex = utext_getNativeIndex(ut);
    679     TEST_ASSERT(foundIndex == 0);
    680     foundIndex = utext_getPreviousNativeIndex(ut);
    681     TEST_ASSERT(foundIndex == 0);
    682 
    683 
    684     foundC = utext_previous32(ut);
    685     TEST_ASSERT(foundC == U_SENTINEL);
    686     foundIndex = utext_getNativeIndex(ut);
    687     TEST_ASSERT(foundIndex == 0);
    688     foundIndex = utext_getPreviousNativeIndex(ut);
    689     TEST_ASSERT(foundIndex == 0);
    690 
    691 
    692     // And again, with the macros
    693     utext_setNativeIndex(ut, len);
    694     for (i=cpCount-1; i>=0; i--) {
    695         expectedC     = cpMap[i].cp;
    696         expectedIndex = cpMap[i].nativeIdx;
    697         foundC        = UTEXT_PREVIOUS32(ut);
    698         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
    699         TEST_ASSERT(expectedIndex == foundIndex);
    700         TEST_ASSERT(expectedC == foundC);
    701         if (gFailed) {
    702             return;
    703         }
    704     }
    705 
    706     //
    707     //  Backwards iteration, above, should have left our iterator
    708     //   position at zero, and continued backwards iterationshould fail.
    709     //
    710     foundIndex = UTEXT_GETNATIVEINDEX(ut);
    711     TEST_ASSERT(foundIndex == 0);
    712 
    713     foundC = UTEXT_PREVIOUS32(ut);
    714     TEST_ASSERT(foundC == U_SENTINEL);
    715     foundIndex = UTEXT_GETNATIVEINDEX(ut);
    716     TEST_ASSERT(foundIndex == 0);
    717     if (gFailed) {
    718         return;
    719     }
    720 
    721     //
    722     //  next32From(), prevous32From(), Iterate in a somewhat random order.
    723     //
    724     int  cpIndex = 0;
    725     for (i=0; i<cpCount; i++) {
    726         cpIndex = (cpIndex + 9973) % cpCount;
    727         index         = cpMap[cpIndex].nativeIdx;
    728         expectedC     = cpMap[cpIndex].cp;
    729         foundC        = utext_next32From(ut, index);
    730         TEST_ASSERT(expectedC == foundC);
    731         if (gFailed) {
    732             return;
    733         }
    734     }
    735 
    736     cpIndex = 0;
    737     for (i=0; i<cpCount; i++) {
    738         cpIndex = (cpIndex + 9973) % cpCount;
    739         index         = cpMap[cpIndex+1].nativeIdx;
    740         expectedC     = cpMap[cpIndex].cp;
    741         foundC        = utext_previous32From(ut, index);
    742         TEST_ASSERT(expectedC == foundC);
    743         if (gFailed) {
    744             return;
    745         }
    746     }
    747 
    748 
    749     //
    750     // moveIndex(int32_t delta);
    751     //
    752 
    753     // Walk through frontwards, incrementing by one
    754     utext_setNativeIndex(ut, 0);
    755     for (i=1; i<=cpCount; i++) {
    756         utext_moveIndex32(ut, 1);
    757         index = utext_getNativeIndex(ut);
    758         expectedIndex = cpMap[i].nativeIdx;
    759         TEST_ASSERT(expectedIndex == index);
    760         index = UTEXT_GETNATIVEINDEX(ut);
    761         TEST_ASSERT(expectedIndex == index);
    762     }
    763 
    764     // Walk through frontwards, incrementing by two
    765     utext_setNativeIndex(ut, 0);
    766     for (i=2; i<cpCount; i+=2) {
    767         utext_moveIndex32(ut, 2);
    768         index = utext_getNativeIndex(ut);
    769         expectedIndex = cpMap[i].nativeIdx;
    770         TEST_ASSERT(expectedIndex == index);
    771         index = UTEXT_GETNATIVEINDEX(ut);
    772         TEST_ASSERT(expectedIndex == index);
    773     }
    774 
    775     // walk through the string backwards, decrementing by one.
    776     i = cpMap[cpCount].nativeIdx;
    777     utext_setNativeIndex(ut, i);
    778     for (i=cpCount; i>=0; i--) {
    779         expectedIndex = cpMap[i].nativeIdx;
    780         index = utext_getNativeIndex(ut);
    781         TEST_ASSERT(expectedIndex == index);
    782         index = UTEXT_GETNATIVEINDEX(ut);
    783         TEST_ASSERT(expectedIndex == index);
    784         utext_moveIndex32(ut, -1);
    785     }
    786 
    787 
    788     // walk through backwards, decrementing by three
    789     i = cpMap[cpCount].nativeIdx;
    790     utext_setNativeIndex(ut, i);
    791     for (i=cpCount; i>=0; i-=3) {
    792         expectedIndex = cpMap[i].nativeIdx;
    793         index = utext_getNativeIndex(ut);
    794         TEST_ASSERT(expectedIndex == index);
    795         index = UTEXT_GETNATIVEINDEX(ut);
    796         TEST_ASSERT(expectedIndex == index);
    797         utext_moveIndex32(ut, -3);
    798     }
    799 
    800 
    801     //
    802     // Extract
    803     //
    804     int bufSize = us.length() + 10;
    805     UChar *buf = new UChar[bufSize];
    806     status = U_ZERO_ERROR;
    807     expectedLen = us.length();
    808     len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
    809     TEST_SUCCESS(status);
    810     TEST_ASSERT(len == expectedLen);
    811     int compareResult = us.compare(buf, -1);
    812     TEST_ASSERT(compareResult == 0);
    813 
    814     status = U_ZERO_ERROR;
    815     len = utext_extract(ut, 0, utlen, NULL, 0, &status);
    816     if (utlen == 0) {
    817         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    818     } else {
    819         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    820     }
    821     TEST_ASSERT(len == expectedLen);
    822 
    823     status = U_ZERO_ERROR;
    824     u_memset(buf, 0x5555, bufSize);
    825     len = utext_extract(ut, 0, utlen, buf, 1, &status);
    826     if (us.length() == 0) {
    827         TEST_SUCCESS(status);
    828         TEST_ASSERT(buf[0] == 0);
    829     } else {
    830         // Buf len == 1, extracting a single 16 bit value.
    831         // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
    832         //   or whether the lead surrogate of the pair is extracted.
    833         //   It's a buffer overflow error in either case.
    834         TEST_ASSERT(buf[0] == us.charAt(0) ||
    835                     (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
    836         TEST_ASSERT(buf[1] == 0x5555);
    837         if (us.length() == 1) {
    838             TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    839         } else {
    840             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    841         }
    842     }
    843 
    844     delete []buf;
    845 }
    846 
    847 //
    848 //  ErrorTest()    Check various error and edge cases.
    849 //
    850 void UTextTest::ErrorTest()
    851 {
    852     // Close of an unitialized UText.  Shouldn't blow up.
    853     {
    854         UText  ut;
    855         memset(&ut, 0, sizeof(UText));
    856         utext_close(&ut);
    857         utext_close(NULL);
    858     }
    859 
    860     // Double-close of a UText.  Shouldn't blow up.  UText should still be usable.
    861     {
    862         UErrorCode status = U_ZERO_ERROR;
    863         UText ut = UTEXT_INITIALIZER;
    864         UnicodeString s("Hello, World");
    865         UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
    866         TEST_SUCCESS(status);
    867         TEST_ASSERT(ut2 == &ut);
    868 
    869         UText *ut3 = utext_close(&ut);
    870         TEST_ASSERT(ut3 == &ut);
    871 
    872         UText *ut4 = utext_close(&ut);
    873         TEST_ASSERT(ut4 == &ut);
    874 
    875         utext_openUnicodeString(&ut, &s, &status);
    876         TEST_SUCCESS(status);
    877         utext_close(&ut);
    878     }
    879 
    880     // Re-use of a UText, chaining through each of the types of UText
    881     //   (If it doesn't blow up, and doesn't leak, it's probably working fine)
    882     {
    883         UErrorCode status = U_ZERO_ERROR;
    884         UText ut = UTEXT_INITIALIZER;
    885         UText  *utp;
    886         UnicodeString s1("Hello, World");
    887         UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
    888         const char  *s3 = "\x66\x67\x68";
    889 
    890         utp = utext_openUnicodeString(&ut, &s1, &status);
    891         TEST_SUCCESS(status);
    892         TEST_ASSERT(utp == &ut);
    893 
    894         utp = utext_openConstUnicodeString(&ut, &s1, &status);
    895         TEST_SUCCESS(status);
    896         TEST_ASSERT(utp == &ut);
    897 
    898         utp = utext_openUTF8(&ut, s3, -1, &status);
    899         TEST_SUCCESS(status);
    900         TEST_ASSERT(utp == &ut);
    901 
    902         utp = utext_openUChars(&ut, s2, -1, &status);
    903         TEST_SUCCESS(status);
    904         TEST_ASSERT(utp == &ut);
    905 
    906         utp = utext_close(&ut);
    907         TEST_ASSERT(utp == &ut);
    908 
    909         utp = utext_openUnicodeString(&ut, &s1, &status);
    910         TEST_SUCCESS(status);
    911         TEST_ASSERT(utp == &ut);
    912     }
    913 
    914     // Invalid parameters on open
    915     //
    916     {
    917         UErrorCode status = U_ZERO_ERROR;
    918         UText ut = UTEXT_INITIALIZER;
    919 
    920         utext_openUChars(&ut, NULL, 5, &status);
    921         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    922 
    923         status = U_ZERO_ERROR;
    924         utext_openUChars(&ut, NULL, -1, &status);
    925         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    926 
    927         status = U_ZERO_ERROR;
    928         utext_openUTF8(&ut, NULL, 4, &status);
    929         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    930 
    931         status = U_ZERO_ERROR;
    932         utext_openUTF8(&ut, NULL, -1, &status);
    933         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    934     }
    935 
    936     //
    937     //  UTF-8 with malformed sequences.
    938     //    These should come through as the Unicode replacement char, \ufffd
    939     //
    940     {
    941         UErrorCode status = U_ZERO_ERROR;
    942         UText *ut = NULL;
    943         const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
    944         UChar32  c;
    945 
    946         ut = utext_openUTF8(NULL, badUTF8, -1, &status);
    947         TEST_SUCCESS(status);
    948         c = utext_char32At(ut, 1);
    949         TEST_ASSERT(c == 0xfffd);
    950         c = utext_char32At(ut, 3);
    951         TEST_ASSERT(c == 0xfffd);
    952         c = utext_char32At(ut, 5);
    953         TEST_ASSERT(c == 0xfffd);
    954         c = utext_char32At(ut, 6);
    955         TEST_ASSERT(c == 0x43);
    956 
    957         UChar buf[10];
    958         int n = utext_extract(ut, 0, 9, buf, 10, &status);
    959         TEST_SUCCESS(status);
    960         TEST_ASSERT(n==5);
    961         TEST_ASSERT(buf[1] == 0xfffd);
    962         TEST_ASSERT(buf[3] == 0xfffd);
    963         TEST_ASSERT(buf[2] == 0x42);
    964         utext_close(ut);
    965     }
    966 
    967 
    968     //
    969     //  isLengthExpensive - does it make the exptected transitions after
    970     //                      getting the length of a nul terminated string?
    971     //
    972     {
    973         UErrorCode status = U_ZERO_ERROR;
    974         UnicodeString sa("Hello, this is a string");
    975         UBool  isExpensive;
    976 
    977         UChar sb[100];
    978         memset(sb, 0x20, sizeof(sb));
    979         sb[99] = 0;
    980 
    981         UText *uta = utext_openUnicodeString(NULL, &sa, &status);
    982         TEST_SUCCESS(status);
    983         isExpensive = utext_isLengthExpensive(uta);
    984         TEST_ASSERT(isExpensive == FALSE);
    985         utext_close(uta);
    986 
    987         UText *utb = utext_openUChars(NULL, sb, -1, &status);
    988         TEST_SUCCESS(status);
    989         isExpensive = utext_isLengthExpensive(utb);
    990         TEST_ASSERT(isExpensive == TRUE);
    991         int64_t  len = utext_nativeLength(utb);
    992         TEST_ASSERT(len == 99);
    993         isExpensive = utext_isLengthExpensive(utb);
    994         TEST_ASSERT(isExpensive == FALSE);
    995         utext_close(utb);
    996     }
    997 
    998     //
    999     // Index to positions not on code point boundaries.
   1000     //
   1001     {
   1002         const char *u8str =         "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
   1003         int32_t startMap[] =        {   0,  0,  2,  2,  2,  5,  5,  5,  5,  9,  9};
   1004         int32_t nextMap[]  =        {   2,  2,  5,  5,  5,  9,  9,  9,  9,  9,  9};
   1005         int32_t prevMap[]  =        {   0,  0,  0,  0,  0,  2,  2,  2,  2,  5,  5};
   1006         UChar32  c32Map[] =    {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
   1007         UChar32  pr32Map[] =   {    -1,   -1,  0x201,  0x201,  0x201,   0x1083,   0x1083,   0x1083,   0x1083, 0x044146, 0x044146};
   1008 
   1009         // extractLen is the size, in UChars, of what will be extracted between index and index+1.
   1010         //  is zero when both index positions lie within the same code point.
   1011         int32_t  exLen[] =          {   0,  1,   0,  0,  1,  0,  0,  0,  2,  0,  0};
   1012 
   1013 
   1014         UErrorCode status = U_ZERO_ERROR;
   1015         UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
   1016         TEST_SUCCESS(status);
   1017 
   1018         // Check setIndex
   1019         int32_t i;
   1020         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
   1021         for (i=0; i<startMapLimit; i++) {
   1022             utext_setNativeIndex(ut, i);
   1023             int64_t cpIndex = utext_getNativeIndex(ut);
   1024             TEST_ASSERT(cpIndex == startMap[i]);
   1025             cpIndex = UTEXT_GETNATIVEINDEX(ut);
   1026             TEST_ASSERT(cpIndex == startMap[i]);
   1027         }
   1028 
   1029         // Check char32At
   1030         for (i=0; i<startMapLimit; i++) {
   1031             UChar32 c32 = utext_char32At(ut, i);
   1032             TEST_ASSERT(c32 == c32Map[i]);
   1033             int64_t cpIndex = utext_getNativeIndex(ut);
   1034             TEST_ASSERT(cpIndex == startMap[i]);
   1035         }
   1036 
   1037         // Check utext_next32From
   1038         for (i=0; i<startMapLimit; i++) {
   1039             UChar32 c32 = utext_next32From(ut, i);
   1040             TEST_ASSERT(c32 == c32Map[i]);
   1041             int64_t cpIndex = utext_getNativeIndex(ut);
   1042             TEST_ASSERT(cpIndex == nextMap[i]);
   1043         }
   1044 
   1045         // check utext_previous32From
   1046         for (i=0; i<startMapLimit; i++) {
   1047             gTestNum++;
   1048             UChar32 c32 = utext_previous32From(ut, i);
   1049             TEST_ASSERT(c32 == pr32Map[i]);
   1050             int64_t cpIndex = utext_getNativeIndex(ut);
   1051             TEST_ASSERT(cpIndex == prevMap[i]);
   1052         }
   1053 
   1054         // check Extract
   1055         //   Extract from i to i+1, which may be zero or one code points,
   1056         //     depending on whether the indices straddle a cp boundary.
   1057         for (i=0; i<startMapLimit; i++) {
   1058             UChar buf[3];
   1059             status = U_ZERO_ERROR;
   1060             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1061             TEST_SUCCESS(status);
   1062             TEST_ASSERT(extractedLen == exLen[i]);
   1063             if (extractedLen > 0) {
   1064                 UChar32  c32;
   1065                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1066                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1067                 TEST_ASSERT(c32 == c32Map[i]);
   1068             }
   1069         }
   1070 
   1071         utext_close(ut);
   1072     }
   1073 
   1074 
   1075     {    //  Similar test, with utf16 instead of utf8
   1076          //  TODO:  merge the common parts of these tests.
   1077 
   1078         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
   1079         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
   1080         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
   1081         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
   1082         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
   1083         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
   1084         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
   1085 
   1086         u16str = u16str.unescape();
   1087         UErrorCode status = U_ZERO_ERROR;
   1088         UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
   1089         TEST_SUCCESS(status);
   1090 
   1091         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
   1092         int i;
   1093         for (i=0; i<startMapLimit; i++) {
   1094             utext_setNativeIndex(ut, i);
   1095             int64_t cpIndex = utext_getNativeIndex(ut);
   1096             TEST_ASSERT(cpIndex == startMap[i]);
   1097         }
   1098 
   1099         // Check char32At
   1100         for (i=0; i<startMapLimit; i++) {
   1101             UChar32 c32 = utext_char32At(ut, i);
   1102             TEST_ASSERT(c32 == c32Map[i]);
   1103             int64_t cpIndex = utext_getNativeIndex(ut);
   1104             TEST_ASSERT(cpIndex == startMap[i]);
   1105         }
   1106 
   1107         // Check utext_next32From
   1108         for (i=0; i<startMapLimit; i++) {
   1109             UChar32 c32 = utext_next32From(ut, i);
   1110             TEST_ASSERT(c32 == c32Map[i]);
   1111             int64_t cpIndex = utext_getNativeIndex(ut);
   1112             TEST_ASSERT(cpIndex == nextMap[i]);
   1113         }
   1114 
   1115         // check utext_previous32From
   1116         for (i=0; i<startMapLimit; i++) {
   1117             UChar32 c32 = utext_previous32From(ut, i);
   1118             TEST_ASSERT(c32 == pr32Map[i]);
   1119             int64_t cpIndex = utext_getNativeIndex(ut);
   1120             TEST_ASSERT(cpIndex == prevMap[i]);
   1121         }
   1122 
   1123         // check Extract
   1124         //   Extract from i to i+1, which may be zero or one code points,
   1125         //     depending on whether the indices straddle a cp boundary.
   1126         for (i=0; i<startMapLimit; i++) {
   1127             UChar buf[3];
   1128             status = U_ZERO_ERROR;
   1129             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1130             TEST_SUCCESS(status);
   1131             TEST_ASSERT(extractedLen == exLen[i]);
   1132             if (extractedLen > 0) {
   1133                 UChar32  c32;
   1134                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1135                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1136                 TEST_ASSERT(c32 == c32Map[i]);
   1137             }
   1138         }
   1139 
   1140         utext_close(ut);
   1141     }
   1142 
   1143     {    //  Similar test, with UText over Replaceable
   1144          //  TODO:  merge the common parts of these tests.
   1145 
   1146         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
   1147         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
   1148         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
   1149         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
   1150         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
   1151         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
   1152         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
   1153 
   1154         u16str = u16str.unescape();
   1155         UErrorCode status = U_ZERO_ERROR;
   1156         UText *ut = utext_openReplaceable(NULL, &u16str, &status);
   1157         TEST_SUCCESS(status);
   1158 
   1159         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
   1160         int i;
   1161         for (i=0; i<startMapLimit; i++) {
   1162             utext_setNativeIndex(ut, i);
   1163             int64_t cpIndex = utext_getNativeIndex(ut);
   1164             TEST_ASSERT(cpIndex == startMap[i]);
   1165         }
   1166 
   1167         // Check char32At
   1168         for (i=0; i<startMapLimit; i++) {
   1169             UChar32 c32 = utext_char32At(ut, i);
   1170             TEST_ASSERT(c32 == c32Map[i]);
   1171             int64_t cpIndex = utext_getNativeIndex(ut);
   1172             TEST_ASSERT(cpIndex == startMap[i]);
   1173         }
   1174 
   1175         // Check utext_next32From
   1176         for (i=0; i<startMapLimit; i++) {
   1177             UChar32 c32 = utext_next32From(ut, i);
   1178             TEST_ASSERT(c32 == c32Map[i]);
   1179             int64_t cpIndex = utext_getNativeIndex(ut);
   1180             TEST_ASSERT(cpIndex == nextMap[i]);
   1181         }
   1182 
   1183         // check utext_previous32From
   1184         for (i=0; i<startMapLimit; i++) {
   1185             UChar32 c32 = utext_previous32From(ut, i);
   1186             TEST_ASSERT(c32 == pr32Map[i]);
   1187             int64_t cpIndex = utext_getNativeIndex(ut);
   1188             TEST_ASSERT(cpIndex == prevMap[i]);
   1189         }
   1190 
   1191         // check Extract
   1192         //   Extract from i to i+1, which may be zero or one code points,
   1193         //     depending on whether the indices straddle a cp boundary.
   1194         for (i=0; i<startMapLimit; i++) {
   1195             UChar buf[3];
   1196             status = U_ZERO_ERROR;
   1197             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1198             TEST_SUCCESS(status);
   1199             TEST_ASSERT(extractedLen == exLen[i]);
   1200             if (extractedLen > 0) {
   1201                 UChar32  c32;
   1202                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1203                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1204                 TEST_ASSERT(c32 == c32Map[i]);
   1205             }
   1206         }
   1207 
   1208         utext_close(ut);
   1209     }
   1210 }
   1211 
   1212 
   1213 void UTextTest::FreezeTest() {
   1214     // Check isWritable() and freeze() behavior.
   1215     //
   1216 
   1217     UnicodeString  ustr("Hello, World.");
   1218     const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
   1219     const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
   1220 
   1221     UErrorCode status = U_ZERO_ERROR;
   1222     UText  *ut        = NULL;
   1223     UText  *ut2       = NULL;
   1224 
   1225     ut = utext_openUTF8(ut, u8str, -1, &status);
   1226     TEST_SUCCESS(status);
   1227     UBool writable = utext_isWritable(ut);
   1228     TEST_ASSERT(writable == FALSE);
   1229     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1230     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1231 
   1232     status = U_ZERO_ERROR;
   1233     ut = utext_openUChars(ut, u16str, -1, &status);
   1234     TEST_SUCCESS(status);
   1235     writable = utext_isWritable(ut);
   1236     TEST_ASSERT(writable == FALSE);
   1237     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1238     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1239 
   1240     status = U_ZERO_ERROR;
   1241     ut = utext_openUnicodeString(ut, &ustr, &status);
   1242     TEST_SUCCESS(status);
   1243     writable = utext_isWritable(ut);
   1244     TEST_ASSERT(writable == TRUE);
   1245     utext_freeze(ut);
   1246     writable = utext_isWritable(ut);
   1247     TEST_ASSERT(writable == FALSE);
   1248     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1249     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1250 
   1251     status = U_ZERO_ERROR;
   1252     ut = utext_openUnicodeString(ut, &ustr, &status);
   1253     TEST_SUCCESS(status);
   1254     ut2 = utext_clone(ut2, ut, FALSE, FALSE, &status);  // clone with readonly = false
   1255     TEST_SUCCESS(status);
   1256     writable = utext_isWritable(ut2);
   1257     TEST_ASSERT(writable == TRUE);
   1258     ut2 = utext_clone(ut2, ut, FALSE, TRUE, &status);  // clone with readonly = true
   1259     TEST_SUCCESS(status);
   1260     writable = utext_isWritable(ut2);
   1261     TEST_ASSERT(writable == FALSE);
   1262     utext_copy(ut2, 1, 2, 0, TRUE, &status);
   1263     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1264 
   1265     status = U_ZERO_ERROR;
   1266     ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
   1267     TEST_SUCCESS(status);
   1268     writable = utext_isWritable(ut);
   1269     TEST_ASSERT(writable == FALSE);
   1270     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1271     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1272 
   1273     // Deep Clone of a frozen UText should re-enable writing in the copy.
   1274     status = U_ZERO_ERROR;
   1275     ut = utext_openUnicodeString(ut, &ustr, &status);
   1276     TEST_SUCCESS(status);
   1277     utext_freeze(ut);
   1278     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
   1279     TEST_SUCCESS(status);
   1280     writable = utext_isWritable(ut2);
   1281     TEST_ASSERT(writable == TRUE);
   1282 
   1283 
   1284     // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
   1285     //  should NOT enable writing in the copy.
   1286     status = U_ZERO_ERROR;
   1287     ut = utext_openUChars(ut, u16str, -1, &status);
   1288     TEST_SUCCESS(status);
   1289     utext_freeze(ut);
   1290     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
   1291     TEST_SUCCESS(status);
   1292     writable = utext_isWritable(ut2);
   1293     TEST_ASSERT(writable == FALSE);
   1294 
   1295     // cleanup
   1296     utext_close(ut);
   1297     utext_close(ut2);
   1298 }
   1299 
   1300 
   1301 //
   1302 //  Fragmented UText
   1303 //      A UText type that works with a chunk size of 1.
   1304 //      Intended to test for edge cases.
   1305 //      Input comes from a UnicodeString.
   1306 //
   1307 //       ut.b    the character.  Put into both halves.
   1308 //
   1309 
   1310 U_CDECL_BEGIN
   1311 static UBool U_CALLCONV
   1312 fragTextAccess(UText *ut, int64_t index, UBool forward) {
   1313     const UnicodeString *us = (const UnicodeString *)ut->context;
   1314     UChar  c;
   1315     int32_t length = us->length();
   1316     if (forward && index>=0 && index<length) {
   1317         c = us->charAt((int32_t)index);
   1318         ut->b = c | c<<16;
   1319         ut->chunkOffset = 0;
   1320         ut->chunkLength = 1;
   1321         ut->chunkNativeStart = index;
   1322         ut->chunkNativeLimit = index+1;
   1323         return true;
   1324     }
   1325     if (!forward && index>0 && index <=length) {
   1326         c = us->charAt((int32_t)index-1);
   1327         ut->b = c | c<<16;
   1328         ut->chunkOffset = 1;
   1329         ut->chunkLength = 1;
   1330         ut->chunkNativeStart = index-1;
   1331         ut->chunkNativeLimit = index;
   1332         return true;
   1333     }
   1334     ut->b = 0;
   1335     ut->chunkOffset = 0;
   1336     ut->chunkLength = 0;
   1337     if (index <= 0) {
   1338         ut->chunkNativeStart = 0;
   1339         ut->chunkNativeLimit = 0;
   1340     } else {
   1341         ut->chunkNativeStart = length;
   1342         ut->chunkNativeLimit = length;
   1343     }
   1344     return false;
   1345 }
   1346 
   1347 // Function table to be used with this fragmented text provider.
   1348 //   Initialized in the open function.
   1349 static UTextFuncs  fragmentFuncs;
   1350 
   1351 // Clone function for fragmented text provider.
   1352 //   Didn't really want to provide this, but it's easier to provide it than to keep it
   1353 //   out of the tests.
   1354 //
   1355 UText *
   1356 cloneFragmentedUnicodeString(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
   1357     if (U_FAILURE(*status)) {
   1358         return NULL;
   1359     }
   1360     if (deep) {
   1361         *status = U_UNSUPPORTED_ERROR;
   1362         return NULL;
   1363     }
   1364     dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
   1365     utext_setNativeIndex(dest, utext_getNativeIndex(src));
   1366     return dest;
   1367 }
   1368 
   1369 U_CDECL_END
   1370 
   1371 // Open function for the fragmented text provider.
   1372 UText *
   1373 openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
   1374     ut = utext_openUnicodeString(ut, s, status);
   1375     if (U_FAILURE(*status)) {
   1376         return ut;
   1377     }
   1378 
   1379     // Copy of the function table from the stock UnicodeString UText,
   1380     //   and replace the entry for the access function.
   1381     memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
   1382     fragmentFuncs.access = fragTextAccess;
   1383     fragmentFuncs.clone  = cloneFragmentedUnicodeString;
   1384     ut->pFuncs = &fragmentFuncs;
   1385 
   1386     ut->chunkContents = (UChar *)&ut->b;
   1387     ut->pFuncs->access(ut, 0, TRUE);
   1388     return ut;
   1389 }
   1390 
   1391 // Regression test for Ticket 5560
   1392 //   Clone fails to update chunkContentPointer in the cloned copy.
   1393 //   This is only an issue for UText types that work in a local buffer,
   1394 //      (UTF-8 wrapper, for example)
   1395 //
   1396 //   The test:
   1397 //     1.  Create an inital UText
   1398 //     2.  Deep clone it.  Contents should match original.
   1399 //     3.  Reset original to something different.
   1400 //     4.  Check that clone contents did not change.
   1401 //
   1402 void UTextTest::Ticket5560() {
   1403     /* The following two strings are in UTF-8 even on EBCDIC platforms. */
   1404     static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
   1405     static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
   1406 	UErrorCode status = U_ZERO_ERROR;
   1407 
   1408 	UText ut1 = UTEXT_INITIALIZER;
   1409 	UText ut2 = UTEXT_INITIALIZER;
   1410 
   1411 	utext_openUTF8(&ut1, s1, -1, &status);
   1412 	UChar c = utext_next32(&ut1);
   1413 	TEST_ASSERT(c == 0x41);  // c == 'A'
   1414 
   1415 	utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
   1416 	TEST_SUCCESS(status);
   1417     c = utext_next32(&ut2);
   1418 	TEST_ASSERT(c == 0x42);  // c == 'B'
   1419     c = utext_next32(&ut1);
   1420 	TEST_ASSERT(c == 0x42);  // c == 'B'
   1421 
   1422 	utext_openUTF8(&ut1, s2, -1, &status);
   1423 	c = utext_next32(&ut1);
   1424 	TEST_ASSERT(c == 0x31);  // c == '1'
   1425     c = utext_next32(&ut2);
   1426 	TEST_ASSERT(c == 0x43);  // c == 'C'
   1427 
   1428     utext_close(&ut1);
   1429     utext_close(&ut2);
   1430 }
   1431 
   1432 
   1433 // Test for Ticket 6847
   1434 //
   1435 void UTextTest::Ticket6847() {
   1436     const int STRLEN = 90;
   1437     UChar s[STRLEN+1];
   1438     u_memset(s, 0x41, STRLEN);
   1439     s[STRLEN] = 0;
   1440 
   1441     UErrorCode status = U_ZERO_ERROR;
   1442     UText *ut = utext_openUChars(NULL, s, -1, &status);
   1443 
   1444     utext_setNativeIndex(ut, 0);
   1445     int32_t count = 0;
   1446     UChar32 c = 0;
   1447     int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1448     TEST_ASSERT(nativeIndex == 0);
   1449     while ((c = utext_next32(ut)) != U_SENTINEL) {
   1450         TEST_ASSERT(c == 0x41);
   1451         TEST_ASSERT(count < STRLEN);
   1452         if (count >= STRLEN) {
   1453             break;
   1454         }
   1455         count++;
   1456         nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1457         TEST_ASSERT(nativeIndex == count);
   1458     }
   1459     TEST_ASSERT(count == STRLEN);
   1460     nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1461     TEST_ASSERT(nativeIndex == STRLEN);
   1462     utext_close(ut);
   1463 }
   1464 
   1465 
   1466 void UTextTest::Ticket10562() {
   1467     // Note: failures show as a heap error when the test is run under valgrind.
   1468     UErrorCode status = U_ZERO_ERROR;
   1469 
   1470     const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
   1471     UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
   1472     TEST_SUCCESS(status);
   1473     UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
   1474     TEST_SUCCESS(status);
   1475     UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
   1476     TEST_SUCCESS(status);
   1477     utext_close(shallowClone);
   1478     utext_close(deepClone);
   1479     utext_close(utf8Text);
   1480 
   1481     status = U_ZERO_ERROR;
   1482     UnicodeString usString("Hello, World.");
   1483     UText *usText = utext_openUnicodeString(NULL, &usString, &status);
   1484     TEST_SUCCESS(status);
   1485     UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
   1486     TEST_SUCCESS(status);
   1487     UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
   1488     TEST_SUCCESS(status);
   1489     utext_close(usShallowClone);
   1490     utext_close(usDeepClone);
   1491     utext_close(usText);
   1492 }
   1493 
   1494 
   1495 void UTextTest::Ticket10983() {
   1496     // Note: failure shows as a seg fault when the defect is present.
   1497 
   1498     UErrorCode status = U_ZERO_ERROR;
   1499     UnicodeString s("Hello, World");
   1500     UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
   1501     TEST_SUCCESS(status);
   1502 
   1503     status = U_INVALID_STATE_ERROR;
   1504     UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
   1505     TEST_ASSERT(cloned == NULL);
   1506     TEST_ASSERT(status == U_INVALID_STATE_ERROR);
   1507 
   1508     utext_close(ut);
   1509 }
   1510 
   1511 // Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
   1512 //                leaves the iteration position set incorrectly when the
   1513 //                actual string length is not yet known.
   1514 //
   1515 //                The test text needs to be long enough that UText defers getting the length.
   1516 
   1517 void UTextTest::Ticket12130() {
   1518     UErrorCode status = U_ZERO_ERROR;
   1519 
   1520     const char *text8 =
   1521         "Fundamentally, computers just deal with numbers. They store letters and other characters "
   1522         "by assigning a number for each one. Before Unicode was invented, there were hundreds "
   1523         "of different encoding systems for assigning these numbers. No single encoding could "
   1524         "contain enough characters: for example, the European Union alone requires several "
   1525         "different encodings to cover all its languages. Even for a single language like "
   1526         "English no single encoding was adequate for all the letters, punctuation, and technical "
   1527         "symbols in common use.";
   1528 
   1529     UnicodeString str(text8);
   1530     const UChar *ustr = str.getTerminatedBuffer();
   1531     UText ut = UTEXT_INITIALIZER;
   1532     utext_openUChars(&ut, ustr, -1, &status);
   1533     UChar extractBuffer[50];
   1534 
   1535     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
   1536         int32_t endIdx = startIdx + 20;
   1537 
   1538         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
   1539         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
   1540         if (U_FAILURE(status)) {
   1541             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
   1542             return;
   1543         }
   1544         int64_t ni  = utext_getNativeIndex(&ut);
   1545         int64_t expectedni = startIdx + 20;
   1546         if (expectedni > str.length()) {
   1547             expectedni = str.length();
   1548         }
   1549         if (expectedni != ni) {
   1550             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
   1551         }
   1552         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
   1553             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
   1554                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
   1555         }
   1556     }
   1557     utext_close(&ut);
   1558 
   1559     // Similar utext extract, this time with the string length provided to the UText in advance,
   1560     // and a buffer of larger than required capacity.
   1561 
   1562     utext_openUChars(&ut, ustr, str.length(), &status);
   1563     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
   1564         int32_t endIdx = startIdx + 20;
   1565         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
   1566         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
   1567         if (U_FAILURE(status)) {
   1568             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
   1569             return;
   1570         }
   1571         int64_t ni  = utext_getNativeIndex(&ut);
   1572         int64_t expectedni = startIdx + 20;
   1573         if (expectedni > str.length()) {
   1574             expectedni = str.length();
   1575         }
   1576         if (expectedni != ni) {
   1577             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
   1578         }
   1579         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
   1580             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
   1581                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
   1582         }
   1583     }
   1584     utext_close(&ut);
   1585 }
   1586