Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2005-2012, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /************************************************************************
      7 *   Tests for the UText and UTextIterator text abstraction classses
      8 *
      9 ************************************************************************/
     10 
     11 #include <string.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include "unicode/utypes.h"
     15 #include "unicode/utext.h"
     16 #include "unicode/utf8.h"
     17 #include "unicode/ustring.h"
     18 #include "unicode/uchriter.h"
     19 #include "utxttest.h"
     20 
     21 static UBool  gFailed = FALSE;
     22 static int    gTestNum = 0;
     23 
     24 // Forward decl
     25 UText *openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
     26 
     27 #define TEST_ASSERT(x) \
     28 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
     29                      gFailed = TRUE;\
     30    }}
     31 
     32 
     33 #define TEST_SUCCESS(status) \
     34 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
     35        gTestNum, __FILE__, __LINE__, u_errorName(status)); \
     36        gFailed = TRUE;\
     37    }}
     38 
     39 UTextTest::UTextTest() {
     40 }
     41 
     42 UTextTest::~UTextTest() {
     43 }
     44 
     45 
     46 void
     47 UTextTest::runIndexedTest(int32_t index, UBool exec,
     48                           const char* &name, char* /*par*/) {
     49     switch (index) {
     50         case 0: name = "TextTest";
     51             if (exec) TextTest();    break;
     52         case 1: name = "ErrorTest";
     53             if (exec) ErrorTest();   break;
     54         case 2: name = "FreezeTest";
     55             if (exec) FreezeTest();  break;
     56         case 3: name = "Ticket5560";
     57             if (exec) Ticket5560();  break;
     58         case 4: name = "Ticket6847";
     59             if (exec) Ticket6847();  break;
     60         default: name = "";          break;
     61     }
     62 }
     63 
     64 //
     65 // Quick and dirty random number generator.
     66 //   (don't use library so that results are portable.
     67 static uint32_t m_seed = 1;
     68 static uint32_t m_rand()
     69 {
     70     m_seed = m_seed * 1103515245 + 12345;
     71     return (uint32_t)(m_seed/65536) % 32768;
     72 }
     73 
     74 
     75 //
     76 //   TextTest()
     77 //
     78 //       Top Level function for UText testing.
     79 //       Specifies the strings to be tested, with the acutal testing itself
     80 //       being carried out in another function, TestString().
     81 //
     82 void  UTextTest::TextTest() {
     83     int32_t i, j;
     84 
     85     TestString("abcd\\U00010001xyz");
     86     TestString("");
     87 
     88     // Supplementary chars at start or end
     89     TestString("\\U00010001");
     90     TestString("abc\\U00010001");
     91     TestString("\\U00010001abc");
     92 
     93     // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
     94     UnicodeString s;
     95     for (i=1; i<60; i++) {
     96         s.truncate(0);
     97         for (j=0; j<i; j++) {
     98             if (j+0x30 == 0x5c) {
     99                 // backslash.  Needs to be escaped
    100                 s.append((UChar)0x5c);
    101             }
    102             s.append(UChar(j+0x30));
    103         }
    104         TestString(s);
    105     }
    106 
    107    // Test strings with odd-aligned supplementary chars,
    108    //    looking for glitches at buffer boundaries
    109     for (i=1; i<60; i++) {
    110         s.truncate(0);
    111         s.append((UChar)0x41);
    112         for (j=0; j<i; j++) {
    113             s.append(UChar32(j+0x11000));
    114         }
    115         TestString(s);
    116     }
    117 
    118     // String of chars of randomly varying size in utf-8 representation.
    119     //   Exercise the mapping, and the varying sized buffer.
    120     //
    121     s.truncate(0);
    122     UChar32  c1 = 0;
    123     UChar32  c2 = 0x100;
    124     UChar32  c3 = 0xa000;
    125     UChar32  c4 = 0x11000;
    126     for (i=0; i<1000; i++) {
    127         int len8 = m_rand()%4 + 1;
    128         switch (len8) {
    129             case 1:
    130                 c1 = (c1+1)%0x80;
    131                 // don't put 0 into string (0 terminated strings for some tests)
    132                 // don't put '\', will cause unescape() to fail.
    133                 if (c1==0x5c || c1==0) {
    134                     c1++;
    135                 }
    136                 s.append(c1);
    137                 break;
    138             case 2:
    139                 s.append(c2++);
    140                 break;
    141             case 3:
    142                 s.append(c3++);
    143                 break;
    144             case 4:
    145                 s.append(c4++);
    146                 break;
    147         }
    148     }
    149     TestString(s);
    150 }
    151 
    152 
    153 //
    154 //  TestString()     Run a suite of UText tests on a string.
    155 //                   The test string is unescaped before use.
    156 //
    157 void UTextTest::TestString(const UnicodeString &s) {
    158     int32_t       i;
    159     int32_t       j;
    160     UChar32       c;
    161     int32_t       cpCount = 0;
    162     UErrorCode    status  = U_ZERO_ERROR;
    163     UText        *ut      = NULL;
    164     int32_t       saLen;
    165 
    166     UnicodeString sa = s.unescape();
    167     saLen = sa.length();
    168 
    169     //
    170     // Build up a mapping between code points and UTF-16 code unit indexes.
    171     //
    172     m *cpMap = new m[sa.length() + 1];
    173     j = 0;
    174     for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
    175         c = sa.char32At(i);
    176         cpMap[j].nativeIdx = i;
    177         cpMap[j].cp = c;
    178         j++;
    179         cpCount++;
    180     }
    181     cpMap[j].nativeIdx = i;   // position following the last char in utf-16 string.
    182 
    183 
    184     // UChar * test, null terminated
    185     status = U_ZERO_ERROR;
    186     UChar *buf = new UChar[saLen+1];
    187     sa.extract(buf, saLen+1, status);
    188     TEST_SUCCESS(status);
    189     ut = utext_openUChars(NULL, buf, -1, &status);
    190     TEST_SUCCESS(status);
    191     TestAccess(sa, ut, cpCount, cpMap);
    192     utext_close(ut);
    193     delete [] buf;
    194 
    195     // UChar * test, with length
    196     status = U_ZERO_ERROR;
    197     buf = new UChar[saLen+1];
    198     sa.extract(buf, saLen+1, status);
    199     TEST_SUCCESS(status);
    200     ut = utext_openUChars(NULL, buf, saLen, &status);
    201     TEST_SUCCESS(status);
    202     TestAccess(sa, ut, cpCount, cpMap);
    203     utext_close(ut);
    204     delete [] buf;
    205 
    206 
    207     // UnicodeString test
    208     status = U_ZERO_ERROR;
    209     ut = utext_openUnicodeString(NULL, &sa, &status);
    210     TEST_SUCCESS(status);
    211     TestAccess(sa, ut, cpCount, cpMap);
    212     TestCMR(sa, ut, cpCount, cpMap, cpMap);
    213     utext_close(ut);
    214 
    215 
    216     // Const UnicodeString test
    217     status = U_ZERO_ERROR;
    218     ut = utext_openConstUnicodeString(NULL, &sa, &status);
    219     TEST_SUCCESS(status);
    220     TestAccess(sa, ut, cpCount, cpMap);
    221     utext_close(ut);
    222 
    223 
    224     // Replaceable test.  (UnicodeString inherits Replaceable)
    225     status = U_ZERO_ERROR;
    226     ut = utext_openReplaceable(NULL, &sa, &status);
    227     TEST_SUCCESS(status);
    228     TestAccess(sa, ut, cpCount, cpMap);
    229     TestCMR(sa, ut, cpCount, cpMap, cpMap);
    230     utext_close(ut);
    231 
    232     // Character Iterator Tests
    233     status = U_ZERO_ERROR;
    234     const UChar *cbuf = sa.getBuffer();
    235     CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
    236     TEST_SUCCESS(status);
    237     ut = utext_openCharacterIterator(NULL, ci, &status);
    238     TEST_SUCCESS(status);
    239     TestAccess(sa, ut, cpCount, cpMap);
    240     utext_close(ut);
    241     delete ci;
    242 
    243 
    244     // Fragmented UnicodeString  (Chunk size of one)
    245     //
    246     status = U_ZERO_ERROR;
    247     ut = openFragmentedUnicodeString(NULL, &sa, &status);
    248     TEST_SUCCESS(status);
    249     TestAccess(sa, ut, cpCount, cpMap);
    250     utext_close(ut);
    251 
    252     //
    253     // UTF-8 test
    254     //
    255 
    256     // Convert the test string from UnicodeString to (char *) in utf-8 format
    257     int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
    258     char *u8String = new char[u8Len + 1];
    259     sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
    260 
    261     // Build up the map of code point indices in the utf-8 string
    262     m * u8Map = new m[sa.length() + 1];
    263     i = 0;   // native utf-8 index
    264     for (j=0; j<cpCount ; j++) {  // code point number
    265         u8Map[j].nativeIdx = i;
    266         U8_NEXT(u8String, i, u8Len, c)
    267         u8Map[j].cp = c;
    268     }
    269     u8Map[cpCount].nativeIdx = u8Len;   // position following the last char in utf-8 string.
    270 
    271     // Do the test itself
    272     status = U_ZERO_ERROR;
    273     ut = utext_openUTF8(NULL, u8String, -1, &status);
    274     TEST_SUCCESS(status);
    275     TestAccess(sa, ut, cpCount, u8Map);
    276     utext_close(ut);
    277 
    278 
    279 
    280     delete []cpMap;
    281     delete []u8Map;
    282     delete []u8String;
    283 }
    284 
    285 //  TestCMR   test Copy, Move and Replace operations.
    286 //              us         UnicodeString containing the test text.
    287 //              ut         UText containing the same test text.
    288 //              cpCount    number of code points in the test text.
    289 //              nativeMap  Mapping from code points to native indexes for the UText.
    290 //              u16Map     Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
    291 //
    292 //     This function runs a whole series of opertions on each incoming UText.
    293 //     The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
    294 //
    295 void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
    296     TEST_ASSERT(utext_isWritable(ut) == TRUE);
    297 
    298     int  srcLengthType;       // Loop variables for selecting the postion and length
    299     int  srcPosType;          //   of the block to operate on within the source text.
    300     int  destPosType;
    301 
    302     int  srcIndex  = 0;       // Code Point indexes of the block to operate on for
    303     int  srcLength = 0;       //   a specific test.
    304 
    305     int  destIndex = 0;       // Code point index of the destination for a copy/move test.
    306 
    307     int32_t  nativeStart = 0; // Native unit indexes for a test.
    308     int32_t  nativeLimit = 0;
    309     int32_t  nativeDest  = 0;
    310 
    311     int32_t  u16Start    = 0; // UTF-16 indexes for a test.
    312     int32_t  u16Limit    = 0; //   used when performing the same operation in a Unicode String
    313     int32_t  u16Dest     = 0;
    314 
    315     // Iterate over a whole series of source index, length and a target indexes.
    316     // This is done with code point indexes; these will be later translated to native
    317     //   indexes using the cpMap.
    318     for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
    319         switch (srcLengthType) {
    320             case 1: srcLength = 1; break;
    321             case 2: srcLength = 5; break;
    322             case 3: srcLength = cpCount / 3;
    323         }
    324         for (srcPosType=1; srcPosType<=5; srcPosType++) {
    325             switch (srcPosType) {
    326                 case 1: srcIndex = 0; break;
    327                 case 2: srcIndex = 1; break;
    328                 case 3: srcIndex = cpCount - srcLength; break;
    329                 case 4: srcIndex = cpCount - srcLength - 1; break;
    330                 case 5: srcIndex = cpCount / 2; break;
    331             }
    332             if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
    333                 // filter out bogus test cases -
    334                 //   those with a source range that falls of an edge of the string.
    335                 continue;
    336             }
    337 
    338             //
    339             // Copy and move tests.
    340             //   iterate over a variety of destination positions.
    341             //
    342             for (destPosType=1; destPosType<=4; destPosType++) {
    343                 switch (destPosType) {
    344                     case 1: destIndex = 0; break;
    345                     case 2: destIndex = 1; break;
    346                     case 3: destIndex = srcIndex - 1; break;
    347                     case 4: destIndex = srcIndex + srcLength + 1; break;
    348                     case 5: destIndex = cpCount-1; break;
    349                     case 6: destIndex = cpCount; break;
    350                 }
    351                 if (destIndex<0 || destIndex>cpCount) {
    352                     // filter out bogus test cases.
    353                     continue;
    354                 }
    355 
    356                 nativeStart = nativeMap[srcIndex].nativeIdx;
    357                 nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
    358                 nativeDest  = nativeMap[destIndex].nativeIdx;
    359 
    360                 u16Start    = u16Map[srcIndex].nativeIdx;
    361                 u16Limit    = u16Map[srcIndex+srcLength].nativeIdx;
    362                 u16Dest     = u16Map[destIndex].nativeIdx;
    363 
    364                 gFailed = FALSE;
    365                 TestCopyMove(us, ut, FALSE,
    366                     nativeStart, nativeLimit, nativeDest,
    367                     u16Start, u16Limit, u16Dest);
    368 
    369                 TestCopyMove(us, ut, TRUE,
    370                     nativeStart, nativeLimit, nativeDest,
    371                     u16Start, u16Limit, u16Dest);
    372 
    373                 if (gFailed) {
    374                     return;
    375                 }
    376             }
    377 
    378             //
    379             //  Replace tests.
    380             //
    381             UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
    382             for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
    383                 UnicodeString repStr(fullRepString, 0, replStrLen);
    384                 TestReplace(us, ut,
    385                     nativeStart, nativeLimit,
    386                     u16Start, u16Limit,
    387                     repStr);
    388                 if (gFailed) {
    389                     return;
    390                 }
    391             }
    392 
    393         }
    394     }
    395 
    396 }
    397 
    398 //
    399 //   TestCopyMove    run a single test case for utext_copy.
    400 //                   Test cases are created in TestCMR and dispatched here for execution.
    401 //
    402 void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
    403                     int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
    404                     int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
    405 {
    406     UErrorCode      status   = U_ZERO_ERROR;
    407     UText          *targetUT = NULL;
    408     gTestNum++;
    409     gFailed = FALSE;
    410 
    411     //
    412     //  clone the UText.  The test will be run in the cloned copy
    413     //  so that we don't alter the original.
    414     //
    415     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
    416     TEST_SUCCESS(status);
    417     UnicodeString targetUS(us);    // And copy the reference string.
    418 
    419     // do the test operation first in the reference
    420     targetUS.copy(u16Start, u16Limit, u16Dest);
    421     if (move) {
    422         // delete out the source range.
    423         if (u16Limit < u16Dest) {
    424             targetUS.removeBetween(u16Start, u16Limit);
    425         } else {
    426             int32_t amtCopied = u16Limit - u16Start;
    427             targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
    428         }
    429     }
    430 
    431     // Do the same operation in the UText under test
    432     utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
    433     if (nativeDest > nativeStart && nativeDest < nativeLimit) {
    434         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    435     } else {
    436         TEST_SUCCESS(status);
    437 
    438         // Compare the results of the two parallel tests
    439         int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
    440         int64_t  uti = 0;    // UText position, native index.
    441         int32_t  cpi;        // char32 position (code point index)
    442         UChar32  usc;        // code point from Unicode String
    443         UChar32  utc;        // code point from UText
    444         utext_setNativeIndex(targetUT, 0);
    445         for (cpi=0; ; cpi++) {
    446             usc = targetUS.char32At(usi);
    447             utc = utext_next32(targetUT);
    448             if (utc < 0) {
    449                 break;
    450             }
    451             TEST_ASSERT(uti == usi);
    452             TEST_ASSERT(utc == usc);
    453             usi = targetUS.moveIndex32(usi, 1);
    454             uti = utext_getNativeIndex(targetUT);
    455             if (gFailed) {
    456                 goto cleanupAndReturn;
    457             }
    458         }
    459         int64_t expectedNativeLength = utext_nativeLength(ut);
    460         if (move == FALSE) {
    461             expectedNativeLength += nativeLimit - nativeStart;
    462         }
    463         uti = utext_getNativeIndex(targetUT);
    464         TEST_ASSERT(uti == expectedNativeLength);
    465     }
    466 
    467 cleanupAndReturn:
    468     utext_close(targetUT);
    469 }
    470 
    471 
    472 //
    473 //  TestReplace   Test a single Replace operation.
    474 //
    475 void UTextTest::TestReplace(
    476             const UnicodeString &us,     // reference UnicodeString in which to do the replace
    477             UText         *ut,                // UnicodeText object under test.
    478             int32_t       nativeStart,        // Range to be replaced, in UText native units.
    479             int32_t       nativeLimit,
    480             int32_t       u16Start,           // Range to be replaced, in UTF-16 units
    481             int32_t       u16Limit,           //    for use in the reference UnicodeString.
    482             const UnicodeString &repStr)      // The replacement string
    483 {
    484     UErrorCode      status   = U_ZERO_ERROR;
    485     UText          *targetUT = NULL;
    486     gTestNum++;
    487     gFailed = FALSE;
    488 
    489     //
    490     //  clone the target UText.  The test will be run in the cloned copy
    491     //  so that we don't alter the original.
    492     //
    493     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
    494     TEST_SUCCESS(status);
    495     UnicodeString targetUS(us);    // And copy the reference string.
    496 
    497     //
    498     // Do the replace operation in the Unicode String, to
    499     //   produce a reference result.
    500     //
    501     targetUS.replace(u16Start, u16Limit-u16Start, repStr);
    502 
    503     //
    504     // Do the replace on the UText under test
    505     //
    506     const UChar *rs = repStr.getBuffer();
    507     int32_t  rsLen = repStr.length();
    508     int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
    509     int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
    510     TEST_ASSERT(actualDelta == expectedDelta);
    511 
    512     //
    513     // Compare the results
    514     //
    515     int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
    516     int64_t  uti = 0;    // UText position, native index.
    517     int32_t  cpi;        // char32 position (code point index)
    518     UChar32  usc;        // code point from Unicode String
    519     UChar32  utc;        // code point from UText
    520     int64_t  expectedNativeLength = 0;
    521     utext_setNativeIndex(targetUT, 0);
    522     for (cpi=0; ; cpi++) {
    523         usc = targetUS.char32At(usi);
    524         utc = utext_next32(targetUT);
    525         if (utc < 0) {
    526             break;
    527         }
    528         TEST_ASSERT(uti == usi);
    529         TEST_ASSERT(utc == usc);
    530         usi = targetUS.moveIndex32(usi, 1);
    531         uti = utext_getNativeIndex(targetUT);
    532         if (gFailed) {
    533             goto cleanupAndReturn;
    534         }
    535     }
    536     expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
    537     uti = utext_getNativeIndex(targetUT);
    538     TEST_ASSERT(uti == expectedNativeLength);
    539 
    540 cleanupAndReturn:
    541     utext_close(targetUT);
    542 }
    543 
    544 //
    545 //  TestAccess      Test the read only access functions on a UText, including cloning.
    546 //                  The text is accessed in a variety of ways, and compared with
    547 //                  the reference UnicodeString.
    548 //
    549 void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
    550     // Run the standard tests on the caller-supplied UText.
    551     TestAccessNoClone(us, ut, cpCount, cpMap);
    552 
    553     // Re-run tests on a shallow clone.
    554     utext_setNativeIndex(ut, 0);
    555     UErrorCode status = U_ZERO_ERROR;
    556     UText *shallowClone = utext_clone(NULL, ut, FALSE /*deep*/, FALSE /*readOnly*/, &status);
    557     TEST_SUCCESS(status);
    558     TestAccessNoClone(us, shallowClone, cpCount, cpMap);
    559 
    560     //
    561     // Rerun again on a deep clone.
    562     // Note that text providers are not required to provide deep cloning,
    563     //   so unsupported errors are ignored.
    564     //
    565     status = U_ZERO_ERROR;
    566     utext_setNativeIndex(shallowClone, 0);
    567     UText *deepClone = utext_clone(NULL, shallowClone, TRUE, FALSE, &status);
    568     utext_close(shallowClone);
    569     if (status != U_UNSUPPORTED_ERROR) {
    570         TEST_SUCCESS(status);
    571         TestAccessNoClone(us, deepClone, cpCount, cpMap);
    572     }
    573     utext_close(deepClone);
    574 }
    575 
    576 
    577 //
    578 //  TestAccessNoClone()    Test the read only access functions on a UText.
    579 //                         The text is accessed in a variety of ways, and compared with
    580 //                         the reference UnicodeString.
    581 //
    582 void UTextTest::TestAccessNoClone(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
    583     UErrorCode  status = U_ZERO_ERROR;
    584     gTestNum++;
    585 
    586     //
    587     //  Check the length from the UText
    588     //
    589     int64_t expectedLen = cpMap[cpCount].nativeIdx;
    590     int64_t utlen = utext_nativeLength(ut);
    591     TEST_ASSERT(expectedLen == utlen);
    592 
    593     //
    594     //  Iterate forwards, verify that we get the correct code points
    595     //   at the correct native offsets.
    596     //
    597     int         i = 0;
    598     int64_t     index;
    599     int64_t     expectedIndex = 0;
    600     int64_t     foundIndex = 0;
    601     UChar32     expectedC;
    602     UChar32     foundC;
    603     int64_t     len;
    604 
    605     for (i=0; i<cpCount; i++) {
    606         expectedIndex = cpMap[i].nativeIdx;
    607         foundIndex    = utext_getNativeIndex(ut);
    608         TEST_ASSERT(expectedIndex == foundIndex);
    609         expectedC     = cpMap[i].cp;
    610         foundC        = utext_next32(ut);
    611         TEST_ASSERT(expectedC == foundC);
    612         foundIndex    = utext_getPreviousNativeIndex(ut);
    613         TEST_ASSERT(expectedIndex == foundIndex);
    614         if (gFailed) {
    615             return;
    616         }
    617     }
    618     foundC = utext_next32(ut);
    619     TEST_ASSERT(foundC == U_SENTINEL);
    620 
    621     // Repeat above, using macros
    622     utext_setNativeIndex(ut, 0);
    623     for (i=0; i<cpCount; i++) {
    624         expectedIndex = cpMap[i].nativeIdx;
    625         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
    626         TEST_ASSERT(expectedIndex == foundIndex);
    627         expectedC     = cpMap[i].cp;
    628         foundC        = UTEXT_NEXT32(ut);
    629         TEST_ASSERT(expectedC == foundC);
    630         if (gFailed) {
    631             return;
    632         }
    633     }
    634     foundC = UTEXT_NEXT32(ut);
    635     TEST_ASSERT(foundC == U_SENTINEL);
    636 
    637     //
    638     //  Forward iteration (above) should have left index at the
    639     //   end of the input, which should == length().
    640     //
    641     len = utext_nativeLength(ut);
    642     foundIndex  = utext_getNativeIndex(ut);
    643     TEST_ASSERT(len == foundIndex);
    644 
    645     //
    646     // Iterate backwards over entire test string
    647     //
    648     len = utext_getNativeIndex(ut);
    649     utext_setNativeIndex(ut, len);
    650     for (i=cpCount-1; i>=0; i--) {
    651         expectedC     = cpMap[i].cp;
    652         expectedIndex = cpMap[i].nativeIdx;
    653         int64_t prevIndex = utext_getPreviousNativeIndex(ut);
    654         foundC        = utext_previous32(ut);
    655         foundIndex    = utext_getNativeIndex(ut);
    656         TEST_ASSERT(expectedIndex == foundIndex);
    657         TEST_ASSERT(expectedC == foundC);
    658         TEST_ASSERT(prevIndex == foundIndex);
    659         if (gFailed) {
    660             return;
    661         }
    662     }
    663 
    664     //
    665     //  Backwards iteration, above, should have left our iterator
    666     //   position at zero, and continued backwards iterationshould fail.
    667     //
    668     foundIndex = utext_getNativeIndex(ut);
    669     TEST_ASSERT(foundIndex == 0);
    670     foundIndex = utext_getPreviousNativeIndex(ut);
    671     TEST_ASSERT(foundIndex == 0);
    672 
    673 
    674     foundC = utext_previous32(ut);
    675     TEST_ASSERT(foundC == U_SENTINEL);
    676     foundIndex = utext_getNativeIndex(ut);
    677     TEST_ASSERT(foundIndex == 0);
    678     foundIndex = utext_getPreviousNativeIndex(ut);
    679     TEST_ASSERT(foundIndex == 0);
    680 
    681 
    682     // And again, with the macros
    683     utext_setNativeIndex(ut, len);
    684     for (i=cpCount-1; i>=0; i--) {
    685         expectedC     = cpMap[i].cp;
    686         expectedIndex = cpMap[i].nativeIdx;
    687         foundC        = UTEXT_PREVIOUS32(ut);
    688         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
    689         TEST_ASSERT(expectedIndex == foundIndex);
    690         TEST_ASSERT(expectedC == foundC);
    691         if (gFailed) {
    692             return;
    693         }
    694     }
    695 
    696     //
    697     //  Backwards iteration, above, should have left our iterator
    698     //   position at zero, and continued backwards iterationshould fail.
    699     //
    700     foundIndex = UTEXT_GETNATIVEINDEX(ut);
    701     TEST_ASSERT(foundIndex == 0);
    702 
    703     foundC = UTEXT_PREVIOUS32(ut);
    704     TEST_ASSERT(foundC == U_SENTINEL);
    705     foundIndex = UTEXT_GETNATIVEINDEX(ut);
    706     TEST_ASSERT(foundIndex == 0);
    707     if (gFailed) {
    708         return;
    709     }
    710 
    711     //
    712     //  next32From(), prevous32From(), Iterate in a somewhat random order.
    713     //
    714     int  cpIndex = 0;
    715     for (i=0; i<cpCount; i++) {
    716         cpIndex = (cpIndex + 9973) % cpCount;
    717         index         = cpMap[cpIndex].nativeIdx;
    718         expectedC     = cpMap[cpIndex].cp;
    719         foundC        = utext_next32From(ut, index);
    720         TEST_ASSERT(expectedC == foundC);
    721         if (gFailed) {
    722             return;
    723         }
    724     }
    725 
    726     cpIndex = 0;
    727     for (i=0; i<cpCount; i++) {
    728         cpIndex = (cpIndex + 9973) % cpCount;
    729         index         = cpMap[cpIndex+1].nativeIdx;
    730         expectedC     = cpMap[cpIndex].cp;
    731         foundC        = utext_previous32From(ut, index);
    732         TEST_ASSERT(expectedC == foundC);
    733         if (gFailed) {
    734             return;
    735         }
    736     }
    737 
    738 
    739     //
    740     // moveIndex(int32_t delta);
    741     //
    742 
    743     // Walk through frontwards, incrementing by one
    744     utext_setNativeIndex(ut, 0);
    745     for (i=1; i<=cpCount; i++) {
    746         utext_moveIndex32(ut, 1);
    747         index = utext_getNativeIndex(ut);
    748         expectedIndex = cpMap[i].nativeIdx;
    749         TEST_ASSERT(expectedIndex == index);
    750         index = UTEXT_GETNATIVEINDEX(ut);
    751         TEST_ASSERT(expectedIndex == index);
    752     }
    753 
    754     // Walk through frontwards, incrementing by two
    755     utext_setNativeIndex(ut, 0);
    756     for (i=2; i<cpCount; i+=2) {
    757         utext_moveIndex32(ut, 2);
    758         index = utext_getNativeIndex(ut);
    759         expectedIndex = cpMap[i].nativeIdx;
    760         TEST_ASSERT(expectedIndex == index);
    761         index = UTEXT_GETNATIVEINDEX(ut);
    762         TEST_ASSERT(expectedIndex == index);
    763     }
    764 
    765     // walk through the string backwards, decrementing by one.
    766     i = cpMap[cpCount].nativeIdx;
    767     utext_setNativeIndex(ut, i);
    768     for (i=cpCount; i>=0; i--) {
    769         expectedIndex = cpMap[i].nativeIdx;
    770         index = utext_getNativeIndex(ut);
    771         TEST_ASSERT(expectedIndex == index);
    772         index = UTEXT_GETNATIVEINDEX(ut);
    773         TEST_ASSERT(expectedIndex == index);
    774         utext_moveIndex32(ut, -1);
    775     }
    776 
    777 
    778     // walk through backwards, decrementing by three
    779     i = cpMap[cpCount].nativeIdx;
    780     utext_setNativeIndex(ut, i);
    781     for (i=cpCount; i>=0; i-=3) {
    782         expectedIndex = cpMap[i].nativeIdx;
    783         index = utext_getNativeIndex(ut);
    784         TEST_ASSERT(expectedIndex == index);
    785         index = UTEXT_GETNATIVEINDEX(ut);
    786         TEST_ASSERT(expectedIndex == index);
    787         utext_moveIndex32(ut, -3);
    788     }
    789 
    790 
    791     //
    792     // Extract
    793     //
    794     int bufSize = us.length() + 10;
    795     UChar *buf = new UChar[bufSize];
    796     status = U_ZERO_ERROR;
    797     expectedLen = us.length();
    798     len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
    799     TEST_SUCCESS(status);
    800     TEST_ASSERT(len == expectedLen);
    801     int compareResult = us.compare(buf, -1);
    802     TEST_ASSERT(compareResult == 0);
    803 
    804     status = U_ZERO_ERROR;
    805     len = utext_extract(ut, 0, utlen, NULL, 0, &status);
    806     if (utlen == 0) {
    807         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    808     } else {
    809         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    810     }
    811     TEST_ASSERT(len == expectedLen);
    812 
    813     status = U_ZERO_ERROR;
    814     u_memset(buf, 0x5555, bufSize);
    815     len = utext_extract(ut, 0, utlen, buf, 1, &status);
    816     if (us.length() == 0) {
    817         TEST_SUCCESS(status);
    818         TEST_ASSERT(buf[0] == 0);
    819     } else {
    820         // Buf len == 1, extracting a single 16 bit value.
    821         // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
    822         //   or whether the lead surrogate of the pair is extracted.
    823         //   It's a buffer overflow error in either case.
    824         TEST_ASSERT(buf[0] == us.charAt(0) ||
    825                     (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
    826         TEST_ASSERT(buf[1] == 0x5555);
    827         if (us.length() == 1) {
    828             TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    829         } else {
    830             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    831         }
    832     }
    833 
    834     delete []buf;
    835 }
    836 
    837 //
    838 //  ErrorTest()    Check various error and edge cases.
    839 //
    840 void UTextTest::ErrorTest()
    841 {
    842     // Close of an unitialized UText.  Shouldn't blow up.
    843     {
    844         UText  ut;
    845         memset(&ut, 0, sizeof(UText));
    846         utext_close(&ut);
    847         utext_close(NULL);
    848     }
    849 
    850     // Double-close of a UText.  Shouldn't blow up.  UText should still be usable.
    851     {
    852         UErrorCode status = U_ZERO_ERROR;
    853         UText ut = UTEXT_INITIALIZER;
    854         UnicodeString s("Hello, World");
    855         UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
    856         TEST_SUCCESS(status);
    857         TEST_ASSERT(ut2 == &ut);
    858 
    859         UText *ut3 = utext_close(&ut);
    860         TEST_ASSERT(ut3 == &ut);
    861 
    862         UText *ut4 = utext_close(&ut);
    863         TEST_ASSERT(ut4 == &ut);
    864 
    865         utext_openUnicodeString(&ut, &s, &status);
    866         TEST_SUCCESS(status);
    867         utext_close(&ut);
    868     }
    869 
    870     // Re-use of a UText, chaining through each of the types of UText
    871     //   (If it doesn't blow up, and doesn't leak, it's probably working fine)
    872     {
    873         UErrorCode status = U_ZERO_ERROR;
    874         UText ut = UTEXT_INITIALIZER;
    875         UText  *utp;
    876         UnicodeString s1("Hello, World");
    877         UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
    878         const char  *s3 = "\x66\x67\x68";
    879 
    880         utp = utext_openUnicodeString(&ut, &s1, &status);
    881         TEST_SUCCESS(status);
    882         TEST_ASSERT(utp == &ut);
    883 
    884         utp = utext_openConstUnicodeString(&ut, &s1, &status);
    885         TEST_SUCCESS(status);
    886         TEST_ASSERT(utp == &ut);
    887 
    888         utp = utext_openUTF8(&ut, s3, -1, &status);
    889         TEST_SUCCESS(status);
    890         TEST_ASSERT(utp == &ut);
    891 
    892         utp = utext_openUChars(&ut, s2, -1, &status);
    893         TEST_SUCCESS(status);
    894         TEST_ASSERT(utp == &ut);
    895 
    896         utp = utext_close(&ut);
    897         TEST_ASSERT(utp == &ut);
    898 
    899         utp = utext_openUnicodeString(&ut, &s1, &status);
    900         TEST_SUCCESS(status);
    901         TEST_ASSERT(utp == &ut);
    902     }
    903 
    904     // Invalid parameters on open
    905     //
    906     {
    907         UErrorCode status = U_ZERO_ERROR;
    908         UText ut = UTEXT_INITIALIZER;
    909 
    910         utext_openUChars(&ut, NULL, 5, &status);
    911         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    912 
    913         status = U_ZERO_ERROR;
    914         utext_openUChars(&ut, NULL, -1, &status);
    915         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    916 
    917         status = U_ZERO_ERROR;
    918         utext_openUTF8(&ut, NULL, 4, &status);
    919         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    920 
    921         status = U_ZERO_ERROR;
    922         utext_openUTF8(&ut, NULL, -1, &status);
    923         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
    924     }
    925 
    926     //
    927     //  UTF-8 with malformed sequences.
    928     //    These should come through as the Unicode replacement char, \ufffd
    929     //
    930     {
    931         UErrorCode status = U_ZERO_ERROR;
    932         UText *ut = NULL;
    933         const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
    934         UChar32  c;
    935 
    936         ut = utext_openUTF8(NULL, badUTF8, -1, &status);
    937         TEST_SUCCESS(status);
    938         c = utext_char32At(ut, 1);
    939         TEST_ASSERT(c == 0xfffd);
    940         c = utext_char32At(ut, 3);
    941         TEST_ASSERT(c == 0xfffd);
    942         c = utext_char32At(ut, 5);
    943         TEST_ASSERT(c == 0xfffd);
    944         c = utext_char32At(ut, 6);
    945         TEST_ASSERT(c == 0x43);
    946 
    947         UChar buf[10];
    948         int n = utext_extract(ut, 0, 9, buf, 10, &status);
    949         TEST_SUCCESS(status);
    950         TEST_ASSERT(n==5);
    951         TEST_ASSERT(buf[1] == 0xfffd);
    952         TEST_ASSERT(buf[3] == 0xfffd);
    953         TEST_ASSERT(buf[2] == 0x42);
    954         utext_close(ut);
    955     }
    956 
    957 
    958     //
    959     //  isLengthExpensive - does it make the exptected transitions after
    960     //                      getting the length of a nul terminated string?
    961     //
    962     {
    963         UErrorCode status = U_ZERO_ERROR;
    964         UnicodeString sa("Hello, this is a string");
    965         UBool  isExpensive;
    966 
    967         UChar sb[100];
    968         memset(sb, 0x20, sizeof(sb));
    969         sb[99] = 0;
    970 
    971         UText *uta = utext_openUnicodeString(NULL, &sa, &status);
    972         TEST_SUCCESS(status);
    973         isExpensive = utext_isLengthExpensive(uta);
    974         TEST_ASSERT(isExpensive == FALSE);
    975         utext_close(uta);
    976 
    977         UText *utb = utext_openUChars(NULL, sb, -1, &status);
    978         TEST_SUCCESS(status);
    979         isExpensive = utext_isLengthExpensive(utb);
    980         TEST_ASSERT(isExpensive == TRUE);
    981         int64_t  len = utext_nativeLength(utb);
    982         TEST_ASSERT(len == 99);
    983         isExpensive = utext_isLengthExpensive(utb);
    984         TEST_ASSERT(isExpensive == FALSE);
    985         utext_close(utb);
    986     }
    987 
    988     //
    989     // Index to positions not on code point boundaries.
    990     //
    991     {
    992         const char *u8str =         "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
    993         int32_t startMap[] =        {   0,  0,  2,  2,  2,  5,  5,  5,  5,  9,  9};
    994         int32_t nextMap[]  =        {   2,  2,  5,  5,  5,  9,  9,  9,  9,  9,  9};
    995         int32_t prevMap[]  =        {   0,  0,  0,  0,  0,  2,  2,  2,  2,  5,  5};
    996         UChar32  c32Map[] =    {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
    997         UChar32  pr32Map[] =   {    -1,   -1,  0x201,  0x201,  0x201,   0x1083,   0x1083,   0x1083,   0x1083, 0x044146, 0x044146};
    998 
    999         // extractLen is the size, in UChars, of what will be extracted between index and index+1.
   1000         //  is zero when both index positions lie within the same code point.
   1001         int32_t  exLen[] =          {   0,  1,   0,  0,  1,  0,  0,  0,  2,  0,  0};
   1002 
   1003 
   1004         UErrorCode status = U_ZERO_ERROR;
   1005         UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
   1006         TEST_SUCCESS(status);
   1007 
   1008         // Check setIndex
   1009         int32_t i;
   1010         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
   1011         for (i=0; i<startMapLimit; i++) {
   1012             utext_setNativeIndex(ut, i);
   1013             int64_t cpIndex = utext_getNativeIndex(ut);
   1014             TEST_ASSERT(cpIndex == startMap[i]);
   1015             cpIndex = UTEXT_GETNATIVEINDEX(ut);
   1016             TEST_ASSERT(cpIndex == startMap[i]);
   1017         }
   1018 
   1019         // Check char32At
   1020         for (i=0; i<startMapLimit; i++) {
   1021             UChar32 c32 = utext_char32At(ut, i);
   1022             TEST_ASSERT(c32 == c32Map[i]);
   1023             int64_t cpIndex = utext_getNativeIndex(ut);
   1024             TEST_ASSERT(cpIndex == startMap[i]);
   1025         }
   1026 
   1027         // Check utext_next32From
   1028         for (i=0; i<startMapLimit; i++) {
   1029             UChar32 c32 = utext_next32From(ut, i);
   1030             TEST_ASSERT(c32 == c32Map[i]);
   1031             int64_t cpIndex = utext_getNativeIndex(ut);
   1032             TEST_ASSERT(cpIndex == nextMap[i]);
   1033         }
   1034 
   1035         // check utext_previous32From
   1036         for (i=0; i<startMapLimit; i++) {
   1037             gTestNum++;
   1038             UChar32 c32 = utext_previous32From(ut, i);
   1039             TEST_ASSERT(c32 == pr32Map[i]);
   1040             int64_t cpIndex = utext_getNativeIndex(ut);
   1041             TEST_ASSERT(cpIndex == prevMap[i]);
   1042         }
   1043 
   1044         // check Extract
   1045         //   Extract from i to i+1, which may be zero or one code points,
   1046         //     depending on whether the indices straddle a cp boundary.
   1047         for (i=0; i<startMapLimit; i++) {
   1048             UChar buf[3];
   1049             status = U_ZERO_ERROR;
   1050             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1051             TEST_SUCCESS(status);
   1052             TEST_ASSERT(extractedLen == exLen[i]);
   1053             if (extractedLen > 0) {
   1054                 UChar32  c32;
   1055                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1056                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1057                 TEST_ASSERT(c32 == c32Map[i]);
   1058             }
   1059         }
   1060 
   1061         utext_close(ut);
   1062     }
   1063 
   1064 
   1065     {    //  Similar test, with utf16 instead of utf8
   1066          //  TODO:  merge the common parts of these tests.
   1067 
   1068         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
   1069         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
   1070         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
   1071         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
   1072         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
   1073         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
   1074         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
   1075 
   1076         u16str = u16str.unescape();
   1077         UErrorCode status = U_ZERO_ERROR;
   1078         UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
   1079         TEST_SUCCESS(status);
   1080 
   1081         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
   1082         int i;
   1083         for (i=0; i<startMapLimit; i++) {
   1084             utext_setNativeIndex(ut, i);
   1085             int64_t cpIndex = utext_getNativeIndex(ut);
   1086             TEST_ASSERT(cpIndex == startMap[i]);
   1087         }
   1088 
   1089         // Check char32At
   1090         for (i=0; i<startMapLimit; i++) {
   1091             UChar32 c32 = utext_char32At(ut, i);
   1092             TEST_ASSERT(c32 == c32Map[i]);
   1093             int64_t cpIndex = utext_getNativeIndex(ut);
   1094             TEST_ASSERT(cpIndex == startMap[i]);
   1095         }
   1096 
   1097         // Check utext_next32From
   1098         for (i=0; i<startMapLimit; i++) {
   1099             UChar32 c32 = utext_next32From(ut, i);
   1100             TEST_ASSERT(c32 == c32Map[i]);
   1101             int64_t cpIndex = utext_getNativeIndex(ut);
   1102             TEST_ASSERT(cpIndex == nextMap[i]);
   1103         }
   1104 
   1105         // check utext_previous32From
   1106         for (i=0; i<startMapLimit; i++) {
   1107             UChar32 c32 = utext_previous32From(ut, i);
   1108             TEST_ASSERT(c32 == pr32Map[i]);
   1109             int64_t cpIndex = utext_getNativeIndex(ut);
   1110             TEST_ASSERT(cpIndex == prevMap[i]);
   1111         }
   1112 
   1113         // check Extract
   1114         //   Extract from i to i+1, which may be zero or one code points,
   1115         //     depending on whether the indices straddle a cp boundary.
   1116         for (i=0; i<startMapLimit; i++) {
   1117             UChar buf[3];
   1118             status = U_ZERO_ERROR;
   1119             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1120             TEST_SUCCESS(status);
   1121             TEST_ASSERT(extractedLen == exLen[i]);
   1122             if (extractedLen > 0) {
   1123                 UChar32  c32;
   1124                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1125                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1126                 TEST_ASSERT(c32 == c32Map[i]);
   1127             }
   1128         }
   1129 
   1130         utext_close(ut);
   1131     }
   1132 
   1133     {    //  Similar test, with UText over Replaceable
   1134          //  TODO:  merge the common parts of these tests.
   1135 
   1136         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
   1137         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
   1138         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
   1139         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
   1140         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
   1141         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
   1142         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
   1143 
   1144         u16str = u16str.unescape();
   1145         UErrorCode status = U_ZERO_ERROR;
   1146         UText *ut = utext_openReplaceable(NULL, &u16str, &status);
   1147         TEST_SUCCESS(status);
   1148 
   1149         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
   1150         int i;
   1151         for (i=0; i<startMapLimit; i++) {
   1152             utext_setNativeIndex(ut, i);
   1153             int64_t cpIndex = utext_getNativeIndex(ut);
   1154             TEST_ASSERT(cpIndex == startMap[i]);
   1155         }
   1156 
   1157         // Check char32At
   1158         for (i=0; i<startMapLimit; i++) {
   1159             UChar32 c32 = utext_char32At(ut, i);
   1160             TEST_ASSERT(c32 == c32Map[i]);
   1161             int64_t cpIndex = utext_getNativeIndex(ut);
   1162             TEST_ASSERT(cpIndex == startMap[i]);
   1163         }
   1164 
   1165         // Check utext_next32From
   1166         for (i=0; i<startMapLimit; i++) {
   1167             UChar32 c32 = utext_next32From(ut, i);
   1168             TEST_ASSERT(c32 == c32Map[i]);
   1169             int64_t cpIndex = utext_getNativeIndex(ut);
   1170             TEST_ASSERT(cpIndex == nextMap[i]);
   1171         }
   1172 
   1173         // check utext_previous32From
   1174         for (i=0; i<startMapLimit; i++) {
   1175             UChar32 c32 = utext_previous32From(ut, i);
   1176             TEST_ASSERT(c32 == pr32Map[i]);
   1177             int64_t cpIndex = utext_getNativeIndex(ut);
   1178             TEST_ASSERT(cpIndex == prevMap[i]);
   1179         }
   1180 
   1181         // check Extract
   1182         //   Extract from i to i+1, which may be zero or one code points,
   1183         //     depending on whether the indices straddle a cp boundary.
   1184         for (i=0; i<startMapLimit; i++) {
   1185             UChar buf[3];
   1186             status = U_ZERO_ERROR;
   1187             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
   1188             TEST_SUCCESS(status);
   1189             TEST_ASSERT(extractedLen == exLen[i]);
   1190             if (extractedLen > 0) {
   1191                 UChar32  c32;
   1192                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
   1193                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
   1194                 TEST_ASSERT(c32 == c32Map[i]);
   1195             }
   1196         }
   1197 
   1198         utext_close(ut);
   1199     }
   1200 }
   1201 
   1202 
   1203 void UTextTest::FreezeTest() {
   1204     // Check isWritable() and freeze() behavior.
   1205     //
   1206 
   1207     UnicodeString  ustr("Hello, World.");
   1208     const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
   1209     const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
   1210 
   1211     UErrorCode status = U_ZERO_ERROR;
   1212     UText  *ut        = NULL;
   1213     UText  *ut2       = NULL;
   1214 
   1215     ut = utext_openUTF8(ut, u8str, -1, &status);
   1216     TEST_SUCCESS(status);
   1217     UBool writable = utext_isWritable(ut);
   1218     TEST_ASSERT(writable == FALSE);
   1219     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1220     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1221 
   1222     status = U_ZERO_ERROR;
   1223     ut = utext_openUChars(ut, u16str, -1, &status);
   1224     TEST_SUCCESS(status);
   1225     writable = utext_isWritable(ut);
   1226     TEST_ASSERT(writable == FALSE);
   1227     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1228     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1229 
   1230     status = U_ZERO_ERROR;
   1231     ut = utext_openUnicodeString(ut, &ustr, &status);
   1232     TEST_SUCCESS(status);
   1233     writable = utext_isWritable(ut);
   1234     TEST_ASSERT(writable == TRUE);
   1235     utext_freeze(ut);
   1236     writable = utext_isWritable(ut);
   1237     TEST_ASSERT(writable == FALSE);
   1238     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1239     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1240 
   1241     status = U_ZERO_ERROR;
   1242     ut = utext_openUnicodeString(ut, &ustr, &status);
   1243     TEST_SUCCESS(status);
   1244     ut2 = utext_clone(ut2, ut, FALSE, FALSE, &status);  // clone with readonly = false
   1245     TEST_SUCCESS(status);
   1246     writable = utext_isWritable(ut2);
   1247     TEST_ASSERT(writable == TRUE);
   1248     ut2 = utext_clone(ut2, ut, FALSE, TRUE, &status);  // clone with readonly = true
   1249     TEST_SUCCESS(status);
   1250     writable = utext_isWritable(ut2);
   1251     TEST_ASSERT(writable == FALSE);
   1252     utext_copy(ut2, 1, 2, 0, TRUE, &status);
   1253     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1254 
   1255     status = U_ZERO_ERROR;
   1256     ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
   1257     TEST_SUCCESS(status);
   1258     writable = utext_isWritable(ut);
   1259     TEST_ASSERT(writable == FALSE);
   1260     utext_copy(ut, 1, 2, 0, TRUE, &status);
   1261     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
   1262 
   1263     // Deep Clone of a frozen UText should re-enable writing in the copy.
   1264     status = U_ZERO_ERROR;
   1265     ut = utext_openUnicodeString(ut, &ustr, &status);
   1266     TEST_SUCCESS(status);
   1267     utext_freeze(ut);
   1268     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
   1269     TEST_SUCCESS(status);
   1270     writable = utext_isWritable(ut2);
   1271     TEST_ASSERT(writable == TRUE);
   1272 
   1273 
   1274     // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
   1275     //  should NOT enable writing in the copy.
   1276     status = U_ZERO_ERROR;
   1277     ut = utext_openUChars(ut, u16str, -1, &status);
   1278     TEST_SUCCESS(status);
   1279     utext_freeze(ut);
   1280     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
   1281     TEST_SUCCESS(status);
   1282     writable = utext_isWritable(ut2);
   1283     TEST_ASSERT(writable == FALSE);
   1284 
   1285     // cleanup
   1286     utext_close(ut);
   1287     utext_close(ut2);
   1288 }
   1289 
   1290 
   1291 //
   1292 //  Fragmented UText
   1293 //      A UText type that works with a chunk size of 1.
   1294 //      Intended to test for edge cases.
   1295 //      Input comes from a UnicodeString.
   1296 //
   1297 //       ut.b    the character.  Put into both halves.
   1298 //
   1299 
   1300 U_CDECL_BEGIN
   1301 static UBool U_CALLCONV
   1302 fragTextAccess(UText *ut, int64_t index, UBool forward) {
   1303     const UnicodeString *us = (const UnicodeString *)ut->context;
   1304     UChar  c;
   1305     int32_t length = us->length();
   1306     if (forward && index>=0 && index<length) {
   1307         c = us->charAt((int32_t)index);
   1308         ut->b = c | c<<16;
   1309         ut->chunkOffset = 0;
   1310         ut->chunkLength = 1;
   1311         ut->chunkNativeStart = index;
   1312         ut->chunkNativeLimit = index+1;
   1313         return true;
   1314     }
   1315     if (!forward && index>0 && index <=length) {
   1316         c = us->charAt((int32_t)index-1);
   1317         ut->b = c | c<<16;
   1318         ut->chunkOffset = 1;
   1319         ut->chunkLength = 1;
   1320         ut->chunkNativeStart = index-1;
   1321         ut->chunkNativeLimit = index;
   1322         return true;
   1323     }
   1324     ut->b = 0;
   1325     ut->chunkOffset = 0;
   1326     ut->chunkLength = 0;
   1327     if (index <= 0) {
   1328         ut->chunkNativeStart = 0;
   1329         ut->chunkNativeLimit = 0;
   1330     } else {
   1331         ut->chunkNativeStart = length;
   1332         ut->chunkNativeLimit = length;
   1333     }
   1334     return false;
   1335 }
   1336 
   1337 // Function table to be used with this fragmented text provider.
   1338 //   Initialized in the open function.
   1339 static UTextFuncs  fragmentFuncs;
   1340 
   1341 // Clone function for fragmented text provider.
   1342 //   Didn't really want to provide this, but it's easier to provide it than to keep it
   1343 //   out of the tests.
   1344 //
   1345 UText *
   1346 cloneFragmentedUnicodeString(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
   1347     if (U_FAILURE(*status)) {
   1348         return NULL;
   1349     }
   1350     if (deep) {
   1351         *status = U_UNSUPPORTED_ERROR;
   1352         return NULL;
   1353     }
   1354     dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
   1355     utext_setNativeIndex(dest, utext_getNativeIndex(src));
   1356     return dest;
   1357 }
   1358 
   1359 U_CDECL_END
   1360 
   1361 // Open function for the fragmented text provider.
   1362 UText *
   1363 openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
   1364     ut = utext_openUnicodeString(ut, s, status);
   1365     if (U_FAILURE(*status)) {
   1366         return ut;
   1367     }
   1368 
   1369     // Copy of the function table from the stock UnicodeString UText,
   1370     //   and replace the entry for the access function.
   1371     memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
   1372     fragmentFuncs.access = fragTextAccess;
   1373     fragmentFuncs.clone  = cloneFragmentedUnicodeString;
   1374     ut->pFuncs = &fragmentFuncs;
   1375 
   1376     ut->chunkContents = (UChar *)&ut->b;
   1377     ut->pFuncs->access(ut, 0, TRUE);
   1378     return ut;
   1379 }
   1380 
   1381 // Regression test for Ticket 5560
   1382 //   Clone fails to update chunkContentPointer in the cloned copy.
   1383 //   This is only an issue for UText types that work in a local buffer,
   1384 //      (UTF-8 wrapper, for example)
   1385 //
   1386 //   The test:
   1387 //     1.  Create an inital UText
   1388 //     2.  Deep clone it.  Contents should match original.
   1389 //     3.  Reset original to something different.
   1390 //     4.  Check that clone contents did not change.
   1391 //
   1392 void UTextTest::Ticket5560() {
   1393     /* The following two strings are in UTF-8 even on EBCDIC platforms. */
   1394     static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
   1395     static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
   1396 	UErrorCode status = U_ZERO_ERROR;
   1397 
   1398 	UText ut1 = UTEXT_INITIALIZER;
   1399 	UText ut2 = UTEXT_INITIALIZER;
   1400 
   1401 	utext_openUTF8(&ut1, s1, -1, &status);
   1402 	UChar c = utext_next32(&ut1);
   1403 	TEST_ASSERT(c == 0x41);  // c == 'A'
   1404 
   1405 	utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
   1406 	TEST_SUCCESS(status);
   1407     c = utext_next32(&ut2);
   1408 	TEST_ASSERT(c == 0x42);  // c == 'B'
   1409     c = utext_next32(&ut1);
   1410 	TEST_ASSERT(c == 0x42);  // c == 'B'
   1411 
   1412 	utext_openUTF8(&ut1, s2, -1, &status);
   1413 	c = utext_next32(&ut1);
   1414 	TEST_ASSERT(c == 0x31);  // c == '1'
   1415     c = utext_next32(&ut2);
   1416 	TEST_ASSERT(c == 0x43);  // c == 'C'
   1417 
   1418     utext_close(&ut1);
   1419     utext_close(&ut2);
   1420 }
   1421 
   1422 
   1423 // Test for Ticket 6847
   1424 //
   1425 void UTextTest::Ticket6847() {
   1426     const int STRLEN = 90;
   1427     UChar s[STRLEN+1];
   1428     u_memset(s, 0x41, STRLEN);
   1429     s[STRLEN] = 0;
   1430 
   1431     UErrorCode status = U_ZERO_ERROR;
   1432     UText *ut = utext_openUChars(NULL, s, -1, &status);
   1433 
   1434     utext_setNativeIndex(ut, 0);
   1435     int32_t count = 0;
   1436     UChar32 c = 0;
   1437     int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1438     TEST_ASSERT(nativeIndex == 0);
   1439     while ((c = utext_next32(ut)) != U_SENTINEL) {
   1440         TEST_ASSERT(c == 0x41);
   1441         TEST_ASSERT(count < STRLEN);
   1442         if (count >= STRLEN) {
   1443             break;
   1444         }
   1445         count++;
   1446         nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1447         TEST_ASSERT(nativeIndex == count);
   1448     }
   1449     TEST_ASSERT(count == STRLEN);
   1450     nativeIndex = UTEXT_GETNATIVEINDEX(ut);
   1451     TEST_ASSERT(nativeIndex == STRLEN);
   1452     utext_close(ut);
   1453 }
   1454 
   1455