Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2004-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File reapits.c
      9 *
     10 *********************************************************************************/
     11 /*C API TEST FOR Regular Expressions */
     12 /**
     13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     14 *   try to test the full functionality.  It just calls each function and verifies that it
     15 *   works on a basic level.
     16 *
     17 *   More complete testing of regular expression functionality is done with the C++ tests.
     18 **/
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     23 
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include "unicode/uloc.h"
     27 #include "unicode/uregex.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/utext.h"
     30 #include "cintltst.h"
     31 
     32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     34 
     35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
     37 
     38 /*
     39  *   TEST_SETUP and TEST_TEARDOWN
     40  *         macros to handle the boilerplate around setting up regex test cases.
     41  *         parameteres to setup:
     42  *              pattern:     The regex pattern, a (char *) null terminated C string.
     43  *              testString:  The string data, also a (char *) C string.
     44  *              flags:       Regex flags to set when compiling the pattern
     45  *
     46  *         Put arbitrary test code between SETUP and TEARDOWN.
     47  *         're" is the compiled, ready-to-go  regular expression.
     48  */
     49 #define TEST_SETUP(pattern, testString, flags) {  \
     50     UChar   *srcString = NULL;  \
     51     status = U_ZERO_ERROR; \
     52     re = uregex_openC(pattern, flags, NULL, &status);  \
     53     TEST_ASSERT_SUCCESS(status);   \
     54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     56     uregex_setText(re, srcString, -1, &status); \
     57     TEST_ASSERT_SUCCESS(status);  \
     58     if (U_SUCCESS(status)) {
     59 
     60 #define TEST_TEARDOWN  \
     61     }  \
     62     TEST_ASSERT_SUCCESS(status);  \
     63     uregex_close(re);  \
     64     free(srcString);   \
     65     }
     66 
     67 
     68 /**
     69  * @param expected utf-8 array of bytes to be expected
     70  */
     71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     72      char     buf_inside_macro[120];
     73      int32_t  len = (int32_t)strlen(expected);
     74      UBool    success;
     75      if (nulTerm) {
     76          u_austrncpy(buf_inside_macro, (actual), len+1);
     77          buf_inside_macro[len+2] = 0;
     78          success = (strcmp((expected), buf_inside_macro) == 0);
     79      } else {
     80          u_austrncpy(buf_inside_macro, (actual), len);
     81          buf_inside_macro[len+1] = 0;
     82          success = (strncmp((expected), buf_inside_macro, len) == 0);
     83      }
     84      if (success == FALSE) {
     85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     86              file, line, (expected), buf_inside_macro);
     87      }
     88 }
     89 
     90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     91 
     92 
     93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
     94     UErrorCode status = U_ZERO_ERROR;
     95     UText expectedText = UTEXT_INITIALIZER;
     96     utext_openUTF8(&expectedText, expected, -1, &status);
     97     utext_setNativeIndex(actual, 0);
     98     if (utext_compare(&expectedText, -1, actual, -1) != 0) {
     99         UChar32 c;
    100         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    101         c = utext_next32From(actual, 0);
    102         while (c != U_SENTINEL) {
    103             if (0x20<c && c <0x7e) {
    104                 log_err("%c", c);
    105             } else {
    106                 log_err("%#x", c);
    107             }
    108             c = UTEXT_NEXT32(actual);
    109         }
    110         log_err("\"\n");
    111     }
    112     utext_close(&expectedText);
    113 }
    114 
    115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    116 
    117 
    118 
    119 static void TestRegexCAPI(void);
    120 static void TestBug4315(void);
    121 static void TestUTextAPI(void);
    122 
    123 void addURegexTest(TestNode** root);
    124 
    125 void addURegexTest(TestNode** root)
    126 {
    127     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    128     addTest(root, &TestBug4315,   "regex/TestBug4315");
    129     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    130 }
    131 
    132 /*
    133  * Call back function and context struct used for testing
    134  *    regular expression user callbacks.  This test is mostly the same as
    135  *   the corresponding C++ test in intltest.
    136  */
    137 typedef struct callBackContext {
    138     int32_t          maxCalls;
    139     int32_t          numCalls;
    140     int32_t          lastSteps;
    141 } callBackContext;
    142 
    143 static UBool U_EXPORT2 U_CALLCONV
    144 TestCallbackFn(const void *context, int32_t steps) {
    145   callBackContext  *info = (callBackContext *)context;
    146   if (info->lastSteps+1 != steps) {
    147       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    148   }
    149   info->lastSteps = steps;
    150   info->numCalls++;
    151   return (info->numCalls < info->maxCalls);
    152 }
    153 
    154 /*
    155  *   Regular Expression C API Tests
    156  */
    157 static void TestRegexCAPI(void) {
    158     UErrorCode           status = U_ZERO_ERROR;
    159     URegularExpression  *re;
    160     UChar                pat[200];
    161     UChar               *minus1;
    162 
    163     memset(&minus1, -1, sizeof(minus1));
    164 
    165     /* Mimimalist open/close */
    166     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    167     re = uregex_open(pat, -1, 0, 0, &status);
    168     if (U_FAILURE(status)) {
    169          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    170          return;
    171     }
    172     uregex_close(re);
    173 
    174     /* Open with all flag values set */
    175     status = U_ZERO_ERROR;
    176     re = uregex_open(pat, -1,
    177         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
    178         0, &status);
    179     TEST_ASSERT_SUCCESS(status);
    180     uregex_close(re);
    181 
    182     /* Open with an invalid flag */
    183     status = U_ZERO_ERROR;
    184     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    185     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    186     uregex_close(re);
    187 
    188     /* Open with an unimplemented flag */
    189     status = U_ZERO_ERROR;
    190     re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
    191     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    192     uregex_close(re);
    193 
    194     /* openC with an invalid parameter */
    195     status = U_ZERO_ERROR;
    196     re = uregex_openC(NULL,
    197         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    198     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    199 
    200     /* openC with an invalid parameter */
    201     status = U_USELESS_COLLATOR_ERROR;
    202     re = uregex_openC(NULL,
    203         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    204     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    205 
    206     /* openC   open from a C string */
    207     {
    208         const UChar   *p;
    209         int32_t  len;
    210         status = U_ZERO_ERROR;
    211         re = uregex_openC("abc*", 0, 0, &status);
    212         TEST_ASSERT_SUCCESS(status);
    213         p = uregex_pattern(re, &len, &status);
    214         TEST_ASSERT_SUCCESS(status);
    215 
    216         /* The TEST_ASSERT_SUCCESS above should change too... */
    217         if(U_SUCCESS(status)) {
    218             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    219             TEST_ASSERT(u_strcmp(pat, p) == 0);
    220             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    221         }
    222 
    223         uregex_close(re);
    224 
    225         /*  TODO:  Open with ParseError parameter */
    226     }
    227 
    228     /*
    229      *  clone
    230      */
    231     {
    232         URegularExpression *clone1;
    233         URegularExpression *clone2;
    234         URegularExpression *clone3;
    235         UChar  testString1[30];
    236         UChar  testString2[30];
    237         UBool  result;
    238 
    239 
    240         status = U_ZERO_ERROR;
    241         re = uregex_openC("abc*", 0, 0, &status);
    242         TEST_ASSERT_SUCCESS(status);
    243         clone1 = uregex_clone(re, &status);
    244         TEST_ASSERT_SUCCESS(status);
    245         TEST_ASSERT(clone1 != NULL);
    246 
    247         status = U_ZERO_ERROR;
    248         clone2 = uregex_clone(re, &status);
    249         TEST_ASSERT_SUCCESS(status);
    250         TEST_ASSERT(clone2 != NULL);
    251         uregex_close(re);
    252 
    253         status = U_ZERO_ERROR;
    254         clone3 = uregex_clone(clone2, &status);
    255         TEST_ASSERT_SUCCESS(status);
    256         TEST_ASSERT(clone3 != NULL);
    257 
    258         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
    259         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
    260 
    261         status = U_ZERO_ERROR;
    262         uregex_setText(clone1, testString1, -1, &status);
    263         TEST_ASSERT_SUCCESS(status);
    264         result = uregex_lookingAt(clone1, 0, &status);
    265         TEST_ASSERT_SUCCESS(status);
    266         TEST_ASSERT(result==TRUE);
    267 
    268         status = U_ZERO_ERROR;
    269         uregex_setText(clone2, testString2, -1, &status);
    270         TEST_ASSERT_SUCCESS(status);
    271         result = uregex_lookingAt(clone2, 0, &status);
    272         TEST_ASSERT_SUCCESS(status);
    273         TEST_ASSERT(result==FALSE);
    274         result = uregex_find(clone2, 0, &status);
    275         TEST_ASSERT_SUCCESS(status);
    276         TEST_ASSERT(result==TRUE);
    277 
    278         uregex_close(clone1);
    279         uregex_close(clone2);
    280         uregex_close(clone3);
    281 
    282     }
    283 
    284     /*
    285      *  pattern()
    286     */
    287     {
    288         const UChar  *resultPat;
    289         int32_t       resultLen;
    290         u_uastrncpy(pat, "hello", sizeof(pat)/2);
    291         status = U_ZERO_ERROR;
    292         re = uregex_open(pat, -1, 0, NULL, &status);
    293         resultPat = uregex_pattern(re, &resultLen, &status);
    294         TEST_ASSERT_SUCCESS(status);
    295 
    296         /* The TEST_ASSERT_SUCCESS above should change too... */
    297         if (U_SUCCESS(status)) {
    298             TEST_ASSERT(resultLen == -1);
    299             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    300         }
    301 
    302         uregex_close(re);
    303 
    304         status = U_ZERO_ERROR;
    305         re = uregex_open(pat, 3, 0, NULL, &status);
    306         resultPat = uregex_pattern(re, &resultLen, &status);
    307         TEST_ASSERT_SUCCESS(status);
    308         TEST_ASSERT_SUCCESS(status);
    309 
    310         /* The TEST_ASSERT_SUCCESS above should change too... */
    311         if (U_SUCCESS(status)) {
    312             TEST_ASSERT(resultLen == 3);
    313             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    314             TEST_ASSERT(u_strlen(resultPat) == 3);
    315         }
    316 
    317         uregex_close(re);
    318     }
    319 
    320     /*
    321      *  flags()
    322      */
    323     {
    324         int32_t  t;
    325 
    326         status = U_ZERO_ERROR;
    327         re = uregex_open(pat, -1, 0, NULL, &status);
    328         t  = uregex_flags(re, &status);
    329         TEST_ASSERT_SUCCESS(status);
    330         TEST_ASSERT(t == 0);
    331         uregex_close(re);
    332 
    333         status = U_ZERO_ERROR;
    334         re = uregex_open(pat, -1, 0, NULL, &status);
    335         t  = uregex_flags(re, &status);
    336         TEST_ASSERT_SUCCESS(status);
    337         TEST_ASSERT(t == 0);
    338         uregex_close(re);
    339 
    340         status = U_ZERO_ERROR;
    341         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    342         t  = uregex_flags(re, &status);
    343         TEST_ASSERT_SUCCESS(status);
    344         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    345         uregex_close(re);
    346     }
    347 
    348     /*
    349      *  setText() and lookingAt()
    350      */
    351     {
    352         UChar  text1[50];
    353         UChar  text2[50];
    354         UBool  result;
    355 
    356         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    357         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    358         status = U_ZERO_ERROR;
    359         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    360         re = uregex_open(pat, -1, 0, NULL, &status);
    361         TEST_ASSERT_SUCCESS(status);
    362 
    363         /* Operation before doing a setText should fail... */
    364         status = U_ZERO_ERROR;
    365         uregex_lookingAt(re, 0, &status);
    366         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    367 
    368         status = U_ZERO_ERROR;
    369         uregex_setText(re, text1, -1, &status);
    370         result = uregex_lookingAt(re, 0, &status);
    371         TEST_ASSERT(result == TRUE);
    372         TEST_ASSERT_SUCCESS(status);
    373 
    374         status = U_ZERO_ERROR;
    375         uregex_setText(re, text2, -1, &status);
    376         result = uregex_lookingAt(re, 0, &status);
    377         TEST_ASSERT(result == FALSE);
    378         TEST_ASSERT_SUCCESS(status);
    379 
    380         status = U_ZERO_ERROR;
    381         uregex_setText(re, text1, -1, &status);
    382         result = uregex_lookingAt(re, 0, &status);
    383         TEST_ASSERT(result == TRUE);
    384         TEST_ASSERT_SUCCESS(status);
    385 
    386         status = U_ZERO_ERROR;
    387         uregex_setText(re, text1, 5, &status);
    388         result = uregex_lookingAt(re, 0, &status);
    389         TEST_ASSERT(result == FALSE);
    390         TEST_ASSERT_SUCCESS(status);
    391 
    392         status = U_ZERO_ERROR;
    393         uregex_setText(re, text1, 6, &status);
    394         result = uregex_lookingAt(re, 0, &status);
    395         TEST_ASSERT(result == TRUE);
    396         TEST_ASSERT_SUCCESS(status);
    397 
    398         uregex_close(re);
    399     }
    400 
    401 
    402     /*
    403      *  getText()
    404      */
    405     {
    406         UChar    text1[50];
    407         UChar    text2[50];
    408         const UChar   *result;
    409         int32_t  textLength;
    410 
    411         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    412         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    413         status = U_ZERO_ERROR;
    414         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    415         re = uregex_open(pat, -1, 0, NULL, &status);
    416 
    417         uregex_setText(re, text1, -1, &status);
    418         result = uregex_getText(re, &textLength, &status);
    419         TEST_ASSERT(result == text1);
    420         TEST_ASSERT(textLength == -1);
    421         TEST_ASSERT_SUCCESS(status);
    422 
    423         status = U_ZERO_ERROR;
    424         uregex_setText(re, text2, 7, &status);
    425         result = uregex_getText(re, &textLength, &status);
    426         TEST_ASSERT(result == text2);
    427         TEST_ASSERT(textLength == 7);
    428         TEST_ASSERT_SUCCESS(status);
    429 
    430         status = U_ZERO_ERROR;
    431         uregex_setText(re, text2, 4, &status);
    432         result = uregex_getText(re, &textLength, &status);
    433         TEST_ASSERT(result == text2);
    434         TEST_ASSERT(textLength == 4);
    435         TEST_ASSERT_SUCCESS(status);
    436         uregex_close(re);
    437     }
    438 
    439     /*
    440      *  matches()
    441      */
    442     {
    443         UChar   text1[50];
    444         UBool   result;
    445         int     len;
    446         UChar   nullString[] = {0,0,0};
    447 
    448         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
    449         status = U_ZERO_ERROR;
    450         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    451         re = uregex_open(pat, -1, 0, NULL, &status);
    452 
    453         uregex_setText(re, text1, -1, &status);
    454         result = uregex_matches(re, 0, &status);
    455         TEST_ASSERT(result == FALSE);
    456         TEST_ASSERT_SUCCESS(status);
    457 
    458         status = U_ZERO_ERROR;
    459         uregex_setText(re, text1, 6, &status);
    460         result = uregex_matches(re, 0, &status);
    461         TEST_ASSERT(result == TRUE);
    462         TEST_ASSERT_SUCCESS(status);
    463 
    464         status = U_ZERO_ERROR;
    465         uregex_setText(re, text1, 6, &status);
    466         result = uregex_matches(re, 1, &status);
    467         TEST_ASSERT(result == FALSE);
    468         TEST_ASSERT_SUCCESS(status);
    469         uregex_close(re);
    470 
    471         status = U_ZERO_ERROR;
    472         re = uregex_openC(".?", 0, NULL, &status);
    473         uregex_setText(re, text1, -1, &status);
    474         len = u_strlen(text1);
    475         result = uregex_matches(re, len, &status);
    476         TEST_ASSERT(result == TRUE);
    477         TEST_ASSERT_SUCCESS(status);
    478 
    479         status = U_ZERO_ERROR;
    480         uregex_setText(re, nullString, -1, &status);
    481         TEST_ASSERT_SUCCESS(status);
    482         result = uregex_matches(re, 0, &status);
    483         TEST_ASSERT(result == TRUE);
    484         TEST_ASSERT_SUCCESS(status);
    485         uregex_close(re);
    486     }
    487 
    488 
    489     /*
    490      *  lookingAt()    Used in setText test.
    491      */
    492 
    493 
    494     /*
    495      *  find(), findNext, start, end, reset
    496      */
    497     {
    498         UChar    text1[50];
    499         UBool    result;
    500         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
    501         status = U_ZERO_ERROR;
    502         re = uregex_openC("rx", 0, NULL, &status);
    503 
    504         uregex_setText(re, text1, -1, &status);
    505         result = uregex_find(re, 0, &status);
    506         TEST_ASSERT(result == TRUE);
    507         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    508         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    509         TEST_ASSERT_SUCCESS(status);
    510 
    511         result = uregex_find(re, 9, &status);
    512         TEST_ASSERT(result == TRUE);
    513         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    514         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    515         TEST_ASSERT_SUCCESS(status);
    516 
    517         result = uregex_find(re, 14, &status);
    518         TEST_ASSERT(result == FALSE);
    519         TEST_ASSERT_SUCCESS(status);
    520 
    521         status = U_ZERO_ERROR;
    522         uregex_reset(re, 0, &status);
    523 
    524         result = uregex_findNext(re, &status);
    525         TEST_ASSERT(result == TRUE);
    526         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    527         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    528         TEST_ASSERT_SUCCESS(status);
    529 
    530         result = uregex_findNext(re, &status);
    531         TEST_ASSERT(result == TRUE);
    532         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    533         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    534         TEST_ASSERT_SUCCESS(status);
    535 
    536         status = U_ZERO_ERROR;
    537         uregex_reset(re, 12, &status);
    538 
    539         result = uregex_findNext(re, &status);
    540         TEST_ASSERT(result == TRUE);
    541         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    542         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    543         TEST_ASSERT_SUCCESS(status);
    544 
    545         result = uregex_findNext(re, &status);
    546         TEST_ASSERT(result == FALSE);
    547         TEST_ASSERT_SUCCESS(status);
    548 
    549         uregex_close(re);
    550     }
    551 
    552     /*
    553      *  groupCount
    554      */
    555     {
    556         int32_t result;
    557 
    558         status = U_ZERO_ERROR;
    559         re = uregex_openC("abc", 0, NULL, &status);
    560         result = uregex_groupCount(re, &status);
    561         TEST_ASSERT_SUCCESS(status);
    562         TEST_ASSERT(result == 0);
    563         uregex_close(re);
    564 
    565         status = U_ZERO_ERROR;
    566         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    567         result = uregex_groupCount(re, &status);
    568         TEST_ASSERT_SUCCESS(status);
    569         TEST_ASSERT(result == 3);
    570         uregex_close(re);
    571 
    572     }
    573 
    574 
    575     /*
    576      *  group()
    577      */
    578     {
    579         UChar    text1[80];
    580         UChar    buf[80];
    581         UBool    result;
    582         int32_t  resultSz;
    583         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
    584 
    585         status = U_ZERO_ERROR;
    586         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    587         TEST_ASSERT_SUCCESS(status);
    588 
    589 
    590         uregex_setText(re, text1, -1, &status);
    591         result = uregex_find(re, 0, &status);
    592         TEST_ASSERT(result==TRUE);
    593 
    594         /*  Capture Group 0, the full match.  Should succeed.  */
    595         status = U_ZERO_ERROR;
    596         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
    597         TEST_ASSERT_SUCCESS(status);
    598         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    599         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    600 
    601         /*  Capture group #1.  Should succeed. */
    602         status = U_ZERO_ERROR;
    603         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
    604         TEST_ASSERT_SUCCESS(status);
    605         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    606         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    607 
    608         /*  Capture group out of range.  Error. */
    609         status = U_ZERO_ERROR;
    610         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
    611         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    612 
    613         /* NULL buffer, pure pre-flight */
    614         status = U_ZERO_ERROR;
    615         resultSz = uregex_group(re, 0, NULL, 0, &status);
    616         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    617         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    618 
    619         /* Too small buffer, truncated string */
    620         status = U_ZERO_ERROR;
    621         memset(buf, -1, sizeof(buf));
    622         resultSz = uregex_group(re, 0, buf, 5, &status);
    623         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    624         TEST_ASSERT_STRING("abc i", buf, FALSE);
    625         TEST_ASSERT(buf[5] == (UChar)0xffff);
    626         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    627 
    628         /* Output string just fits buffer, no NUL term. */
    629         status = U_ZERO_ERROR;
    630         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    631         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    632         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    633         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    634         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    635 
    636         uregex_close(re);
    637 
    638     }
    639 
    640     /*
    641      *  Regions
    642      */
    643 
    644 
    645         /* SetRegion(), getRegion() do something  */
    646         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    647         UChar resultString[40];
    648         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    649         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    650         uregex_setRegion(re, 3, 6, &status);
    651         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    652         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    653         TEST_ASSERT(uregex_findNext(re, &status));
    654         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
    655         TEST_ASSERT_STRING("345", resultString, TRUE);
    656         TEST_TEARDOWN;
    657 
    658         /* find(start=-1) uses regions   */
    659         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    660         uregex_setRegion(re, 4, 6, &status);
    661         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    662         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    663         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    664         TEST_TEARDOWN;
    665 
    666         /* find (start >=0) does not use regions   */
    667         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    668         uregex_setRegion(re, 4, 6, &status);
    669         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    670         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    671         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    672         TEST_TEARDOWN;
    673 
    674         /* findNext() obeys regions    */
    675         TEST_SETUP(".", "0123456789ABCDEF", 0);
    676         uregex_setRegion(re, 4, 6, &status);
    677         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    678         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    679         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    680         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    681         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    682         TEST_TEARDOWN;
    683 
    684         /* matches(start=-1) uses regions                                           */
    685         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    686         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    687         uregex_setRegion(re, 4, 6, &status);
    688         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    689         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    690         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    691         TEST_TEARDOWN;
    692 
    693         /* matches (start >=0) does not use regions       */
    694         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    695         uregex_setRegion(re, 4, 6, &status);
    696         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    697         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    698         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    699         TEST_TEARDOWN;
    700 
    701         /* lookingAt(start=-1) uses regions                                         */
    702         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    703         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    704         uregex_setRegion(re, 4, 6, &status);
    705         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    707         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    708         TEST_TEARDOWN;
    709 
    710         /* lookingAt (start >=0) does not use regions  */
    711         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    712         uregex_setRegion(re, 4, 6, &status);
    713         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    715         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    716         TEST_TEARDOWN;
    717 
    718         /* hitEnd()       */
    719         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    720         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    721         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    722         TEST_TEARDOWN;
    723 
    724         TEST_SETUP("[a-f]*", "abcdef", 0);
    725         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    726         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    727         TEST_TEARDOWN;
    728 
    729         /* requireEnd   */
    730         TEST_SETUP("abcd", "abcd", 0);
    731         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    732         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    733         TEST_TEARDOWN;
    734 
    735         TEST_SETUP("abcd$", "abcd", 0);
    736         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    737         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    738         TEST_TEARDOWN;
    739 
    740         /* anchoringBounds        */
    741         TEST_SETUP("abc$", "abcdef", 0);
    742         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    743         uregex_useAnchoringBounds(re, FALSE, &status);
    744         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    745 
    746         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    747         uregex_useAnchoringBounds(re, TRUE, &status);
    748         uregex_setRegion(re, 0, 3, &status);
    749         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    750         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    751         TEST_TEARDOWN;
    752 
    753         /* Transparent Bounds      */
    754         TEST_SETUP("abc(?=def)", "abcdef", 0);
    755         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    756         uregex_useTransparentBounds(re, TRUE, &status);
    757         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    758 
    759         uregex_useTransparentBounds(re, FALSE, &status);
    760         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    761         uregex_setRegion(re, 0, 3, &status);
    762         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    763         uregex_useTransparentBounds(re, TRUE, &status);
    764         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    765         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    766         TEST_TEARDOWN;
    767 
    768 
    769     /*
    770      *  replaceFirst()
    771      */
    772     {
    773         UChar    text1[80];
    774         UChar    text2[80];
    775         UChar    replText[80];
    776         UChar    buf[80];
    777         int32_t  resultSz;
    778         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    779         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    780         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    781 
    782         status = U_ZERO_ERROR;
    783         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    784         TEST_ASSERT_SUCCESS(status);
    785 
    786         /*  Normal case, with match */
    787         uregex_setText(re, text1, -1, &status);
    788         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    789         TEST_ASSERT_SUCCESS(status);
    790         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    791         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    792 
    793         /* No match.  Text should copy to output with no changes.  */
    794         status = U_ZERO_ERROR;
    795         uregex_setText(re, text2, -1, &status);
    796         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    797         TEST_ASSERT_SUCCESS(status);
    798         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    799         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    800 
    801         /*  Match, output just fills buffer, no termination warning. */
    802         status = U_ZERO_ERROR;
    803         uregex_setText(re, text1, -1, &status);
    804         memset(buf, -1, sizeof(buf));
    805         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    806         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    807         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    808         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    809         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    810 
    811         /* Do the replaceFirst again, without first resetting anything.
    812          *  Should give the same results.
    813          */
    814         status = U_ZERO_ERROR;
    815         memset(buf, -1, sizeof(buf));
    816         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    817         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    818         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    819         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    820         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    821 
    822         /* NULL buffer, zero buffer length */
    823         status = U_ZERO_ERROR;
    824         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    825         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    826         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    827 
    828         /* Buffer too small by one */
    829         status = U_ZERO_ERROR;
    830         memset(buf, -1, sizeof(buf));
    831         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    832         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    833         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    834         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    835         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    836 
    837         uregex_close(re);
    838     }
    839 
    840 
    841     /*
    842      *  replaceAll()
    843      */
    844     {
    845         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    846         UChar    text2[80];          /*  "No match Here"           */
    847         UChar    replText[80];       /*  "<$1>"                    */
    848         UChar    replText2[80];      /*  "<<$1>>"                  */
    849         const char * pattern = "x(.*?)x";
    850         const char * expectedResult = "Replace <aa> <1> <...>.";
    851         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    852         UChar    buf[80];
    853         int32_t  resultSize;
    854         int32_t  expectedResultSize;
    855         int32_t  expectedResultSize2;
    856         int32_t  i;
    857 
    858         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    859         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    860         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    861         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
    862         expectedResultSize = strlen(expectedResult);
    863         expectedResultSize2 = strlen(expectedResult2);
    864 
    865         status = U_ZERO_ERROR;
    866         re = uregex_openC(pattern, 0, NULL, &status);
    867         TEST_ASSERT_SUCCESS(status);
    868 
    869         /*  Normal case, with match */
    870         uregex_setText(re, text1, -1, &status);
    871         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    872         TEST_ASSERT_SUCCESS(status);
    873         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    874         TEST_ASSERT(resultSize == expectedResultSize);
    875 
    876         /* No match.  Text should copy to output with no changes.  */
    877         status = U_ZERO_ERROR;
    878         uregex_setText(re, text2, -1, &status);
    879         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    880         TEST_ASSERT_SUCCESS(status);
    881         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    882         TEST_ASSERT(resultSize == u_strlen(text2));
    883 
    884         /*  Match, output just fills buffer, no termination warning. */
    885         status = U_ZERO_ERROR;
    886         uregex_setText(re, text1, -1, &status);
    887         memset(buf, -1, sizeof(buf));
    888         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    889         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    890         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    891         TEST_ASSERT(resultSize == expectedResultSize);
    892         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    893 
    894         /* Do the replaceFirst again, without first resetting anything.
    895          *  Should give the same results.
    896          */
    897         status = U_ZERO_ERROR;
    898         memset(buf, -1, sizeof(buf));
    899         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    900         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    901         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    902         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    903         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    904 
    905         /* NULL buffer, zero buffer length */
    906         status = U_ZERO_ERROR;
    907         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    908         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    909         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    910 
    911         /* Buffer too small.  Try every size, which will tickle edge cases
    912          * in uregex_appendReplacement (used by replaceAll)   */
    913         for (i=0; i<expectedResultSize; i++) {
    914             char  expected[80];
    915             status = U_ZERO_ERROR;
    916             memset(buf, -1, sizeof(buf));
    917             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    918             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    919             strcpy(expected, expectedResult);
    920             expected[i] = 0;
    921             TEST_ASSERT_STRING(expected, buf, FALSE);
    922             TEST_ASSERT(resultSize == expectedResultSize);
    923             TEST_ASSERT(buf[i] == (UChar)0xffff);
    924         }
    925 
    926         /* Buffer too small.  Same as previous test, except this time the replacement
    927          * text is longer than the match capture group, making the length of the complete
    928          * replacement longer than the original string.
    929          */
    930         for (i=0; i<expectedResultSize2; i++) {
    931             char  expected[80];
    932             status = U_ZERO_ERROR;
    933             memset(buf, -1, sizeof(buf));
    934             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    935             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    936             strcpy(expected, expectedResult2);
    937             expected[i] = 0;
    938             TEST_ASSERT_STRING(expected, buf, FALSE);
    939             TEST_ASSERT(resultSize == expectedResultSize2);
    940             TEST_ASSERT(buf[i] == (UChar)0xffff);
    941         }
    942 
    943 
    944         uregex_close(re);
    945     }
    946 
    947 
    948     /*
    949      *  appendReplacement()
    950      */
    951     {
    952         UChar    text[100];
    953         UChar    repl[100];
    954         UChar    buf[100];
    955         UChar   *bufPtr;
    956         int32_t  bufCap;
    957 
    958 
    959         status = U_ZERO_ERROR;
    960         re = uregex_openC(".*", 0, 0, &status);
    961         TEST_ASSERT_SUCCESS(status);
    962 
    963         u_uastrncpy(text, "whatever",  sizeof(text)/2);
    964         u_uastrncpy(repl, "some other", sizeof(repl)/2);
    965         uregex_setText(re, text, -1, &status);
    966 
    967         /* match covers whole target string */
    968         uregex_find(re, 0, &status);
    969         TEST_ASSERT_SUCCESS(status);
    970         bufPtr = buf;
    971         bufCap = sizeof(buf) / 2;
    972         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
    973         TEST_ASSERT_SUCCESS(status);
    974         TEST_ASSERT_STRING("some other", buf, TRUE);
    975 
    976         /* Match has \u \U escapes */
    977         uregex_find(re, 0, &status);
    978         TEST_ASSERT_SUCCESS(status);
    979         bufPtr = buf;
    980         bufCap = sizeof(buf) / 2;
    981         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
    982         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
    983         TEST_ASSERT_SUCCESS(status);
    984         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
    985 
    986         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
    987         status = U_ZERO_ERROR;
    988         uregex_find(re, 0, &status);
    989         TEST_ASSERT_SUCCESS(status);
    990         bufPtr = buf;
    991         status = U_BUFFER_OVERFLOW_ERROR;
    992         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
    993         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    994 
    995         uregex_close(re);
    996     }
    997 
    998 
    999     /*
   1000      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1001      */
   1002 
   1003     /*
   1004      *  split()
   1005      */
   1006     {
   1007         UChar    textToSplit[80];
   1008         UChar    text2[80];
   1009         UChar    buf[200];
   1010         UChar    *fields[10];
   1011         int32_t  numFields;
   1012         int32_t  requiredCapacity;
   1013         int32_t  spaceNeeded;
   1014         int32_t  sz;
   1015 
   1016         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1017         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1018 
   1019         status = U_ZERO_ERROR;
   1020         re = uregex_openC(":", 0, NULL, &status);
   1021 
   1022 
   1023         /*  Simple split */
   1024 
   1025         uregex_setText(re, textToSplit, -1, &status);
   1026         TEST_ASSERT_SUCCESS(status);
   1027 
   1028         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1029         if (U_SUCCESS(status)) {
   1030             memset(fields, -1, sizeof(fields));
   1031             numFields =
   1032                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1033             TEST_ASSERT_SUCCESS(status);
   1034 
   1035             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1036             if(U_SUCCESS(status)) {
   1037                 TEST_ASSERT(numFields == 3);
   1038                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1039                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1040                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1041                 TEST_ASSERT(fields[3] == NULL);
   1042 
   1043                 spaceNeeded = u_strlen(textToSplit) -
   1044                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1045                             numFields;          /* Each field gets a NUL terminator */
   1046 
   1047                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1048             }
   1049         }
   1050 
   1051         uregex_close(re);
   1052 
   1053 
   1054         /*  Split with too few output strings available */
   1055         status = U_ZERO_ERROR;
   1056         re = uregex_openC(":", 0, NULL, &status);
   1057         uregex_setText(re, textToSplit, -1, &status);
   1058         TEST_ASSERT_SUCCESS(status);
   1059 
   1060         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1061         if(U_SUCCESS(status)) {
   1062             memset(fields, -1, sizeof(fields));
   1063             numFields =
   1064                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1065             TEST_ASSERT_SUCCESS(status);
   1066 
   1067             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1068             if(U_SUCCESS(status)) {
   1069                 TEST_ASSERT(numFields == 2);
   1070                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1071                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1072                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1073 
   1074                 spaceNeeded = u_strlen(textToSplit) -
   1075                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1076                             numFields;          /* Each field gets a NUL terminator */
   1077 
   1078                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1079 
   1080                 /* Split with a range of output buffer sizes.  */
   1081                 spaceNeeded = u_strlen(textToSplit) -
   1082                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1083                     numFields;          /* Each field gets a NUL terminator */
   1084 
   1085                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1086                     memset(fields, -1, sizeof(fields));
   1087                     status = U_ZERO_ERROR;
   1088                     numFields =
   1089                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1090                     if (sz >= spaceNeeded) {
   1091                         TEST_ASSERT_SUCCESS(status);
   1092                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1093                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1094                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1095                     } else {
   1096                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1097                     }
   1098                     TEST_ASSERT(numFields == 3);
   1099                     TEST_ASSERT(fields[3] == NULL);
   1100                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1101                 }
   1102             }
   1103         }
   1104 
   1105         uregex_close(re);
   1106     }
   1107 
   1108 
   1109 
   1110 
   1111     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1112      *                   comes out as additional fields.  */
   1113     {
   1114         UChar    textToSplit[80];
   1115         UChar    buf[200];
   1116         UChar    *fields[10];
   1117         int32_t  numFields;
   1118         int32_t  requiredCapacity;
   1119         int32_t  spaceNeeded;
   1120         int32_t  sz;
   1121 
   1122         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   1123 
   1124         status = U_ZERO_ERROR;
   1125         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1126 
   1127         uregex_setText(re, textToSplit, -1, &status);
   1128         TEST_ASSERT_SUCCESS(status);
   1129 
   1130         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1131         if(U_SUCCESS(status)) {
   1132             memset(fields, -1, sizeof(fields));
   1133             numFields =
   1134                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1135             TEST_ASSERT_SUCCESS(status);
   1136 
   1137             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1138             if(U_SUCCESS(status)) {
   1139                 TEST_ASSERT(numFields == 5);
   1140                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1141                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1142                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1143                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1144                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1145                 TEST_ASSERT(fields[5] == NULL);
   1146                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1147                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1148             }
   1149         }
   1150 
   1151         /*  Split with too few output strings available (2) */
   1152         status = U_ZERO_ERROR;
   1153         memset(fields, -1, sizeof(fields));
   1154         numFields =
   1155             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1156         TEST_ASSERT_SUCCESS(status);
   1157 
   1158         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1159         if(U_SUCCESS(status)) {
   1160             TEST_ASSERT(numFields == 2);
   1161             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1162             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1163             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1164 
   1165             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1166             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1167         }
   1168 
   1169         /*  Split with too few output strings available (3) */
   1170         status = U_ZERO_ERROR;
   1171         memset(fields, -1, sizeof(fields));
   1172         numFields =
   1173             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
   1174         TEST_ASSERT_SUCCESS(status);
   1175 
   1176         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1177         if(U_SUCCESS(status)) {
   1178             TEST_ASSERT(numFields == 3);
   1179             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1180             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1181             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1182             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1183 
   1184             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1185             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1186         }
   1187 
   1188         /*  Split with just enough output strings available (5) */
   1189         status = U_ZERO_ERROR;
   1190         memset(fields, -1, sizeof(fields));
   1191         numFields =
   1192             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
   1193         TEST_ASSERT_SUCCESS(status);
   1194 
   1195         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1196         if(U_SUCCESS(status)) {
   1197             TEST_ASSERT(numFields == 5);
   1198             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1199             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1200             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1201             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1202             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1203             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1204 
   1205             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1206             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1207         }
   1208 
   1209         /* Split, end of text is a field delimiter.   */
   1210         status = U_ZERO_ERROR;
   1211         sz = strlen("first <tag-a> second<tag-b>");
   1212         uregex_setText(re, textToSplit, sz, &status);
   1213         TEST_ASSERT_SUCCESS(status);
   1214 
   1215         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1216         if(U_SUCCESS(status)) {
   1217             memset(fields, -1, sizeof(fields));
   1218             numFields =
   1219                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
   1220             TEST_ASSERT_SUCCESS(status);
   1221 
   1222             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1223             if(U_SUCCESS(status)) {
   1224                 TEST_ASSERT(numFields == 4);
   1225                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1226                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1227                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1228                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1229                 TEST_ASSERT(fields[4] == NULL);
   1230                 TEST_ASSERT(fields[8] == NULL);
   1231                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1232                 spaceNeeded = strlen("first .tag-a. second.tag-b.");  /* "." at NUL positions */
   1233                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1234             }
   1235         }
   1236 
   1237         uregex_close(re);
   1238     }
   1239 
   1240     /*
   1241      * set/getTimeLimit
   1242      */
   1243      TEST_SETUP("abc$", "abcdef", 0);
   1244      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1245      uregex_setTimeLimit(re, 1000, &status);
   1246      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1247      TEST_ASSERT_SUCCESS(status);
   1248      uregex_setTimeLimit(re, -1, &status);
   1249      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1250      status = U_ZERO_ERROR;
   1251      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1252      TEST_TEARDOWN;
   1253 
   1254      /*
   1255       * set/get Stack Limit
   1256       */
   1257      TEST_SETUP("abc$", "abcdef", 0);
   1258      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1259      uregex_setStackLimit(re, 40000, &status);
   1260      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1261      TEST_ASSERT_SUCCESS(status);
   1262      uregex_setStackLimit(re, -1, &status);
   1263      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1264      status = U_ZERO_ERROR;
   1265      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1266      TEST_TEARDOWN;
   1267 
   1268 
   1269      /*
   1270       * Get/Set callback functions
   1271       *     This test is copied from intltest regex/Callbacks
   1272       *     The pattern and test data will run long enough to cause the callback
   1273       *       to be invoked.  The nested '+' operators give exponential time
   1274       *       behavior with increasing string length.
   1275       */
   1276      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1277      callBackContext cbInfo = {4, 0, 0};
   1278      const void     *pContext   = &cbInfo;
   1279      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1280 
   1281      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1282      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1283      TEST_ASSERT_SUCCESS(status);
   1284      TEST_ASSERT(returnedFn == NULL);
   1285      TEST_ASSERT(pContext == NULL);
   1286 
   1287      /* Set thecallback and do a match.                                   */
   1288      /* The callback function should record that it has been called.      */
   1289      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1290      TEST_ASSERT_SUCCESS(status);
   1291      TEST_ASSERT(cbInfo.numCalls == 0);
   1292      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1293      TEST_ASSERT_SUCCESS(status);
   1294      TEST_ASSERT(cbInfo.numCalls > 0);
   1295 
   1296      /* Getting the callback should return the values that were set above.  */
   1297      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1298      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1299      TEST_ASSERT(pContext == &cbInfo);
   1300 
   1301      TEST_TEARDOWN;
   1302 }
   1303 
   1304 
   1305 
   1306 static void TestBug4315(void) {
   1307     UErrorCode      theICUError = U_ZERO_ERROR;
   1308     URegularExpression *theRegEx;
   1309     UChar           *textBuff;
   1310     const char      *thePattern;
   1311     UChar            theString[100];
   1312     UChar           *destFields[24];
   1313     int32_t         neededLength1;
   1314     int32_t         neededLength2;
   1315 
   1316     int32_t         wordCount = 0;
   1317     int32_t         destFieldsSize = 24;
   1318 
   1319     thePattern  = "ck ";
   1320     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1321 
   1322     /* open a regex */
   1323     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1324     TEST_ASSERT_SUCCESS(theICUError);
   1325 
   1326     /* set the input string */
   1327     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1328     TEST_ASSERT_SUCCESS(theICUError);
   1329 
   1330     /* split */
   1331     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1332      *  error occurs! */
   1333     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1334         destFieldsSize, &theICUError);
   1335 
   1336     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1337     TEST_ASSERT(wordCount==3);
   1338 
   1339     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1340     {
   1341         theICUError = U_ZERO_ERROR;
   1342         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1343         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1344             destFields, destFieldsSize, &theICUError);
   1345         TEST_ASSERT(wordCount==3);
   1346         TEST_ASSERT_SUCCESS(theICUError);
   1347         TEST_ASSERT(neededLength1 == neededLength2);
   1348         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1349         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1350         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1351         TEST_ASSERT(destFields[3] == NULL);
   1352         free(textBuff);
   1353     }
   1354     uregex_close(theRegEx);
   1355 }
   1356 
   1357 /* Based on TestRegexCAPI() */
   1358 static void TestUTextAPI(void) {
   1359     UErrorCode           status = U_ZERO_ERROR;
   1360     URegularExpression  *re;
   1361     UText                patternText = UTEXT_INITIALIZER;
   1362     UChar                pat[200];
   1363     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1364 
   1365     /* Mimimalist open/close */
   1366     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1367     re = uregex_openUText(&patternText, 0, 0, &status);
   1368     if (U_FAILURE(status)) {
   1369          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1370          utext_close(&patternText);
   1371          return;
   1372     }
   1373     uregex_close(re);
   1374 
   1375     /* Open with all flag values set */
   1376     status = U_ZERO_ERROR;
   1377     re = uregex_openUText(&patternText,
   1378         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1379         0, &status);
   1380     TEST_ASSERT_SUCCESS(status);
   1381     uregex_close(re);
   1382 
   1383     /* Open with an invalid flag */
   1384     status = U_ZERO_ERROR;
   1385     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1386     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1387     uregex_close(re);
   1388 
   1389     /* open with an invalid parameter */
   1390     status = U_ZERO_ERROR;
   1391     re = uregex_openUText(NULL,
   1392         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1393     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1394 
   1395     /*
   1396      *  clone
   1397      */
   1398     {
   1399         URegularExpression *clone1;
   1400         URegularExpression *clone2;
   1401         URegularExpression *clone3;
   1402         UChar  testString1[30];
   1403         UChar  testString2[30];
   1404         UBool  result;
   1405 
   1406 
   1407         status = U_ZERO_ERROR;
   1408         re = uregex_openUText(&patternText, 0, 0, &status);
   1409         TEST_ASSERT_SUCCESS(status);
   1410         clone1 = uregex_clone(re, &status);
   1411         TEST_ASSERT_SUCCESS(status);
   1412         TEST_ASSERT(clone1 != NULL);
   1413 
   1414         status = U_ZERO_ERROR;
   1415         clone2 = uregex_clone(re, &status);
   1416         TEST_ASSERT_SUCCESS(status);
   1417         TEST_ASSERT(clone2 != NULL);
   1418         uregex_close(re);
   1419 
   1420         status = U_ZERO_ERROR;
   1421         clone3 = uregex_clone(clone2, &status);
   1422         TEST_ASSERT_SUCCESS(status);
   1423         TEST_ASSERT(clone3 != NULL);
   1424 
   1425         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
   1426         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
   1427 
   1428         status = U_ZERO_ERROR;
   1429         uregex_setText(clone1, testString1, -1, &status);
   1430         TEST_ASSERT_SUCCESS(status);
   1431         result = uregex_lookingAt(clone1, 0, &status);
   1432         TEST_ASSERT_SUCCESS(status);
   1433         TEST_ASSERT(result==TRUE);
   1434 
   1435         status = U_ZERO_ERROR;
   1436         uregex_setText(clone2, testString2, -1, &status);
   1437         TEST_ASSERT_SUCCESS(status);
   1438         result = uregex_lookingAt(clone2, 0, &status);
   1439         TEST_ASSERT_SUCCESS(status);
   1440         TEST_ASSERT(result==FALSE);
   1441         result = uregex_find(clone2, 0, &status);
   1442         TEST_ASSERT_SUCCESS(status);
   1443         TEST_ASSERT(result==TRUE);
   1444 
   1445         uregex_close(clone1);
   1446         uregex_close(clone2);
   1447         uregex_close(clone3);
   1448 
   1449     }
   1450 
   1451     /*
   1452      *  pattern() and patternText()
   1453      */
   1454     {
   1455         const UChar  *resultPat;
   1456         int32_t       resultLen;
   1457         UText        *resultText;
   1458         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1459         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1460         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
   1461         status = U_ZERO_ERROR;
   1462 
   1463         utext_openUTF8(&patternText, str_hello, -1, &status);
   1464         re = uregex_open(pat, -1, 0, NULL, &status);
   1465         resultPat = uregex_pattern(re, &resultLen, &status);
   1466         TEST_ASSERT_SUCCESS(status);
   1467 
   1468         /* The TEST_ASSERT_SUCCESS above should change too... */
   1469         if (U_SUCCESS(status)) {
   1470             TEST_ASSERT(resultLen == -1);
   1471             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1472         }
   1473 
   1474         resultText = uregex_patternUText(re, &status);
   1475         TEST_ASSERT_SUCCESS(status);
   1476         TEST_ASSERT_UTEXT(str_hello, resultText);
   1477 
   1478         uregex_close(re);
   1479 
   1480         status = U_ZERO_ERROR;
   1481         re = uregex_open(pat, 3, 0, NULL, &status);
   1482         resultPat = uregex_pattern(re, &resultLen, &status);
   1483         TEST_ASSERT_SUCCESS(status);
   1484 
   1485         /* The TEST_ASSERT_SUCCESS above should change too... */
   1486         if (U_SUCCESS(status)) {
   1487             TEST_ASSERT(resultLen == 3);
   1488             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1489             TEST_ASSERT(u_strlen(resultPat) == 3);
   1490         }
   1491 
   1492         resultText = uregex_patternUText(re, &status);
   1493         TEST_ASSERT_SUCCESS(status);
   1494         TEST_ASSERT_UTEXT(str_hel, resultText);
   1495 
   1496         uregex_close(re);
   1497     }
   1498 
   1499     /*
   1500      *  setUText() and lookingAt()
   1501      */
   1502     {
   1503         UText  text1 = UTEXT_INITIALIZER;
   1504         UText  text2 = UTEXT_INITIALIZER;
   1505         UBool  result;
   1506         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1507         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1508         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1509         status = U_ZERO_ERROR;
   1510         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1511         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1512 
   1513         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1514         re = uregex_openUText(&patternText, 0, NULL, &status);
   1515         TEST_ASSERT_SUCCESS(status);
   1516 
   1517         /* Operation before doing a setText should fail... */
   1518         status = U_ZERO_ERROR;
   1519         uregex_lookingAt(re, 0, &status);
   1520         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1521 
   1522         status = U_ZERO_ERROR;
   1523         uregex_setUText(re, &text1, &status);
   1524         result = uregex_lookingAt(re, 0, &status);
   1525         TEST_ASSERT(result == TRUE);
   1526         TEST_ASSERT_SUCCESS(status);
   1527 
   1528         status = U_ZERO_ERROR;
   1529         uregex_setUText(re, &text2, &status);
   1530         result = uregex_lookingAt(re, 0, &status);
   1531         TEST_ASSERT(result == FALSE);
   1532         TEST_ASSERT_SUCCESS(status);
   1533 
   1534         status = U_ZERO_ERROR;
   1535         uregex_setUText(re, &text1, &status);
   1536         result = uregex_lookingAt(re, 0, &status);
   1537         TEST_ASSERT(result == TRUE);
   1538         TEST_ASSERT_SUCCESS(status);
   1539 
   1540         uregex_close(re);
   1541         utext_close(&text1);
   1542         utext_close(&text2);
   1543     }
   1544 
   1545 
   1546     /*
   1547      *  getText() and getUText()
   1548      */
   1549     {
   1550         UText  text1 = UTEXT_INITIALIZER;
   1551         UText  text2 = UTEXT_INITIALIZER;
   1552         UChar  text2Chars[20];
   1553         UText  *resultText;
   1554         const UChar   *result;
   1555         int32_t  textLength;
   1556         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1557         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1558         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1559 
   1560 
   1561         status = U_ZERO_ERROR;
   1562         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1563         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
   1564         utext_openUChars(&text2, text2Chars, -1, &status);
   1565 
   1566         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1567         re = uregex_openUText(&patternText, 0, NULL, &status);
   1568 
   1569         /* First set a UText */
   1570         uregex_setUText(re, &text1, &status);
   1571         resultText = uregex_getUText(re, NULL, &status);
   1572         TEST_ASSERT_SUCCESS(status);
   1573         TEST_ASSERT(resultText != &text1);
   1574         utext_setNativeIndex(resultText, 0);
   1575         utext_setNativeIndex(&text1, 0);
   1576         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
   1577         utext_close(resultText);
   1578 
   1579         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1580         TEST_ASSERT(textLength == -1 || textLength == 6);
   1581         resultText = uregex_getUText(re, NULL, &status);
   1582         TEST_ASSERT_SUCCESS(status);
   1583         TEST_ASSERT(resultText != &text1);
   1584         utext_setNativeIndex(resultText, 0);
   1585         utext_setNativeIndex(&text1, 0);
   1586         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
   1587         utext_close(resultText);
   1588 
   1589         /* Then set a UChar * */
   1590         uregex_setText(re, text2Chars, 7, &status);
   1591         resultText = uregex_getUText(re, NULL, &status);
   1592         TEST_ASSERT_SUCCESS(status);
   1593         utext_setNativeIndex(resultText, 0);
   1594         utext_setNativeIndex(&text2, 0);
   1595         TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
   1596         utext_close(resultText);
   1597         result = uregex_getText(re, &textLength, &status);
   1598         TEST_ASSERT(textLength == 7);
   1599 
   1600         uregex_close(re);
   1601         utext_close(&text1);
   1602         utext_close(&text2);
   1603     }
   1604 
   1605     /*
   1606      *  matches()
   1607      */
   1608     {
   1609         UText   text1 = UTEXT_INITIALIZER;
   1610         UBool   result;
   1611         UText   nullText = UTEXT_INITIALIZER;
   1612         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1613         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1614 
   1615         status = U_ZERO_ERROR;
   1616         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1617         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1618         re = uregex_openUText(&patternText, 0, NULL, &status);
   1619 
   1620         uregex_setUText(re, &text1, &status);
   1621         result = uregex_matches(re, 0, &status);
   1622         TEST_ASSERT(result == FALSE);
   1623         TEST_ASSERT_SUCCESS(status);
   1624         uregex_close(re);
   1625 
   1626         status = U_ZERO_ERROR;
   1627         re = uregex_openC(".?", 0, NULL, &status);
   1628         uregex_setUText(re, &text1, &status);
   1629         result = uregex_matches(re, 7, &status);
   1630         TEST_ASSERT(result == TRUE);
   1631         TEST_ASSERT_SUCCESS(status);
   1632 
   1633         status = U_ZERO_ERROR;
   1634         utext_openUTF8(&nullText, "", -1, &status);
   1635         uregex_setUText(re, &nullText, &status);
   1636         TEST_ASSERT_SUCCESS(status);
   1637         result = uregex_matches(re, 0, &status);
   1638         TEST_ASSERT(result == TRUE);
   1639         TEST_ASSERT_SUCCESS(status);
   1640 
   1641         uregex_close(re);
   1642         utext_close(&text1);
   1643         utext_close(&nullText);
   1644     }
   1645 
   1646 
   1647     /*
   1648      *  lookingAt()    Used in setText test.
   1649      */
   1650 
   1651 
   1652     /*
   1653      *  find(), findNext, start, end, reset
   1654      */
   1655     {
   1656         UChar    text1[50];
   1657         UBool    result;
   1658         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
   1659         status = U_ZERO_ERROR;
   1660         re = uregex_openC("rx", 0, NULL, &status);
   1661 
   1662         uregex_setText(re, text1, -1, &status);
   1663         result = uregex_find(re, 0, &status);
   1664         TEST_ASSERT(result == TRUE);
   1665         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1666         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1667         TEST_ASSERT_SUCCESS(status);
   1668 
   1669         result = uregex_find(re, 9, &status);
   1670         TEST_ASSERT(result == TRUE);
   1671         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1672         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1673         TEST_ASSERT_SUCCESS(status);
   1674 
   1675         result = uregex_find(re, 14, &status);
   1676         TEST_ASSERT(result == FALSE);
   1677         TEST_ASSERT_SUCCESS(status);
   1678 
   1679         status = U_ZERO_ERROR;
   1680         uregex_reset(re, 0, &status);
   1681 
   1682         result = uregex_findNext(re, &status);
   1683         TEST_ASSERT(result == TRUE);
   1684         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1685         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1686         TEST_ASSERT_SUCCESS(status);
   1687 
   1688         result = uregex_findNext(re, &status);
   1689         TEST_ASSERT(result == TRUE);
   1690         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1691         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1692         TEST_ASSERT_SUCCESS(status);
   1693 
   1694         status = U_ZERO_ERROR;
   1695         uregex_reset(re, 12, &status);
   1696 
   1697         result = uregex_findNext(re, &status);
   1698         TEST_ASSERT(result == TRUE);
   1699         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1700         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1701         TEST_ASSERT_SUCCESS(status);
   1702 
   1703         result = uregex_findNext(re, &status);
   1704         TEST_ASSERT(result == FALSE);
   1705         TEST_ASSERT_SUCCESS(status);
   1706 
   1707         uregex_close(re);
   1708     }
   1709 
   1710     /*
   1711      *  group()
   1712      */
   1713     {
   1714         UChar    text1[80];
   1715         UText   *actual;
   1716         UBool    result;
   1717 
   1718         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
   1719         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
   1720 
   1721 
   1722         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
   1723 
   1724         status = U_ZERO_ERROR;
   1725         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1726         TEST_ASSERT_SUCCESS(status);
   1727 
   1728         uregex_setText(re, text1, -1, &status);
   1729         result = uregex_find(re, 0, &status);
   1730         TEST_ASSERT(result==TRUE);
   1731 
   1732         /*  Capture Group 0, the full match.  Should succeed.  */
   1733         status = U_ZERO_ERROR;
   1734         actual = uregex_groupUTextDeep(re, 0, NULL, &status);
   1735         TEST_ASSERT_SUCCESS(status);
   1736         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
   1737         utext_close(actual);
   1738 
   1739         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
   1740         status = U_ZERO_ERROR;
   1741         {
   1742             int64_t      group_len;
   1743             int32_t      len16;
   1744             UErrorCode   shallowStatus = U_ZERO_ERROR;
   1745             int64_t      nativeIndex;
   1746             UChar *groupChars;
   1747             UText groupText = UTEXT_INITIALIZER;
   1748 
   1749             actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
   1750             TEST_ASSERT_SUCCESS(status);
   1751 
   1752             nativeIndex = utext_getNativeIndex(actual);
   1753             /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
   1754             /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
   1755             len16 = group_len;
   1756 
   1757             groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
   1758             utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
   1759 
   1760             utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
   1761 
   1762             TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
   1763             utext_close(&groupText);
   1764             free(groupChars);
   1765         }
   1766         utext_close(actual);
   1767 
   1768         /*  Capture group #1.  Should succeed. */
   1769         status = U_ZERO_ERROR;
   1770         actual = uregex_groupUTextDeep(re, 1, NULL, &status);
   1771         TEST_ASSERT_SUCCESS(status);
   1772         TEST_ASSERT_UTEXT(str_interior, actual);
   1773         utext_close(actual);
   1774 
   1775         /*  Capture group out of range.  Error. */
   1776         status = U_ZERO_ERROR;
   1777         actual = uregex_groupUTextDeep(re, 2, NULL, &status);
   1778         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1779         TEST_ASSERT(utext_nativeLength(actual) == 0);
   1780         utext_close(actual);
   1781 
   1782         uregex_close(re);
   1783 
   1784     }
   1785 
   1786     /*
   1787      *  replaceFirst()
   1788      */
   1789     {
   1790         UChar    text1[80];
   1791         UChar    text2[80];
   1792         UText    replText = UTEXT_INITIALIZER;
   1793         UText   *result;
   1794         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1795         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1796         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
   1797         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1798         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1799         status = U_ZERO_ERROR;
   1800         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1801         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1802         utext_openUTF8(&replText, str_1x, -1, &status);
   1803 
   1804         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1805         TEST_ASSERT_SUCCESS(status);
   1806 
   1807         /*  Normal case, with match */
   1808         uregex_setText(re, text1, -1, &status);
   1809         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1810         TEST_ASSERT_SUCCESS(status);
   1811         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1812         utext_close(result);
   1813 
   1814         /* No match.  Text should copy to output with no changes.  */
   1815         uregex_setText(re, text2, -1, &status);
   1816         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1817         TEST_ASSERT_SUCCESS(status);
   1818         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1819         utext_close(result);
   1820 
   1821         /* Unicode escapes */
   1822         uregex_setText(re, text1, -1, &status);
   1823         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1824         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1825         TEST_ASSERT_SUCCESS(status);
   1826         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1827         utext_close(result);
   1828 
   1829         uregex_close(re);
   1830         utext_close(&replText);
   1831     }
   1832 
   1833 
   1834     /*
   1835      *  replaceAll()
   1836      */
   1837     {
   1838         UChar    text1[80];
   1839         UChar    text2[80];
   1840         UText    replText = UTEXT_INITIALIZER;
   1841         UText   *result;
   1842         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1843         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1844         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1845         status = U_ZERO_ERROR;
   1846         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1847         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1848         utext_openUTF8(&replText, str_1, -1, &status);
   1849 
   1850         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1851         TEST_ASSERT_SUCCESS(status);
   1852 
   1853         /*  Normal case, with match */
   1854         uregex_setText(re, text1, -1, &status);
   1855         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1856         TEST_ASSERT_SUCCESS(status);
   1857         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1858         utext_close(result);
   1859 
   1860         /* No match.  Text should copy to output with no changes.  */
   1861         uregex_setText(re, text2, -1, &status);
   1862         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1863         TEST_ASSERT_SUCCESS(status);
   1864         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1865         utext_close(result);
   1866 
   1867         uregex_close(re);
   1868         utext_close(&replText);
   1869     }
   1870 
   1871 
   1872     /*
   1873      *  appendReplacement()
   1874      */
   1875     {
   1876         UChar    text[100];
   1877         UChar    repl[100];
   1878         UChar    buf[100];
   1879         UChar   *bufPtr;
   1880         int32_t  bufCap;
   1881 
   1882         status = U_ZERO_ERROR;
   1883         re = uregex_openC(".*", 0, 0, &status);
   1884         TEST_ASSERT_SUCCESS(status);
   1885 
   1886         u_uastrncpy(text, "whatever",  sizeof(text)/2);
   1887         u_uastrncpy(repl, "some other", sizeof(repl)/2);
   1888         uregex_setText(re, text, -1, &status);
   1889 
   1890         /* match covers whole target string */
   1891         uregex_find(re, 0, &status);
   1892         TEST_ASSERT_SUCCESS(status);
   1893         bufPtr = buf;
   1894         bufCap = sizeof(buf) / 2;
   1895         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1896         TEST_ASSERT_SUCCESS(status);
   1897         TEST_ASSERT_STRING("some other", buf, TRUE);
   1898 
   1899         /* Match has \u \U escapes */
   1900         uregex_find(re, 0, &status);
   1901         TEST_ASSERT_SUCCESS(status);
   1902         bufPtr = buf;
   1903         bufCap = sizeof(buf) / 2;
   1904         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
   1905         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1906         TEST_ASSERT_SUCCESS(status);
   1907         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1908 
   1909         uregex_close(re);
   1910     }
   1911 
   1912 
   1913     /*
   1914      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1915      */
   1916 
   1917     /*
   1918      *  splitUText()
   1919      */
   1920     {
   1921         UChar    textToSplit[80];
   1922         UChar    text2[80];
   1923         UText    *fields[10];
   1924         int32_t  numFields;
   1925         int32_t i;
   1926 
   1927         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1928         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1929 
   1930         status = U_ZERO_ERROR;
   1931         re = uregex_openC(":", 0, NULL, &status);
   1932 
   1933 
   1934         /*  Simple split */
   1935 
   1936         uregex_setText(re, textToSplit, -1, &status);
   1937         TEST_ASSERT_SUCCESS(status);
   1938 
   1939         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1940         if (U_SUCCESS(status)) {
   1941             memset(fields, 0, sizeof(fields));
   1942             numFields = uregex_splitUText(re, fields, 10, &status);
   1943             TEST_ASSERT_SUCCESS(status);
   1944 
   1945             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1946             if(U_SUCCESS(status)) {
   1947               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1948               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1949               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1950                 TEST_ASSERT(numFields == 3);
   1951                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1952                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1953                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1954                 TEST_ASSERT(fields[3] == NULL);
   1955             }
   1956             for(i = 0; i < numFields; i++) {
   1957                 utext_close(fields[i]);
   1958             }
   1959         }
   1960 
   1961         uregex_close(re);
   1962 
   1963 
   1964         /*  Split with too few output strings available */
   1965         status = U_ZERO_ERROR;
   1966         re = uregex_openC(":", 0, NULL, &status);
   1967         uregex_setText(re, textToSplit, -1, &status);
   1968         TEST_ASSERT_SUCCESS(status);
   1969 
   1970         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1971         if(U_SUCCESS(status)) {
   1972             fields[0] = NULL;
   1973             fields[1] = NULL;
   1974             fields[2] = &patternText;
   1975             numFields = uregex_splitUText(re, fields, 2, &status);
   1976             TEST_ASSERT_SUCCESS(status);
   1977 
   1978             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1979             if(U_SUCCESS(status)) {
   1980                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   1981                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   1982                 TEST_ASSERT(numFields == 2);
   1983                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1984                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   1985                 TEST_ASSERT(fields[2] == &patternText);
   1986             }
   1987             for(i = 0; i < numFields; i++) {
   1988                 utext_close(fields[i]);
   1989             }
   1990         }
   1991 
   1992         uregex_close(re);
   1993     }
   1994 
   1995     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   1996      *                   comes out as additional fields.  */
   1997     {
   1998         UChar    textToSplit[80];
   1999         UText    *fields[10];
   2000         int32_t  numFields;
   2001         int32_t i;
   2002 
   2003         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   2004 
   2005         status = U_ZERO_ERROR;
   2006         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   2007 
   2008         uregex_setText(re, textToSplit, -1, &status);
   2009         TEST_ASSERT_SUCCESS(status);
   2010 
   2011         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2012         if(U_SUCCESS(status)) {
   2013             memset(fields, 0, sizeof(fields));
   2014             numFields = uregex_splitUText(re, fields, 10, &status);
   2015             TEST_ASSERT_SUCCESS(status);
   2016 
   2017             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2018             if(U_SUCCESS(status)) {
   2019                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2020                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2021                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2022                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2023                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2024 
   2025                 TEST_ASSERT(numFields == 5);
   2026                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2027                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2028                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2029                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2030                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2031                 TEST_ASSERT(fields[5] == NULL);
   2032             }
   2033             for(i = 0; i < numFields; i++) {
   2034                 utext_close(fields[i]);
   2035             }
   2036         }
   2037 
   2038         /*  Split with too few output strings available (2) */
   2039         status = U_ZERO_ERROR;
   2040         fields[0] = NULL;
   2041         fields[1] = NULL;
   2042         fields[2] = &patternText;
   2043         numFields = uregex_splitUText(re, fields, 2, &status);
   2044         TEST_ASSERT_SUCCESS(status);
   2045 
   2046         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2047         if(U_SUCCESS(status)) {
   2048             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2049             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2050             TEST_ASSERT(numFields == 2);
   2051             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2052             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2053             TEST_ASSERT(fields[2] == &patternText);
   2054         }
   2055         for(i = 0; i < numFields; i++) {
   2056             utext_close(fields[i]);
   2057         }
   2058 
   2059 
   2060         /*  Split with too few output strings available (3) */
   2061         status = U_ZERO_ERROR;
   2062         fields[0] = NULL;
   2063         fields[1] = NULL;
   2064         fields[2] = NULL;
   2065         fields[3] = &patternText;
   2066         numFields = uregex_splitUText(re, fields, 3, &status);
   2067         TEST_ASSERT_SUCCESS(status);
   2068 
   2069         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2070         if(U_SUCCESS(status)) {
   2071             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2072             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2073             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2074             TEST_ASSERT(numFields == 3);
   2075             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2076             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2078             TEST_ASSERT(fields[3] == &patternText);
   2079         }
   2080         for(i = 0; i < numFields; i++) {
   2081             utext_close(fields[i]);
   2082         }
   2083 
   2084         /*  Split with just enough output strings available (5) */
   2085         status = U_ZERO_ERROR;
   2086         fields[0] = NULL;
   2087         fields[1] = NULL;
   2088         fields[2] = NULL;
   2089         fields[3] = NULL;
   2090         fields[4] = NULL;
   2091         fields[5] = &patternText;
   2092         numFields = uregex_splitUText(re, fields, 5, &status);
   2093         TEST_ASSERT_SUCCESS(status);
   2094 
   2095         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2096         if(U_SUCCESS(status)) {
   2097             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2098             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2099             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2100             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2101             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2102 
   2103             TEST_ASSERT(numFields == 5);
   2104             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2105             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2106             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2107             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2108             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2109             TEST_ASSERT(fields[5] == &patternText);
   2110         }
   2111         for(i = 0; i < numFields; i++) {
   2112             utext_close(fields[i]);
   2113         }
   2114 
   2115         /* Split, end of text is a field delimiter.   */
   2116         status = U_ZERO_ERROR;
   2117         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2118         TEST_ASSERT_SUCCESS(status);
   2119 
   2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2121         if(U_SUCCESS(status)) {
   2122             memset(fields, 0, sizeof(fields));
   2123             fields[9] = &patternText;
   2124             numFields = uregex_splitUText(re, fields, 9, &status);
   2125             TEST_ASSERT_SUCCESS(status);
   2126 
   2127             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2128             if(U_SUCCESS(status)) {
   2129                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2130                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2131                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2132                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2133 
   2134                 TEST_ASSERT(numFields == 4);
   2135                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2136                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2137                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2138                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2139                 TEST_ASSERT(fields[4] == NULL);
   2140                 TEST_ASSERT(fields[8] == NULL);
   2141                 TEST_ASSERT(fields[9] == &patternText);
   2142             }
   2143             for(i = 0; i < numFields; i++) {
   2144                 utext_close(fields[i]);
   2145             }
   2146         }
   2147 
   2148         uregex_close(re);
   2149     }
   2150     utext_close(&patternText);
   2151 }
   2152 
   2153 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2154