Home | History | Annotate | Download | only in cintltst
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 2004-2015, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /********************************************************************************
      9 *
     10 * File reapits.c
     11 *
     12 *********************************************************************************/
     13 /*C API TEST FOR Regular Expressions */
     14 /**
     15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     16 *   try to test the full functionality.  It just calls each function and verifies that it
     17 *   works on a basic level.
     18 *
     19 *   More complete testing of regular expression functionality is done with the C++ tests.
     20 **/
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     25 
     26 #include <stdlib.h>
     27 #include <string.h>
     28 #include "unicode/uloc.h"
     29 #include "unicode/uregex.h"
     30 #include "unicode/ustring.h"
     31 #include "unicode/utext.h"
     32 #include "unicode/utf8.h"
     33 #include "cintltst.h"
     34 #include "cmemory.h"
     35 
     36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     38 
     39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
     41 
     42 /*
     43  *   TEST_SETUP and TEST_TEARDOWN
     44  *         macros to handle the boilerplate around setting up regex test cases.
     45  *         parameteres to setup:
     46  *              pattern:     The regex pattern, a (char *) null terminated C string.
     47  *              testString:  The string data, also a (char *) C string.
     48  *              flags:       Regex flags to set when compiling the pattern
     49  *
     50  *         Put arbitrary test code between SETUP and TEARDOWN.
     51  *         're" is the compiled, ready-to-go  regular expression.
     52  */
     53 #define TEST_SETUP(pattern, testString, flags) {  \
     54     UChar   *srcString = NULL;  \
     55     status = U_ZERO_ERROR; \
     56     re = uregex_openC(pattern, flags, NULL, &status);  \
     57     TEST_ASSERT_SUCCESS(status);   \
     58     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     59     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     60     uregex_setText(re, srcString, -1, &status); \
     61     TEST_ASSERT_SUCCESS(status);  \
     62     if (U_SUCCESS(status)) {
     63 
     64 #define TEST_TEARDOWN  \
     65     }  \
     66     TEST_ASSERT_SUCCESS(status);  \
     67     uregex_close(re);  \
     68     free(srcString);   \
     69     }
     70 
     71 
     72 /**
     73  * @param expected utf-8 array of bytes to be expected
     74  */
     75 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     76      char     buf_inside_macro[120];
     77      int32_t  len = (int32_t)strlen(expected);
     78      UBool    success;
     79      if (nulTerm) {
     80          u_austrncpy(buf_inside_macro, (actual), len+1);
     81          buf_inside_macro[len+2] = 0;
     82          success = (strcmp((expected), buf_inside_macro) == 0);
     83      } else {
     84          u_austrncpy(buf_inside_macro, (actual), len);
     85          buf_inside_macro[len+1] = 0;
     86          success = (strncmp((expected), buf_inside_macro, len) == 0);
     87      }
     88      if (success == FALSE) {
     89          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     90              file, line, (expected), buf_inside_macro);
     91      }
     92 }
     93 
     94 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     95 
     96 
     97 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
     98     int32_t u8i = 0;
     99     UChar32 u8c = 0;
    100     UChar32 utc = 0;
    101     UBool   stringsEqual = TRUE;
    102     utext_setNativeIndex(utext, 0);
    103     for (;;) {
    104         U8_NEXT_UNSAFE(utf8, u8i, u8c);
    105         utc = utext_next32(utext);
    106         if (u8c == 0 && utc == U_SENTINEL) {
    107             break;
    108         }
    109         if (u8c != utc || u8c == 0) {
    110             stringsEqual = FALSE;
    111             break;
    112         }
    113     }
    114     return stringsEqual;
    115 }
    116 
    117 
    118 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
    119     utext_setNativeIndex(actual, 0);
    120     if (!equals_utf8_utext(expected, actual)) {
    121         UChar32 c;
    122         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    123         c = utext_next32From(actual, 0);
    124         while (c != U_SENTINEL) {
    125             if (0x20<c && c <0x7e) {
    126                 log_err("%c", c);
    127             } else {
    128                 log_err("%#x", c);
    129             }
    130             c = UTEXT_NEXT32(actual);
    131         }
    132         log_err("\"\n");
    133     }
    134 }
    135 
    136 /*
    137  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
    138  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
    139  */
    140 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    141 
    142 static UBool testUTextEqual(UText *uta, UText *utb) {
    143     UChar32 ca = 0;
    144     UChar32 cb = 0;
    145     utext_setNativeIndex(uta, 0);
    146     utext_setNativeIndex(utb, 0);
    147     do {
    148         ca = utext_next32(uta);
    149         cb = utext_next32(utb);
    150         if (ca != cb) {
    151             break;
    152         }
    153     } while (ca != U_SENTINEL);
    154     return ca == cb;
    155 }
    156 
    157 
    158 
    159 
    160 static void TestRegexCAPI(void);
    161 static void TestBug4315(void);
    162 static void TestUTextAPI(void);
    163 static void TestRefreshInput(void);
    164 static void TestBug8421(void);
    165 static void TestBug10815(void);
    166 
    167 void addURegexTest(TestNode** root);
    168 
    169 void addURegexTest(TestNode** root)
    170 {
    171     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    172     addTest(root, &TestBug4315,   "regex/TestBug4315");
    173     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    174     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
    175     addTest(root, &TestBug8421,   "regex/TestBug8421");
    176     addTest(root, &TestBug10815,   "regex/TestBug10815");
    177 }
    178 
    179 /*
    180  * Call back function and context struct used for testing
    181  *    regular expression user callbacks.  This test is mostly the same as
    182  *   the corresponding C++ test in intltest.
    183  */
    184 typedef struct callBackContext {
    185     int32_t          maxCalls;
    186     int32_t          numCalls;
    187     int32_t          lastSteps;
    188 } callBackContext;
    189 
    190 static UBool U_EXPORT2 U_CALLCONV
    191 TestCallbackFn(const void *context, int32_t steps) {
    192   callBackContext  *info = (callBackContext *)context;
    193   if (info->lastSteps+1 != steps) {
    194       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    195   }
    196   info->lastSteps = steps;
    197   info->numCalls++;
    198   return (info->numCalls < info->maxCalls);
    199 }
    200 
    201 /*
    202  *   Regular Expression C API Tests
    203  */
    204 static void TestRegexCAPI(void) {
    205     UErrorCode           status = U_ZERO_ERROR;
    206     URegularExpression  *re;
    207     UChar                pat[200];
    208     UChar               *minus1;
    209 
    210     memset(&minus1, -1, sizeof(minus1));
    211 
    212     /* Mimimalist open/close */
    213     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    214     re = uregex_open(pat, -1, 0, 0, &status);
    215     if (U_FAILURE(status)) {
    216          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    217          return;
    218     }
    219     uregex_close(re);
    220 
    221     /* Open with all flag values set */
    222     status = U_ZERO_ERROR;
    223     re = uregex_open(pat, -1,
    224         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
    225         0, &status);
    226     TEST_ASSERT_SUCCESS(status);
    227     uregex_close(re);
    228 
    229     /* Open with an invalid flag */
    230     status = U_ZERO_ERROR;
    231     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    232     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    233     uregex_close(re);
    234 
    235     /* Open with an unimplemented flag */
    236     status = U_ZERO_ERROR;
    237     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
    238     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    239     uregex_close(re);
    240 
    241     /* openC with an invalid parameter */
    242     status = U_ZERO_ERROR;
    243     re = uregex_openC(NULL,
    244         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    245     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    246 
    247     /* openC with an invalid parameter */
    248     status = U_USELESS_COLLATOR_ERROR;
    249     re = uregex_openC(NULL,
    250         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    251     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    252 
    253     /* openC   open from a C string */
    254     {
    255         const UChar   *p;
    256         int32_t  len;
    257         status = U_ZERO_ERROR;
    258         re = uregex_openC("abc*", 0, 0, &status);
    259         TEST_ASSERT_SUCCESS(status);
    260         p = uregex_pattern(re, &len, &status);
    261         TEST_ASSERT_SUCCESS(status);
    262 
    263         /* The TEST_ASSERT_SUCCESS above should change too... */
    264         if(U_SUCCESS(status)) {
    265             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    266             TEST_ASSERT(u_strcmp(pat, p) == 0);
    267             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    268         }
    269 
    270         uregex_close(re);
    271 
    272         /*  TODO:  Open with ParseError parameter */
    273     }
    274 
    275     /*
    276      *  clone
    277      */
    278     {
    279         URegularExpression *clone1;
    280         URegularExpression *clone2;
    281         URegularExpression *clone3;
    282         UChar  testString1[30];
    283         UChar  testString2[30];
    284         UBool  result;
    285 
    286 
    287         status = U_ZERO_ERROR;
    288         re = uregex_openC("abc*", 0, 0, &status);
    289         TEST_ASSERT_SUCCESS(status);
    290         clone1 = uregex_clone(re, &status);
    291         TEST_ASSERT_SUCCESS(status);
    292         TEST_ASSERT(clone1 != NULL);
    293 
    294         status = U_ZERO_ERROR;
    295         clone2 = uregex_clone(re, &status);
    296         TEST_ASSERT_SUCCESS(status);
    297         TEST_ASSERT(clone2 != NULL);
    298         uregex_close(re);
    299 
    300         status = U_ZERO_ERROR;
    301         clone3 = uregex_clone(clone2, &status);
    302         TEST_ASSERT_SUCCESS(status);
    303         TEST_ASSERT(clone3 != NULL);
    304 
    305         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
    306         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
    307 
    308         status = U_ZERO_ERROR;
    309         uregex_setText(clone1, testString1, -1, &status);
    310         TEST_ASSERT_SUCCESS(status);
    311         result = uregex_lookingAt(clone1, 0, &status);
    312         TEST_ASSERT_SUCCESS(status);
    313         TEST_ASSERT(result==TRUE);
    314 
    315         status = U_ZERO_ERROR;
    316         uregex_setText(clone2, testString2, -1, &status);
    317         TEST_ASSERT_SUCCESS(status);
    318         result = uregex_lookingAt(clone2, 0, &status);
    319         TEST_ASSERT_SUCCESS(status);
    320         TEST_ASSERT(result==FALSE);
    321         result = uregex_find(clone2, 0, &status);
    322         TEST_ASSERT_SUCCESS(status);
    323         TEST_ASSERT(result==TRUE);
    324 
    325         uregex_close(clone1);
    326         uregex_close(clone2);
    327         uregex_close(clone3);
    328 
    329     }
    330 
    331     /*
    332      *  pattern()
    333     */
    334     {
    335         const UChar  *resultPat;
    336         int32_t       resultLen;
    337         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
    338         status = U_ZERO_ERROR;
    339         re = uregex_open(pat, -1, 0, NULL, &status);
    340         resultPat = uregex_pattern(re, &resultLen, &status);
    341         TEST_ASSERT_SUCCESS(status);
    342 
    343         /* The TEST_ASSERT_SUCCESS above should change too... */
    344         if (U_SUCCESS(status)) {
    345             TEST_ASSERT(resultLen == -1);
    346             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    347         }
    348 
    349         uregex_close(re);
    350 
    351         status = U_ZERO_ERROR;
    352         re = uregex_open(pat, 3, 0, NULL, &status);
    353         resultPat = uregex_pattern(re, &resultLen, &status);
    354         TEST_ASSERT_SUCCESS(status);
    355         TEST_ASSERT_SUCCESS(status);
    356 
    357         /* The TEST_ASSERT_SUCCESS above should change too... */
    358         if (U_SUCCESS(status)) {
    359             TEST_ASSERT(resultLen == 3);
    360             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    361             TEST_ASSERT(u_strlen(resultPat) == 3);
    362         }
    363 
    364         uregex_close(re);
    365     }
    366 
    367     /*
    368      *  flags()
    369      */
    370     {
    371         int32_t  t;
    372 
    373         status = U_ZERO_ERROR;
    374         re = uregex_open(pat, -1, 0, NULL, &status);
    375         t  = uregex_flags(re, &status);
    376         TEST_ASSERT_SUCCESS(status);
    377         TEST_ASSERT(t == 0);
    378         uregex_close(re);
    379 
    380         status = U_ZERO_ERROR;
    381         re = uregex_open(pat, -1, 0, NULL, &status);
    382         t  = uregex_flags(re, &status);
    383         TEST_ASSERT_SUCCESS(status);
    384         TEST_ASSERT(t == 0);
    385         uregex_close(re);
    386 
    387         status = U_ZERO_ERROR;
    388         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    389         t  = uregex_flags(re, &status);
    390         TEST_ASSERT_SUCCESS(status);
    391         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    392         uregex_close(re);
    393     }
    394 
    395     /*
    396      *  setText() and lookingAt()
    397      */
    398     {
    399         UChar  text1[50];
    400         UChar  text2[50];
    401         UBool  result;
    402 
    403         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    404         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    405         status = U_ZERO_ERROR;
    406         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    407         re = uregex_open(pat, -1, 0, NULL, &status);
    408         TEST_ASSERT_SUCCESS(status);
    409 
    410         /* Operation before doing a setText should fail... */
    411         status = U_ZERO_ERROR;
    412         uregex_lookingAt(re, 0, &status);
    413         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    414 
    415         status = U_ZERO_ERROR;
    416         uregex_setText(re, text1, -1, &status);
    417         result = uregex_lookingAt(re, 0, &status);
    418         TEST_ASSERT(result == TRUE);
    419         TEST_ASSERT_SUCCESS(status);
    420 
    421         status = U_ZERO_ERROR;
    422         uregex_setText(re, text2, -1, &status);
    423         result = uregex_lookingAt(re, 0, &status);
    424         TEST_ASSERT(result == FALSE);
    425         TEST_ASSERT_SUCCESS(status);
    426 
    427         status = U_ZERO_ERROR;
    428         uregex_setText(re, text1, -1, &status);
    429         result = uregex_lookingAt(re, 0, &status);
    430         TEST_ASSERT(result == TRUE);
    431         TEST_ASSERT_SUCCESS(status);
    432 
    433         status = U_ZERO_ERROR;
    434         uregex_setText(re, text1, 5, &status);
    435         result = uregex_lookingAt(re, 0, &status);
    436         TEST_ASSERT(result == FALSE);
    437         TEST_ASSERT_SUCCESS(status);
    438 
    439         status = U_ZERO_ERROR;
    440         uregex_setText(re, text1, 6, &status);
    441         result = uregex_lookingAt(re, 0, &status);
    442         TEST_ASSERT(result == TRUE);
    443         TEST_ASSERT_SUCCESS(status);
    444 
    445         uregex_close(re);
    446     }
    447 
    448 
    449     /*
    450      *  getText()
    451      */
    452     {
    453         UChar    text1[50];
    454         UChar    text2[50];
    455         const UChar   *result;
    456         int32_t  textLength;
    457 
    458         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    459         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    460         status = U_ZERO_ERROR;
    461         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    462         re = uregex_open(pat, -1, 0, NULL, &status);
    463 
    464         uregex_setText(re, text1, -1, &status);
    465         result = uregex_getText(re, &textLength, &status);
    466         TEST_ASSERT(result == text1);
    467         TEST_ASSERT(textLength == -1);
    468         TEST_ASSERT_SUCCESS(status);
    469 
    470         status = U_ZERO_ERROR;
    471         uregex_setText(re, text2, 7, &status);
    472         result = uregex_getText(re, &textLength, &status);
    473         TEST_ASSERT(result == text2);
    474         TEST_ASSERT(textLength == 7);
    475         TEST_ASSERT_SUCCESS(status);
    476 
    477         status = U_ZERO_ERROR;
    478         uregex_setText(re, text2, 4, &status);
    479         result = uregex_getText(re, &textLength, &status);
    480         TEST_ASSERT(result == text2);
    481         TEST_ASSERT(textLength == 4);
    482         TEST_ASSERT_SUCCESS(status);
    483         uregex_close(re);
    484     }
    485 
    486     /*
    487      *  matches()
    488      */
    489     {
    490         UChar   text1[50];
    491         UBool   result;
    492         int     len;
    493         UChar   nullString[] = {0,0,0};
    494 
    495         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
    496         status = U_ZERO_ERROR;
    497         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    498         re = uregex_open(pat, -1, 0, NULL, &status);
    499 
    500         uregex_setText(re, text1, -1, &status);
    501         result = uregex_matches(re, 0, &status);
    502         TEST_ASSERT(result == FALSE);
    503         TEST_ASSERT_SUCCESS(status);
    504 
    505         status = U_ZERO_ERROR;
    506         uregex_setText(re, text1, 6, &status);
    507         result = uregex_matches(re, 0, &status);
    508         TEST_ASSERT(result == TRUE);
    509         TEST_ASSERT_SUCCESS(status);
    510 
    511         status = U_ZERO_ERROR;
    512         uregex_setText(re, text1, 6, &status);
    513         result = uregex_matches(re, 1, &status);
    514         TEST_ASSERT(result == FALSE);
    515         TEST_ASSERT_SUCCESS(status);
    516         uregex_close(re);
    517 
    518         status = U_ZERO_ERROR;
    519         re = uregex_openC(".?", 0, NULL, &status);
    520         uregex_setText(re, text1, -1, &status);
    521         len = u_strlen(text1);
    522         result = uregex_matches(re, len, &status);
    523         TEST_ASSERT(result == TRUE);
    524         TEST_ASSERT_SUCCESS(status);
    525 
    526         status = U_ZERO_ERROR;
    527         uregex_setText(re, nullString, -1, &status);
    528         TEST_ASSERT_SUCCESS(status);
    529         result = uregex_matches(re, 0, &status);
    530         TEST_ASSERT(result == TRUE);
    531         TEST_ASSERT_SUCCESS(status);
    532         uregex_close(re);
    533     }
    534 
    535 
    536     /*
    537      *  lookingAt()    Used in setText test.
    538      */
    539 
    540 
    541     /*
    542      *  find(), findNext, start, end, reset
    543      */
    544     {
    545         UChar    text1[50];
    546         UBool    result;
    547         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
    548         status = U_ZERO_ERROR;
    549         re = uregex_openC("rx", 0, NULL, &status);
    550 
    551         uregex_setText(re, text1, -1, &status);
    552         result = uregex_find(re, 0, &status);
    553         TEST_ASSERT(result == TRUE);
    554         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    555         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    556         TEST_ASSERT_SUCCESS(status);
    557 
    558         result = uregex_find(re, 9, &status);
    559         TEST_ASSERT(result == TRUE);
    560         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    561         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    562         TEST_ASSERT_SUCCESS(status);
    563 
    564         result = uregex_find(re, 14, &status);
    565         TEST_ASSERT(result == FALSE);
    566         TEST_ASSERT_SUCCESS(status);
    567 
    568         status = U_ZERO_ERROR;
    569         uregex_reset(re, 0, &status);
    570 
    571         result = uregex_findNext(re, &status);
    572         TEST_ASSERT(result == TRUE);
    573         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    574         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    575         TEST_ASSERT_SUCCESS(status);
    576 
    577         result = uregex_findNext(re, &status);
    578         TEST_ASSERT(result == TRUE);
    579         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    580         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    581         TEST_ASSERT_SUCCESS(status);
    582 
    583         status = U_ZERO_ERROR;
    584         uregex_reset(re, 12, &status);
    585 
    586         result = uregex_findNext(re, &status);
    587         TEST_ASSERT(result == TRUE);
    588         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    589         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    590         TEST_ASSERT_SUCCESS(status);
    591 
    592         result = uregex_findNext(re, &status);
    593         TEST_ASSERT(result == FALSE);
    594         TEST_ASSERT_SUCCESS(status);
    595 
    596         uregex_close(re);
    597     }
    598 
    599     /*
    600      *  groupCount
    601      */
    602     {
    603         int32_t result;
    604 
    605         status = U_ZERO_ERROR;
    606         re = uregex_openC("abc", 0, NULL, &status);
    607         result = uregex_groupCount(re, &status);
    608         TEST_ASSERT_SUCCESS(status);
    609         TEST_ASSERT(result == 0);
    610         uregex_close(re);
    611 
    612         status = U_ZERO_ERROR;
    613         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    614         result = uregex_groupCount(re, &status);
    615         TEST_ASSERT_SUCCESS(status);
    616         TEST_ASSERT(result == 3);
    617         uregex_close(re);
    618 
    619     }
    620 
    621 
    622     /*
    623      *  group()
    624      */
    625     {
    626         UChar    text1[80];
    627         UChar    buf[80];
    628         UBool    result;
    629         int32_t  resultSz;
    630         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
    631 
    632         status = U_ZERO_ERROR;
    633         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    634         TEST_ASSERT_SUCCESS(status);
    635 
    636 
    637         uregex_setText(re, text1, -1, &status);
    638         result = uregex_find(re, 0, &status);
    639         TEST_ASSERT(result==TRUE);
    640 
    641         /*  Capture Group 0, the full match.  Should succeed.  */
    642         status = U_ZERO_ERROR;
    643         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
    644         TEST_ASSERT_SUCCESS(status);
    645         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    646         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    647 
    648         /*  Capture group #1.  Should succeed. */
    649         status = U_ZERO_ERROR;
    650         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
    651         TEST_ASSERT_SUCCESS(status);
    652         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    653         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    654 
    655         /*  Capture group out of range.  Error. */
    656         status = U_ZERO_ERROR;
    657         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
    658         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    659 
    660         /* NULL buffer, pure pre-flight */
    661         status = U_ZERO_ERROR;
    662         resultSz = uregex_group(re, 0, NULL, 0, &status);
    663         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    664         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    665 
    666         /* Too small buffer, truncated string */
    667         status = U_ZERO_ERROR;
    668         memset(buf, -1, sizeof(buf));
    669         resultSz = uregex_group(re, 0, buf, 5, &status);
    670         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    671         TEST_ASSERT_STRING("abc i", buf, FALSE);
    672         TEST_ASSERT(buf[5] == (UChar)0xffff);
    673         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    674 
    675         /* Output string just fits buffer, no NUL term. */
    676         status = U_ZERO_ERROR;
    677         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    678         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    679         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    680         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    681         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    682 
    683         uregex_close(re);
    684 
    685     }
    686 
    687     /*
    688      *  Regions
    689      */
    690 
    691 
    692         /* SetRegion(), getRegion() do something  */
    693         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    694         UChar resultString[40];
    695         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    696         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    697         uregex_setRegion(re, 3, 6, &status);
    698         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    699         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    700         TEST_ASSERT(uregex_findNext(re, &status));
    701         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
    702         TEST_ASSERT_STRING("345", resultString, TRUE);
    703         TEST_TEARDOWN;
    704 
    705         /* find(start=-1) uses regions   */
    706         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    707         uregex_setRegion(re, 4, 6, &status);
    708         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    709         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    710         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    711         TEST_TEARDOWN;
    712 
    713         /* find (start >=0) does not use regions   */
    714         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    715         uregex_setRegion(re, 4, 6, &status);
    716         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    717         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    718         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    719         TEST_TEARDOWN;
    720 
    721         /* findNext() obeys regions    */
    722         TEST_SETUP(".", "0123456789ABCDEF", 0);
    723         uregex_setRegion(re, 4, 6, &status);
    724         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    725         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    726         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    727         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    728         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    729         TEST_TEARDOWN;
    730 
    731         /* matches(start=-1) uses regions                                           */
    732         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    733         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    734         uregex_setRegion(re, 4, 6, &status);
    735         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    736         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    737         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    738         TEST_TEARDOWN;
    739 
    740         /* matches (start >=0) does not use regions       */
    741         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    742         uregex_setRegion(re, 4, 6, &status);
    743         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    744         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    745         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    746         TEST_TEARDOWN;
    747 
    748         /* lookingAt(start=-1) uses regions                                         */
    749         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    750         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    751         uregex_setRegion(re, 4, 6, &status);
    752         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    753         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    754         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    755         TEST_TEARDOWN;
    756 
    757         /* lookingAt (start >=0) does not use regions  */
    758         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    759         uregex_setRegion(re, 4, 6, &status);
    760         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    761         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    762         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    763         TEST_TEARDOWN;
    764 
    765         /* hitEnd()       */
    766         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    767         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    768         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    769         TEST_TEARDOWN;
    770 
    771         TEST_SETUP("[a-f]*", "abcdef", 0);
    772         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    773         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    774         TEST_TEARDOWN;
    775 
    776         /* requireEnd   */
    777         TEST_SETUP("abcd", "abcd", 0);
    778         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    779         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    780         TEST_TEARDOWN;
    781 
    782         TEST_SETUP("abcd$", "abcd", 0);
    783         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    784         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    785         TEST_TEARDOWN;
    786 
    787         /* anchoringBounds        */
    788         TEST_SETUP("abc$", "abcdef", 0);
    789         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    790         uregex_useAnchoringBounds(re, FALSE, &status);
    791         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    792 
    793         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    794         uregex_useAnchoringBounds(re, TRUE, &status);
    795         uregex_setRegion(re, 0, 3, &status);
    796         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    797         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    798         TEST_TEARDOWN;
    799 
    800         /* Transparent Bounds      */
    801         TEST_SETUP("abc(?=def)", "abcdef", 0);
    802         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    803         uregex_useTransparentBounds(re, TRUE, &status);
    804         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    805 
    806         uregex_useTransparentBounds(re, FALSE, &status);
    807         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    808         uregex_setRegion(re, 0, 3, &status);
    809         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    810         uregex_useTransparentBounds(re, TRUE, &status);
    811         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    812         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    813         TEST_TEARDOWN;
    814 
    815 
    816     /*
    817      *  replaceFirst()
    818      */
    819     {
    820         UChar    text1[80];
    821         UChar    text2[80];
    822         UChar    replText[80];
    823         UChar    buf[80];
    824         int32_t  resultSz;
    825         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    826         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    827         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    828 
    829         status = U_ZERO_ERROR;
    830         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    831         TEST_ASSERT_SUCCESS(status);
    832 
    833         /*  Normal case, with match */
    834         uregex_setText(re, text1, -1, &status);
    835         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    836         TEST_ASSERT_SUCCESS(status);
    837         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    838         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    839 
    840         /* No match.  Text should copy to output with no changes.  */
    841         status = U_ZERO_ERROR;
    842         uregex_setText(re, text2, -1, &status);
    843         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    844         TEST_ASSERT_SUCCESS(status);
    845         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    846         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    847 
    848         /*  Match, output just fills buffer, no termination warning. */
    849         status = U_ZERO_ERROR;
    850         uregex_setText(re, text1, -1, &status);
    851         memset(buf, -1, sizeof(buf));
    852         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    853         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    854         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    855         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    856         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    857 
    858         /* Do the replaceFirst again, without first resetting anything.
    859          *  Should give the same results.
    860          */
    861         status = U_ZERO_ERROR;
    862         memset(buf, -1, sizeof(buf));
    863         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    864         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    865         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    866         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    867         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    868 
    869         /* NULL buffer, zero buffer length */
    870         status = U_ZERO_ERROR;
    871         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    872         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    873         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    874 
    875         /* Buffer too small by one */
    876         status = U_ZERO_ERROR;
    877         memset(buf, -1, sizeof(buf));
    878         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    879         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    880         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    881         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    882         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    883 
    884         uregex_close(re);
    885     }
    886 
    887 
    888     /*
    889      *  replaceAll()
    890      */
    891     {
    892         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    893         UChar    text2[80];          /*  "No match Here"           */
    894         UChar    replText[80];       /*  "<$1>"                    */
    895         UChar    replText2[80];      /*  "<<$1>>"                  */
    896         const char * pattern = "x(.*?)x";
    897         const char * expectedResult = "Replace <aa> <1> <...>.";
    898         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    899         UChar    buf[80];
    900         int32_t  resultSize;
    901         int32_t  expectedResultSize;
    902         int32_t  expectedResultSize2;
    903         int32_t  i;
    904 
    905         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    906         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    907         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    908         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
    909         expectedResultSize = strlen(expectedResult);
    910         expectedResultSize2 = strlen(expectedResult2);
    911 
    912         status = U_ZERO_ERROR;
    913         re = uregex_openC(pattern, 0, NULL, &status);
    914         TEST_ASSERT_SUCCESS(status);
    915 
    916         /*  Normal case, with match */
    917         uregex_setText(re, text1, -1, &status);
    918         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    919         TEST_ASSERT_SUCCESS(status);
    920         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    921         TEST_ASSERT(resultSize == expectedResultSize);
    922 
    923         /* No match.  Text should copy to output with no changes.  */
    924         status = U_ZERO_ERROR;
    925         uregex_setText(re, text2, -1, &status);
    926         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    927         TEST_ASSERT_SUCCESS(status);
    928         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    929         TEST_ASSERT(resultSize == u_strlen(text2));
    930 
    931         /*  Match, output just fills buffer, no termination warning. */
    932         status = U_ZERO_ERROR;
    933         uregex_setText(re, text1, -1, &status);
    934         memset(buf, -1, sizeof(buf));
    935         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    936         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    937         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    938         TEST_ASSERT(resultSize == expectedResultSize);
    939         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    940 
    941         /* Do the replaceFirst again, without first resetting anything.
    942          *  Should give the same results.
    943          */
    944         status = U_ZERO_ERROR;
    945         memset(buf, -1, sizeof(buf));
    946         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    947         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    948         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    949         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    950         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    951 
    952         /* NULL buffer, zero buffer length */
    953         status = U_ZERO_ERROR;
    954         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    955         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    956         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    957 
    958         /* Buffer too small.  Try every size, which will tickle edge cases
    959          * in uregex_appendReplacement (used by replaceAll)   */
    960         for (i=0; i<expectedResultSize; i++) {
    961             char  expected[80];
    962             status = U_ZERO_ERROR;
    963             memset(buf, -1, sizeof(buf));
    964             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    965             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    966             strcpy(expected, expectedResult);
    967             expected[i] = 0;
    968             TEST_ASSERT_STRING(expected, buf, FALSE);
    969             TEST_ASSERT(resultSize == expectedResultSize);
    970             TEST_ASSERT(buf[i] == (UChar)0xffff);
    971         }
    972 
    973         /* Buffer too small.  Same as previous test, except this time the replacement
    974          * text is longer than the match capture group, making the length of the complete
    975          * replacement longer than the original string.
    976          */
    977         for (i=0; i<expectedResultSize2; i++) {
    978             char  expected[80];
    979             status = U_ZERO_ERROR;
    980             memset(buf, -1, sizeof(buf));
    981             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    982             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    983             strcpy(expected, expectedResult2);
    984             expected[i] = 0;
    985             TEST_ASSERT_STRING(expected, buf, FALSE);
    986             TEST_ASSERT(resultSize == expectedResultSize2);
    987             TEST_ASSERT(buf[i] == (UChar)0xffff);
    988         }
    989 
    990 
    991         uregex_close(re);
    992     }
    993 
    994 
    995     /*
    996      *  appendReplacement()
    997      */
    998     {
    999         UChar    text[100];
   1000         UChar    repl[100];
   1001         UChar    buf[100];
   1002         UChar   *bufPtr;
   1003         int32_t  bufCap;
   1004 
   1005 
   1006         status = U_ZERO_ERROR;
   1007         re = uregex_openC(".*", 0, 0, &status);
   1008         TEST_ASSERT_SUCCESS(status);
   1009 
   1010         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1011         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1012         uregex_setText(re, text, -1, &status);
   1013 
   1014         /* match covers whole target string */
   1015         uregex_find(re, 0, &status);
   1016         TEST_ASSERT_SUCCESS(status);
   1017         bufPtr = buf;
   1018         bufCap = UPRV_LENGTHOF(buf);
   1019         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1020         TEST_ASSERT_SUCCESS(status);
   1021         TEST_ASSERT_STRING("some other", buf, TRUE);
   1022 
   1023         /* Match has \u \U escapes */
   1024         uregex_find(re, 0, &status);
   1025         TEST_ASSERT_SUCCESS(status);
   1026         bufPtr = buf;
   1027         bufCap = UPRV_LENGTHOF(buf);
   1028         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1029         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1030         TEST_ASSERT_SUCCESS(status);
   1031         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1032 
   1033         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
   1034         status = U_ZERO_ERROR;
   1035         uregex_find(re, 0, &status);
   1036         TEST_ASSERT_SUCCESS(status);
   1037         bufPtr = buf;
   1038         status = U_BUFFER_OVERFLOW_ERROR;
   1039         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
   1040         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1041 
   1042         uregex_close(re);
   1043     }
   1044 
   1045 
   1046     /*
   1047      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1048      */
   1049 
   1050     /*
   1051      *  split()
   1052      */
   1053     {
   1054         UChar    textToSplit[80];
   1055         UChar    text2[80];
   1056         UChar    buf[200];
   1057         UChar    *fields[10];
   1058         int32_t  numFields;
   1059         int32_t  requiredCapacity;
   1060         int32_t  spaceNeeded;
   1061         int32_t  sz;
   1062 
   1063         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1064         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1065 
   1066         status = U_ZERO_ERROR;
   1067         re = uregex_openC(":", 0, NULL, &status);
   1068 
   1069 
   1070         /*  Simple split */
   1071 
   1072         uregex_setText(re, textToSplit, -1, &status);
   1073         TEST_ASSERT_SUCCESS(status);
   1074 
   1075         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1076         if (U_SUCCESS(status)) {
   1077             memset(fields, -1, sizeof(fields));
   1078             numFields =
   1079                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1080             TEST_ASSERT_SUCCESS(status);
   1081 
   1082             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1083             if(U_SUCCESS(status)) {
   1084                 TEST_ASSERT(numFields == 3);
   1085                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1086                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1087                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1088                 TEST_ASSERT(fields[3] == NULL);
   1089 
   1090                 spaceNeeded = u_strlen(textToSplit) -
   1091                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1092                             numFields;          /* Each field gets a NUL terminator */
   1093 
   1094                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1095             }
   1096         }
   1097 
   1098         uregex_close(re);
   1099 
   1100 
   1101         /*  Split with too few output strings available */
   1102         status = U_ZERO_ERROR;
   1103         re = uregex_openC(":", 0, NULL, &status);
   1104         uregex_setText(re, textToSplit, -1, &status);
   1105         TEST_ASSERT_SUCCESS(status);
   1106 
   1107         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1108         if(U_SUCCESS(status)) {
   1109             memset(fields, -1, sizeof(fields));
   1110             numFields =
   1111                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1112             TEST_ASSERT_SUCCESS(status);
   1113 
   1114             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1115             if(U_SUCCESS(status)) {
   1116                 TEST_ASSERT(numFields == 2);
   1117                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1118                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1119                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1120 
   1121                 spaceNeeded = u_strlen(textToSplit) -
   1122                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1123                             numFields;          /* Each field gets a NUL terminator */
   1124 
   1125                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1126 
   1127                 /* Split with a range of output buffer sizes.  */
   1128                 spaceNeeded = u_strlen(textToSplit) -
   1129                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1130                     numFields;          /* Each field gets a NUL terminator */
   1131 
   1132                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1133                     memset(fields, -1, sizeof(fields));
   1134                     status = U_ZERO_ERROR;
   1135                     numFields =
   1136                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1137                     if (sz >= spaceNeeded) {
   1138                         TEST_ASSERT_SUCCESS(status);
   1139                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1140                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1141                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1142                     } else {
   1143                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1144                     }
   1145                     TEST_ASSERT(numFields == 3);
   1146                     TEST_ASSERT(fields[3] == NULL);
   1147                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1148                 }
   1149             }
   1150         }
   1151 
   1152         uregex_close(re);
   1153     }
   1154 
   1155 
   1156 
   1157 
   1158     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1159      *                   comes out as additional fields.  */
   1160     {
   1161         UChar    textToSplit[80];
   1162         UChar    buf[200];
   1163         UChar    *fields[10];
   1164         int32_t  numFields;
   1165         int32_t  requiredCapacity;
   1166         int32_t  spaceNeeded;
   1167         int32_t  sz;
   1168 
   1169         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   1170 
   1171         status = U_ZERO_ERROR;
   1172         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1173 
   1174         uregex_setText(re, textToSplit, -1, &status);
   1175         TEST_ASSERT_SUCCESS(status);
   1176 
   1177         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1178         if(U_SUCCESS(status)) {
   1179             memset(fields, -1, sizeof(fields));
   1180             numFields =
   1181                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1182             TEST_ASSERT_SUCCESS(status);
   1183 
   1184             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1185             if(U_SUCCESS(status)) {
   1186                 TEST_ASSERT(numFields == 5);
   1187                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1188                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1189                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1190                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1191                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1192                 TEST_ASSERT(fields[5] == NULL);
   1193                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1194                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1195             }
   1196         }
   1197 
   1198         /*  Split with too few output strings available (2) */
   1199         status = U_ZERO_ERROR;
   1200         memset(fields, -1, sizeof(fields));
   1201         numFields =
   1202             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1203         TEST_ASSERT_SUCCESS(status);
   1204 
   1205         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1206         if(U_SUCCESS(status)) {
   1207             TEST_ASSERT(numFields == 2);
   1208             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1209             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1210             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1211 
   1212             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1213             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1214         }
   1215 
   1216         /*  Split with too few output strings available (3) */
   1217         status = U_ZERO_ERROR;
   1218         memset(fields, -1, sizeof(fields));
   1219         numFields =
   1220             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
   1221         TEST_ASSERT_SUCCESS(status);
   1222 
   1223         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1224         if(U_SUCCESS(status)) {
   1225             TEST_ASSERT(numFields == 3);
   1226             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1227             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1228             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1229             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1230 
   1231             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1232             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1233         }
   1234 
   1235         /*  Split with just enough output strings available (5) */
   1236         status = U_ZERO_ERROR;
   1237         memset(fields, -1, sizeof(fields));
   1238         numFields =
   1239             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
   1240         TEST_ASSERT_SUCCESS(status);
   1241 
   1242         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1243         if(U_SUCCESS(status)) {
   1244             TEST_ASSERT(numFields == 5);
   1245             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1246             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1247             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1248             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1249             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1250             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1251 
   1252             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1253             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1254         }
   1255 
   1256         /* Split, end of text is a field delimiter.   */
   1257         status = U_ZERO_ERROR;
   1258         sz = strlen("first <tag-a> second<tag-b>");
   1259         uregex_setText(re, textToSplit, sz, &status);
   1260         TEST_ASSERT_SUCCESS(status);
   1261 
   1262         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1263         if(U_SUCCESS(status)) {
   1264             memset(fields, -1, sizeof(fields));
   1265             numFields =
   1266                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
   1267             TEST_ASSERT_SUCCESS(status);
   1268 
   1269             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1270             if(U_SUCCESS(status)) {
   1271                 TEST_ASSERT(numFields == 5);
   1272                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1273                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1274                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1275                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1276                 TEST_ASSERT_STRING("",        fields[4], TRUE);
   1277                 TEST_ASSERT(fields[5] == NULL);
   1278                 TEST_ASSERT(fields[8] == NULL);
   1279                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1280                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
   1281                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1282             }
   1283         }
   1284 
   1285         uregex_close(re);
   1286     }
   1287 
   1288     /*
   1289      * set/getTimeLimit
   1290      */
   1291      TEST_SETUP("abc$", "abcdef", 0);
   1292      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1293      uregex_setTimeLimit(re, 1000, &status);
   1294      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1295      TEST_ASSERT_SUCCESS(status);
   1296      uregex_setTimeLimit(re, -1, &status);
   1297      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1298      status = U_ZERO_ERROR;
   1299      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1300      TEST_TEARDOWN;
   1301 
   1302      /*
   1303       * set/get Stack Limit
   1304       */
   1305      TEST_SETUP("abc$", "abcdef", 0);
   1306      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1307      uregex_setStackLimit(re, 40000, &status);
   1308      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1309      TEST_ASSERT_SUCCESS(status);
   1310      uregex_setStackLimit(re, -1, &status);
   1311      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1312      status = U_ZERO_ERROR;
   1313      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1314      TEST_TEARDOWN;
   1315 
   1316 
   1317      /*
   1318       * Get/Set callback functions
   1319       *     This test is copied from intltest regex/Callbacks
   1320       *     The pattern and test data will run long enough to cause the callback
   1321       *       to be invoked.  The nested '+' operators give exponential time
   1322       *       behavior with increasing string length.
   1323       */
   1324      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1325      callBackContext cbInfo = {4, 0, 0};
   1326      const void     *pContext   = &cbInfo;
   1327      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1328 
   1329      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1330      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1331      TEST_ASSERT_SUCCESS(status);
   1332      TEST_ASSERT(returnedFn == NULL);
   1333      TEST_ASSERT(pContext == NULL);
   1334 
   1335      /* Set thecallback and do a match.                                   */
   1336      /* The callback function should record that it has been called.      */
   1337      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1338      TEST_ASSERT_SUCCESS(status);
   1339      TEST_ASSERT(cbInfo.numCalls == 0);
   1340      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1341      TEST_ASSERT_SUCCESS(status);
   1342      TEST_ASSERT(cbInfo.numCalls > 0);
   1343 
   1344      /* Getting the callback should return the values that were set above.  */
   1345      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1346      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1347      TEST_ASSERT(pContext == &cbInfo);
   1348 
   1349      TEST_TEARDOWN;
   1350 }
   1351 
   1352 
   1353 
   1354 static void TestBug4315(void) {
   1355     UErrorCode      theICUError = U_ZERO_ERROR;
   1356     URegularExpression *theRegEx;
   1357     UChar           *textBuff;
   1358     const char      *thePattern;
   1359     UChar            theString[100];
   1360     UChar           *destFields[24];
   1361     int32_t         neededLength1;
   1362     int32_t         neededLength2;
   1363 
   1364     int32_t         wordCount = 0;
   1365     int32_t         destFieldsSize = 24;
   1366 
   1367     thePattern  = "ck ";
   1368     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1369 
   1370     /* open a regex */
   1371     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1372     TEST_ASSERT_SUCCESS(theICUError);
   1373 
   1374     /* set the input string */
   1375     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1376     TEST_ASSERT_SUCCESS(theICUError);
   1377 
   1378     /* split */
   1379     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1380      *  error occurs! */
   1381     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1382         destFieldsSize, &theICUError);
   1383 
   1384     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1385     TEST_ASSERT(wordCount==3);
   1386 
   1387     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1388     {
   1389         theICUError = U_ZERO_ERROR;
   1390         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1391         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1392             destFields, destFieldsSize, &theICUError);
   1393         TEST_ASSERT(wordCount==3);
   1394         TEST_ASSERT_SUCCESS(theICUError);
   1395         TEST_ASSERT(neededLength1 == neededLength2);
   1396         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1397         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1398         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1399         TEST_ASSERT(destFields[3] == NULL);
   1400         free(textBuff);
   1401     }
   1402     uregex_close(theRegEx);
   1403 }
   1404 
   1405 /* Based on TestRegexCAPI() */
   1406 static void TestUTextAPI(void) {
   1407     UErrorCode           status = U_ZERO_ERROR;
   1408     URegularExpression  *re;
   1409     UText                patternText = UTEXT_INITIALIZER;
   1410     UChar                pat[200];
   1411     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1412 
   1413     /* Mimimalist open/close */
   1414     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1415     re = uregex_openUText(&patternText, 0, 0, &status);
   1416     if (U_FAILURE(status)) {
   1417          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1418          utext_close(&patternText);
   1419          return;
   1420     }
   1421     uregex_close(re);
   1422 
   1423     /* Open with all flag values set */
   1424     status = U_ZERO_ERROR;
   1425     re = uregex_openUText(&patternText,
   1426         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1427         0, &status);
   1428     TEST_ASSERT_SUCCESS(status);
   1429     uregex_close(re);
   1430 
   1431     /* Open with an invalid flag */
   1432     status = U_ZERO_ERROR;
   1433     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1434     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1435     uregex_close(re);
   1436 
   1437     /* open with an invalid parameter */
   1438     status = U_ZERO_ERROR;
   1439     re = uregex_openUText(NULL,
   1440         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1441     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1442 
   1443     /*
   1444      *  clone
   1445      */
   1446     {
   1447         URegularExpression *clone1;
   1448         URegularExpression *clone2;
   1449         URegularExpression *clone3;
   1450         UChar  testString1[30];
   1451         UChar  testString2[30];
   1452         UBool  result;
   1453 
   1454 
   1455         status = U_ZERO_ERROR;
   1456         re = uregex_openUText(&patternText, 0, 0, &status);
   1457         TEST_ASSERT_SUCCESS(status);
   1458         clone1 = uregex_clone(re, &status);
   1459         TEST_ASSERT_SUCCESS(status);
   1460         TEST_ASSERT(clone1 != NULL);
   1461 
   1462         status = U_ZERO_ERROR;
   1463         clone2 = uregex_clone(re, &status);
   1464         TEST_ASSERT_SUCCESS(status);
   1465         TEST_ASSERT(clone2 != NULL);
   1466         uregex_close(re);
   1467 
   1468         status = U_ZERO_ERROR;
   1469         clone3 = uregex_clone(clone2, &status);
   1470         TEST_ASSERT_SUCCESS(status);
   1471         TEST_ASSERT(clone3 != NULL);
   1472 
   1473         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
   1474         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
   1475 
   1476         status = U_ZERO_ERROR;
   1477         uregex_setText(clone1, testString1, -1, &status);
   1478         TEST_ASSERT_SUCCESS(status);
   1479         result = uregex_lookingAt(clone1, 0, &status);
   1480         TEST_ASSERT_SUCCESS(status);
   1481         TEST_ASSERT(result==TRUE);
   1482 
   1483         status = U_ZERO_ERROR;
   1484         uregex_setText(clone2, testString2, -1, &status);
   1485         TEST_ASSERT_SUCCESS(status);
   1486         result = uregex_lookingAt(clone2, 0, &status);
   1487         TEST_ASSERT_SUCCESS(status);
   1488         TEST_ASSERT(result==FALSE);
   1489         result = uregex_find(clone2, 0, &status);
   1490         TEST_ASSERT_SUCCESS(status);
   1491         TEST_ASSERT(result==TRUE);
   1492 
   1493         uregex_close(clone1);
   1494         uregex_close(clone2);
   1495         uregex_close(clone3);
   1496 
   1497     }
   1498 
   1499     /*
   1500      *  pattern() and patternText()
   1501      */
   1502     {
   1503         const UChar  *resultPat;
   1504         int32_t       resultLen;
   1505         UText        *resultText;
   1506         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1507         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1508         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
   1509         status = U_ZERO_ERROR;
   1510 
   1511         utext_openUTF8(&patternText, str_hello, -1, &status);
   1512         re = uregex_open(pat, -1, 0, NULL, &status);
   1513         resultPat = uregex_pattern(re, &resultLen, &status);
   1514         TEST_ASSERT_SUCCESS(status);
   1515 
   1516         /* The TEST_ASSERT_SUCCESS above should change too... */
   1517         if (U_SUCCESS(status)) {
   1518             TEST_ASSERT(resultLen == -1);
   1519             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1520         }
   1521 
   1522         resultText = uregex_patternUText(re, &status);
   1523         TEST_ASSERT_SUCCESS(status);
   1524         TEST_ASSERT_UTEXT(str_hello, resultText);
   1525 
   1526         uregex_close(re);
   1527 
   1528         status = U_ZERO_ERROR;
   1529         re = uregex_open(pat, 3, 0, NULL, &status);
   1530         resultPat = uregex_pattern(re, &resultLen, &status);
   1531         TEST_ASSERT_SUCCESS(status);
   1532 
   1533         /* The TEST_ASSERT_SUCCESS above should change too... */
   1534         if (U_SUCCESS(status)) {
   1535             TEST_ASSERT(resultLen == 3);
   1536             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1537             TEST_ASSERT(u_strlen(resultPat) == 3);
   1538         }
   1539 
   1540         resultText = uregex_patternUText(re, &status);
   1541         TEST_ASSERT_SUCCESS(status);
   1542         TEST_ASSERT_UTEXT(str_hel, resultText);
   1543 
   1544         uregex_close(re);
   1545     }
   1546 
   1547     /*
   1548      *  setUText() and lookingAt()
   1549      */
   1550     {
   1551         UText  text1 = UTEXT_INITIALIZER;
   1552         UText  text2 = UTEXT_INITIALIZER;
   1553         UBool  result;
   1554         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1555         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1556         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1557         status = U_ZERO_ERROR;
   1558         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1559         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1560 
   1561         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1562         re = uregex_openUText(&patternText, 0, NULL, &status);
   1563         TEST_ASSERT_SUCCESS(status);
   1564 
   1565         /* Operation before doing a setText should fail... */
   1566         status = U_ZERO_ERROR;
   1567         uregex_lookingAt(re, 0, &status);
   1568         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1569 
   1570         status = U_ZERO_ERROR;
   1571         uregex_setUText(re, &text1, &status);
   1572         result = uregex_lookingAt(re, 0, &status);
   1573         TEST_ASSERT(result == TRUE);
   1574         TEST_ASSERT_SUCCESS(status);
   1575 
   1576         status = U_ZERO_ERROR;
   1577         uregex_setUText(re, &text2, &status);
   1578         result = uregex_lookingAt(re, 0, &status);
   1579         TEST_ASSERT(result == FALSE);
   1580         TEST_ASSERT_SUCCESS(status);
   1581 
   1582         status = U_ZERO_ERROR;
   1583         uregex_setUText(re, &text1, &status);
   1584         result = uregex_lookingAt(re, 0, &status);
   1585         TEST_ASSERT(result == TRUE);
   1586         TEST_ASSERT_SUCCESS(status);
   1587 
   1588         uregex_close(re);
   1589         utext_close(&text1);
   1590         utext_close(&text2);
   1591     }
   1592 
   1593 
   1594     /*
   1595      *  getText() and getUText()
   1596      */
   1597     {
   1598         UText  text1 = UTEXT_INITIALIZER;
   1599         UText  text2 = UTEXT_INITIALIZER;
   1600         UChar  text2Chars[20];
   1601         UText  *resultText;
   1602         const UChar   *result;
   1603         int32_t  textLength;
   1604         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1605         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1606         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1607 
   1608 
   1609         status = U_ZERO_ERROR;
   1610         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1611         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
   1612         utext_openUChars(&text2, text2Chars, -1, &status);
   1613 
   1614         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1615         re = uregex_openUText(&patternText, 0, NULL, &status);
   1616 
   1617         /* First set a UText */
   1618         uregex_setUText(re, &text1, &status);
   1619         resultText = uregex_getUText(re, NULL, &status);
   1620         TEST_ASSERT_SUCCESS(status);
   1621         TEST_ASSERT(resultText != &text1);
   1622         utext_setNativeIndex(resultText, 0);
   1623         utext_setNativeIndex(&text1, 0);
   1624         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1625         utext_close(resultText);
   1626 
   1627         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1628         (void)result;    /* Suppress set but not used warning. */
   1629         TEST_ASSERT(textLength == -1 || textLength == 6);
   1630         resultText = uregex_getUText(re, NULL, &status);
   1631         TEST_ASSERT_SUCCESS(status);
   1632         TEST_ASSERT(resultText != &text1);
   1633         utext_setNativeIndex(resultText, 0);
   1634         utext_setNativeIndex(&text1, 0);
   1635         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1636         utext_close(resultText);
   1637 
   1638         /* Then set a UChar * */
   1639         uregex_setText(re, text2Chars, 7, &status);
   1640         resultText = uregex_getUText(re, NULL, &status);
   1641         TEST_ASSERT_SUCCESS(status);
   1642         utext_setNativeIndex(resultText, 0);
   1643         utext_setNativeIndex(&text2, 0);
   1644         TEST_ASSERT(testUTextEqual(resultText, &text2));
   1645         utext_close(resultText);
   1646         result = uregex_getText(re, &textLength, &status);
   1647         TEST_ASSERT(textLength == 7);
   1648 
   1649         uregex_close(re);
   1650         utext_close(&text1);
   1651         utext_close(&text2);
   1652     }
   1653 
   1654     /*
   1655      *  matches()
   1656      */
   1657     {
   1658         UText   text1 = UTEXT_INITIALIZER;
   1659         UBool   result;
   1660         UText   nullText = UTEXT_INITIALIZER;
   1661         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1662         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1663 
   1664         status = U_ZERO_ERROR;
   1665         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1666         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1667         re = uregex_openUText(&patternText, 0, NULL, &status);
   1668 
   1669         uregex_setUText(re, &text1, &status);
   1670         result = uregex_matches(re, 0, &status);
   1671         TEST_ASSERT(result == FALSE);
   1672         TEST_ASSERT_SUCCESS(status);
   1673         uregex_close(re);
   1674 
   1675         status = U_ZERO_ERROR;
   1676         re = uregex_openC(".?", 0, NULL, &status);
   1677         uregex_setUText(re, &text1, &status);
   1678         result = uregex_matches(re, 7, &status);
   1679         TEST_ASSERT(result == TRUE);
   1680         TEST_ASSERT_SUCCESS(status);
   1681 
   1682         status = U_ZERO_ERROR;
   1683         utext_openUTF8(&nullText, "", -1, &status);
   1684         uregex_setUText(re, &nullText, &status);
   1685         TEST_ASSERT_SUCCESS(status);
   1686         result = uregex_matches(re, 0, &status);
   1687         TEST_ASSERT(result == TRUE);
   1688         TEST_ASSERT_SUCCESS(status);
   1689 
   1690         uregex_close(re);
   1691         utext_close(&text1);
   1692         utext_close(&nullText);
   1693     }
   1694 
   1695 
   1696     /*
   1697      *  lookingAt()    Used in setText test.
   1698      */
   1699 
   1700 
   1701     /*
   1702      *  find(), findNext, start, end, reset
   1703      */
   1704     {
   1705         UChar    text1[50];
   1706         UBool    result;
   1707         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
   1708         status = U_ZERO_ERROR;
   1709         re = uregex_openC("rx", 0, NULL, &status);
   1710 
   1711         uregex_setText(re, text1, -1, &status);
   1712         result = uregex_find(re, 0, &status);
   1713         TEST_ASSERT(result == TRUE);
   1714         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1715         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1716         TEST_ASSERT_SUCCESS(status);
   1717 
   1718         result = uregex_find(re, 9, &status);
   1719         TEST_ASSERT(result == TRUE);
   1720         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1721         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1722         TEST_ASSERT_SUCCESS(status);
   1723 
   1724         result = uregex_find(re, 14, &status);
   1725         TEST_ASSERT(result == FALSE);
   1726         TEST_ASSERT_SUCCESS(status);
   1727 
   1728         status = U_ZERO_ERROR;
   1729         uregex_reset(re, 0, &status);
   1730 
   1731         result = uregex_findNext(re, &status);
   1732         TEST_ASSERT(result == TRUE);
   1733         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1734         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1735         TEST_ASSERT_SUCCESS(status);
   1736 
   1737         result = uregex_findNext(re, &status);
   1738         TEST_ASSERT(result == TRUE);
   1739         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1740         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1741         TEST_ASSERT_SUCCESS(status);
   1742 
   1743         status = U_ZERO_ERROR;
   1744         uregex_reset(re, 12, &status);
   1745 
   1746         result = uregex_findNext(re, &status);
   1747         TEST_ASSERT(result == TRUE);
   1748         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1749         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1750         TEST_ASSERT_SUCCESS(status);
   1751 
   1752         result = uregex_findNext(re, &status);
   1753         TEST_ASSERT(result == FALSE);
   1754         TEST_ASSERT_SUCCESS(status);
   1755 
   1756         uregex_close(re);
   1757     }
   1758 
   1759     /*
   1760      *  groupUText()
   1761      */
   1762     {
   1763         UChar    text1[80];
   1764         UText   *actual;
   1765         UBool    result;
   1766         int64_t  groupLen = 0;
   1767         UChar    groupBuf[20];
   1768 
   1769         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
   1770 
   1771         status = U_ZERO_ERROR;
   1772         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1773         TEST_ASSERT_SUCCESS(status);
   1774 
   1775         uregex_setText(re, text1, -1, &status);
   1776         result = uregex_find(re, 0, &status);
   1777         TEST_ASSERT(result==TRUE);
   1778 
   1779         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
   1780         status = U_ZERO_ERROR;
   1781         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
   1782         TEST_ASSERT_SUCCESS(status);
   1783 
   1784         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
   1785         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
   1786         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1787 
   1788         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
   1789         utext_close(actual);
   1790 
   1791         /*  Capture group #1.  Should succeed. */
   1792         status = U_ZERO_ERROR;
   1793 
   1794         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
   1795         TEST_ASSERT_SUCCESS(status);
   1796         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
   1797                                                            /*    (within the string text1)           */
   1798         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
   1799         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1800         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
   1801 
   1802         utext_close(actual);
   1803 
   1804         /*  Capture group out of range.  Error. */
   1805         status = U_ZERO_ERROR;
   1806         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
   1807         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1808         utext_close(actual);
   1809 
   1810         uregex_close(re);
   1811     }
   1812 
   1813     /*
   1814      *  replaceFirst()
   1815      */
   1816     {
   1817         UChar    text1[80];
   1818         UChar    text2[80];
   1819         UText    replText = UTEXT_INITIALIZER;
   1820         UText   *result;
   1821         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1822         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1823         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
   1824                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
   1825         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1826         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1827         status = U_ZERO_ERROR;
   1828         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1829         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1830         utext_openUTF8(&replText, str_1x, -1, &status);
   1831 
   1832         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1833         TEST_ASSERT_SUCCESS(status);
   1834 
   1835         /*  Normal case, with match */
   1836         uregex_setText(re, text1, -1, &status);
   1837         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1838         TEST_ASSERT_SUCCESS(status);
   1839         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1840         utext_close(result);
   1841 
   1842         /* No match.  Text should copy to output with no changes.  */
   1843         uregex_setText(re, text2, -1, &status);
   1844         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1845         TEST_ASSERT_SUCCESS(status);
   1846         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1847         utext_close(result);
   1848 
   1849         /* Unicode escapes */
   1850         uregex_setText(re, text1, -1, &status);
   1851         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1852         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1853         TEST_ASSERT_SUCCESS(status);
   1854         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1855         utext_close(result);
   1856 
   1857         uregex_close(re);
   1858         utext_close(&replText);
   1859     }
   1860 
   1861 
   1862     /*
   1863      *  replaceAll()
   1864      */
   1865     {
   1866         UChar    text1[80];
   1867         UChar    text2[80];
   1868         UText    replText = UTEXT_INITIALIZER;
   1869         UText   *result;
   1870         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1871         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1872         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1873         status = U_ZERO_ERROR;
   1874         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1875         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1876         utext_openUTF8(&replText, str_1, -1, &status);
   1877 
   1878         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1879         TEST_ASSERT_SUCCESS(status);
   1880 
   1881         /*  Normal case, with match */
   1882         uregex_setText(re, text1, -1, &status);
   1883         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1884         TEST_ASSERT_SUCCESS(status);
   1885         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1886         utext_close(result);
   1887 
   1888         /* No match.  Text should copy to output with no changes.  */
   1889         uregex_setText(re, text2, -1, &status);
   1890         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1891         TEST_ASSERT_SUCCESS(status);
   1892         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1893         utext_close(result);
   1894 
   1895         uregex_close(re);
   1896         utext_close(&replText);
   1897     }
   1898 
   1899 
   1900     /*
   1901      *  appendReplacement()
   1902      */
   1903     {
   1904         UChar    text[100];
   1905         UChar    repl[100];
   1906         UChar    buf[100];
   1907         UChar   *bufPtr;
   1908         int32_t  bufCap;
   1909 
   1910         status = U_ZERO_ERROR;
   1911         re = uregex_openC(".*", 0, 0, &status);
   1912         TEST_ASSERT_SUCCESS(status);
   1913 
   1914         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1915         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1916         uregex_setText(re, text, -1, &status);
   1917 
   1918         /* match covers whole target string */
   1919         uregex_find(re, 0, &status);
   1920         TEST_ASSERT_SUCCESS(status);
   1921         bufPtr = buf;
   1922         bufCap = UPRV_LENGTHOF(buf);
   1923         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1924         TEST_ASSERT_SUCCESS(status);
   1925         TEST_ASSERT_STRING("some other", buf, TRUE);
   1926 
   1927         /* Match has \u \U escapes */
   1928         uregex_find(re, 0, &status);
   1929         TEST_ASSERT_SUCCESS(status);
   1930         bufPtr = buf;
   1931         bufCap = UPRV_LENGTHOF(buf);
   1932         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1933         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1934         TEST_ASSERT_SUCCESS(status);
   1935         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1936 
   1937         uregex_close(re);
   1938     }
   1939 
   1940 
   1941     /*
   1942      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1943      */
   1944 
   1945     /*
   1946      *  splitUText()
   1947      */
   1948     {
   1949         UChar    textToSplit[80];
   1950         UChar    text2[80];
   1951         UText    *fields[10];
   1952         int32_t  numFields;
   1953         int32_t i;
   1954 
   1955         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1956         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1957 
   1958         status = U_ZERO_ERROR;
   1959         re = uregex_openC(":", 0, NULL, &status);
   1960 
   1961 
   1962         /*  Simple split */
   1963 
   1964         uregex_setText(re, textToSplit, -1, &status);
   1965         TEST_ASSERT_SUCCESS(status);
   1966 
   1967         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1968         if (U_SUCCESS(status)) {
   1969             memset(fields, 0, sizeof(fields));
   1970             numFields = uregex_splitUText(re, fields, 10, &status);
   1971             TEST_ASSERT_SUCCESS(status);
   1972 
   1973             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1974             if(U_SUCCESS(status)) {
   1975               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1976               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1977               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1978                 TEST_ASSERT(numFields == 3);
   1979                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1980                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1981                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1982                 TEST_ASSERT(fields[3] == NULL);
   1983             }
   1984             for(i = 0; i < numFields; i++) {
   1985                 utext_close(fields[i]);
   1986             }
   1987         }
   1988 
   1989         uregex_close(re);
   1990 
   1991 
   1992         /*  Split with too few output strings available */
   1993         status = U_ZERO_ERROR;
   1994         re = uregex_openC(":", 0, NULL, &status);
   1995         uregex_setText(re, textToSplit, -1, &status);
   1996         TEST_ASSERT_SUCCESS(status);
   1997 
   1998         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1999         if(U_SUCCESS(status)) {
   2000             fields[0] = NULL;
   2001             fields[1] = NULL;
   2002             fields[2] = &patternText;
   2003             numFields = uregex_splitUText(re, fields, 2, &status);
   2004             TEST_ASSERT_SUCCESS(status);
   2005 
   2006             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2007             if(U_SUCCESS(status)) {
   2008                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2009                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   2010                 TEST_ASSERT(numFields == 2);
   2011                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2012                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   2013                 TEST_ASSERT(fields[2] == &patternText);
   2014             }
   2015             for(i = 0; i < numFields; i++) {
   2016                 utext_close(fields[i]);
   2017             }
   2018         }
   2019 
   2020         uregex_close(re);
   2021     }
   2022 
   2023     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   2024      *                   comes out as additional fields.  */
   2025     {
   2026         UChar    textToSplit[80];
   2027         UText    *fields[10];
   2028         int32_t  numFields;
   2029         int32_t i;
   2030 
   2031         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   2032 
   2033         status = U_ZERO_ERROR;
   2034         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   2035 
   2036         uregex_setText(re, textToSplit, -1, &status);
   2037         TEST_ASSERT_SUCCESS(status);
   2038 
   2039         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2040         if(U_SUCCESS(status)) {
   2041             memset(fields, 0, sizeof(fields));
   2042             numFields = uregex_splitUText(re, fields, 10, &status);
   2043             TEST_ASSERT_SUCCESS(status);
   2044 
   2045             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2046             if(U_SUCCESS(status)) {
   2047                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2048                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2049                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2050                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2051                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2052 
   2053                 TEST_ASSERT(numFields == 5);
   2054                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2055                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2056                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2057                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2058                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2059                 TEST_ASSERT(fields[5] == NULL);
   2060             }
   2061             for(i = 0; i < numFields; i++) {
   2062                 utext_close(fields[i]);
   2063             }
   2064         }
   2065 
   2066         /*  Split with too few output strings available (2) */
   2067         status = U_ZERO_ERROR;
   2068         fields[0] = NULL;
   2069         fields[1] = NULL;
   2070         fields[2] = &patternText;
   2071         numFields = uregex_splitUText(re, fields, 2, &status);
   2072         TEST_ASSERT_SUCCESS(status);
   2073 
   2074         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2075         if(U_SUCCESS(status)) {
   2076             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2077             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2078             TEST_ASSERT(numFields == 2);
   2079             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2080             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2081             TEST_ASSERT(fields[2] == &patternText);
   2082         }
   2083         for(i = 0; i < numFields; i++) {
   2084             utext_close(fields[i]);
   2085         }
   2086 
   2087 
   2088         /*  Split with too few output strings available (3) */
   2089         status = U_ZERO_ERROR;
   2090         fields[0] = NULL;
   2091         fields[1] = NULL;
   2092         fields[2] = NULL;
   2093         fields[3] = &patternText;
   2094         numFields = uregex_splitUText(re, fields, 3, &status);
   2095         TEST_ASSERT_SUCCESS(status);
   2096 
   2097         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2098         if(U_SUCCESS(status)) {
   2099             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2100             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2101             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2102             TEST_ASSERT(numFields == 3);
   2103             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2104             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2105             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2106             TEST_ASSERT(fields[3] == &patternText);
   2107         }
   2108         for(i = 0; i < numFields; i++) {
   2109             utext_close(fields[i]);
   2110         }
   2111 
   2112         /*  Split with just enough output strings available (5) */
   2113         status = U_ZERO_ERROR;
   2114         fields[0] = NULL;
   2115         fields[1] = NULL;
   2116         fields[2] = NULL;
   2117         fields[3] = NULL;
   2118         fields[4] = NULL;
   2119         fields[5] = &patternText;
   2120         numFields = uregex_splitUText(re, fields, 5, &status);
   2121         TEST_ASSERT_SUCCESS(status);
   2122 
   2123         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2124         if(U_SUCCESS(status)) {
   2125             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2126             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2127             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2128             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2129             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2130 
   2131             TEST_ASSERT(numFields == 5);
   2132             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2133             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2134             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2135             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2136             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2137             TEST_ASSERT(fields[5] == &patternText);
   2138         }
   2139         for(i = 0; i < numFields; i++) {
   2140             utext_close(fields[i]);
   2141         }
   2142 
   2143         /* Split, end of text is a field delimiter.   */
   2144         status = U_ZERO_ERROR;
   2145         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2146         TEST_ASSERT_SUCCESS(status);
   2147 
   2148         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2149         if(U_SUCCESS(status)) {
   2150             memset(fields, 0, sizeof(fields));
   2151             fields[9] = &patternText;
   2152             numFields = uregex_splitUText(re, fields, 9, &status);
   2153             TEST_ASSERT_SUCCESS(status);
   2154 
   2155             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2156             if(U_SUCCESS(status)) {
   2157                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2158                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2159                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2160                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2161                 const char str_empty[] = { 0x00 };
   2162 
   2163                 TEST_ASSERT(numFields == 5);
   2164                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2165                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2166                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2167                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2168                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
   2169                 TEST_ASSERT(fields[5] == NULL);
   2170                 TEST_ASSERT(fields[8] == NULL);
   2171                 TEST_ASSERT(fields[9] == &patternText);
   2172             }
   2173             for(i = 0; i < numFields; i++) {
   2174                 utext_close(fields[i]);
   2175             }
   2176         }
   2177 
   2178         uregex_close(re);
   2179     }
   2180     utext_close(&patternText);
   2181 }
   2182 
   2183 
   2184 static void TestRefreshInput(void) {
   2185     /*
   2186      *  RefreshInput changes out the input of a URegularExpression without
   2187      *    changing anything else in the match state.  Used with Java JNI,
   2188      *    when Java moves the underlying string storage.   This test
   2189      *    runs a find() loop, moving the text after the first match.
   2190      *    The right number of matches should still be found.
   2191      */
   2192     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
   2193     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
   2194     UErrorCode status = U_ZERO_ERROR;
   2195     URegularExpression *re;
   2196     UText ut1 = UTEXT_INITIALIZER;
   2197     UText ut2 = UTEXT_INITIALIZER;
   2198 
   2199     re = uregex_openC("[ABC]", 0, 0, &status);
   2200     TEST_ASSERT_SUCCESS(status);
   2201 
   2202     utext_openUChars(&ut1, testStr, -1, &status);
   2203     TEST_ASSERT_SUCCESS(status);
   2204     uregex_setUText(re, &ut1, &status);
   2205     TEST_ASSERT_SUCCESS(status);
   2206 
   2207     /* Find the first match "A" in the original string */
   2208     TEST_ASSERT(uregex_findNext(re, &status));
   2209     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
   2210 
   2211     /* Move the string, kill the original string.  */
   2212     u_strcpy(movedStr, testStr);
   2213     u_memset(testStr, 0, u_strlen(testStr));
   2214     utext_openUChars(&ut2, movedStr, -1, &status);
   2215     TEST_ASSERT_SUCCESS(status);
   2216     uregex_refreshUText(re, &ut2, &status);
   2217     TEST_ASSERT_SUCCESS(status);
   2218 
   2219     /* Find the following two matches, now working in the moved string. */
   2220     TEST_ASSERT(uregex_findNext(re, &status));
   2221     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
   2222     TEST_ASSERT(uregex_findNext(re, &status));
   2223     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
   2224     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
   2225 
   2226     uregex_close(re);
   2227 }
   2228 
   2229 
   2230 static void TestBug8421(void) {
   2231     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
   2232      *             was failing.
   2233      */
   2234     URegularExpression *re;
   2235     UErrorCode status = U_ZERO_ERROR;
   2236     int32_t  limit = -1;
   2237 
   2238     re = uregex_openC("abc", 0, 0, &status);
   2239     TEST_ASSERT_SUCCESS(status);
   2240 
   2241     limit = uregex_getTimeLimit(re, &status);
   2242     TEST_ASSERT_SUCCESS(status);
   2243     TEST_ASSERT(limit == 0);
   2244 
   2245     uregex_setTimeLimit(re, 100, &status);
   2246     TEST_ASSERT_SUCCESS(status);
   2247     limit = uregex_getTimeLimit(re, &status);
   2248     TEST_ASSERT_SUCCESS(status);
   2249     TEST_ASSERT(limit == 100);
   2250 
   2251     uregex_close(re);
   2252 }
   2253 
   2254 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
   2255     return FALSE;
   2256 }
   2257 
   2258 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
   2259     return FALSE;
   2260 }
   2261 
   2262 static void TestBug10815() {
   2263   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
   2264    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
   2265    */
   2266     URegularExpression *re;
   2267     UErrorCode status = U_ZERO_ERROR;
   2268     UChar    text[100];
   2269 
   2270 
   2271     // findNext() with a find progress callback function.
   2272 
   2273     re = uregex_openC(".z", 0, 0, &status);
   2274     TEST_ASSERT_SUCCESS(status);
   2275 
   2276     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
   2277     uregex_setText(re, text, -1, &status);
   2278     TEST_ASSERT_SUCCESS(status);
   2279 
   2280     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
   2281     TEST_ASSERT_SUCCESS(status);
   2282 
   2283     uregex_findNext(re, &status);
   2284     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2285 
   2286     uregex_close(re);
   2287 
   2288     // findNext() with a match progress callback function.
   2289 
   2290     status = U_ZERO_ERROR;
   2291     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
   2292     TEST_ASSERT_SUCCESS(status);
   2293 
   2294     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
   2295     // it will appear to be stuck in a (near) infinite loop.
   2296     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
   2297     uregex_setText(re, text, -1, &status);
   2298     TEST_ASSERT_SUCCESS(status);
   2299 
   2300     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
   2301     TEST_ASSERT_SUCCESS(status);
   2302 
   2303     uregex_findNext(re, &status);
   2304     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2305 
   2306     uregex_close(re);
   2307 }
   2308 
   2309 
   2310 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2311