Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2004-2015, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File reapits.c
      9 *
     10 *********************************************************************************/
     11 /*C API TEST FOR Regular Expressions */
     12 /**
     13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     14 *   try to test the full functionality.  It just calls each function and verifies that it
     15 *   works on a basic level.
     16 *
     17 *   More complete testing of regular expression functionality is done with the C++ tests.
     18 **/
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     23 
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include "unicode/uloc.h"
     27 #include "unicode/uregex.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/utext.h"
     30 #include "cintltst.h"
     31 #include "cmemory.h"
     32 
     33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     35 
     36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
     38 
     39 /*
     40  *   TEST_SETUP and TEST_TEARDOWN
     41  *         macros to handle the boilerplate around setting up regex test cases.
     42  *         parameteres to setup:
     43  *              pattern:     The regex pattern, a (char *) null terminated C string.
     44  *              testString:  The string data, also a (char *) C string.
     45  *              flags:       Regex flags to set when compiling the pattern
     46  *
     47  *         Put arbitrary test code between SETUP and TEARDOWN.
     48  *         're" is the compiled, ready-to-go  regular expression.
     49  */
     50 #define TEST_SETUP(pattern, testString, flags) {  \
     51     UChar   *srcString = NULL;  \
     52     status = U_ZERO_ERROR; \
     53     re = uregex_openC(pattern, flags, NULL, &status);  \
     54     TEST_ASSERT_SUCCESS(status);   \
     55     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     56     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     57     uregex_setText(re, srcString, -1, &status); \
     58     TEST_ASSERT_SUCCESS(status);  \
     59     if (U_SUCCESS(status)) {
     60 
     61 #define TEST_TEARDOWN  \
     62     }  \
     63     TEST_ASSERT_SUCCESS(status);  \
     64     uregex_close(re);  \
     65     free(srcString);   \
     66     }
     67 
     68 
     69 /**
     70  * @param expected utf-8 array of bytes to be expected
     71  */
     72 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     73      char     buf_inside_macro[120];
     74      int32_t  len = (int32_t)strlen(expected);
     75      UBool    success;
     76      if (nulTerm) {
     77          u_austrncpy(buf_inside_macro, (actual), len+1);
     78          buf_inside_macro[len+2] = 0;
     79          success = (strcmp((expected), buf_inside_macro) == 0);
     80      } else {
     81          u_austrncpy(buf_inside_macro, (actual), len);
     82          buf_inside_macro[len+1] = 0;
     83          success = (strncmp((expected), buf_inside_macro, len) == 0);
     84      }
     85      if (success == FALSE) {
     86          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     87              file, line, (expected), buf_inside_macro);
     88      }
     89 }
     90 
     91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     92 
     93 
     94 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
     95     int32_t u8i = 0;
     96     UChar32 u8c = 0;
     97     UChar32 utc = 0;
     98     UBool   stringsEqual = TRUE;
     99     utext_setNativeIndex(utext, 0);
    100     for (;;) {
    101         U8_NEXT_UNSAFE(utf8, u8i, u8c);
    102         utc = utext_next32(utext);
    103         if (u8c == 0 && utc == U_SENTINEL) {
    104             break;
    105         }
    106         if (u8c != utc || u8c == 0) {
    107             stringsEqual = FALSE;
    108             break;
    109         }
    110     }
    111     return stringsEqual;
    112 }
    113 
    114 
    115 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
    116     utext_setNativeIndex(actual, 0);
    117     if (!equals_utf8_utext(expected, actual)) {
    118         UChar32 c;
    119         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    120         c = utext_next32From(actual, 0);
    121         while (c != U_SENTINEL) {
    122             if (0x20<c && c <0x7e) {
    123                 log_err("%c", c);
    124             } else {
    125                 log_err("%#x", c);
    126             }
    127             c = UTEXT_NEXT32(actual);
    128         }
    129         log_err("\"\n");
    130     }
    131 }
    132 
    133 /*
    134  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
    135  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
    136  */
    137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    138 
    139 static UBool testUTextEqual(UText *uta, UText *utb) {
    140     UChar32 ca = 0;
    141     UChar32 cb = 0;
    142     utext_setNativeIndex(uta, 0);
    143     utext_setNativeIndex(utb, 0);
    144     do {
    145         ca = utext_next32(uta);
    146         cb = utext_next32(utb);
    147         if (ca != cb) {
    148             break;
    149         }
    150     } while (ca != U_SENTINEL);
    151     return ca == cb;
    152 }
    153 
    154 
    155 
    156 
    157 static void TestRegexCAPI(void);
    158 static void TestBug4315(void);
    159 static void TestUTextAPI(void);
    160 static void TestRefreshInput(void);
    161 static void TestBug8421(void);
    162 static void TestBug10815(void);
    163 
    164 void addURegexTest(TestNode** root);
    165 
    166 void addURegexTest(TestNode** root)
    167 {
    168     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    169     addTest(root, &TestBug4315,   "regex/TestBug4315");
    170     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    171     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
    172     addTest(root, &TestBug8421,   "regex/TestBug8421");
    173     addTest(root, &TestBug10815,   "regex/TestBug10815");
    174 }
    175 
    176 /*
    177  * Call back function and context struct used for testing
    178  *    regular expression user callbacks.  This test is mostly the same as
    179  *   the corresponding C++ test in intltest.
    180  */
    181 typedef struct callBackContext {
    182     int32_t          maxCalls;
    183     int32_t          numCalls;
    184     int32_t          lastSteps;
    185 } callBackContext;
    186 
    187 static UBool U_EXPORT2 U_CALLCONV
    188 TestCallbackFn(const void *context, int32_t steps) {
    189   callBackContext  *info = (callBackContext *)context;
    190   if (info->lastSteps+1 != steps) {
    191       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    192   }
    193   info->lastSteps = steps;
    194   info->numCalls++;
    195   return (info->numCalls < info->maxCalls);
    196 }
    197 
    198 /*
    199  *   Regular Expression C API Tests
    200  */
    201 static void TestRegexCAPI(void) {
    202     UErrorCode           status = U_ZERO_ERROR;
    203     URegularExpression  *re;
    204     UChar                pat[200];
    205     UChar               *minus1;
    206 
    207     memset(&minus1, -1, sizeof(minus1));
    208 
    209     /* Mimimalist open/close */
    210     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    211     re = uregex_open(pat, -1, 0, 0, &status);
    212     if (U_FAILURE(status)) {
    213          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    214          return;
    215     }
    216     uregex_close(re);
    217 
    218     /* Open with all flag values set */
    219     status = U_ZERO_ERROR;
    220     re = uregex_open(pat, -1,
    221         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
    222         0, &status);
    223     TEST_ASSERT_SUCCESS(status);
    224     uregex_close(re);
    225 
    226     /* Open with an invalid flag */
    227     status = U_ZERO_ERROR;
    228     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    229     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    230     uregex_close(re);
    231 
    232     /* Open with an unimplemented flag */
    233     status = U_ZERO_ERROR;
    234     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
    235     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    236     uregex_close(re);
    237 
    238     /* openC with an invalid parameter */
    239     status = U_ZERO_ERROR;
    240     re = uregex_openC(NULL,
    241         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    242     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    243 
    244     /* openC with an invalid parameter */
    245     status = U_USELESS_COLLATOR_ERROR;
    246     re = uregex_openC(NULL,
    247         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    248     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    249 
    250     /* openC   open from a C string */
    251     {
    252         const UChar   *p;
    253         int32_t  len;
    254         status = U_ZERO_ERROR;
    255         re = uregex_openC("abc*", 0, 0, &status);
    256         TEST_ASSERT_SUCCESS(status);
    257         p = uregex_pattern(re, &len, &status);
    258         TEST_ASSERT_SUCCESS(status);
    259 
    260         /* The TEST_ASSERT_SUCCESS above should change too... */
    261         if(U_SUCCESS(status)) {
    262             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    263             TEST_ASSERT(u_strcmp(pat, p) == 0);
    264             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    265         }
    266 
    267         uregex_close(re);
    268 
    269         /*  TODO:  Open with ParseError parameter */
    270     }
    271 
    272     /*
    273      *  clone
    274      */
    275     {
    276         URegularExpression *clone1;
    277         URegularExpression *clone2;
    278         URegularExpression *clone3;
    279         UChar  testString1[30];
    280         UChar  testString2[30];
    281         UBool  result;
    282 
    283 
    284         status = U_ZERO_ERROR;
    285         re = uregex_openC("abc*", 0, 0, &status);
    286         TEST_ASSERT_SUCCESS(status);
    287         clone1 = uregex_clone(re, &status);
    288         TEST_ASSERT_SUCCESS(status);
    289         TEST_ASSERT(clone1 != NULL);
    290 
    291         status = U_ZERO_ERROR;
    292         clone2 = uregex_clone(re, &status);
    293         TEST_ASSERT_SUCCESS(status);
    294         TEST_ASSERT(clone2 != NULL);
    295         uregex_close(re);
    296 
    297         status = U_ZERO_ERROR;
    298         clone3 = uregex_clone(clone2, &status);
    299         TEST_ASSERT_SUCCESS(status);
    300         TEST_ASSERT(clone3 != NULL);
    301 
    302         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
    303         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
    304 
    305         status = U_ZERO_ERROR;
    306         uregex_setText(clone1, testString1, -1, &status);
    307         TEST_ASSERT_SUCCESS(status);
    308         result = uregex_lookingAt(clone1, 0, &status);
    309         TEST_ASSERT_SUCCESS(status);
    310         TEST_ASSERT(result==TRUE);
    311 
    312         status = U_ZERO_ERROR;
    313         uregex_setText(clone2, testString2, -1, &status);
    314         TEST_ASSERT_SUCCESS(status);
    315         result = uregex_lookingAt(clone2, 0, &status);
    316         TEST_ASSERT_SUCCESS(status);
    317         TEST_ASSERT(result==FALSE);
    318         result = uregex_find(clone2, 0, &status);
    319         TEST_ASSERT_SUCCESS(status);
    320         TEST_ASSERT(result==TRUE);
    321 
    322         uregex_close(clone1);
    323         uregex_close(clone2);
    324         uregex_close(clone3);
    325 
    326     }
    327 
    328     /*
    329      *  pattern()
    330     */
    331     {
    332         const UChar  *resultPat;
    333         int32_t       resultLen;
    334         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
    335         status = U_ZERO_ERROR;
    336         re = uregex_open(pat, -1, 0, NULL, &status);
    337         resultPat = uregex_pattern(re, &resultLen, &status);
    338         TEST_ASSERT_SUCCESS(status);
    339 
    340         /* The TEST_ASSERT_SUCCESS above should change too... */
    341         if (U_SUCCESS(status)) {
    342             TEST_ASSERT(resultLen == -1);
    343             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    344         }
    345 
    346         uregex_close(re);
    347 
    348         status = U_ZERO_ERROR;
    349         re = uregex_open(pat, 3, 0, NULL, &status);
    350         resultPat = uregex_pattern(re, &resultLen, &status);
    351         TEST_ASSERT_SUCCESS(status);
    352         TEST_ASSERT_SUCCESS(status);
    353 
    354         /* The TEST_ASSERT_SUCCESS above should change too... */
    355         if (U_SUCCESS(status)) {
    356             TEST_ASSERT(resultLen == 3);
    357             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    358             TEST_ASSERT(u_strlen(resultPat) == 3);
    359         }
    360 
    361         uregex_close(re);
    362     }
    363 
    364     /*
    365      *  flags()
    366      */
    367     {
    368         int32_t  t;
    369 
    370         status = U_ZERO_ERROR;
    371         re = uregex_open(pat, -1, 0, NULL, &status);
    372         t  = uregex_flags(re, &status);
    373         TEST_ASSERT_SUCCESS(status);
    374         TEST_ASSERT(t == 0);
    375         uregex_close(re);
    376 
    377         status = U_ZERO_ERROR;
    378         re = uregex_open(pat, -1, 0, NULL, &status);
    379         t  = uregex_flags(re, &status);
    380         TEST_ASSERT_SUCCESS(status);
    381         TEST_ASSERT(t == 0);
    382         uregex_close(re);
    383 
    384         status = U_ZERO_ERROR;
    385         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    386         t  = uregex_flags(re, &status);
    387         TEST_ASSERT_SUCCESS(status);
    388         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    389         uregex_close(re);
    390     }
    391 
    392     /*
    393      *  setText() and lookingAt()
    394      */
    395     {
    396         UChar  text1[50];
    397         UChar  text2[50];
    398         UBool  result;
    399 
    400         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    401         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    402         status = U_ZERO_ERROR;
    403         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    404         re = uregex_open(pat, -1, 0, NULL, &status);
    405         TEST_ASSERT_SUCCESS(status);
    406 
    407         /* Operation before doing a setText should fail... */
    408         status = U_ZERO_ERROR;
    409         uregex_lookingAt(re, 0, &status);
    410         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    411 
    412         status = U_ZERO_ERROR;
    413         uregex_setText(re, text1, -1, &status);
    414         result = uregex_lookingAt(re, 0, &status);
    415         TEST_ASSERT(result == TRUE);
    416         TEST_ASSERT_SUCCESS(status);
    417 
    418         status = U_ZERO_ERROR;
    419         uregex_setText(re, text2, -1, &status);
    420         result = uregex_lookingAt(re, 0, &status);
    421         TEST_ASSERT(result == FALSE);
    422         TEST_ASSERT_SUCCESS(status);
    423 
    424         status = U_ZERO_ERROR;
    425         uregex_setText(re, text1, -1, &status);
    426         result = uregex_lookingAt(re, 0, &status);
    427         TEST_ASSERT(result == TRUE);
    428         TEST_ASSERT_SUCCESS(status);
    429 
    430         status = U_ZERO_ERROR;
    431         uregex_setText(re, text1, 5, &status);
    432         result = uregex_lookingAt(re, 0, &status);
    433         TEST_ASSERT(result == FALSE);
    434         TEST_ASSERT_SUCCESS(status);
    435 
    436         status = U_ZERO_ERROR;
    437         uregex_setText(re, text1, 6, &status);
    438         result = uregex_lookingAt(re, 0, &status);
    439         TEST_ASSERT(result == TRUE);
    440         TEST_ASSERT_SUCCESS(status);
    441 
    442         uregex_close(re);
    443     }
    444 
    445 
    446     /*
    447      *  getText()
    448      */
    449     {
    450         UChar    text1[50];
    451         UChar    text2[50];
    452         const UChar   *result;
    453         int32_t  textLength;
    454 
    455         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    456         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    457         status = U_ZERO_ERROR;
    458         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    459         re = uregex_open(pat, -1, 0, NULL, &status);
    460 
    461         uregex_setText(re, text1, -1, &status);
    462         result = uregex_getText(re, &textLength, &status);
    463         TEST_ASSERT(result == text1);
    464         TEST_ASSERT(textLength == -1);
    465         TEST_ASSERT_SUCCESS(status);
    466 
    467         status = U_ZERO_ERROR;
    468         uregex_setText(re, text2, 7, &status);
    469         result = uregex_getText(re, &textLength, &status);
    470         TEST_ASSERT(result == text2);
    471         TEST_ASSERT(textLength == 7);
    472         TEST_ASSERT_SUCCESS(status);
    473 
    474         status = U_ZERO_ERROR;
    475         uregex_setText(re, text2, 4, &status);
    476         result = uregex_getText(re, &textLength, &status);
    477         TEST_ASSERT(result == text2);
    478         TEST_ASSERT(textLength == 4);
    479         TEST_ASSERT_SUCCESS(status);
    480         uregex_close(re);
    481     }
    482 
    483     /*
    484      *  matches()
    485      */
    486     {
    487         UChar   text1[50];
    488         UBool   result;
    489         int     len;
    490         UChar   nullString[] = {0,0,0};
    491 
    492         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
    493         status = U_ZERO_ERROR;
    494         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    495         re = uregex_open(pat, -1, 0, NULL, &status);
    496 
    497         uregex_setText(re, text1, -1, &status);
    498         result = uregex_matches(re, 0, &status);
    499         TEST_ASSERT(result == FALSE);
    500         TEST_ASSERT_SUCCESS(status);
    501 
    502         status = U_ZERO_ERROR;
    503         uregex_setText(re, text1, 6, &status);
    504         result = uregex_matches(re, 0, &status);
    505         TEST_ASSERT(result == TRUE);
    506         TEST_ASSERT_SUCCESS(status);
    507 
    508         status = U_ZERO_ERROR;
    509         uregex_setText(re, text1, 6, &status);
    510         result = uregex_matches(re, 1, &status);
    511         TEST_ASSERT(result == FALSE);
    512         TEST_ASSERT_SUCCESS(status);
    513         uregex_close(re);
    514 
    515         status = U_ZERO_ERROR;
    516         re = uregex_openC(".?", 0, NULL, &status);
    517         uregex_setText(re, text1, -1, &status);
    518         len = u_strlen(text1);
    519         result = uregex_matches(re, len, &status);
    520         TEST_ASSERT(result == TRUE);
    521         TEST_ASSERT_SUCCESS(status);
    522 
    523         status = U_ZERO_ERROR;
    524         uregex_setText(re, nullString, -1, &status);
    525         TEST_ASSERT_SUCCESS(status);
    526         result = uregex_matches(re, 0, &status);
    527         TEST_ASSERT(result == TRUE);
    528         TEST_ASSERT_SUCCESS(status);
    529         uregex_close(re);
    530     }
    531 
    532 
    533     /*
    534      *  lookingAt()    Used in setText test.
    535      */
    536 
    537 
    538     /*
    539      *  find(), findNext, start, end, reset
    540      */
    541     {
    542         UChar    text1[50];
    543         UBool    result;
    544         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
    545         status = U_ZERO_ERROR;
    546         re = uregex_openC("rx", 0, NULL, &status);
    547 
    548         uregex_setText(re, text1, -1, &status);
    549         result = uregex_find(re, 0, &status);
    550         TEST_ASSERT(result == TRUE);
    551         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    552         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    553         TEST_ASSERT_SUCCESS(status);
    554 
    555         result = uregex_find(re, 9, &status);
    556         TEST_ASSERT(result == TRUE);
    557         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    558         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    559         TEST_ASSERT_SUCCESS(status);
    560 
    561         result = uregex_find(re, 14, &status);
    562         TEST_ASSERT(result == FALSE);
    563         TEST_ASSERT_SUCCESS(status);
    564 
    565         status = U_ZERO_ERROR;
    566         uregex_reset(re, 0, &status);
    567 
    568         result = uregex_findNext(re, &status);
    569         TEST_ASSERT(result == TRUE);
    570         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    571         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    572         TEST_ASSERT_SUCCESS(status);
    573 
    574         result = uregex_findNext(re, &status);
    575         TEST_ASSERT(result == TRUE);
    576         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    577         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    578         TEST_ASSERT_SUCCESS(status);
    579 
    580         status = U_ZERO_ERROR;
    581         uregex_reset(re, 12, &status);
    582 
    583         result = uregex_findNext(re, &status);
    584         TEST_ASSERT(result == TRUE);
    585         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    586         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    587         TEST_ASSERT_SUCCESS(status);
    588 
    589         result = uregex_findNext(re, &status);
    590         TEST_ASSERT(result == FALSE);
    591         TEST_ASSERT_SUCCESS(status);
    592 
    593         uregex_close(re);
    594     }
    595 
    596     /*
    597      *  groupCount
    598      */
    599     {
    600         int32_t result;
    601 
    602         status = U_ZERO_ERROR;
    603         re = uregex_openC("abc", 0, NULL, &status);
    604         result = uregex_groupCount(re, &status);
    605         TEST_ASSERT_SUCCESS(status);
    606         TEST_ASSERT(result == 0);
    607         uregex_close(re);
    608 
    609         status = U_ZERO_ERROR;
    610         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    611         result = uregex_groupCount(re, &status);
    612         TEST_ASSERT_SUCCESS(status);
    613         TEST_ASSERT(result == 3);
    614         uregex_close(re);
    615 
    616     }
    617 
    618 
    619     /*
    620      *  group()
    621      */
    622     {
    623         UChar    text1[80];
    624         UChar    buf[80];
    625         UBool    result;
    626         int32_t  resultSz;
    627         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
    628 
    629         status = U_ZERO_ERROR;
    630         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    631         TEST_ASSERT_SUCCESS(status);
    632 
    633 
    634         uregex_setText(re, text1, -1, &status);
    635         result = uregex_find(re, 0, &status);
    636         TEST_ASSERT(result==TRUE);
    637 
    638         /*  Capture Group 0, the full match.  Should succeed.  */
    639         status = U_ZERO_ERROR;
    640         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
    641         TEST_ASSERT_SUCCESS(status);
    642         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    643         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    644 
    645         /*  Capture group #1.  Should succeed. */
    646         status = U_ZERO_ERROR;
    647         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
    648         TEST_ASSERT_SUCCESS(status);
    649         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    650         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    651 
    652         /*  Capture group out of range.  Error. */
    653         status = U_ZERO_ERROR;
    654         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
    655         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    656 
    657         /* NULL buffer, pure pre-flight */
    658         status = U_ZERO_ERROR;
    659         resultSz = uregex_group(re, 0, NULL, 0, &status);
    660         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    661         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    662 
    663         /* Too small buffer, truncated string */
    664         status = U_ZERO_ERROR;
    665         memset(buf, -1, sizeof(buf));
    666         resultSz = uregex_group(re, 0, buf, 5, &status);
    667         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    668         TEST_ASSERT_STRING("abc i", buf, FALSE);
    669         TEST_ASSERT(buf[5] == (UChar)0xffff);
    670         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    671 
    672         /* Output string just fits buffer, no NUL term. */
    673         status = U_ZERO_ERROR;
    674         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    675         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    676         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    677         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    678         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    679 
    680         uregex_close(re);
    681 
    682     }
    683 
    684     /*
    685      *  Regions
    686      */
    687 
    688 
    689         /* SetRegion(), getRegion() do something  */
    690         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    691         UChar resultString[40];
    692         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    693         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    694         uregex_setRegion(re, 3, 6, &status);
    695         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    696         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    697         TEST_ASSERT(uregex_findNext(re, &status));
    698         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
    699         TEST_ASSERT_STRING("345", resultString, TRUE);
    700         TEST_TEARDOWN;
    701 
    702         /* find(start=-1) uses regions   */
    703         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    704         uregex_setRegion(re, 4, 6, &status);
    705         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    707         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    708         TEST_TEARDOWN;
    709 
    710         /* find (start >=0) does not use regions   */
    711         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    712         uregex_setRegion(re, 4, 6, &status);
    713         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    715         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    716         TEST_TEARDOWN;
    717 
    718         /* findNext() obeys regions    */
    719         TEST_SETUP(".", "0123456789ABCDEF", 0);
    720         uregex_setRegion(re, 4, 6, &status);
    721         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    722         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    723         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    724         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    725         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    726         TEST_TEARDOWN;
    727 
    728         /* matches(start=-1) uses regions                                           */
    729         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    730         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    731         uregex_setRegion(re, 4, 6, &status);
    732         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    733         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    734         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    735         TEST_TEARDOWN;
    736 
    737         /* matches (start >=0) does not use regions       */
    738         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    739         uregex_setRegion(re, 4, 6, &status);
    740         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    741         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    742         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    743         TEST_TEARDOWN;
    744 
    745         /* lookingAt(start=-1) uses regions                                         */
    746         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    747         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    748         uregex_setRegion(re, 4, 6, &status);
    749         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    750         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    751         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    752         TEST_TEARDOWN;
    753 
    754         /* lookingAt (start >=0) does not use regions  */
    755         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    756         uregex_setRegion(re, 4, 6, &status);
    757         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    758         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    759         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    760         TEST_TEARDOWN;
    761 
    762         /* hitEnd()       */
    763         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    764         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    765         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    766         TEST_TEARDOWN;
    767 
    768         TEST_SETUP("[a-f]*", "abcdef", 0);
    769         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    770         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    771         TEST_TEARDOWN;
    772 
    773         /* requireEnd   */
    774         TEST_SETUP("abcd", "abcd", 0);
    775         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    776         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    777         TEST_TEARDOWN;
    778 
    779         TEST_SETUP("abcd$", "abcd", 0);
    780         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    781         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    782         TEST_TEARDOWN;
    783 
    784         /* anchoringBounds        */
    785         TEST_SETUP("abc$", "abcdef", 0);
    786         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    787         uregex_useAnchoringBounds(re, FALSE, &status);
    788         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    789 
    790         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    791         uregex_useAnchoringBounds(re, TRUE, &status);
    792         uregex_setRegion(re, 0, 3, &status);
    793         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    794         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    795         TEST_TEARDOWN;
    796 
    797         /* Transparent Bounds      */
    798         TEST_SETUP("abc(?=def)", "abcdef", 0);
    799         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    800         uregex_useTransparentBounds(re, TRUE, &status);
    801         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    802 
    803         uregex_useTransparentBounds(re, FALSE, &status);
    804         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    805         uregex_setRegion(re, 0, 3, &status);
    806         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    807         uregex_useTransparentBounds(re, TRUE, &status);
    808         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    809         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    810         TEST_TEARDOWN;
    811 
    812 
    813     /*
    814      *  replaceFirst()
    815      */
    816     {
    817         UChar    text1[80];
    818         UChar    text2[80];
    819         UChar    replText[80];
    820         UChar    buf[80];
    821         int32_t  resultSz;
    822         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    823         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    824         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    825 
    826         status = U_ZERO_ERROR;
    827         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    828         TEST_ASSERT_SUCCESS(status);
    829 
    830         /*  Normal case, with match */
    831         uregex_setText(re, text1, -1, &status);
    832         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    833         TEST_ASSERT_SUCCESS(status);
    834         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    835         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    836 
    837         /* No match.  Text should copy to output with no changes.  */
    838         status = U_ZERO_ERROR;
    839         uregex_setText(re, text2, -1, &status);
    840         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    841         TEST_ASSERT_SUCCESS(status);
    842         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    843         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    844 
    845         /*  Match, output just fills buffer, no termination warning. */
    846         status = U_ZERO_ERROR;
    847         uregex_setText(re, text1, -1, &status);
    848         memset(buf, -1, sizeof(buf));
    849         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    850         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    851         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    852         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    853         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    854 
    855         /* Do the replaceFirst again, without first resetting anything.
    856          *  Should give the same results.
    857          */
    858         status = U_ZERO_ERROR;
    859         memset(buf, -1, sizeof(buf));
    860         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    861         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    862         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    863         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    864         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    865 
    866         /* NULL buffer, zero buffer length */
    867         status = U_ZERO_ERROR;
    868         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    869         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    870         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    871 
    872         /* Buffer too small by one */
    873         status = U_ZERO_ERROR;
    874         memset(buf, -1, sizeof(buf));
    875         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    876         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    877         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    878         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    879         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    880 
    881         uregex_close(re);
    882     }
    883 
    884 
    885     /*
    886      *  replaceAll()
    887      */
    888     {
    889         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    890         UChar    text2[80];          /*  "No match Here"           */
    891         UChar    replText[80];       /*  "<$1>"                    */
    892         UChar    replText2[80];      /*  "<<$1>>"                  */
    893         const char * pattern = "x(.*?)x";
    894         const char * expectedResult = "Replace <aa> <1> <...>.";
    895         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    896         UChar    buf[80];
    897         int32_t  resultSize;
    898         int32_t  expectedResultSize;
    899         int32_t  expectedResultSize2;
    900         int32_t  i;
    901 
    902         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    903         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    904         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    905         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
    906         expectedResultSize = strlen(expectedResult);
    907         expectedResultSize2 = strlen(expectedResult2);
    908 
    909         status = U_ZERO_ERROR;
    910         re = uregex_openC(pattern, 0, NULL, &status);
    911         TEST_ASSERT_SUCCESS(status);
    912 
    913         /*  Normal case, with match */
    914         uregex_setText(re, text1, -1, &status);
    915         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    916         TEST_ASSERT_SUCCESS(status);
    917         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    918         TEST_ASSERT(resultSize == expectedResultSize);
    919 
    920         /* No match.  Text should copy to output with no changes.  */
    921         status = U_ZERO_ERROR;
    922         uregex_setText(re, text2, -1, &status);
    923         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    924         TEST_ASSERT_SUCCESS(status);
    925         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    926         TEST_ASSERT(resultSize == u_strlen(text2));
    927 
    928         /*  Match, output just fills buffer, no termination warning. */
    929         status = U_ZERO_ERROR;
    930         uregex_setText(re, text1, -1, &status);
    931         memset(buf, -1, sizeof(buf));
    932         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    933         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    934         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    935         TEST_ASSERT(resultSize == expectedResultSize);
    936         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    937 
    938         /* Do the replaceFirst again, without first resetting anything.
    939          *  Should give the same results.
    940          */
    941         status = U_ZERO_ERROR;
    942         memset(buf, -1, sizeof(buf));
    943         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    944         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    945         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    946         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    947         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    948 
    949         /* NULL buffer, zero buffer length */
    950         status = U_ZERO_ERROR;
    951         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    952         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    953         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    954 
    955         /* Buffer too small.  Try every size, which will tickle edge cases
    956          * in uregex_appendReplacement (used by replaceAll)   */
    957         for (i=0; i<expectedResultSize; i++) {
    958             char  expected[80];
    959             status = U_ZERO_ERROR;
    960             memset(buf, -1, sizeof(buf));
    961             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    962             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    963             strcpy(expected, expectedResult);
    964             expected[i] = 0;
    965             TEST_ASSERT_STRING(expected, buf, FALSE);
    966             TEST_ASSERT(resultSize == expectedResultSize);
    967             TEST_ASSERT(buf[i] == (UChar)0xffff);
    968         }
    969 
    970         /* Buffer too small.  Same as previous test, except this time the replacement
    971          * text is longer than the match capture group, making the length of the complete
    972          * replacement longer than the original string.
    973          */
    974         for (i=0; i<expectedResultSize2; i++) {
    975             char  expected[80];
    976             status = U_ZERO_ERROR;
    977             memset(buf, -1, sizeof(buf));
    978             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    979             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    980             strcpy(expected, expectedResult2);
    981             expected[i] = 0;
    982             TEST_ASSERT_STRING(expected, buf, FALSE);
    983             TEST_ASSERT(resultSize == expectedResultSize2);
    984             TEST_ASSERT(buf[i] == (UChar)0xffff);
    985         }
    986 
    987 
    988         uregex_close(re);
    989     }
    990 
    991 
    992     /*
    993      *  appendReplacement()
    994      */
    995     {
    996         UChar    text[100];
    997         UChar    repl[100];
    998         UChar    buf[100];
    999         UChar   *bufPtr;
   1000         int32_t  bufCap;
   1001 
   1002 
   1003         status = U_ZERO_ERROR;
   1004         re = uregex_openC(".*", 0, 0, &status);
   1005         TEST_ASSERT_SUCCESS(status);
   1006 
   1007         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1008         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1009         uregex_setText(re, text, -1, &status);
   1010 
   1011         /* match covers whole target string */
   1012         uregex_find(re, 0, &status);
   1013         TEST_ASSERT_SUCCESS(status);
   1014         bufPtr = buf;
   1015         bufCap = UPRV_LENGTHOF(buf);
   1016         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1017         TEST_ASSERT_SUCCESS(status);
   1018         TEST_ASSERT_STRING("some other", buf, TRUE);
   1019 
   1020         /* Match has \u \U escapes */
   1021         uregex_find(re, 0, &status);
   1022         TEST_ASSERT_SUCCESS(status);
   1023         bufPtr = buf;
   1024         bufCap = UPRV_LENGTHOF(buf);
   1025         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1026         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1027         TEST_ASSERT_SUCCESS(status);
   1028         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1029 
   1030         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
   1031         status = U_ZERO_ERROR;
   1032         uregex_find(re, 0, &status);
   1033         TEST_ASSERT_SUCCESS(status);
   1034         bufPtr = buf;
   1035         status = U_BUFFER_OVERFLOW_ERROR;
   1036         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
   1037         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1038 
   1039         uregex_close(re);
   1040     }
   1041 
   1042 
   1043     /*
   1044      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1045      */
   1046 
   1047     /*
   1048      *  split()
   1049      */
   1050     {
   1051         UChar    textToSplit[80];
   1052         UChar    text2[80];
   1053         UChar    buf[200];
   1054         UChar    *fields[10];
   1055         int32_t  numFields;
   1056         int32_t  requiredCapacity;
   1057         int32_t  spaceNeeded;
   1058         int32_t  sz;
   1059 
   1060         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1061         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1062 
   1063         status = U_ZERO_ERROR;
   1064         re = uregex_openC(":", 0, NULL, &status);
   1065 
   1066 
   1067         /*  Simple split */
   1068 
   1069         uregex_setText(re, textToSplit, -1, &status);
   1070         TEST_ASSERT_SUCCESS(status);
   1071 
   1072         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1073         if (U_SUCCESS(status)) {
   1074             memset(fields, -1, sizeof(fields));
   1075             numFields =
   1076                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1077             TEST_ASSERT_SUCCESS(status);
   1078 
   1079             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1080             if(U_SUCCESS(status)) {
   1081                 TEST_ASSERT(numFields == 3);
   1082                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1083                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1084                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1085                 TEST_ASSERT(fields[3] == NULL);
   1086 
   1087                 spaceNeeded = u_strlen(textToSplit) -
   1088                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1089                             numFields;          /* Each field gets a NUL terminator */
   1090 
   1091                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1092             }
   1093         }
   1094 
   1095         uregex_close(re);
   1096 
   1097 
   1098         /*  Split with too few output strings available */
   1099         status = U_ZERO_ERROR;
   1100         re = uregex_openC(":", 0, NULL, &status);
   1101         uregex_setText(re, textToSplit, -1, &status);
   1102         TEST_ASSERT_SUCCESS(status);
   1103 
   1104         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1105         if(U_SUCCESS(status)) {
   1106             memset(fields, -1, sizeof(fields));
   1107             numFields =
   1108                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1109             TEST_ASSERT_SUCCESS(status);
   1110 
   1111             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1112             if(U_SUCCESS(status)) {
   1113                 TEST_ASSERT(numFields == 2);
   1114                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1115                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1116                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1117 
   1118                 spaceNeeded = u_strlen(textToSplit) -
   1119                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1120                             numFields;          /* Each field gets a NUL terminator */
   1121 
   1122                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1123 
   1124                 /* Split with a range of output buffer sizes.  */
   1125                 spaceNeeded = u_strlen(textToSplit) -
   1126                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1127                     numFields;          /* Each field gets a NUL terminator */
   1128 
   1129                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1130                     memset(fields, -1, sizeof(fields));
   1131                     status = U_ZERO_ERROR;
   1132                     numFields =
   1133                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1134                     if (sz >= spaceNeeded) {
   1135                         TEST_ASSERT_SUCCESS(status);
   1136                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1137                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1138                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1139                     } else {
   1140                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1141                     }
   1142                     TEST_ASSERT(numFields == 3);
   1143                     TEST_ASSERT(fields[3] == NULL);
   1144                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1145                 }
   1146             }
   1147         }
   1148 
   1149         uregex_close(re);
   1150     }
   1151 
   1152 
   1153 
   1154 
   1155     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1156      *                   comes out as additional fields.  */
   1157     {
   1158         UChar    textToSplit[80];
   1159         UChar    buf[200];
   1160         UChar    *fields[10];
   1161         int32_t  numFields;
   1162         int32_t  requiredCapacity;
   1163         int32_t  spaceNeeded;
   1164         int32_t  sz;
   1165 
   1166         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   1167 
   1168         status = U_ZERO_ERROR;
   1169         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1170 
   1171         uregex_setText(re, textToSplit, -1, &status);
   1172         TEST_ASSERT_SUCCESS(status);
   1173 
   1174         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1175         if(U_SUCCESS(status)) {
   1176             memset(fields, -1, sizeof(fields));
   1177             numFields =
   1178                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1179             TEST_ASSERT_SUCCESS(status);
   1180 
   1181             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1182             if(U_SUCCESS(status)) {
   1183                 TEST_ASSERT(numFields == 5);
   1184                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1185                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1186                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1187                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1188                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1189                 TEST_ASSERT(fields[5] == NULL);
   1190                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1191                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1192             }
   1193         }
   1194 
   1195         /*  Split with too few output strings available (2) */
   1196         status = U_ZERO_ERROR;
   1197         memset(fields, -1, sizeof(fields));
   1198         numFields =
   1199             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1200         TEST_ASSERT_SUCCESS(status);
   1201 
   1202         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1203         if(U_SUCCESS(status)) {
   1204             TEST_ASSERT(numFields == 2);
   1205             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1206             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1207             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1208 
   1209             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1210             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1211         }
   1212 
   1213         /*  Split with too few output strings available (3) */
   1214         status = U_ZERO_ERROR;
   1215         memset(fields, -1, sizeof(fields));
   1216         numFields =
   1217             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
   1218         TEST_ASSERT_SUCCESS(status);
   1219 
   1220         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1221         if(U_SUCCESS(status)) {
   1222             TEST_ASSERT(numFields == 3);
   1223             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1224             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1225             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1226             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1227 
   1228             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1229             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1230         }
   1231 
   1232         /*  Split with just enough output strings available (5) */
   1233         status = U_ZERO_ERROR;
   1234         memset(fields, -1, sizeof(fields));
   1235         numFields =
   1236             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
   1237         TEST_ASSERT_SUCCESS(status);
   1238 
   1239         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1240         if(U_SUCCESS(status)) {
   1241             TEST_ASSERT(numFields == 5);
   1242             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1243             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1244             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1245             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1246             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1247             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1248 
   1249             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1250             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1251         }
   1252 
   1253         /* Split, end of text is a field delimiter.   */
   1254         status = U_ZERO_ERROR;
   1255         sz = strlen("first <tag-a> second<tag-b>");
   1256         uregex_setText(re, textToSplit, sz, &status);
   1257         TEST_ASSERT_SUCCESS(status);
   1258 
   1259         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1260         if(U_SUCCESS(status)) {
   1261             memset(fields, -1, sizeof(fields));
   1262             numFields =
   1263                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
   1264             TEST_ASSERT_SUCCESS(status);
   1265 
   1266             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1267             if(U_SUCCESS(status)) {
   1268                 TEST_ASSERT(numFields == 5);
   1269                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1270                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1271                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1272                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1273                 TEST_ASSERT_STRING("",        fields[4], TRUE);
   1274                 TEST_ASSERT(fields[5] == NULL);
   1275                 TEST_ASSERT(fields[8] == NULL);
   1276                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1277                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
   1278                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1279             }
   1280         }
   1281 
   1282         uregex_close(re);
   1283     }
   1284 
   1285     /*
   1286      * set/getTimeLimit
   1287      */
   1288      TEST_SETUP("abc$", "abcdef", 0);
   1289      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1290      uregex_setTimeLimit(re, 1000, &status);
   1291      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1292      TEST_ASSERT_SUCCESS(status);
   1293      uregex_setTimeLimit(re, -1, &status);
   1294      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1295      status = U_ZERO_ERROR;
   1296      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1297      TEST_TEARDOWN;
   1298 
   1299      /*
   1300       * set/get Stack Limit
   1301       */
   1302      TEST_SETUP("abc$", "abcdef", 0);
   1303      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1304      uregex_setStackLimit(re, 40000, &status);
   1305      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1306      TEST_ASSERT_SUCCESS(status);
   1307      uregex_setStackLimit(re, -1, &status);
   1308      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1309      status = U_ZERO_ERROR;
   1310      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1311      TEST_TEARDOWN;
   1312 
   1313 
   1314      /*
   1315       * Get/Set callback functions
   1316       *     This test is copied from intltest regex/Callbacks
   1317       *     The pattern and test data will run long enough to cause the callback
   1318       *       to be invoked.  The nested '+' operators give exponential time
   1319       *       behavior with increasing string length.
   1320       */
   1321      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1322      callBackContext cbInfo = {4, 0, 0};
   1323      const void     *pContext   = &cbInfo;
   1324      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1325 
   1326      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1327      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1328      TEST_ASSERT_SUCCESS(status);
   1329      TEST_ASSERT(returnedFn == NULL);
   1330      TEST_ASSERT(pContext == NULL);
   1331 
   1332      /* Set thecallback and do a match.                                   */
   1333      /* The callback function should record that it has been called.      */
   1334      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1335      TEST_ASSERT_SUCCESS(status);
   1336      TEST_ASSERT(cbInfo.numCalls == 0);
   1337      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1338      TEST_ASSERT_SUCCESS(status);
   1339      TEST_ASSERT(cbInfo.numCalls > 0);
   1340 
   1341      /* Getting the callback should return the values that were set above.  */
   1342      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1343      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1344      TEST_ASSERT(pContext == &cbInfo);
   1345 
   1346      TEST_TEARDOWN;
   1347 }
   1348 
   1349 
   1350 
   1351 static void TestBug4315(void) {
   1352     UErrorCode      theICUError = U_ZERO_ERROR;
   1353     URegularExpression *theRegEx;
   1354     UChar           *textBuff;
   1355     const char      *thePattern;
   1356     UChar            theString[100];
   1357     UChar           *destFields[24];
   1358     int32_t         neededLength1;
   1359     int32_t         neededLength2;
   1360 
   1361     int32_t         wordCount = 0;
   1362     int32_t         destFieldsSize = 24;
   1363 
   1364     thePattern  = "ck ";
   1365     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1366 
   1367     /* open a regex */
   1368     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1369     TEST_ASSERT_SUCCESS(theICUError);
   1370 
   1371     /* set the input string */
   1372     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1373     TEST_ASSERT_SUCCESS(theICUError);
   1374 
   1375     /* split */
   1376     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1377      *  error occurs! */
   1378     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1379         destFieldsSize, &theICUError);
   1380 
   1381     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1382     TEST_ASSERT(wordCount==3);
   1383 
   1384     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1385     {
   1386         theICUError = U_ZERO_ERROR;
   1387         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1388         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1389             destFields, destFieldsSize, &theICUError);
   1390         TEST_ASSERT(wordCount==3);
   1391         TEST_ASSERT_SUCCESS(theICUError);
   1392         TEST_ASSERT(neededLength1 == neededLength2);
   1393         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1394         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1395         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1396         TEST_ASSERT(destFields[3] == NULL);
   1397         free(textBuff);
   1398     }
   1399     uregex_close(theRegEx);
   1400 }
   1401 
   1402 /* Based on TestRegexCAPI() */
   1403 static void TestUTextAPI(void) {
   1404     UErrorCode           status = U_ZERO_ERROR;
   1405     URegularExpression  *re;
   1406     UText                patternText = UTEXT_INITIALIZER;
   1407     UChar                pat[200];
   1408     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1409 
   1410     /* Mimimalist open/close */
   1411     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1412     re = uregex_openUText(&patternText, 0, 0, &status);
   1413     if (U_FAILURE(status)) {
   1414          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1415          utext_close(&patternText);
   1416          return;
   1417     }
   1418     uregex_close(re);
   1419 
   1420     /* Open with all flag values set */
   1421     status = U_ZERO_ERROR;
   1422     re = uregex_openUText(&patternText,
   1423         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1424         0, &status);
   1425     TEST_ASSERT_SUCCESS(status);
   1426     uregex_close(re);
   1427 
   1428     /* Open with an invalid flag */
   1429     status = U_ZERO_ERROR;
   1430     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1431     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1432     uregex_close(re);
   1433 
   1434     /* open with an invalid parameter */
   1435     status = U_ZERO_ERROR;
   1436     re = uregex_openUText(NULL,
   1437         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1438     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1439 
   1440     /*
   1441      *  clone
   1442      */
   1443     {
   1444         URegularExpression *clone1;
   1445         URegularExpression *clone2;
   1446         URegularExpression *clone3;
   1447         UChar  testString1[30];
   1448         UChar  testString2[30];
   1449         UBool  result;
   1450 
   1451 
   1452         status = U_ZERO_ERROR;
   1453         re = uregex_openUText(&patternText, 0, 0, &status);
   1454         TEST_ASSERT_SUCCESS(status);
   1455         clone1 = uregex_clone(re, &status);
   1456         TEST_ASSERT_SUCCESS(status);
   1457         TEST_ASSERT(clone1 != NULL);
   1458 
   1459         status = U_ZERO_ERROR;
   1460         clone2 = uregex_clone(re, &status);
   1461         TEST_ASSERT_SUCCESS(status);
   1462         TEST_ASSERT(clone2 != NULL);
   1463         uregex_close(re);
   1464 
   1465         status = U_ZERO_ERROR;
   1466         clone3 = uregex_clone(clone2, &status);
   1467         TEST_ASSERT_SUCCESS(status);
   1468         TEST_ASSERT(clone3 != NULL);
   1469 
   1470         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
   1471         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
   1472 
   1473         status = U_ZERO_ERROR;
   1474         uregex_setText(clone1, testString1, -1, &status);
   1475         TEST_ASSERT_SUCCESS(status);
   1476         result = uregex_lookingAt(clone1, 0, &status);
   1477         TEST_ASSERT_SUCCESS(status);
   1478         TEST_ASSERT(result==TRUE);
   1479 
   1480         status = U_ZERO_ERROR;
   1481         uregex_setText(clone2, testString2, -1, &status);
   1482         TEST_ASSERT_SUCCESS(status);
   1483         result = uregex_lookingAt(clone2, 0, &status);
   1484         TEST_ASSERT_SUCCESS(status);
   1485         TEST_ASSERT(result==FALSE);
   1486         result = uregex_find(clone2, 0, &status);
   1487         TEST_ASSERT_SUCCESS(status);
   1488         TEST_ASSERT(result==TRUE);
   1489 
   1490         uregex_close(clone1);
   1491         uregex_close(clone2);
   1492         uregex_close(clone3);
   1493 
   1494     }
   1495 
   1496     /*
   1497      *  pattern() and patternText()
   1498      */
   1499     {
   1500         const UChar  *resultPat;
   1501         int32_t       resultLen;
   1502         UText        *resultText;
   1503         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1504         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1505         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
   1506         status = U_ZERO_ERROR;
   1507 
   1508         utext_openUTF8(&patternText, str_hello, -1, &status);
   1509         re = uregex_open(pat, -1, 0, NULL, &status);
   1510         resultPat = uregex_pattern(re, &resultLen, &status);
   1511         TEST_ASSERT_SUCCESS(status);
   1512 
   1513         /* The TEST_ASSERT_SUCCESS above should change too... */
   1514         if (U_SUCCESS(status)) {
   1515             TEST_ASSERT(resultLen == -1);
   1516             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1517         }
   1518 
   1519         resultText = uregex_patternUText(re, &status);
   1520         TEST_ASSERT_SUCCESS(status);
   1521         TEST_ASSERT_UTEXT(str_hello, resultText);
   1522 
   1523         uregex_close(re);
   1524 
   1525         status = U_ZERO_ERROR;
   1526         re = uregex_open(pat, 3, 0, NULL, &status);
   1527         resultPat = uregex_pattern(re, &resultLen, &status);
   1528         TEST_ASSERT_SUCCESS(status);
   1529 
   1530         /* The TEST_ASSERT_SUCCESS above should change too... */
   1531         if (U_SUCCESS(status)) {
   1532             TEST_ASSERT(resultLen == 3);
   1533             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1534             TEST_ASSERT(u_strlen(resultPat) == 3);
   1535         }
   1536 
   1537         resultText = uregex_patternUText(re, &status);
   1538         TEST_ASSERT_SUCCESS(status);
   1539         TEST_ASSERT_UTEXT(str_hel, resultText);
   1540 
   1541         uregex_close(re);
   1542     }
   1543 
   1544     /*
   1545      *  setUText() and lookingAt()
   1546      */
   1547     {
   1548         UText  text1 = UTEXT_INITIALIZER;
   1549         UText  text2 = UTEXT_INITIALIZER;
   1550         UBool  result;
   1551         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1552         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1553         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1554         status = U_ZERO_ERROR;
   1555         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1556         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1557 
   1558         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1559         re = uregex_openUText(&patternText, 0, NULL, &status);
   1560         TEST_ASSERT_SUCCESS(status);
   1561 
   1562         /* Operation before doing a setText should fail... */
   1563         status = U_ZERO_ERROR;
   1564         uregex_lookingAt(re, 0, &status);
   1565         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1566 
   1567         status = U_ZERO_ERROR;
   1568         uregex_setUText(re, &text1, &status);
   1569         result = uregex_lookingAt(re, 0, &status);
   1570         TEST_ASSERT(result == TRUE);
   1571         TEST_ASSERT_SUCCESS(status);
   1572 
   1573         status = U_ZERO_ERROR;
   1574         uregex_setUText(re, &text2, &status);
   1575         result = uregex_lookingAt(re, 0, &status);
   1576         TEST_ASSERT(result == FALSE);
   1577         TEST_ASSERT_SUCCESS(status);
   1578 
   1579         status = U_ZERO_ERROR;
   1580         uregex_setUText(re, &text1, &status);
   1581         result = uregex_lookingAt(re, 0, &status);
   1582         TEST_ASSERT(result == TRUE);
   1583         TEST_ASSERT_SUCCESS(status);
   1584 
   1585         uregex_close(re);
   1586         utext_close(&text1);
   1587         utext_close(&text2);
   1588     }
   1589 
   1590 
   1591     /*
   1592      *  getText() and getUText()
   1593      */
   1594     {
   1595         UText  text1 = UTEXT_INITIALIZER;
   1596         UText  text2 = UTEXT_INITIALIZER;
   1597         UChar  text2Chars[20];
   1598         UText  *resultText;
   1599         const UChar   *result;
   1600         int32_t  textLength;
   1601         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1602         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1603         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1604 
   1605 
   1606         status = U_ZERO_ERROR;
   1607         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1608         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
   1609         utext_openUChars(&text2, text2Chars, -1, &status);
   1610 
   1611         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1612         re = uregex_openUText(&patternText, 0, NULL, &status);
   1613 
   1614         /* First set a UText */
   1615         uregex_setUText(re, &text1, &status);
   1616         resultText = uregex_getUText(re, NULL, &status);
   1617         TEST_ASSERT_SUCCESS(status);
   1618         TEST_ASSERT(resultText != &text1);
   1619         utext_setNativeIndex(resultText, 0);
   1620         utext_setNativeIndex(&text1, 0);
   1621         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1622         utext_close(resultText);
   1623 
   1624         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1625         (void)result;    /* Suppress set but not used warning. */
   1626         TEST_ASSERT(textLength == -1 || textLength == 6);
   1627         resultText = uregex_getUText(re, NULL, &status);
   1628         TEST_ASSERT_SUCCESS(status);
   1629         TEST_ASSERT(resultText != &text1);
   1630         utext_setNativeIndex(resultText, 0);
   1631         utext_setNativeIndex(&text1, 0);
   1632         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1633         utext_close(resultText);
   1634 
   1635         /* Then set a UChar * */
   1636         uregex_setText(re, text2Chars, 7, &status);
   1637         resultText = uregex_getUText(re, NULL, &status);
   1638         TEST_ASSERT_SUCCESS(status);
   1639         utext_setNativeIndex(resultText, 0);
   1640         utext_setNativeIndex(&text2, 0);
   1641         TEST_ASSERT(testUTextEqual(resultText, &text2));
   1642         utext_close(resultText);
   1643         result = uregex_getText(re, &textLength, &status);
   1644         TEST_ASSERT(textLength == 7);
   1645 
   1646         uregex_close(re);
   1647         utext_close(&text1);
   1648         utext_close(&text2);
   1649     }
   1650 
   1651     /*
   1652      *  matches()
   1653      */
   1654     {
   1655         UText   text1 = UTEXT_INITIALIZER;
   1656         UBool   result;
   1657         UText   nullText = UTEXT_INITIALIZER;
   1658         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1659         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1660 
   1661         status = U_ZERO_ERROR;
   1662         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1663         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1664         re = uregex_openUText(&patternText, 0, NULL, &status);
   1665 
   1666         uregex_setUText(re, &text1, &status);
   1667         result = uregex_matches(re, 0, &status);
   1668         TEST_ASSERT(result == FALSE);
   1669         TEST_ASSERT_SUCCESS(status);
   1670         uregex_close(re);
   1671 
   1672         status = U_ZERO_ERROR;
   1673         re = uregex_openC(".?", 0, NULL, &status);
   1674         uregex_setUText(re, &text1, &status);
   1675         result = uregex_matches(re, 7, &status);
   1676         TEST_ASSERT(result == TRUE);
   1677         TEST_ASSERT_SUCCESS(status);
   1678 
   1679         status = U_ZERO_ERROR;
   1680         utext_openUTF8(&nullText, "", -1, &status);
   1681         uregex_setUText(re, &nullText, &status);
   1682         TEST_ASSERT_SUCCESS(status);
   1683         result = uregex_matches(re, 0, &status);
   1684         TEST_ASSERT(result == TRUE);
   1685         TEST_ASSERT_SUCCESS(status);
   1686 
   1687         uregex_close(re);
   1688         utext_close(&text1);
   1689         utext_close(&nullText);
   1690     }
   1691 
   1692 
   1693     /*
   1694      *  lookingAt()    Used in setText test.
   1695      */
   1696 
   1697 
   1698     /*
   1699      *  find(), findNext, start, end, reset
   1700      */
   1701     {
   1702         UChar    text1[50];
   1703         UBool    result;
   1704         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
   1705         status = U_ZERO_ERROR;
   1706         re = uregex_openC("rx", 0, NULL, &status);
   1707 
   1708         uregex_setText(re, text1, -1, &status);
   1709         result = uregex_find(re, 0, &status);
   1710         TEST_ASSERT(result == TRUE);
   1711         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1712         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1713         TEST_ASSERT_SUCCESS(status);
   1714 
   1715         result = uregex_find(re, 9, &status);
   1716         TEST_ASSERT(result == TRUE);
   1717         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1718         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1719         TEST_ASSERT_SUCCESS(status);
   1720 
   1721         result = uregex_find(re, 14, &status);
   1722         TEST_ASSERT(result == FALSE);
   1723         TEST_ASSERT_SUCCESS(status);
   1724 
   1725         status = U_ZERO_ERROR;
   1726         uregex_reset(re, 0, &status);
   1727 
   1728         result = uregex_findNext(re, &status);
   1729         TEST_ASSERT(result == TRUE);
   1730         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1731         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1732         TEST_ASSERT_SUCCESS(status);
   1733 
   1734         result = uregex_findNext(re, &status);
   1735         TEST_ASSERT(result == TRUE);
   1736         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1737         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1738         TEST_ASSERT_SUCCESS(status);
   1739 
   1740         status = U_ZERO_ERROR;
   1741         uregex_reset(re, 12, &status);
   1742 
   1743         result = uregex_findNext(re, &status);
   1744         TEST_ASSERT(result == TRUE);
   1745         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1746         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1747         TEST_ASSERT_SUCCESS(status);
   1748 
   1749         result = uregex_findNext(re, &status);
   1750         TEST_ASSERT(result == FALSE);
   1751         TEST_ASSERT_SUCCESS(status);
   1752 
   1753         uregex_close(re);
   1754     }
   1755 
   1756     /*
   1757      *  groupUText()
   1758      */
   1759     {
   1760         UChar    text1[80];
   1761         UText   *actual;
   1762         UBool    result;
   1763         int64_t  groupLen = 0;
   1764         UChar    groupBuf[20];
   1765 
   1766         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
   1767 
   1768         status = U_ZERO_ERROR;
   1769         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1770         TEST_ASSERT_SUCCESS(status);
   1771 
   1772         uregex_setText(re, text1, -1, &status);
   1773         result = uregex_find(re, 0, &status);
   1774         TEST_ASSERT(result==TRUE);
   1775 
   1776         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
   1777         status = U_ZERO_ERROR;
   1778         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
   1779         TEST_ASSERT_SUCCESS(status);
   1780 
   1781         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
   1782         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
   1783         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1784 
   1785         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
   1786         utext_close(actual);
   1787 
   1788         /*  Capture group #1.  Should succeed. */
   1789         status = U_ZERO_ERROR;
   1790 
   1791         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
   1792         TEST_ASSERT_SUCCESS(status);
   1793         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
   1794                                                            /*    (within the string text1)           */
   1795         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
   1796         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1797         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
   1798 
   1799         utext_close(actual);
   1800 
   1801         /*  Capture group out of range.  Error. */
   1802         status = U_ZERO_ERROR;
   1803         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
   1804         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1805         utext_close(actual);
   1806 
   1807         uregex_close(re);
   1808     }
   1809 
   1810     /*
   1811      *  replaceFirst()
   1812      */
   1813     {
   1814         UChar    text1[80];
   1815         UChar    text2[80];
   1816         UText    replText = UTEXT_INITIALIZER;
   1817         UText   *result;
   1818         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1819         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1820         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
   1821                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
   1822         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1823         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1824         status = U_ZERO_ERROR;
   1825         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1826         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1827         utext_openUTF8(&replText, str_1x, -1, &status);
   1828 
   1829         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1830         TEST_ASSERT_SUCCESS(status);
   1831 
   1832         /*  Normal case, with match */
   1833         uregex_setText(re, text1, -1, &status);
   1834         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1835         TEST_ASSERT_SUCCESS(status);
   1836         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1837         utext_close(result);
   1838 
   1839         /* No match.  Text should copy to output with no changes.  */
   1840         uregex_setText(re, text2, -1, &status);
   1841         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1842         TEST_ASSERT_SUCCESS(status);
   1843         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1844         utext_close(result);
   1845 
   1846         /* Unicode escapes */
   1847         uregex_setText(re, text1, -1, &status);
   1848         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1849         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1850         TEST_ASSERT_SUCCESS(status);
   1851         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1852         utext_close(result);
   1853 
   1854         uregex_close(re);
   1855         utext_close(&replText);
   1856     }
   1857 
   1858 
   1859     /*
   1860      *  replaceAll()
   1861      */
   1862     {
   1863         UChar    text1[80];
   1864         UChar    text2[80];
   1865         UText    replText = UTEXT_INITIALIZER;
   1866         UText   *result;
   1867         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1868         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1869         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1870         status = U_ZERO_ERROR;
   1871         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1872         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1873         utext_openUTF8(&replText, str_1, -1, &status);
   1874 
   1875         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1876         TEST_ASSERT_SUCCESS(status);
   1877 
   1878         /*  Normal case, with match */
   1879         uregex_setText(re, text1, -1, &status);
   1880         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1881         TEST_ASSERT_SUCCESS(status);
   1882         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1883         utext_close(result);
   1884 
   1885         /* No match.  Text should copy to output with no changes.  */
   1886         uregex_setText(re, text2, -1, &status);
   1887         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1888         TEST_ASSERT_SUCCESS(status);
   1889         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1890         utext_close(result);
   1891 
   1892         uregex_close(re);
   1893         utext_close(&replText);
   1894     }
   1895 
   1896 
   1897     /*
   1898      *  appendReplacement()
   1899      */
   1900     {
   1901         UChar    text[100];
   1902         UChar    repl[100];
   1903         UChar    buf[100];
   1904         UChar   *bufPtr;
   1905         int32_t  bufCap;
   1906 
   1907         status = U_ZERO_ERROR;
   1908         re = uregex_openC(".*", 0, 0, &status);
   1909         TEST_ASSERT_SUCCESS(status);
   1910 
   1911         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1912         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1913         uregex_setText(re, text, -1, &status);
   1914 
   1915         /* match covers whole target string */
   1916         uregex_find(re, 0, &status);
   1917         TEST_ASSERT_SUCCESS(status);
   1918         bufPtr = buf;
   1919         bufCap = UPRV_LENGTHOF(buf);
   1920         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1921         TEST_ASSERT_SUCCESS(status);
   1922         TEST_ASSERT_STRING("some other", buf, TRUE);
   1923 
   1924         /* Match has \u \U escapes */
   1925         uregex_find(re, 0, &status);
   1926         TEST_ASSERT_SUCCESS(status);
   1927         bufPtr = buf;
   1928         bufCap = UPRV_LENGTHOF(buf);
   1929         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1930         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1931         TEST_ASSERT_SUCCESS(status);
   1932         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1933 
   1934         uregex_close(re);
   1935     }
   1936 
   1937 
   1938     /*
   1939      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1940      */
   1941 
   1942     /*
   1943      *  splitUText()
   1944      */
   1945     {
   1946         UChar    textToSplit[80];
   1947         UChar    text2[80];
   1948         UText    *fields[10];
   1949         int32_t  numFields;
   1950         int32_t i;
   1951 
   1952         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1953         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1954 
   1955         status = U_ZERO_ERROR;
   1956         re = uregex_openC(":", 0, NULL, &status);
   1957 
   1958 
   1959         /*  Simple split */
   1960 
   1961         uregex_setText(re, textToSplit, -1, &status);
   1962         TEST_ASSERT_SUCCESS(status);
   1963 
   1964         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1965         if (U_SUCCESS(status)) {
   1966             memset(fields, 0, sizeof(fields));
   1967             numFields = uregex_splitUText(re, fields, 10, &status);
   1968             TEST_ASSERT_SUCCESS(status);
   1969 
   1970             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1971             if(U_SUCCESS(status)) {
   1972               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1973               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1974               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1975                 TEST_ASSERT(numFields == 3);
   1976                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1977                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1978                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1979                 TEST_ASSERT(fields[3] == NULL);
   1980             }
   1981             for(i = 0; i < numFields; i++) {
   1982                 utext_close(fields[i]);
   1983             }
   1984         }
   1985 
   1986         uregex_close(re);
   1987 
   1988 
   1989         /*  Split with too few output strings available */
   1990         status = U_ZERO_ERROR;
   1991         re = uregex_openC(":", 0, NULL, &status);
   1992         uregex_setText(re, textToSplit, -1, &status);
   1993         TEST_ASSERT_SUCCESS(status);
   1994 
   1995         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1996         if(U_SUCCESS(status)) {
   1997             fields[0] = NULL;
   1998             fields[1] = NULL;
   1999             fields[2] = &patternText;
   2000             numFields = uregex_splitUText(re, fields, 2, &status);
   2001             TEST_ASSERT_SUCCESS(status);
   2002 
   2003             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2004             if(U_SUCCESS(status)) {
   2005                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2006                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   2007                 TEST_ASSERT(numFields == 2);
   2008                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2009                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   2010                 TEST_ASSERT(fields[2] == &patternText);
   2011             }
   2012             for(i = 0; i < numFields; i++) {
   2013                 utext_close(fields[i]);
   2014             }
   2015         }
   2016 
   2017         uregex_close(re);
   2018     }
   2019 
   2020     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   2021      *                   comes out as additional fields.  */
   2022     {
   2023         UChar    textToSplit[80];
   2024         UText    *fields[10];
   2025         int32_t  numFields;
   2026         int32_t i;
   2027 
   2028         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   2029 
   2030         status = U_ZERO_ERROR;
   2031         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   2032 
   2033         uregex_setText(re, textToSplit, -1, &status);
   2034         TEST_ASSERT_SUCCESS(status);
   2035 
   2036         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2037         if(U_SUCCESS(status)) {
   2038             memset(fields, 0, sizeof(fields));
   2039             numFields = uregex_splitUText(re, fields, 10, &status);
   2040             TEST_ASSERT_SUCCESS(status);
   2041 
   2042             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2043             if(U_SUCCESS(status)) {
   2044                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2045                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2046                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2047                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2048                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2049 
   2050                 TEST_ASSERT(numFields == 5);
   2051                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2052                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2053                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2054                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2055                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2056                 TEST_ASSERT(fields[5] == NULL);
   2057             }
   2058             for(i = 0; i < numFields; i++) {
   2059                 utext_close(fields[i]);
   2060             }
   2061         }
   2062 
   2063         /*  Split with too few output strings available (2) */
   2064         status = U_ZERO_ERROR;
   2065         fields[0] = NULL;
   2066         fields[1] = NULL;
   2067         fields[2] = &patternText;
   2068         numFields = uregex_splitUText(re, fields, 2, &status);
   2069         TEST_ASSERT_SUCCESS(status);
   2070 
   2071         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2072         if(U_SUCCESS(status)) {
   2073             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2074             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2075             TEST_ASSERT(numFields == 2);
   2076             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2078             TEST_ASSERT(fields[2] == &patternText);
   2079         }
   2080         for(i = 0; i < numFields; i++) {
   2081             utext_close(fields[i]);
   2082         }
   2083 
   2084 
   2085         /*  Split with too few output strings available (3) */
   2086         status = U_ZERO_ERROR;
   2087         fields[0] = NULL;
   2088         fields[1] = NULL;
   2089         fields[2] = NULL;
   2090         fields[3] = &patternText;
   2091         numFields = uregex_splitUText(re, fields, 3, &status);
   2092         TEST_ASSERT_SUCCESS(status);
   2093 
   2094         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2095         if(U_SUCCESS(status)) {
   2096             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2097             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2098             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2099             TEST_ASSERT(numFields == 3);
   2100             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2101             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2102             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2103             TEST_ASSERT(fields[3] == &patternText);
   2104         }
   2105         for(i = 0; i < numFields; i++) {
   2106             utext_close(fields[i]);
   2107         }
   2108 
   2109         /*  Split with just enough output strings available (5) */
   2110         status = U_ZERO_ERROR;
   2111         fields[0] = NULL;
   2112         fields[1] = NULL;
   2113         fields[2] = NULL;
   2114         fields[3] = NULL;
   2115         fields[4] = NULL;
   2116         fields[5] = &patternText;
   2117         numFields = uregex_splitUText(re, fields, 5, &status);
   2118         TEST_ASSERT_SUCCESS(status);
   2119 
   2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2121         if(U_SUCCESS(status)) {
   2122             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2123             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2124             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2125             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2126             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2127 
   2128             TEST_ASSERT(numFields == 5);
   2129             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2130             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2131             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2132             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2133             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2134             TEST_ASSERT(fields[5] == &patternText);
   2135         }
   2136         for(i = 0; i < numFields; i++) {
   2137             utext_close(fields[i]);
   2138         }
   2139 
   2140         /* Split, end of text is a field delimiter.   */
   2141         status = U_ZERO_ERROR;
   2142         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2143         TEST_ASSERT_SUCCESS(status);
   2144 
   2145         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2146         if(U_SUCCESS(status)) {
   2147             memset(fields, 0, sizeof(fields));
   2148             fields[9] = &patternText;
   2149             numFields = uregex_splitUText(re, fields, 9, &status);
   2150             TEST_ASSERT_SUCCESS(status);
   2151 
   2152             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2153             if(U_SUCCESS(status)) {
   2154                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2155                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2156                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2157                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2158                 const char str_empty[] = { 0x00 };
   2159 
   2160                 TEST_ASSERT(numFields == 5);
   2161                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2162                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2163                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2164                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2165                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
   2166                 TEST_ASSERT(fields[5] == NULL);
   2167                 TEST_ASSERT(fields[8] == NULL);
   2168                 TEST_ASSERT(fields[9] == &patternText);
   2169             }
   2170             for(i = 0; i < numFields; i++) {
   2171                 utext_close(fields[i]);
   2172             }
   2173         }
   2174 
   2175         uregex_close(re);
   2176     }
   2177     utext_close(&patternText);
   2178 }
   2179 
   2180 
   2181 static void TestRefreshInput(void) {
   2182     /*
   2183      *  RefreshInput changes out the input of a URegularExpression without
   2184      *    changing anything else in the match state.  Used with Java JNI,
   2185      *    when Java moves the underlying string storage.   This test
   2186      *    runs a find() loop, moving the text after the first match.
   2187      *    The right number of matches should still be found.
   2188      */
   2189     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
   2190     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
   2191     UErrorCode status = U_ZERO_ERROR;
   2192     URegularExpression *re;
   2193     UText ut1 = UTEXT_INITIALIZER;
   2194     UText ut2 = UTEXT_INITIALIZER;
   2195 
   2196     re = uregex_openC("[ABC]", 0, 0, &status);
   2197     TEST_ASSERT_SUCCESS(status);
   2198 
   2199     utext_openUChars(&ut1, testStr, -1, &status);
   2200     TEST_ASSERT_SUCCESS(status);
   2201     uregex_setUText(re, &ut1, &status);
   2202     TEST_ASSERT_SUCCESS(status);
   2203 
   2204     /* Find the first match "A" in the original string */
   2205     TEST_ASSERT(uregex_findNext(re, &status));
   2206     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
   2207 
   2208     /* Move the string, kill the original string.  */
   2209     u_strcpy(movedStr, testStr);
   2210     u_memset(testStr, 0, u_strlen(testStr));
   2211     utext_openUChars(&ut2, movedStr, -1, &status);
   2212     TEST_ASSERT_SUCCESS(status);
   2213     uregex_refreshUText(re, &ut2, &status);
   2214     TEST_ASSERT_SUCCESS(status);
   2215 
   2216     /* Find the following two matches, now working in the moved string. */
   2217     TEST_ASSERT(uregex_findNext(re, &status));
   2218     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
   2219     TEST_ASSERT(uregex_findNext(re, &status));
   2220     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
   2221     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
   2222 
   2223     uregex_close(re);
   2224 }
   2225 
   2226 
   2227 static void TestBug8421(void) {
   2228     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
   2229      *             was failing.
   2230      */
   2231     URegularExpression *re;
   2232     UErrorCode status = U_ZERO_ERROR;
   2233     int32_t  limit = -1;
   2234 
   2235     re = uregex_openC("abc", 0, 0, &status);
   2236     TEST_ASSERT_SUCCESS(status);
   2237 
   2238     limit = uregex_getTimeLimit(re, &status);
   2239     TEST_ASSERT_SUCCESS(status);
   2240     TEST_ASSERT(limit == 0);
   2241 
   2242     uregex_setTimeLimit(re, 100, &status);
   2243     TEST_ASSERT_SUCCESS(status);
   2244     limit = uregex_getTimeLimit(re, &status);
   2245     TEST_ASSERT_SUCCESS(status);
   2246     TEST_ASSERT(limit == 100);
   2247 
   2248     uregex_close(re);
   2249 }
   2250 
   2251 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
   2252     return FALSE;
   2253 }
   2254 
   2255 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
   2256     return FALSE;
   2257 }
   2258 
   2259 static void TestBug10815() {
   2260   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
   2261    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
   2262    */
   2263     URegularExpression *re;
   2264     UErrorCode status = U_ZERO_ERROR;
   2265     UChar    text[100];
   2266 
   2267 
   2268     // findNext() with a find progress callback function.
   2269 
   2270     re = uregex_openC(".z", 0, 0, &status);
   2271     TEST_ASSERT_SUCCESS(status);
   2272 
   2273     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
   2274     uregex_setText(re, text, -1, &status);
   2275     TEST_ASSERT_SUCCESS(status);
   2276 
   2277     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
   2278     TEST_ASSERT_SUCCESS(status);
   2279 
   2280     uregex_findNext(re, &status);
   2281     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2282 
   2283     uregex_close(re);
   2284 
   2285     // findNext() with a match progress callback function.
   2286 
   2287     status = U_ZERO_ERROR;
   2288     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
   2289     TEST_ASSERT_SUCCESS(status);
   2290 
   2291     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
   2292     // it will appear to be stuck in a (near) infinite loop.
   2293     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
   2294     uregex_setText(re, text, -1, &status);
   2295     TEST_ASSERT_SUCCESS(status);
   2296 
   2297     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
   2298     TEST_ASSERT_SUCCESS(status);
   2299 
   2300     uregex_findNext(re, &status);
   2301     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2302 
   2303     uregex_close(re);
   2304 }
   2305 
   2306 
   2307 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2308