Home | History | Annotate | Download | only in cintltst
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 2004-2015, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /********************************************************************************
      9 *
     10 * File reapits.c
     11 *
     12 *********************************************************************************/
     13 /*C API TEST FOR Regular Expressions */
     14 /**
     15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     16 *   try to test the full functionality.  It just calls each function and verifies that it
     17 *   works on a basic level.
     18 *
     19 *   More complete testing of regular expression functionality is done with the C++ tests.
     20 **/
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     25 
     26 #include <stdlib.h>
     27 #include <string.h>
     28 #include "unicode/uloc.h"
     29 #include "unicode/uregex.h"
     30 #include "unicode/ustring.h"
     31 #include "unicode/utext.h"
     32 #include "cintltst.h"
     33 #include "cmemory.h"
     34 
     35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     36 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     37 
     38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     39 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
     40 
     41 /*
     42  *   TEST_SETUP and TEST_TEARDOWN
     43  *         macros to handle the boilerplate around setting up regex test cases.
     44  *         parameteres to setup:
     45  *              pattern:     The regex pattern, a (char *) null terminated C string.
     46  *              testString:  The string data, also a (char *) C string.
     47  *              flags:       Regex flags to set when compiling the pattern
     48  *
     49  *         Put arbitrary test code between SETUP and TEARDOWN.
     50  *         're" is the compiled, ready-to-go  regular expression.
     51  */
     52 #define TEST_SETUP(pattern, testString, flags) {  \
     53     UChar   *srcString = NULL;  \
     54     status = U_ZERO_ERROR; \
     55     re = uregex_openC(pattern, flags, NULL, &status);  \
     56     TEST_ASSERT_SUCCESS(status);   \
     57     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     58     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     59     uregex_setText(re, srcString, -1, &status); \
     60     TEST_ASSERT_SUCCESS(status);  \
     61     if (U_SUCCESS(status)) {
     62 
     63 #define TEST_TEARDOWN  \
     64     }  \
     65     TEST_ASSERT_SUCCESS(status);  \
     66     uregex_close(re);  \
     67     free(srcString);   \
     68     }
     69 
     70 
     71 /**
     72  * @param expected utf-8 array of bytes to be expected
     73  */
     74 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     75      char     buf_inside_macro[120];
     76      int32_t  len = (int32_t)strlen(expected);
     77      UBool    success;
     78      if (nulTerm) {
     79          u_austrncpy(buf_inside_macro, (actual), len+1);
     80          buf_inside_macro[len+2] = 0;
     81          success = (strcmp((expected), buf_inside_macro) == 0);
     82      } else {
     83          u_austrncpy(buf_inside_macro, (actual), len);
     84          buf_inside_macro[len+1] = 0;
     85          success = (strncmp((expected), buf_inside_macro, len) == 0);
     86      }
     87      if (success == FALSE) {
     88          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     89              file, line, (expected), buf_inside_macro);
     90      }
     91 }
     92 
     93 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     94 
     95 
     96 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
     97     int32_t u8i = 0;
     98     UChar32 u8c = 0;
     99     UChar32 utc = 0;
    100     UBool   stringsEqual = TRUE;
    101     utext_setNativeIndex(utext, 0);
    102     for (;;) {
    103         U8_NEXT_UNSAFE(utf8, u8i, u8c);
    104         utc = utext_next32(utext);
    105         if (u8c == 0 && utc == U_SENTINEL) {
    106             break;
    107         }
    108         if (u8c != utc || u8c == 0) {
    109             stringsEqual = FALSE;
    110             break;
    111         }
    112     }
    113     return stringsEqual;
    114 }
    115 
    116 
    117 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
    118     utext_setNativeIndex(actual, 0);
    119     if (!equals_utf8_utext(expected, actual)) {
    120         UChar32 c;
    121         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    122         c = utext_next32From(actual, 0);
    123         while (c != U_SENTINEL) {
    124             if (0x20<c && c <0x7e) {
    125                 log_err("%c", c);
    126             } else {
    127                 log_err("%#x", c);
    128             }
    129             c = UTEXT_NEXT32(actual);
    130         }
    131         log_err("\"\n");
    132     }
    133 }
    134 
    135 /*
    136  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
    137  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
    138  */
    139 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    140 
    141 static UBool testUTextEqual(UText *uta, UText *utb) {
    142     UChar32 ca = 0;
    143     UChar32 cb = 0;
    144     utext_setNativeIndex(uta, 0);
    145     utext_setNativeIndex(utb, 0);
    146     do {
    147         ca = utext_next32(uta);
    148         cb = utext_next32(utb);
    149         if (ca != cb) {
    150             break;
    151         }
    152     } while (ca != U_SENTINEL);
    153     return ca == cb;
    154 }
    155 
    156 
    157 
    158 
    159 static void TestRegexCAPI(void);
    160 static void TestBug4315(void);
    161 static void TestUTextAPI(void);
    162 static void TestRefreshInput(void);
    163 static void TestBug8421(void);
    164 static void TestBug10815(void);
    165 
    166 void addURegexTest(TestNode** root);
    167 
    168 void addURegexTest(TestNode** root)
    169 {
    170     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    171     addTest(root, &TestBug4315,   "regex/TestBug4315");
    172     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    173     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
    174     addTest(root, &TestBug8421,   "regex/TestBug8421");
    175     addTest(root, &TestBug10815,   "regex/TestBug10815");
    176 }
    177 
    178 /*
    179  * Call back function and context struct used for testing
    180  *    regular expression user callbacks.  This test is mostly the same as
    181  *   the corresponding C++ test in intltest.
    182  */
    183 typedef struct callBackContext {
    184     int32_t          maxCalls;
    185     int32_t          numCalls;
    186     int32_t          lastSteps;
    187 } callBackContext;
    188 
    189 static UBool U_EXPORT2 U_CALLCONV
    190 TestCallbackFn(const void *context, int32_t steps) {
    191   callBackContext  *info = (callBackContext *)context;
    192   if (info->lastSteps+1 != steps) {
    193       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    194   }
    195   info->lastSteps = steps;
    196   info->numCalls++;
    197   return (info->numCalls < info->maxCalls);
    198 }
    199 
    200 /*
    201  *   Regular Expression C API Tests
    202  */
    203 static void TestRegexCAPI(void) {
    204     UErrorCode           status = U_ZERO_ERROR;
    205     URegularExpression  *re;
    206     UChar                pat[200];
    207     UChar               *minus1;
    208 
    209     memset(&minus1, -1, sizeof(minus1));
    210 
    211     /* Mimimalist open/close */
    212     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    213     re = uregex_open(pat, -1, 0, 0, &status);
    214     if (U_FAILURE(status)) {
    215          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    216          return;
    217     }
    218     uregex_close(re);
    219 
    220     /* Open with all flag values set */
    221     status = U_ZERO_ERROR;
    222     re = uregex_open(pat, -1,
    223         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
    224         0, &status);
    225     TEST_ASSERT_SUCCESS(status);
    226     uregex_close(re);
    227 
    228     /* Open with an invalid flag */
    229     status = U_ZERO_ERROR;
    230     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    231     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    232     uregex_close(re);
    233 
    234     /* Open with an unimplemented flag */
    235     status = U_ZERO_ERROR;
    236     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
    237     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    238     uregex_close(re);
    239 
    240     /* openC with an invalid parameter */
    241     status = U_ZERO_ERROR;
    242     re = uregex_openC(NULL,
    243         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    244     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    245 
    246     /* openC with an invalid parameter */
    247     status = U_USELESS_COLLATOR_ERROR;
    248     re = uregex_openC(NULL,
    249         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    250     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    251 
    252     /* openC   open from a C string */
    253     {
    254         const UChar   *p;
    255         int32_t  len;
    256         status = U_ZERO_ERROR;
    257         re = uregex_openC("abc*", 0, 0, &status);
    258         TEST_ASSERT_SUCCESS(status);
    259         p = uregex_pattern(re, &len, &status);
    260         TEST_ASSERT_SUCCESS(status);
    261 
    262         /* The TEST_ASSERT_SUCCESS above should change too... */
    263         if(U_SUCCESS(status)) {
    264             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
    265             TEST_ASSERT(u_strcmp(pat, p) == 0);
    266             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    267         }
    268 
    269         uregex_close(re);
    270 
    271         /*  TODO:  Open with ParseError parameter */
    272     }
    273 
    274     /*
    275      *  clone
    276      */
    277     {
    278         URegularExpression *clone1;
    279         URegularExpression *clone2;
    280         URegularExpression *clone3;
    281         UChar  testString1[30];
    282         UChar  testString2[30];
    283         UBool  result;
    284 
    285 
    286         status = U_ZERO_ERROR;
    287         re = uregex_openC("abc*", 0, 0, &status);
    288         TEST_ASSERT_SUCCESS(status);
    289         clone1 = uregex_clone(re, &status);
    290         TEST_ASSERT_SUCCESS(status);
    291         TEST_ASSERT(clone1 != NULL);
    292 
    293         status = U_ZERO_ERROR;
    294         clone2 = uregex_clone(re, &status);
    295         TEST_ASSERT_SUCCESS(status);
    296         TEST_ASSERT(clone2 != NULL);
    297         uregex_close(re);
    298 
    299         status = U_ZERO_ERROR;
    300         clone3 = uregex_clone(clone2, &status);
    301         TEST_ASSERT_SUCCESS(status);
    302         TEST_ASSERT(clone3 != NULL);
    303 
    304         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
    305         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
    306 
    307         status = U_ZERO_ERROR;
    308         uregex_setText(clone1, testString1, -1, &status);
    309         TEST_ASSERT_SUCCESS(status);
    310         result = uregex_lookingAt(clone1, 0, &status);
    311         TEST_ASSERT_SUCCESS(status);
    312         TEST_ASSERT(result==TRUE);
    313 
    314         status = U_ZERO_ERROR;
    315         uregex_setText(clone2, testString2, -1, &status);
    316         TEST_ASSERT_SUCCESS(status);
    317         result = uregex_lookingAt(clone2, 0, &status);
    318         TEST_ASSERT_SUCCESS(status);
    319         TEST_ASSERT(result==FALSE);
    320         result = uregex_find(clone2, 0, &status);
    321         TEST_ASSERT_SUCCESS(status);
    322         TEST_ASSERT(result==TRUE);
    323 
    324         uregex_close(clone1);
    325         uregex_close(clone2);
    326         uregex_close(clone3);
    327 
    328     }
    329 
    330     /*
    331      *  pattern()
    332     */
    333     {
    334         const UChar  *resultPat;
    335         int32_t       resultLen;
    336         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
    337         status = U_ZERO_ERROR;
    338         re = uregex_open(pat, -1, 0, NULL, &status);
    339         resultPat = uregex_pattern(re, &resultLen, &status);
    340         TEST_ASSERT_SUCCESS(status);
    341 
    342         /* The TEST_ASSERT_SUCCESS above should change too... */
    343         if (U_SUCCESS(status)) {
    344             TEST_ASSERT(resultLen == -1);
    345             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    346         }
    347 
    348         uregex_close(re);
    349 
    350         status = U_ZERO_ERROR;
    351         re = uregex_open(pat, 3, 0, NULL, &status);
    352         resultPat = uregex_pattern(re, &resultLen, &status);
    353         TEST_ASSERT_SUCCESS(status);
    354         TEST_ASSERT_SUCCESS(status);
    355 
    356         /* The TEST_ASSERT_SUCCESS above should change too... */
    357         if (U_SUCCESS(status)) {
    358             TEST_ASSERT(resultLen == 3);
    359             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    360             TEST_ASSERT(u_strlen(resultPat) == 3);
    361         }
    362 
    363         uregex_close(re);
    364     }
    365 
    366     /*
    367      *  flags()
    368      */
    369     {
    370         int32_t  t;
    371 
    372         status = U_ZERO_ERROR;
    373         re = uregex_open(pat, -1, 0, NULL, &status);
    374         t  = uregex_flags(re, &status);
    375         TEST_ASSERT_SUCCESS(status);
    376         TEST_ASSERT(t == 0);
    377         uregex_close(re);
    378 
    379         status = U_ZERO_ERROR;
    380         re = uregex_open(pat, -1, 0, NULL, &status);
    381         t  = uregex_flags(re, &status);
    382         TEST_ASSERT_SUCCESS(status);
    383         TEST_ASSERT(t == 0);
    384         uregex_close(re);
    385 
    386         status = U_ZERO_ERROR;
    387         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    388         t  = uregex_flags(re, &status);
    389         TEST_ASSERT_SUCCESS(status);
    390         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    391         uregex_close(re);
    392     }
    393 
    394     /*
    395      *  setText() and lookingAt()
    396      */
    397     {
    398         UChar  text1[50];
    399         UChar  text2[50];
    400         UBool  result;
    401 
    402         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    403         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    404         status = U_ZERO_ERROR;
    405         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    406         re = uregex_open(pat, -1, 0, NULL, &status);
    407         TEST_ASSERT_SUCCESS(status);
    408 
    409         /* Operation before doing a setText should fail... */
    410         status = U_ZERO_ERROR;
    411         uregex_lookingAt(re, 0, &status);
    412         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    413 
    414         status = U_ZERO_ERROR;
    415         uregex_setText(re, text1, -1, &status);
    416         result = uregex_lookingAt(re, 0, &status);
    417         TEST_ASSERT(result == TRUE);
    418         TEST_ASSERT_SUCCESS(status);
    419 
    420         status = U_ZERO_ERROR;
    421         uregex_setText(re, text2, -1, &status);
    422         result = uregex_lookingAt(re, 0, &status);
    423         TEST_ASSERT(result == FALSE);
    424         TEST_ASSERT_SUCCESS(status);
    425 
    426         status = U_ZERO_ERROR;
    427         uregex_setText(re, text1, -1, &status);
    428         result = uregex_lookingAt(re, 0, &status);
    429         TEST_ASSERT(result == TRUE);
    430         TEST_ASSERT_SUCCESS(status);
    431 
    432         status = U_ZERO_ERROR;
    433         uregex_setText(re, text1, 5, &status);
    434         result = uregex_lookingAt(re, 0, &status);
    435         TEST_ASSERT(result == FALSE);
    436         TEST_ASSERT_SUCCESS(status);
    437 
    438         status = U_ZERO_ERROR;
    439         uregex_setText(re, text1, 6, &status);
    440         result = uregex_lookingAt(re, 0, &status);
    441         TEST_ASSERT(result == TRUE);
    442         TEST_ASSERT_SUCCESS(status);
    443 
    444         uregex_close(re);
    445     }
    446 
    447 
    448     /*
    449      *  getText()
    450      */
    451     {
    452         UChar    text1[50];
    453         UChar    text2[50];
    454         const UChar   *result;
    455         int32_t  textLength;
    456 
    457         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
    458         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
    459         status = U_ZERO_ERROR;
    460         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    461         re = uregex_open(pat, -1, 0, NULL, &status);
    462 
    463         uregex_setText(re, text1, -1, &status);
    464         result = uregex_getText(re, &textLength, &status);
    465         TEST_ASSERT(result == text1);
    466         TEST_ASSERT(textLength == -1);
    467         TEST_ASSERT_SUCCESS(status);
    468 
    469         status = U_ZERO_ERROR;
    470         uregex_setText(re, text2, 7, &status);
    471         result = uregex_getText(re, &textLength, &status);
    472         TEST_ASSERT(result == text2);
    473         TEST_ASSERT(textLength == 7);
    474         TEST_ASSERT_SUCCESS(status);
    475 
    476         status = U_ZERO_ERROR;
    477         uregex_setText(re, text2, 4, &status);
    478         result = uregex_getText(re, &textLength, &status);
    479         TEST_ASSERT(result == text2);
    480         TEST_ASSERT(textLength == 4);
    481         TEST_ASSERT_SUCCESS(status);
    482         uregex_close(re);
    483     }
    484 
    485     /*
    486      *  matches()
    487      */
    488     {
    489         UChar   text1[50];
    490         UBool   result;
    491         int     len;
    492         UChar   nullString[] = {0,0,0};
    493 
    494         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
    495         status = U_ZERO_ERROR;
    496         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
    497         re = uregex_open(pat, -1, 0, NULL, &status);
    498 
    499         uregex_setText(re, text1, -1, &status);
    500         result = uregex_matches(re, 0, &status);
    501         TEST_ASSERT(result == FALSE);
    502         TEST_ASSERT_SUCCESS(status);
    503 
    504         status = U_ZERO_ERROR;
    505         uregex_setText(re, text1, 6, &status);
    506         result = uregex_matches(re, 0, &status);
    507         TEST_ASSERT(result == TRUE);
    508         TEST_ASSERT_SUCCESS(status);
    509 
    510         status = U_ZERO_ERROR;
    511         uregex_setText(re, text1, 6, &status);
    512         result = uregex_matches(re, 1, &status);
    513         TEST_ASSERT(result == FALSE);
    514         TEST_ASSERT_SUCCESS(status);
    515         uregex_close(re);
    516 
    517         status = U_ZERO_ERROR;
    518         re = uregex_openC(".?", 0, NULL, &status);
    519         uregex_setText(re, text1, -1, &status);
    520         len = u_strlen(text1);
    521         result = uregex_matches(re, len, &status);
    522         TEST_ASSERT(result == TRUE);
    523         TEST_ASSERT_SUCCESS(status);
    524 
    525         status = U_ZERO_ERROR;
    526         uregex_setText(re, nullString, -1, &status);
    527         TEST_ASSERT_SUCCESS(status);
    528         result = uregex_matches(re, 0, &status);
    529         TEST_ASSERT(result == TRUE);
    530         TEST_ASSERT_SUCCESS(status);
    531         uregex_close(re);
    532     }
    533 
    534 
    535     /*
    536      *  lookingAt()    Used in setText test.
    537      */
    538 
    539 
    540     /*
    541      *  find(), findNext, start, end, reset
    542      */
    543     {
    544         UChar    text1[50];
    545         UBool    result;
    546         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
    547         status = U_ZERO_ERROR;
    548         re = uregex_openC("rx", 0, NULL, &status);
    549 
    550         uregex_setText(re, text1, -1, &status);
    551         result = uregex_find(re, 0, &status);
    552         TEST_ASSERT(result == TRUE);
    553         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    554         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    555         TEST_ASSERT_SUCCESS(status);
    556 
    557         result = uregex_find(re, 9, &status);
    558         TEST_ASSERT(result == TRUE);
    559         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    560         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    561         TEST_ASSERT_SUCCESS(status);
    562 
    563         result = uregex_find(re, 14, &status);
    564         TEST_ASSERT(result == FALSE);
    565         TEST_ASSERT_SUCCESS(status);
    566 
    567         status = U_ZERO_ERROR;
    568         uregex_reset(re, 0, &status);
    569 
    570         result = uregex_findNext(re, &status);
    571         TEST_ASSERT(result == TRUE);
    572         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    573         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    574         TEST_ASSERT_SUCCESS(status);
    575 
    576         result = uregex_findNext(re, &status);
    577         TEST_ASSERT(result == TRUE);
    578         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    579         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    580         TEST_ASSERT_SUCCESS(status);
    581 
    582         status = U_ZERO_ERROR;
    583         uregex_reset(re, 12, &status);
    584 
    585         result = uregex_findNext(re, &status);
    586         TEST_ASSERT(result == TRUE);
    587         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    588         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    589         TEST_ASSERT_SUCCESS(status);
    590 
    591         result = uregex_findNext(re, &status);
    592         TEST_ASSERT(result == FALSE);
    593         TEST_ASSERT_SUCCESS(status);
    594 
    595         uregex_close(re);
    596     }
    597 
    598     /*
    599      *  groupCount
    600      */
    601     {
    602         int32_t result;
    603 
    604         status = U_ZERO_ERROR;
    605         re = uregex_openC("abc", 0, NULL, &status);
    606         result = uregex_groupCount(re, &status);
    607         TEST_ASSERT_SUCCESS(status);
    608         TEST_ASSERT(result == 0);
    609         uregex_close(re);
    610 
    611         status = U_ZERO_ERROR;
    612         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    613         result = uregex_groupCount(re, &status);
    614         TEST_ASSERT_SUCCESS(status);
    615         TEST_ASSERT(result == 3);
    616         uregex_close(re);
    617 
    618     }
    619 
    620 
    621     /*
    622      *  group()
    623      */
    624     {
    625         UChar    text1[80];
    626         UChar    buf[80];
    627         UBool    result;
    628         int32_t  resultSz;
    629         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
    630 
    631         status = U_ZERO_ERROR;
    632         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    633         TEST_ASSERT_SUCCESS(status);
    634 
    635 
    636         uregex_setText(re, text1, -1, &status);
    637         result = uregex_find(re, 0, &status);
    638         TEST_ASSERT(result==TRUE);
    639 
    640         /*  Capture Group 0, the full match.  Should succeed.  */
    641         status = U_ZERO_ERROR;
    642         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
    643         TEST_ASSERT_SUCCESS(status);
    644         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    645         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    646 
    647         /*  Capture group #1.  Should succeed. */
    648         status = U_ZERO_ERROR;
    649         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
    650         TEST_ASSERT_SUCCESS(status);
    651         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    652         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    653 
    654         /*  Capture group out of range.  Error. */
    655         status = U_ZERO_ERROR;
    656         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
    657         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    658 
    659         /* NULL buffer, pure pre-flight */
    660         status = U_ZERO_ERROR;
    661         resultSz = uregex_group(re, 0, NULL, 0, &status);
    662         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    663         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    664 
    665         /* Too small buffer, truncated string */
    666         status = U_ZERO_ERROR;
    667         memset(buf, -1, sizeof(buf));
    668         resultSz = uregex_group(re, 0, buf, 5, &status);
    669         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    670         TEST_ASSERT_STRING("abc i", buf, FALSE);
    671         TEST_ASSERT(buf[5] == (UChar)0xffff);
    672         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    673 
    674         /* Output string just fits buffer, no NUL term. */
    675         status = U_ZERO_ERROR;
    676         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    677         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    678         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    679         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    680         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    681 
    682         uregex_close(re);
    683 
    684     }
    685 
    686     /*
    687      *  Regions
    688      */
    689 
    690 
    691         /* SetRegion(), getRegion() do something  */
    692         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    693         UChar resultString[40];
    694         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    695         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    696         uregex_setRegion(re, 3, 6, &status);
    697         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    698         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    699         TEST_ASSERT(uregex_findNext(re, &status));
    700         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
    701         TEST_ASSERT_STRING("345", resultString, TRUE);
    702         TEST_TEARDOWN;
    703 
    704         /* find(start=-1) uses regions   */
    705         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    706         uregex_setRegion(re, 4, 6, &status);
    707         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    708         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    709         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    710         TEST_TEARDOWN;
    711 
    712         /* find (start >=0) does not use regions   */
    713         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    714         uregex_setRegion(re, 4, 6, &status);
    715         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    716         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    717         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    718         TEST_TEARDOWN;
    719 
    720         /* findNext() obeys regions    */
    721         TEST_SETUP(".", "0123456789ABCDEF", 0);
    722         uregex_setRegion(re, 4, 6, &status);
    723         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    724         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    725         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    726         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    727         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    728         TEST_TEARDOWN;
    729 
    730         /* matches(start=-1) uses regions                                           */
    731         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    732         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    733         uregex_setRegion(re, 4, 6, &status);
    734         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    735         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    736         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    737         TEST_TEARDOWN;
    738 
    739         /* matches (start >=0) does not use regions       */
    740         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    741         uregex_setRegion(re, 4, 6, &status);
    742         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    743         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    744         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    745         TEST_TEARDOWN;
    746 
    747         /* lookingAt(start=-1) uses regions                                         */
    748         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    749         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    750         uregex_setRegion(re, 4, 6, &status);
    751         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    752         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    753         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    754         TEST_TEARDOWN;
    755 
    756         /* lookingAt (start >=0) does not use regions  */
    757         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    758         uregex_setRegion(re, 4, 6, &status);
    759         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    760         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    761         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    762         TEST_TEARDOWN;
    763 
    764         /* hitEnd()       */
    765         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    766         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    767         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    768         TEST_TEARDOWN;
    769 
    770         TEST_SETUP("[a-f]*", "abcdef", 0);
    771         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    772         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    773         TEST_TEARDOWN;
    774 
    775         /* requireEnd   */
    776         TEST_SETUP("abcd", "abcd", 0);
    777         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    778         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    779         TEST_TEARDOWN;
    780 
    781         TEST_SETUP("abcd$", "abcd", 0);
    782         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    783         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    784         TEST_TEARDOWN;
    785 
    786         /* anchoringBounds        */
    787         TEST_SETUP("abc$", "abcdef", 0);
    788         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    789         uregex_useAnchoringBounds(re, FALSE, &status);
    790         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    791 
    792         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    793         uregex_useAnchoringBounds(re, TRUE, &status);
    794         uregex_setRegion(re, 0, 3, &status);
    795         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    796         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    797         TEST_TEARDOWN;
    798 
    799         /* Transparent Bounds      */
    800         TEST_SETUP("abc(?=def)", "abcdef", 0);
    801         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    802         uregex_useTransparentBounds(re, TRUE, &status);
    803         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    804 
    805         uregex_useTransparentBounds(re, FALSE, &status);
    806         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    807         uregex_setRegion(re, 0, 3, &status);
    808         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    809         uregex_useTransparentBounds(re, TRUE, &status);
    810         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    811         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    812         TEST_TEARDOWN;
    813 
    814 
    815     /*
    816      *  replaceFirst()
    817      */
    818     {
    819         UChar    text1[80];
    820         UChar    text2[80];
    821         UChar    replText[80];
    822         UChar    buf[80];
    823         int32_t  resultSz;
    824         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    825         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    826         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    827 
    828         status = U_ZERO_ERROR;
    829         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    830         TEST_ASSERT_SUCCESS(status);
    831 
    832         /*  Normal case, with match */
    833         uregex_setText(re, text1, -1, &status);
    834         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    835         TEST_ASSERT_SUCCESS(status);
    836         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    837         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    838 
    839         /* No match.  Text should copy to output with no changes.  */
    840         status = U_ZERO_ERROR;
    841         uregex_setText(re, text2, -1, &status);
    842         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    843         TEST_ASSERT_SUCCESS(status);
    844         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    845         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    846 
    847         /*  Match, output just fills buffer, no termination warning. */
    848         status = U_ZERO_ERROR;
    849         uregex_setText(re, text1, -1, &status);
    850         memset(buf, -1, sizeof(buf));
    851         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    852         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    853         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    854         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    855         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    856 
    857         /* Do the replaceFirst again, without first resetting anything.
    858          *  Should give the same results.
    859          */
    860         status = U_ZERO_ERROR;
    861         memset(buf, -1, sizeof(buf));
    862         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    863         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    864         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    865         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    866         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    867 
    868         /* NULL buffer, zero buffer length */
    869         status = U_ZERO_ERROR;
    870         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    871         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    872         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    873 
    874         /* Buffer too small by one */
    875         status = U_ZERO_ERROR;
    876         memset(buf, -1, sizeof(buf));
    877         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    878         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    879         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    880         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    881         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    882 
    883         uregex_close(re);
    884     }
    885 
    886 
    887     /*
    888      *  replaceAll()
    889      */
    890     {
    891         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    892         UChar    text2[80];          /*  "No match Here"           */
    893         UChar    replText[80];       /*  "<$1>"                    */
    894         UChar    replText2[80];      /*  "<<$1>>"                  */
    895         const char * pattern = "x(.*?)x";
    896         const char * expectedResult = "Replace <aa> <1> <...>.";
    897         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    898         UChar    buf[80];
    899         int32_t  resultSize;
    900         int32_t  expectedResultSize;
    901         int32_t  expectedResultSize2;
    902         int32_t  i;
    903 
    904         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
    905         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
    906         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
    907         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
    908         expectedResultSize = strlen(expectedResult);
    909         expectedResultSize2 = strlen(expectedResult2);
    910 
    911         status = U_ZERO_ERROR;
    912         re = uregex_openC(pattern, 0, NULL, &status);
    913         TEST_ASSERT_SUCCESS(status);
    914 
    915         /*  Normal case, with match */
    916         uregex_setText(re, text1, -1, &status);
    917         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    918         TEST_ASSERT_SUCCESS(status);
    919         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    920         TEST_ASSERT(resultSize == expectedResultSize);
    921 
    922         /* No match.  Text should copy to output with no changes.  */
    923         status = U_ZERO_ERROR;
    924         uregex_setText(re, text2, -1, &status);
    925         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
    926         TEST_ASSERT_SUCCESS(status);
    927         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    928         TEST_ASSERT(resultSize == u_strlen(text2));
    929 
    930         /*  Match, output just fills buffer, no termination warning. */
    931         status = U_ZERO_ERROR;
    932         uregex_setText(re, text1, -1, &status);
    933         memset(buf, -1, sizeof(buf));
    934         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    935         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    936         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    937         TEST_ASSERT(resultSize == expectedResultSize);
    938         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    939 
    940         /* Do the replaceFirst again, without first resetting anything.
    941          *  Should give the same results.
    942          */
    943         status = U_ZERO_ERROR;
    944         memset(buf, -1, sizeof(buf));
    945         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    946         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    947         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    948         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    949         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    950 
    951         /* NULL buffer, zero buffer length */
    952         status = U_ZERO_ERROR;
    953         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    954         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    955         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    956 
    957         /* Buffer too small.  Try every size, which will tickle edge cases
    958          * in uregex_appendReplacement (used by replaceAll)   */
    959         for (i=0; i<expectedResultSize; i++) {
    960             char  expected[80];
    961             status = U_ZERO_ERROR;
    962             memset(buf, -1, sizeof(buf));
    963             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    964             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    965             strcpy(expected, expectedResult);
    966             expected[i] = 0;
    967             TEST_ASSERT_STRING(expected, buf, FALSE);
    968             TEST_ASSERT(resultSize == expectedResultSize);
    969             TEST_ASSERT(buf[i] == (UChar)0xffff);
    970         }
    971 
    972         /* Buffer too small.  Same as previous test, except this time the replacement
    973          * text is longer than the match capture group, making the length of the complete
    974          * replacement longer than the original string.
    975          */
    976         for (i=0; i<expectedResultSize2; i++) {
    977             char  expected[80];
    978             status = U_ZERO_ERROR;
    979             memset(buf, -1, sizeof(buf));
    980             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    981             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    982             strcpy(expected, expectedResult2);
    983             expected[i] = 0;
    984             TEST_ASSERT_STRING(expected, buf, FALSE);
    985             TEST_ASSERT(resultSize == expectedResultSize2);
    986             TEST_ASSERT(buf[i] == (UChar)0xffff);
    987         }
    988 
    989 
    990         uregex_close(re);
    991     }
    992 
    993 
    994     /*
    995      *  appendReplacement()
    996      */
    997     {
    998         UChar    text[100];
    999         UChar    repl[100];
   1000         UChar    buf[100];
   1001         UChar   *bufPtr;
   1002         int32_t  bufCap;
   1003 
   1004 
   1005         status = U_ZERO_ERROR;
   1006         re = uregex_openC(".*", 0, 0, &status);
   1007         TEST_ASSERT_SUCCESS(status);
   1008 
   1009         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1010         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1011         uregex_setText(re, text, -1, &status);
   1012 
   1013         /* match covers whole target string */
   1014         uregex_find(re, 0, &status);
   1015         TEST_ASSERT_SUCCESS(status);
   1016         bufPtr = buf;
   1017         bufCap = UPRV_LENGTHOF(buf);
   1018         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1019         TEST_ASSERT_SUCCESS(status);
   1020         TEST_ASSERT_STRING("some other", buf, TRUE);
   1021 
   1022         /* Match has \u \U escapes */
   1023         uregex_find(re, 0, &status);
   1024         TEST_ASSERT_SUCCESS(status);
   1025         bufPtr = buf;
   1026         bufCap = UPRV_LENGTHOF(buf);
   1027         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1028         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1029         TEST_ASSERT_SUCCESS(status);
   1030         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1031 
   1032         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
   1033         status = U_ZERO_ERROR;
   1034         uregex_find(re, 0, &status);
   1035         TEST_ASSERT_SUCCESS(status);
   1036         bufPtr = buf;
   1037         status = U_BUFFER_OVERFLOW_ERROR;
   1038         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
   1039         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1040 
   1041         uregex_close(re);
   1042     }
   1043 
   1044 
   1045     /*
   1046      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1047      */
   1048 
   1049     /*
   1050      *  split()
   1051      */
   1052     {
   1053         UChar    textToSplit[80];
   1054         UChar    text2[80];
   1055         UChar    buf[200];
   1056         UChar    *fields[10];
   1057         int32_t  numFields;
   1058         int32_t  requiredCapacity;
   1059         int32_t  spaceNeeded;
   1060         int32_t  sz;
   1061 
   1062         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1063         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1064 
   1065         status = U_ZERO_ERROR;
   1066         re = uregex_openC(":", 0, NULL, &status);
   1067 
   1068 
   1069         /*  Simple split */
   1070 
   1071         uregex_setText(re, textToSplit, -1, &status);
   1072         TEST_ASSERT_SUCCESS(status);
   1073 
   1074         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1075         if (U_SUCCESS(status)) {
   1076             memset(fields, -1, sizeof(fields));
   1077             numFields =
   1078                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1079             TEST_ASSERT_SUCCESS(status);
   1080 
   1081             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1082             if(U_SUCCESS(status)) {
   1083                 TEST_ASSERT(numFields == 3);
   1084                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1085                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1086                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1087                 TEST_ASSERT(fields[3] == NULL);
   1088 
   1089                 spaceNeeded = u_strlen(textToSplit) -
   1090                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1091                             numFields;          /* Each field gets a NUL terminator */
   1092 
   1093                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1094             }
   1095         }
   1096 
   1097         uregex_close(re);
   1098 
   1099 
   1100         /*  Split with too few output strings available */
   1101         status = U_ZERO_ERROR;
   1102         re = uregex_openC(":", 0, NULL, &status);
   1103         uregex_setText(re, textToSplit, -1, &status);
   1104         TEST_ASSERT_SUCCESS(status);
   1105 
   1106         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1107         if(U_SUCCESS(status)) {
   1108             memset(fields, -1, sizeof(fields));
   1109             numFields =
   1110                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1111             TEST_ASSERT_SUCCESS(status);
   1112 
   1113             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1114             if(U_SUCCESS(status)) {
   1115                 TEST_ASSERT(numFields == 2);
   1116                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1117                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1118                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1119 
   1120                 spaceNeeded = u_strlen(textToSplit) -
   1121                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1122                             numFields;          /* Each field gets a NUL terminator */
   1123 
   1124                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1125 
   1126                 /* Split with a range of output buffer sizes.  */
   1127                 spaceNeeded = u_strlen(textToSplit) -
   1128                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1129                     numFields;          /* Each field gets a NUL terminator */
   1130 
   1131                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1132                     memset(fields, -1, sizeof(fields));
   1133                     status = U_ZERO_ERROR;
   1134                     numFields =
   1135                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1136                     if (sz >= spaceNeeded) {
   1137                         TEST_ASSERT_SUCCESS(status);
   1138                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1139                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1140                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1141                     } else {
   1142                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1143                     }
   1144                     TEST_ASSERT(numFields == 3);
   1145                     TEST_ASSERT(fields[3] == NULL);
   1146                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1147                 }
   1148             }
   1149         }
   1150 
   1151         uregex_close(re);
   1152     }
   1153 
   1154 
   1155 
   1156 
   1157     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1158      *                   comes out as additional fields.  */
   1159     {
   1160         UChar    textToSplit[80];
   1161         UChar    buf[200];
   1162         UChar    *fields[10];
   1163         int32_t  numFields;
   1164         int32_t  requiredCapacity;
   1165         int32_t  spaceNeeded;
   1166         int32_t  sz;
   1167 
   1168         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   1169 
   1170         status = U_ZERO_ERROR;
   1171         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1172 
   1173         uregex_setText(re, textToSplit, -1, &status);
   1174         TEST_ASSERT_SUCCESS(status);
   1175 
   1176         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1177         if(U_SUCCESS(status)) {
   1178             memset(fields, -1, sizeof(fields));
   1179             numFields =
   1180                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
   1181             TEST_ASSERT_SUCCESS(status);
   1182 
   1183             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1184             if(U_SUCCESS(status)) {
   1185                 TEST_ASSERT(numFields == 5);
   1186                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1187                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1188                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1189                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1190                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1191                 TEST_ASSERT(fields[5] == NULL);
   1192                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1193                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1194             }
   1195         }
   1196 
   1197         /*  Split with too few output strings available (2) */
   1198         status = U_ZERO_ERROR;
   1199         memset(fields, -1, sizeof(fields));
   1200         numFields =
   1201             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
   1202         TEST_ASSERT_SUCCESS(status);
   1203 
   1204         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1205         if(U_SUCCESS(status)) {
   1206             TEST_ASSERT(numFields == 2);
   1207             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1208             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1209             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1210 
   1211             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1212             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1213         }
   1214 
   1215         /*  Split with too few output strings available (3) */
   1216         status = U_ZERO_ERROR;
   1217         memset(fields, -1, sizeof(fields));
   1218         numFields =
   1219             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
   1220         TEST_ASSERT_SUCCESS(status);
   1221 
   1222         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1223         if(U_SUCCESS(status)) {
   1224             TEST_ASSERT(numFields == 3);
   1225             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1226             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1227             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1228             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1229 
   1230             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1231             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1232         }
   1233 
   1234         /*  Split with just enough output strings available (5) */
   1235         status = U_ZERO_ERROR;
   1236         memset(fields, -1, sizeof(fields));
   1237         numFields =
   1238             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
   1239         TEST_ASSERT_SUCCESS(status);
   1240 
   1241         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1242         if(U_SUCCESS(status)) {
   1243             TEST_ASSERT(numFields == 5);
   1244             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1245             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1246             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1247             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1248             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1249             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1250 
   1251             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1252             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1253         }
   1254 
   1255         /* Split, end of text is a field delimiter.   */
   1256         status = U_ZERO_ERROR;
   1257         sz = strlen("first <tag-a> second<tag-b>");
   1258         uregex_setText(re, textToSplit, sz, &status);
   1259         TEST_ASSERT_SUCCESS(status);
   1260 
   1261         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1262         if(U_SUCCESS(status)) {
   1263             memset(fields, -1, sizeof(fields));
   1264             numFields =
   1265                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
   1266             TEST_ASSERT_SUCCESS(status);
   1267 
   1268             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1269             if(U_SUCCESS(status)) {
   1270                 TEST_ASSERT(numFields == 5);
   1271                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1272                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1273                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1274                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1275                 TEST_ASSERT_STRING("",        fields[4], TRUE);
   1276                 TEST_ASSERT(fields[5] == NULL);
   1277                 TEST_ASSERT(fields[8] == NULL);
   1278                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1279                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
   1280                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1281             }
   1282         }
   1283 
   1284         uregex_close(re);
   1285     }
   1286 
   1287     /*
   1288      * set/getTimeLimit
   1289      */
   1290      TEST_SETUP("abc$", "abcdef", 0);
   1291      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1292      uregex_setTimeLimit(re, 1000, &status);
   1293      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1294      TEST_ASSERT_SUCCESS(status);
   1295      uregex_setTimeLimit(re, -1, &status);
   1296      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1297      status = U_ZERO_ERROR;
   1298      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1299      TEST_TEARDOWN;
   1300 
   1301      /*
   1302       * set/get Stack Limit
   1303       */
   1304      TEST_SETUP("abc$", "abcdef", 0);
   1305      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1306      uregex_setStackLimit(re, 40000, &status);
   1307      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1308      TEST_ASSERT_SUCCESS(status);
   1309      uregex_setStackLimit(re, -1, &status);
   1310      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1311      status = U_ZERO_ERROR;
   1312      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1313      TEST_TEARDOWN;
   1314 
   1315 
   1316      /*
   1317       * Get/Set callback functions
   1318       *     This test is copied from intltest regex/Callbacks
   1319       *     The pattern and test data will run long enough to cause the callback
   1320       *       to be invoked.  The nested '+' operators give exponential time
   1321       *       behavior with increasing string length.
   1322       */
   1323      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1324      callBackContext cbInfo = {4, 0, 0};
   1325      const void     *pContext   = &cbInfo;
   1326      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1327 
   1328      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1329      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1330      TEST_ASSERT_SUCCESS(status);
   1331      TEST_ASSERT(returnedFn == NULL);
   1332      TEST_ASSERT(pContext == NULL);
   1333 
   1334      /* Set thecallback and do a match.                                   */
   1335      /* The callback function should record that it has been called.      */
   1336      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1337      TEST_ASSERT_SUCCESS(status);
   1338      TEST_ASSERT(cbInfo.numCalls == 0);
   1339      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1340      TEST_ASSERT_SUCCESS(status);
   1341      TEST_ASSERT(cbInfo.numCalls > 0);
   1342 
   1343      /* Getting the callback should return the values that were set above.  */
   1344      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1345      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1346      TEST_ASSERT(pContext == &cbInfo);
   1347 
   1348      TEST_TEARDOWN;
   1349 }
   1350 
   1351 
   1352 
   1353 static void TestBug4315(void) {
   1354     UErrorCode      theICUError = U_ZERO_ERROR;
   1355     URegularExpression *theRegEx;
   1356     UChar           *textBuff;
   1357     const char      *thePattern;
   1358     UChar            theString[100];
   1359     UChar           *destFields[24];
   1360     int32_t         neededLength1;
   1361     int32_t         neededLength2;
   1362 
   1363     int32_t         wordCount = 0;
   1364     int32_t         destFieldsSize = 24;
   1365 
   1366     thePattern  = "ck ";
   1367     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1368 
   1369     /* open a regex */
   1370     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1371     TEST_ASSERT_SUCCESS(theICUError);
   1372 
   1373     /* set the input string */
   1374     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1375     TEST_ASSERT_SUCCESS(theICUError);
   1376 
   1377     /* split */
   1378     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1379      *  error occurs! */
   1380     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1381         destFieldsSize, &theICUError);
   1382 
   1383     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1384     TEST_ASSERT(wordCount==3);
   1385 
   1386     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1387     {
   1388         theICUError = U_ZERO_ERROR;
   1389         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1390         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1391             destFields, destFieldsSize, &theICUError);
   1392         TEST_ASSERT(wordCount==3);
   1393         TEST_ASSERT_SUCCESS(theICUError);
   1394         TEST_ASSERT(neededLength1 == neededLength2);
   1395         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1396         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1397         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1398         TEST_ASSERT(destFields[3] == NULL);
   1399         free(textBuff);
   1400     }
   1401     uregex_close(theRegEx);
   1402 }
   1403 
   1404 /* Based on TestRegexCAPI() */
   1405 static void TestUTextAPI(void) {
   1406     UErrorCode           status = U_ZERO_ERROR;
   1407     URegularExpression  *re;
   1408     UText                patternText = UTEXT_INITIALIZER;
   1409     UChar                pat[200];
   1410     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1411 
   1412     /* Mimimalist open/close */
   1413     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1414     re = uregex_openUText(&patternText, 0, 0, &status);
   1415     if (U_FAILURE(status)) {
   1416          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1417          utext_close(&patternText);
   1418          return;
   1419     }
   1420     uregex_close(re);
   1421 
   1422     /* Open with all flag values set */
   1423     status = U_ZERO_ERROR;
   1424     re = uregex_openUText(&patternText,
   1425         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1426         0, &status);
   1427     TEST_ASSERT_SUCCESS(status);
   1428     uregex_close(re);
   1429 
   1430     /* Open with an invalid flag */
   1431     status = U_ZERO_ERROR;
   1432     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1433     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1434     uregex_close(re);
   1435 
   1436     /* open with an invalid parameter */
   1437     status = U_ZERO_ERROR;
   1438     re = uregex_openUText(NULL,
   1439         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1440     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1441 
   1442     /*
   1443      *  clone
   1444      */
   1445     {
   1446         URegularExpression *clone1;
   1447         URegularExpression *clone2;
   1448         URegularExpression *clone3;
   1449         UChar  testString1[30];
   1450         UChar  testString2[30];
   1451         UBool  result;
   1452 
   1453 
   1454         status = U_ZERO_ERROR;
   1455         re = uregex_openUText(&patternText, 0, 0, &status);
   1456         TEST_ASSERT_SUCCESS(status);
   1457         clone1 = uregex_clone(re, &status);
   1458         TEST_ASSERT_SUCCESS(status);
   1459         TEST_ASSERT(clone1 != NULL);
   1460 
   1461         status = U_ZERO_ERROR;
   1462         clone2 = uregex_clone(re, &status);
   1463         TEST_ASSERT_SUCCESS(status);
   1464         TEST_ASSERT(clone2 != NULL);
   1465         uregex_close(re);
   1466 
   1467         status = U_ZERO_ERROR;
   1468         clone3 = uregex_clone(clone2, &status);
   1469         TEST_ASSERT_SUCCESS(status);
   1470         TEST_ASSERT(clone3 != NULL);
   1471 
   1472         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
   1473         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
   1474 
   1475         status = U_ZERO_ERROR;
   1476         uregex_setText(clone1, testString1, -1, &status);
   1477         TEST_ASSERT_SUCCESS(status);
   1478         result = uregex_lookingAt(clone1, 0, &status);
   1479         TEST_ASSERT_SUCCESS(status);
   1480         TEST_ASSERT(result==TRUE);
   1481 
   1482         status = U_ZERO_ERROR;
   1483         uregex_setText(clone2, testString2, -1, &status);
   1484         TEST_ASSERT_SUCCESS(status);
   1485         result = uregex_lookingAt(clone2, 0, &status);
   1486         TEST_ASSERT_SUCCESS(status);
   1487         TEST_ASSERT(result==FALSE);
   1488         result = uregex_find(clone2, 0, &status);
   1489         TEST_ASSERT_SUCCESS(status);
   1490         TEST_ASSERT(result==TRUE);
   1491 
   1492         uregex_close(clone1);
   1493         uregex_close(clone2);
   1494         uregex_close(clone3);
   1495 
   1496     }
   1497 
   1498     /*
   1499      *  pattern() and patternText()
   1500      */
   1501     {
   1502         const UChar  *resultPat;
   1503         int32_t       resultLen;
   1504         UText        *resultText;
   1505         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1506         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1507         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
   1508         status = U_ZERO_ERROR;
   1509 
   1510         utext_openUTF8(&patternText, str_hello, -1, &status);
   1511         re = uregex_open(pat, -1, 0, NULL, &status);
   1512         resultPat = uregex_pattern(re, &resultLen, &status);
   1513         TEST_ASSERT_SUCCESS(status);
   1514 
   1515         /* The TEST_ASSERT_SUCCESS above should change too... */
   1516         if (U_SUCCESS(status)) {
   1517             TEST_ASSERT(resultLen == -1);
   1518             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1519         }
   1520 
   1521         resultText = uregex_patternUText(re, &status);
   1522         TEST_ASSERT_SUCCESS(status);
   1523         TEST_ASSERT_UTEXT(str_hello, resultText);
   1524 
   1525         uregex_close(re);
   1526 
   1527         status = U_ZERO_ERROR;
   1528         re = uregex_open(pat, 3, 0, NULL, &status);
   1529         resultPat = uregex_pattern(re, &resultLen, &status);
   1530         TEST_ASSERT_SUCCESS(status);
   1531 
   1532         /* The TEST_ASSERT_SUCCESS above should change too... */
   1533         if (U_SUCCESS(status)) {
   1534             TEST_ASSERT(resultLen == 3);
   1535             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1536             TEST_ASSERT(u_strlen(resultPat) == 3);
   1537         }
   1538 
   1539         resultText = uregex_patternUText(re, &status);
   1540         TEST_ASSERT_SUCCESS(status);
   1541         TEST_ASSERT_UTEXT(str_hel, resultText);
   1542 
   1543         uregex_close(re);
   1544     }
   1545 
   1546     /*
   1547      *  setUText() and lookingAt()
   1548      */
   1549     {
   1550         UText  text1 = UTEXT_INITIALIZER;
   1551         UText  text2 = UTEXT_INITIALIZER;
   1552         UBool  result;
   1553         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1554         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1555         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1556         status = U_ZERO_ERROR;
   1557         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1558         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1559 
   1560         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1561         re = uregex_openUText(&patternText, 0, NULL, &status);
   1562         TEST_ASSERT_SUCCESS(status);
   1563 
   1564         /* Operation before doing a setText should fail... */
   1565         status = U_ZERO_ERROR;
   1566         uregex_lookingAt(re, 0, &status);
   1567         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1568 
   1569         status = U_ZERO_ERROR;
   1570         uregex_setUText(re, &text1, &status);
   1571         result = uregex_lookingAt(re, 0, &status);
   1572         TEST_ASSERT(result == TRUE);
   1573         TEST_ASSERT_SUCCESS(status);
   1574 
   1575         status = U_ZERO_ERROR;
   1576         uregex_setUText(re, &text2, &status);
   1577         result = uregex_lookingAt(re, 0, &status);
   1578         TEST_ASSERT(result == FALSE);
   1579         TEST_ASSERT_SUCCESS(status);
   1580 
   1581         status = U_ZERO_ERROR;
   1582         uregex_setUText(re, &text1, &status);
   1583         result = uregex_lookingAt(re, 0, &status);
   1584         TEST_ASSERT(result == TRUE);
   1585         TEST_ASSERT_SUCCESS(status);
   1586 
   1587         uregex_close(re);
   1588         utext_close(&text1);
   1589         utext_close(&text2);
   1590     }
   1591 
   1592 
   1593     /*
   1594      *  getText() and getUText()
   1595      */
   1596     {
   1597         UText  text1 = UTEXT_INITIALIZER;
   1598         UText  text2 = UTEXT_INITIALIZER;
   1599         UChar  text2Chars[20];
   1600         UText  *resultText;
   1601         const UChar   *result;
   1602         int32_t  textLength;
   1603         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1604         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1605         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1606 
   1607 
   1608         status = U_ZERO_ERROR;
   1609         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1610         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
   1611         utext_openUChars(&text2, text2Chars, -1, &status);
   1612 
   1613         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1614         re = uregex_openUText(&patternText, 0, NULL, &status);
   1615 
   1616         /* First set a UText */
   1617         uregex_setUText(re, &text1, &status);
   1618         resultText = uregex_getUText(re, NULL, &status);
   1619         TEST_ASSERT_SUCCESS(status);
   1620         TEST_ASSERT(resultText != &text1);
   1621         utext_setNativeIndex(resultText, 0);
   1622         utext_setNativeIndex(&text1, 0);
   1623         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1624         utext_close(resultText);
   1625 
   1626         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1627         (void)result;    /* Suppress set but not used warning. */
   1628         TEST_ASSERT(textLength == -1 || textLength == 6);
   1629         resultText = uregex_getUText(re, NULL, &status);
   1630         TEST_ASSERT_SUCCESS(status);
   1631         TEST_ASSERT(resultText != &text1);
   1632         utext_setNativeIndex(resultText, 0);
   1633         utext_setNativeIndex(&text1, 0);
   1634         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1635         utext_close(resultText);
   1636 
   1637         /* Then set a UChar * */
   1638         uregex_setText(re, text2Chars, 7, &status);
   1639         resultText = uregex_getUText(re, NULL, &status);
   1640         TEST_ASSERT_SUCCESS(status);
   1641         utext_setNativeIndex(resultText, 0);
   1642         utext_setNativeIndex(&text2, 0);
   1643         TEST_ASSERT(testUTextEqual(resultText, &text2));
   1644         utext_close(resultText);
   1645         result = uregex_getText(re, &textLength, &status);
   1646         TEST_ASSERT(textLength == 7);
   1647 
   1648         uregex_close(re);
   1649         utext_close(&text1);
   1650         utext_close(&text2);
   1651     }
   1652 
   1653     /*
   1654      *  matches()
   1655      */
   1656     {
   1657         UText   text1 = UTEXT_INITIALIZER;
   1658         UBool   result;
   1659         UText   nullText = UTEXT_INITIALIZER;
   1660         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1661         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1662 
   1663         status = U_ZERO_ERROR;
   1664         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1665         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1666         re = uregex_openUText(&patternText, 0, NULL, &status);
   1667 
   1668         uregex_setUText(re, &text1, &status);
   1669         result = uregex_matches(re, 0, &status);
   1670         TEST_ASSERT(result == FALSE);
   1671         TEST_ASSERT_SUCCESS(status);
   1672         uregex_close(re);
   1673 
   1674         status = U_ZERO_ERROR;
   1675         re = uregex_openC(".?", 0, NULL, &status);
   1676         uregex_setUText(re, &text1, &status);
   1677         result = uregex_matches(re, 7, &status);
   1678         TEST_ASSERT(result == TRUE);
   1679         TEST_ASSERT_SUCCESS(status);
   1680 
   1681         status = U_ZERO_ERROR;
   1682         utext_openUTF8(&nullText, "", -1, &status);
   1683         uregex_setUText(re, &nullText, &status);
   1684         TEST_ASSERT_SUCCESS(status);
   1685         result = uregex_matches(re, 0, &status);
   1686         TEST_ASSERT(result == TRUE);
   1687         TEST_ASSERT_SUCCESS(status);
   1688 
   1689         uregex_close(re);
   1690         utext_close(&text1);
   1691         utext_close(&nullText);
   1692     }
   1693 
   1694 
   1695     /*
   1696      *  lookingAt()    Used in setText test.
   1697      */
   1698 
   1699 
   1700     /*
   1701      *  find(), findNext, start, end, reset
   1702      */
   1703     {
   1704         UChar    text1[50];
   1705         UBool    result;
   1706         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
   1707         status = U_ZERO_ERROR;
   1708         re = uregex_openC("rx", 0, NULL, &status);
   1709 
   1710         uregex_setText(re, text1, -1, &status);
   1711         result = uregex_find(re, 0, &status);
   1712         TEST_ASSERT(result == TRUE);
   1713         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1714         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1715         TEST_ASSERT_SUCCESS(status);
   1716 
   1717         result = uregex_find(re, 9, &status);
   1718         TEST_ASSERT(result == TRUE);
   1719         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1720         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1721         TEST_ASSERT_SUCCESS(status);
   1722 
   1723         result = uregex_find(re, 14, &status);
   1724         TEST_ASSERT(result == FALSE);
   1725         TEST_ASSERT_SUCCESS(status);
   1726 
   1727         status = U_ZERO_ERROR;
   1728         uregex_reset(re, 0, &status);
   1729 
   1730         result = uregex_findNext(re, &status);
   1731         TEST_ASSERT(result == TRUE);
   1732         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1733         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1734         TEST_ASSERT_SUCCESS(status);
   1735 
   1736         result = uregex_findNext(re, &status);
   1737         TEST_ASSERT(result == TRUE);
   1738         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1739         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1740         TEST_ASSERT_SUCCESS(status);
   1741 
   1742         status = U_ZERO_ERROR;
   1743         uregex_reset(re, 12, &status);
   1744 
   1745         result = uregex_findNext(re, &status);
   1746         TEST_ASSERT(result == TRUE);
   1747         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1748         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1749         TEST_ASSERT_SUCCESS(status);
   1750 
   1751         result = uregex_findNext(re, &status);
   1752         TEST_ASSERT(result == FALSE);
   1753         TEST_ASSERT_SUCCESS(status);
   1754 
   1755         uregex_close(re);
   1756     }
   1757 
   1758     /*
   1759      *  groupUText()
   1760      */
   1761     {
   1762         UChar    text1[80];
   1763         UText   *actual;
   1764         UBool    result;
   1765         int64_t  groupLen = 0;
   1766         UChar    groupBuf[20];
   1767 
   1768         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
   1769 
   1770         status = U_ZERO_ERROR;
   1771         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1772         TEST_ASSERT_SUCCESS(status);
   1773 
   1774         uregex_setText(re, text1, -1, &status);
   1775         result = uregex_find(re, 0, &status);
   1776         TEST_ASSERT(result==TRUE);
   1777 
   1778         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
   1779         status = U_ZERO_ERROR;
   1780         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
   1781         TEST_ASSERT_SUCCESS(status);
   1782 
   1783         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
   1784         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
   1785         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1786 
   1787         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
   1788         utext_close(actual);
   1789 
   1790         /*  Capture group #1.  Should succeed. */
   1791         status = U_ZERO_ERROR;
   1792 
   1793         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
   1794         TEST_ASSERT_SUCCESS(status);
   1795         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
   1796                                                            /*    (within the string text1)           */
   1797         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
   1798         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
   1799         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
   1800 
   1801         utext_close(actual);
   1802 
   1803         /*  Capture group out of range.  Error. */
   1804         status = U_ZERO_ERROR;
   1805         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
   1806         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1807         utext_close(actual);
   1808 
   1809         uregex_close(re);
   1810     }
   1811 
   1812     /*
   1813      *  replaceFirst()
   1814      */
   1815     {
   1816         UChar    text1[80];
   1817         UChar    text2[80];
   1818         UText    replText = UTEXT_INITIALIZER;
   1819         UText   *result;
   1820         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1821         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1822         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
   1823                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
   1824         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1825         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1826         status = U_ZERO_ERROR;
   1827         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1828         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1829         utext_openUTF8(&replText, str_1x, -1, &status);
   1830 
   1831         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1832         TEST_ASSERT_SUCCESS(status);
   1833 
   1834         /*  Normal case, with match */
   1835         uregex_setText(re, text1, -1, &status);
   1836         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1837         TEST_ASSERT_SUCCESS(status);
   1838         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1839         utext_close(result);
   1840 
   1841         /* No match.  Text should copy to output with no changes.  */
   1842         uregex_setText(re, text2, -1, &status);
   1843         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1844         TEST_ASSERT_SUCCESS(status);
   1845         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1846         utext_close(result);
   1847 
   1848         /* Unicode escapes */
   1849         uregex_setText(re, text1, -1, &status);
   1850         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1851         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1852         TEST_ASSERT_SUCCESS(status);
   1853         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1854         utext_close(result);
   1855 
   1856         uregex_close(re);
   1857         utext_close(&replText);
   1858     }
   1859 
   1860 
   1861     /*
   1862      *  replaceAll()
   1863      */
   1864     {
   1865         UChar    text1[80];
   1866         UChar    text2[80];
   1867         UText    replText = UTEXT_INITIALIZER;
   1868         UText   *result;
   1869         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1870         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1871         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1872         status = U_ZERO_ERROR;
   1873         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
   1874         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1875         utext_openUTF8(&replText, str_1, -1, &status);
   1876 
   1877         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1878         TEST_ASSERT_SUCCESS(status);
   1879 
   1880         /*  Normal case, with match */
   1881         uregex_setText(re, text1, -1, &status);
   1882         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1883         TEST_ASSERT_SUCCESS(status);
   1884         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1885         utext_close(result);
   1886 
   1887         /* No match.  Text should copy to output with no changes.  */
   1888         uregex_setText(re, text2, -1, &status);
   1889         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1890         TEST_ASSERT_SUCCESS(status);
   1891         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1892         utext_close(result);
   1893 
   1894         uregex_close(re);
   1895         utext_close(&replText);
   1896     }
   1897 
   1898 
   1899     /*
   1900      *  appendReplacement()
   1901      */
   1902     {
   1903         UChar    text[100];
   1904         UChar    repl[100];
   1905         UChar    buf[100];
   1906         UChar   *bufPtr;
   1907         int32_t  bufCap;
   1908 
   1909         status = U_ZERO_ERROR;
   1910         re = uregex_openC(".*", 0, 0, &status);
   1911         TEST_ASSERT_SUCCESS(status);
   1912 
   1913         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
   1914         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
   1915         uregex_setText(re, text, -1, &status);
   1916 
   1917         /* match covers whole target string */
   1918         uregex_find(re, 0, &status);
   1919         TEST_ASSERT_SUCCESS(status);
   1920         bufPtr = buf;
   1921         bufCap = UPRV_LENGTHOF(buf);
   1922         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1923         TEST_ASSERT_SUCCESS(status);
   1924         TEST_ASSERT_STRING("some other", buf, TRUE);
   1925 
   1926         /* Match has \u \U escapes */
   1927         uregex_find(re, 0, &status);
   1928         TEST_ASSERT_SUCCESS(status);
   1929         bufPtr = buf;
   1930         bufCap = UPRV_LENGTHOF(buf);
   1931         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
   1932         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1933         TEST_ASSERT_SUCCESS(status);
   1934         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1935 
   1936         uregex_close(re);
   1937     }
   1938 
   1939 
   1940     /*
   1941      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1942      */
   1943 
   1944     /*
   1945      *  splitUText()
   1946      */
   1947     {
   1948         UChar    textToSplit[80];
   1949         UChar    text2[80];
   1950         UText    *fields[10];
   1951         int32_t  numFields;
   1952         int32_t i;
   1953 
   1954         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
   1955         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
   1956 
   1957         status = U_ZERO_ERROR;
   1958         re = uregex_openC(":", 0, NULL, &status);
   1959 
   1960 
   1961         /*  Simple split */
   1962 
   1963         uregex_setText(re, textToSplit, -1, &status);
   1964         TEST_ASSERT_SUCCESS(status);
   1965 
   1966         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1967         if (U_SUCCESS(status)) {
   1968             memset(fields, 0, sizeof(fields));
   1969             numFields = uregex_splitUText(re, fields, 10, &status);
   1970             TEST_ASSERT_SUCCESS(status);
   1971 
   1972             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1973             if(U_SUCCESS(status)) {
   1974               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1975               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1976               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1977                 TEST_ASSERT(numFields == 3);
   1978                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1979                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1980                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1981                 TEST_ASSERT(fields[3] == NULL);
   1982             }
   1983             for(i = 0; i < numFields; i++) {
   1984                 utext_close(fields[i]);
   1985             }
   1986         }
   1987 
   1988         uregex_close(re);
   1989 
   1990 
   1991         /*  Split with too few output strings available */
   1992         status = U_ZERO_ERROR;
   1993         re = uregex_openC(":", 0, NULL, &status);
   1994         uregex_setText(re, textToSplit, -1, &status);
   1995         TEST_ASSERT_SUCCESS(status);
   1996 
   1997         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1998         if(U_SUCCESS(status)) {
   1999             fields[0] = NULL;
   2000             fields[1] = NULL;
   2001             fields[2] = &patternText;
   2002             numFields = uregex_splitUText(re, fields, 2, &status);
   2003             TEST_ASSERT_SUCCESS(status);
   2004 
   2005             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2006             if(U_SUCCESS(status)) {
   2007                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2008                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   2009                 TEST_ASSERT(numFields == 2);
   2010                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2011                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   2012                 TEST_ASSERT(fields[2] == &patternText);
   2013             }
   2014             for(i = 0; i < numFields; i++) {
   2015                 utext_close(fields[i]);
   2016             }
   2017         }
   2018 
   2019         uregex_close(re);
   2020     }
   2021 
   2022     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   2023      *                   comes out as additional fields.  */
   2024     {
   2025         UChar    textToSplit[80];
   2026         UText    *fields[10];
   2027         int32_t  numFields;
   2028         int32_t i;
   2029 
   2030         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
   2031 
   2032         status = U_ZERO_ERROR;
   2033         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   2034 
   2035         uregex_setText(re, textToSplit, -1, &status);
   2036         TEST_ASSERT_SUCCESS(status);
   2037 
   2038         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2039         if(U_SUCCESS(status)) {
   2040             memset(fields, 0, sizeof(fields));
   2041             numFields = uregex_splitUText(re, fields, 10, &status);
   2042             TEST_ASSERT_SUCCESS(status);
   2043 
   2044             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2045             if(U_SUCCESS(status)) {
   2046                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2047                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2048                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2049                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2050                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2051 
   2052                 TEST_ASSERT(numFields == 5);
   2053                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2054                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2055                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2056                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2057                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2058                 TEST_ASSERT(fields[5] == NULL);
   2059             }
   2060             for(i = 0; i < numFields; i++) {
   2061                 utext_close(fields[i]);
   2062             }
   2063         }
   2064 
   2065         /*  Split with too few output strings available (2) */
   2066         status = U_ZERO_ERROR;
   2067         fields[0] = NULL;
   2068         fields[1] = NULL;
   2069         fields[2] = &patternText;
   2070         numFields = uregex_splitUText(re, fields, 2, &status);
   2071         TEST_ASSERT_SUCCESS(status);
   2072 
   2073         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2074         if(U_SUCCESS(status)) {
   2075             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2076             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2077             TEST_ASSERT(numFields == 2);
   2078             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2079             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2080             TEST_ASSERT(fields[2] == &patternText);
   2081         }
   2082         for(i = 0; i < numFields; i++) {
   2083             utext_close(fields[i]);
   2084         }
   2085 
   2086 
   2087         /*  Split with too few output strings available (3) */
   2088         status = U_ZERO_ERROR;
   2089         fields[0] = NULL;
   2090         fields[1] = NULL;
   2091         fields[2] = NULL;
   2092         fields[3] = &patternText;
   2093         numFields = uregex_splitUText(re, fields, 3, &status);
   2094         TEST_ASSERT_SUCCESS(status);
   2095 
   2096         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2097         if(U_SUCCESS(status)) {
   2098             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2099             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2100             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2101             TEST_ASSERT(numFields == 3);
   2102             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2103             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2104             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2105             TEST_ASSERT(fields[3] == &patternText);
   2106         }
   2107         for(i = 0; i < numFields; i++) {
   2108             utext_close(fields[i]);
   2109         }
   2110 
   2111         /*  Split with just enough output strings available (5) */
   2112         status = U_ZERO_ERROR;
   2113         fields[0] = NULL;
   2114         fields[1] = NULL;
   2115         fields[2] = NULL;
   2116         fields[3] = NULL;
   2117         fields[4] = NULL;
   2118         fields[5] = &patternText;
   2119         numFields = uregex_splitUText(re, fields, 5, &status);
   2120         TEST_ASSERT_SUCCESS(status);
   2121 
   2122         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2123         if(U_SUCCESS(status)) {
   2124             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2125             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2126             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2127             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2128             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2129 
   2130             TEST_ASSERT(numFields == 5);
   2131             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2132             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2133             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2134             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2135             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2136             TEST_ASSERT(fields[5] == &patternText);
   2137         }
   2138         for(i = 0; i < numFields; i++) {
   2139             utext_close(fields[i]);
   2140         }
   2141 
   2142         /* Split, end of text is a field delimiter.   */
   2143         status = U_ZERO_ERROR;
   2144         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2145         TEST_ASSERT_SUCCESS(status);
   2146 
   2147         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2148         if(U_SUCCESS(status)) {
   2149             memset(fields, 0, sizeof(fields));
   2150             fields[9] = &patternText;
   2151             numFields = uregex_splitUText(re, fields, 9, &status);
   2152             TEST_ASSERT_SUCCESS(status);
   2153 
   2154             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2155             if(U_SUCCESS(status)) {
   2156                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2157                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2158                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2159                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2160                 const char str_empty[] = { 0x00 };
   2161 
   2162                 TEST_ASSERT(numFields == 5);
   2163                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2164                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2165                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2166                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2167                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
   2168                 TEST_ASSERT(fields[5] == NULL);
   2169                 TEST_ASSERT(fields[8] == NULL);
   2170                 TEST_ASSERT(fields[9] == &patternText);
   2171             }
   2172             for(i = 0; i < numFields; i++) {
   2173                 utext_close(fields[i]);
   2174             }
   2175         }
   2176 
   2177         uregex_close(re);
   2178     }
   2179     utext_close(&patternText);
   2180 }
   2181 
   2182 
   2183 static void TestRefreshInput(void) {
   2184     /*
   2185      *  RefreshInput changes out the input of a URegularExpression without
   2186      *    changing anything else in the match state.  Used with Java JNI,
   2187      *    when Java moves the underlying string storage.   This test
   2188      *    runs a find() loop, moving the text after the first match.
   2189      *    The right number of matches should still be found.
   2190      */
   2191     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
   2192     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
   2193     UErrorCode status = U_ZERO_ERROR;
   2194     URegularExpression *re;
   2195     UText ut1 = UTEXT_INITIALIZER;
   2196     UText ut2 = UTEXT_INITIALIZER;
   2197 
   2198     re = uregex_openC("[ABC]", 0, 0, &status);
   2199     TEST_ASSERT_SUCCESS(status);
   2200 
   2201     utext_openUChars(&ut1, testStr, -1, &status);
   2202     TEST_ASSERT_SUCCESS(status);
   2203     uregex_setUText(re, &ut1, &status);
   2204     TEST_ASSERT_SUCCESS(status);
   2205 
   2206     /* Find the first match "A" in the original string */
   2207     TEST_ASSERT(uregex_findNext(re, &status));
   2208     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
   2209 
   2210     /* Move the string, kill the original string.  */
   2211     u_strcpy(movedStr, testStr);
   2212     u_memset(testStr, 0, u_strlen(testStr));
   2213     utext_openUChars(&ut2, movedStr, -1, &status);
   2214     TEST_ASSERT_SUCCESS(status);
   2215     uregex_refreshUText(re, &ut2, &status);
   2216     TEST_ASSERT_SUCCESS(status);
   2217 
   2218     /* Find the following two matches, now working in the moved string. */
   2219     TEST_ASSERT(uregex_findNext(re, &status));
   2220     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
   2221     TEST_ASSERT(uregex_findNext(re, &status));
   2222     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
   2223     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
   2224 
   2225     uregex_close(re);
   2226 }
   2227 
   2228 
   2229 static void TestBug8421(void) {
   2230     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
   2231      *             was failing.
   2232      */
   2233     URegularExpression *re;
   2234     UErrorCode status = U_ZERO_ERROR;
   2235     int32_t  limit = -1;
   2236 
   2237     re = uregex_openC("abc", 0, 0, &status);
   2238     TEST_ASSERT_SUCCESS(status);
   2239 
   2240     limit = uregex_getTimeLimit(re, &status);
   2241     TEST_ASSERT_SUCCESS(status);
   2242     TEST_ASSERT(limit == 0);
   2243 
   2244     uregex_setTimeLimit(re, 100, &status);
   2245     TEST_ASSERT_SUCCESS(status);
   2246     limit = uregex_getTimeLimit(re, &status);
   2247     TEST_ASSERT_SUCCESS(status);
   2248     TEST_ASSERT(limit == 100);
   2249 
   2250     uregex_close(re);
   2251 }
   2252 
   2253 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
   2254     return FALSE;
   2255 }
   2256 
   2257 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
   2258     return FALSE;
   2259 }
   2260 
   2261 static void TestBug10815() {
   2262   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
   2263    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
   2264    */
   2265     URegularExpression *re;
   2266     UErrorCode status = U_ZERO_ERROR;
   2267     UChar    text[100];
   2268 
   2269 
   2270     // findNext() with a find progress callback function.
   2271 
   2272     re = uregex_openC(".z", 0, 0, &status);
   2273     TEST_ASSERT_SUCCESS(status);
   2274 
   2275     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
   2276     uregex_setText(re, text, -1, &status);
   2277     TEST_ASSERT_SUCCESS(status);
   2278 
   2279     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
   2280     TEST_ASSERT_SUCCESS(status);
   2281 
   2282     uregex_findNext(re, &status);
   2283     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2284 
   2285     uregex_close(re);
   2286 
   2287     // findNext() with a match progress callback function.
   2288 
   2289     status = U_ZERO_ERROR;
   2290     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
   2291     TEST_ASSERT_SUCCESS(status);
   2292 
   2293     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
   2294     // it will appear to be stuck in a (near) infinite loop.
   2295     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
   2296     uregex_setText(re, text, -1, &status);
   2297     TEST_ASSERT_SUCCESS(status);
   2298 
   2299     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
   2300     TEST_ASSERT_SUCCESS(status);
   2301 
   2302     uregex_findNext(re, &status);
   2303     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
   2304 
   2305     uregex_close(re);
   2306 }
   2307 
   2308 
   2309 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2310