Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2004-2013, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File reapits.c
      9 *
     10 *********************************************************************************/
     11 /*C API TEST FOR Regular Expressions */
     12 /**
     13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     14 *   try to test the full functionality.  It just calls each function and verifies that it
     15 *   works on a basic level.
     16 *
     17 *   More complete testing of regular expression functionality is done with the C++ tests.
     18 **/
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     23 
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include "unicode/uloc.h"
     27 #include "unicode/uregex.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/utext.h"
     30 #include "cintltst.h"
     31 
     32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     34 
     35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
     37 
     38 /*
     39  *   TEST_SETUP and TEST_TEARDOWN
     40  *         macros to handle the boilerplate around setting up regex test cases.
     41  *         parameteres to setup:
     42  *              pattern:     The regex pattern, a (char *) null terminated C string.
     43  *              testString:  The string data, also a (char *) C string.
     44  *              flags:       Regex flags to set when compiling the pattern
     45  *
     46  *         Put arbitrary test code between SETUP and TEARDOWN.
     47  *         're" is the compiled, ready-to-go  regular expression.
     48  */
     49 #define TEST_SETUP(pattern, testString, flags) {  \
     50     UChar   *srcString = NULL;  \
     51     status = U_ZERO_ERROR; \
     52     re = uregex_openC(pattern, flags, NULL, &status);  \
     53     TEST_ASSERT_SUCCESS(status);   \
     54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     56     uregex_setText(re, srcString, -1, &status); \
     57     TEST_ASSERT_SUCCESS(status);  \
     58     if (U_SUCCESS(status)) {
     59 
     60 #define TEST_TEARDOWN  \
     61     }  \
     62     TEST_ASSERT_SUCCESS(status);  \
     63     uregex_close(re);  \
     64     free(srcString);   \
     65     }
     66 
     67 
     68 /**
     69  * @param expected utf-8 array of bytes to be expected
     70  */
     71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     72      char     buf_inside_macro[120];
     73      int32_t  len = (int32_t)strlen(expected);
     74      UBool    success;
     75      if (nulTerm) {
     76          u_austrncpy(buf_inside_macro, (actual), len+1);
     77          buf_inside_macro[len+2] = 0;
     78          success = (strcmp((expected), buf_inside_macro) == 0);
     79      } else {
     80          u_austrncpy(buf_inside_macro, (actual), len);
     81          buf_inside_macro[len+1] = 0;
     82          success = (strncmp((expected), buf_inside_macro, len) == 0);
     83      }
     84      if (success == FALSE) {
     85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     86              file, line, (expected), buf_inside_macro);
     87      }
     88 }
     89 
     90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     91 
     92 
     93 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
     94     int32_t u8i = 0;
     95     UChar32 u8c = 0;
     96     UChar32 utc = 0;
     97     UBool   stringsEqual = TRUE;
     98     utext_setNativeIndex(utext, 0);
     99     for (;;) {
    100         U8_NEXT_UNSAFE(utf8, u8i, u8c);
    101         utc = utext_next32(utext);
    102         if (u8c == 0 && utc == U_SENTINEL) {
    103             break;
    104         }
    105         if (u8c != utc || u8c == 0) {
    106             stringsEqual = FALSE;
    107             break;
    108         }
    109     }
    110     return stringsEqual;
    111 }
    112 
    113 
    114 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
    115     utext_setNativeIndex(actual, 0);
    116     if (!equals_utf8_utext(expected, actual)) {
    117         UChar32 c;
    118         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    119         c = utext_next32From(actual, 0);
    120         while (c != U_SENTINEL) {
    121             if (0x20<c && c <0x7e) {
    122                 log_err("%c", c);
    123             } else {
    124                 log_err("%#x", c);
    125             }
    126             c = UTEXT_NEXT32(actual);
    127         }
    128         log_err("\"\n");
    129     }
    130 }
    131 
    132 /*
    133  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
    134  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
    135  */
    136 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    137 
    138 static UBool testUTextEqual(UText *uta, UText *utb) {
    139     UChar32 ca = 0;
    140     UChar32 cb = 0;
    141     utext_setNativeIndex(uta, 0);
    142     utext_setNativeIndex(utb, 0);
    143     do {
    144         ca = utext_next32(uta);
    145         cb = utext_next32(utb);
    146         if (ca != cb) {
    147             break;
    148         }
    149     } while (ca != U_SENTINEL);
    150     return ca == cb;
    151 }
    152 
    153 
    154 
    155 
    156 static void TestRegexCAPI(void);
    157 static void TestBug4315(void);
    158 static void TestUTextAPI(void);
    159 static void TestRefreshInput(void);
    160 static void TestBug8421(void);
    161 
    162 void addURegexTest(TestNode** root);
    163 
    164 void addURegexTest(TestNode** root)
    165 {
    166     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    167     addTest(root, &TestBug4315,   "regex/TestBug4315");
    168     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    169     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
    170     addTest(root, &TestBug8421,   "regex/TestBug8421");
    171 }
    172 
    173 /*
    174  * Call back function and context struct used for testing
    175  *    regular expression user callbacks.  This test is mostly the same as
    176  *   the corresponding C++ test in intltest.
    177  */
    178 typedef struct callBackContext {
    179     int32_t          maxCalls;
    180     int32_t          numCalls;
    181     int32_t          lastSteps;
    182 } callBackContext;
    183 
    184 static UBool U_EXPORT2 U_CALLCONV
    185 TestCallbackFn(const void *context, int32_t steps) {
    186   callBackContext  *info = (callBackContext *)context;
    187   if (info->lastSteps+1 != steps) {
    188       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    189   }
    190   info->lastSteps = steps;
    191   info->numCalls++;
    192   return (info->numCalls < info->maxCalls);
    193 }
    194 
    195 /*
    196  *   Regular Expression C API Tests
    197  */
    198 static void TestRegexCAPI(void) {
    199     UErrorCode           status = U_ZERO_ERROR;
    200     URegularExpression  *re;
    201     UChar                pat[200];
    202     UChar               *minus1;
    203 
    204     memset(&minus1, -1, sizeof(minus1));
    205 
    206     /* Mimimalist open/close */
    207     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    208     re = uregex_open(pat, -1, 0, 0, &status);
    209     if (U_FAILURE(status)) {
    210          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    211          return;
    212     }
    213     uregex_close(re);
    214 
    215     /* Open with all flag values set */
    216     status = U_ZERO_ERROR;
    217     re = uregex_open(pat, -1,
    218         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
    219         0, &status);
    220     TEST_ASSERT_SUCCESS(status);
    221     uregex_close(re);
    222 
    223     /* Open with an invalid flag */
    224     status = U_ZERO_ERROR;
    225     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    226     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    227     uregex_close(re);
    228 
    229     /* Open with an unimplemented flag */
    230     status = U_ZERO_ERROR;
    231     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
    232     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    233     uregex_close(re);
    234 
    235     /* openC with an invalid parameter */
    236     status = U_ZERO_ERROR;
    237     re = uregex_openC(NULL,
    238         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    239     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    240 
    241     /* openC with an invalid parameter */
    242     status = U_USELESS_COLLATOR_ERROR;
    243     re = uregex_openC(NULL,
    244         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    245     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    246 
    247     /* openC   open from a C string */
    248     {
    249         const UChar   *p;
    250         int32_t  len;
    251         status = U_ZERO_ERROR;
    252         re = uregex_openC("abc*", 0, 0, &status);
    253         TEST_ASSERT_SUCCESS(status);
    254         p = uregex_pattern(re, &len, &status);
    255         TEST_ASSERT_SUCCESS(status);
    256 
    257         /* The TEST_ASSERT_SUCCESS above should change too... */
    258         if(U_SUCCESS(status)) {
    259             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    260             TEST_ASSERT(u_strcmp(pat, p) == 0);
    261             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    262         }
    263 
    264         uregex_close(re);
    265 
    266         /*  TODO:  Open with ParseError parameter */
    267     }
    268 
    269     /*
    270      *  clone
    271      */
    272     {
    273         URegularExpression *clone1;
    274         URegularExpression *clone2;
    275         URegularExpression *clone3;
    276         UChar  testString1[30];
    277         UChar  testString2[30];
    278         UBool  result;
    279 
    280 
    281         status = U_ZERO_ERROR;
    282         re = uregex_openC("abc*", 0, 0, &status);
    283         TEST_ASSERT_SUCCESS(status);
    284         clone1 = uregex_clone(re, &status);
    285         TEST_ASSERT_SUCCESS(status);
    286         TEST_ASSERT(clone1 != NULL);
    287 
    288         status = U_ZERO_ERROR;
    289         clone2 = uregex_clone(re, &status);
    290         TEST_ASSERT_SUCCESS(status);
    291         TEST_ASSERT(clone2 != NULL);
    292         uregex_close(re);
    293 
    294         status = U_ZERO_ERROR;
    295         clone3 = uregex_clone(clone2, &status);
    296         TEST_ASSERT_SUCCESS(status);
    297         TEST_ASSERT(clone3 != NULL);
    298 
    299         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
    300         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
    301 
    302         status = U_ZERO_ERROR;
    303         uregex_setText(clone1, testString1, -1, &status);
    304         TEST_ASSERT_SUCCESS(status);
    305         result = uregex_lookingAt(clone1, 0, &status);
    306         TEST_ASSERT_SUCCESS(status);
    307         TEST_ASSERT(result==TRUE);
    308 
    309         status = U_ZERO_ERROR;
    310         uregex_setText(clone2, testString2, -1, &status);
    311         TEST_ASSERT_SUCCESS(status);
    312         result = uregex_lookingAt(clone2, 0, &status);
    313         TEST_ASSERT_SUCCESS(status);
    314         TEST_ASSERT(result==FALSE);
    315         result = uregex_find(clone2, 0, &status);
    316         TEST_ASSERT_SUCCESS(status);
    317         TEST_ASSERT(result==TRUE);
    318 
    319         uregex_close(clone1);
    320         uregex_close(clone2);
    321         uregex_close(clone3);
    322 
    323     }
    324 
    325     /*
    326      *  pattern()
    327     */
    328     {
    329         const UChar  *resultPat;
    330         int32_t       resultLen;
    331         u_uastrncpy(pat, "hello", sizeof(pat)/2);
    332         status = U_ZERO_ERROR;
    333         re = uregex_open(pat, -1, 0, NULL, &status);
    334         resultPat = uregex_pattern(re, &resultLen, &status);
    335         TEST_ASSERT_SUCCESS(status);
    336 
    337         /* The TEST_ASSERT_SUCCESS above should change too... */
    338         if (U_SUCCESS(status)) {
    339             TEST_ASSERT(resultLen == -1);
    340             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    341         }
    342 
    343         uregex_close(re);
    344 
    345         status = U_ZERO_ERROR;
    346         re = uregex_open(pat, 3, 0, NULL, &status);
    347         resultPat = uregex_pattern(re, &resultLen, &status);
    348         TEST_ASSERT_SUCCESS(status);
    349         TEST_ASSERT_SUCCESS(status);
    350 
    351         /* The TEST_ASSERT_SUCCESS above should change too... */
    352         if (U_SUCCESS(status)) {
    353             TEST_ASSERT(resultLen == 3);
    354             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    355             TEST_ASSERT(u_strlen(resultPat) == 3);
    356         }
    357 
    358         uregex_close(re);
    359     }
    360 
    361     /*
    362      *  flags()
    363      */
    364     {
    365         int32_t  t;
    366 
    367         status = U_ZERO_ERROR;
    368         re = uregex_open(pat, -1, 0, NULL, &status);
    369         t  = uregex_flags(re, &status);
    370         TEST_ASSERT_SUCCESS(status);
    371         TEST_ASSERT(t == 0);
    372         uregex_close(re);
    373 
    374         status = U_ZERO_ERROR;
    375         re = uregex_open(pat, -1, 0, NULL, &status);
    376         t  = uregex_flags(re, &status);
    377         TEST_ASSERT_SUCCESS(status);
    378         TEST_ASSERT(t == 0);
    379         uregex_close(re);
    380 
    381         status = U_ZERO_ERROR;
    382         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    383         t  = uregex_flags(re, &status);
    384         TEST_ASSERT_SUCCESS(status);
    385         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    386         uregex_close(re);
    387     }
    388 
    389     /*
    390      *  setText() and lookingAt()
    391      */
    392     {
    393         UChar  text1[50];
    394         UChar  text2[50];
    395         UBool  result;
    396 
    397         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    398         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    399         status = U_ZERO_ERROR;
    400         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    401         re = uregex_open(pat, -1, 0, NULL, &status);
    402         TEST_ASSERT_SUCCESS(status);
    403 
    404         /* Operation before doing a setText should fail... */
    405         status = U_ZERO_ERROR;
    406         uregex_lookingAt(re, 0, &status);
    407         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    408 
    409         status = U_ZERO_ERROR;
    410         uregex_setText(re, text1, -1, &status);
    411         result = uregex_lookingAt(re, 0, &status);
    412         TEST_ASSERT(result == TRUE);
    413         TEST_ASSERT_SUCCESS(status);
    414 
    415         status = U_ZERO_ERROR;
    416         uregex_setText(re, text2, -1, &status);
    417         result = uregex_lookingAt(re, 0, &status);
    418         TEST_ASSERT(result == FALSE);
    419         TEST_ASSERT_SUCCESS(status);
    420 
    421         status = U_ZERO_ERROR;
    422         uregex_setText(re, text1, -1, &status);
    423         result = uregex_lookingAt(re, 0, &status);
    424         TEST_ASSERT(result == TRUE);
    425         TEST_ASSERT_SUCCESS(status);
    426 
    427         status = U_ZERO_ERROR;
    428         uregex_setText(re, text1, 5, &status);
    429         result = uregex_lookingAt(re, 0, &status);
    430         TEST_ASSERT(result == FALSE);
    431         TEST_ASSERT_SUCCESS(status);
    432 
    433         status = U_ZERO_ERROR;
    434         uregex_setText(re, text1, 6, &status);
    435         result = uregex_lookingAt(re, 0, &status);
    436         TEST_ASSERT(result == TRUE);
    437         TEST_ASSERT_SUCCESS(status);
    438 
    439         uregex_close(re);
    440     }
    441 
    442 
    443     /*
    444      *  getText()
    445      */
    446     {
    447         UChar    text1[50];
    448         UChar    text2[50];
    449         const UChar   *result;
    450         int32_t  textLength;
    451 
    452         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    453         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    454         status = U_ZERO_ERROR;
    455         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    456         re = uregex_open(pat, -1, 0, NULL, &status);
    457 
    458         uregex_setText(re, text1, -1, &status);
    459         result = uregex_getText(re, &textLength, &status);
    460         TEST_ASSERT(result == text1);
    461         TEST_ASSERT(textLength == -1);
    462         TEST_ASSERT_SUCCESS(status);
    463 
    464         status = U_ZERO_ERROR;
    465         uregex_setText(re, text2, 7, &status);
    466         result = uregex_getText(re, &textLength, &status);
    467         TEST_ASSERT(result == text2);
    468         TEST_ASSERT(textLength == 7);
    469         TEST_ASSERT_SUCCESS(status);
    470 
    471         status = U_ZERO_ERROR;
    472         uregex_setText(re, text2, 4, &status);
    473         result = uregex_getText(re, &textLength, &status);
    474         TEST_ASSERT(result == text2);
    475         TEST_ASSERT(textLength == 4);
    476         TEST_ASSERT_SUCCESS(status);
    477         uregex_close(re);
    478     }
    479 
    480     /*
    481      *  matches()
    482      */
    483     {
    484         UChar   text1[50];
    485         UBool   result;
    486         int     len;
    487         UChar   nullString[] = {0,0,0};
    488 
    489         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
    490         status = U_ZERO_ERROR;
    491         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    492         re = uregex_open(pat, -1, 0, NULL, &status);
    493 
    494         uregex_setText(re, text1, -1, &status);
    495         result = uregex_matches(re, 0, &status);
    496         TEST_ASSERT(result == FALSE);
    497         TEST_ASSERT_SUCCESS(status);
    498 
    499         status = U_ZERO_ERROR;
    500         uregex_setText(re, text1, 6, &status);
    501         result = uregex_matches(re, 0, &status);
    502         TEST_ASSERT(result == TRUE);
    503         TEST_ASSERT_SUCCESS(status);
    504 
    505         status = U_ZERO_ERROR;
    506         uregex_setText(re, text1, 6, &status);
    507         result = uregex_matches(re, 1, &status);
    508         TEST_ASSERT(result == FALSE);
    509         TEST_ASSERT_SUCCESS(status);
    510         uregex_close(re);
    511 
    512         status = U_ZERO_ERROR;
    513         re = uregex_openC(".?", 0, NULL, &status);
    514         uregex_setText(re, text1, -1, &status);
    515         len = u_strlen(text1);
    516         result = uregex_matches(re, len, &status);
    517         TEST_ASSERT(result == TRUE);
    518         TEST_ASSERT_SUCCESS(status);
    519 
    520         status = U_ZERO_ERROR;
    521         uregex_setText(re, nullString, -1, &status);
    522         TEST_ASSERT_SUCCESS(status);
    523         result = uregex_matches(re, 0, &status);
    524         TEST_ASSERT(result == TRUE);
    525         TEST_ASSERT_SUCCESS(status);
    526         uregex_close(re);
    527     }
    528 
    529 
    530     /*
    531      *  lookingAt()    Used in setText test.
    532      */
    533 
    534 
    535     /*
    536      *  find(), findNext, start, end, reset
    537      */
    538     {
    539         UChar    text1[50];
    540         UBool    result;
    541         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
    542         status = U_ZERO_ERROR;
    543         re = uregex_openC("rx", 0, NULL, &status);
    544 
    545         uregex_setText(re, text1, -1, &status);
    546         result = uregex_find(re, 0, &status);
    547         TEST_ASSERT(result == TRUE);
    548         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    549         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    550         TEST_ASSERT_SUCCESS(status);
    551 
    552         result = uregex_find(re, 9, &status);
    553         TEST_ASSERT(result == TRUE);
    554         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    555         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    556         TEST_ASSERT_SUCCESS(status);
    557 
    558         result = uregex_find(re, 14, &status);
    559         TEST_ASSERT(result == FALSE);
    560         TEST_ASSERT_SUCCESS(status);
    561 
    562         status = U_ZERO_ERROR;
    563         uregex_reset(re, 0, &status);
    564 
    565         result = uregex_findNext(re, &status);
    566         TEST_ASSERT(result == TRUE);
    567         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    568         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    569         TEST_ASSERT_SUCCESS(status);
    570 
    571         result = uregex_findNext(re, &status);
    572         TEST_ASSERT(result == TRUE);
    573         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    574         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    575         TEST_ASSERT_SUCCESS(status);
    576 
    577         status = U_ZERO_ERROR;
    578         uregex_reset(re, 12, &status);
    579 
    580         result = uregex_findNext(re, &status);
    581         TEST_ASSERT(result == TRUE);
    582         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    583         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    584         TEST_ASSERT_SUCCESS(status);
    585 
    586         result = uregex_findNext(re, &status);
    587         TEST_ASSERT(result == FALSE);
    588         TEST_ASSERT_SUCCESS(status);
    589 
    590         uregex_close(re);
    591     }
    592 
    593     /*
    594      *  groupCount
    595      */
    596     {
    597         int32_t result;
    598 
    599         status = U_ZERO_ERROR;
    600         re = uregex_openC("abc", 0, NULL, &status);
    601         result = uregex_groupCount(re, &status);
    602         TEST_ASSERT_SUCCESS(status);
    603         TEST_ASSERT(result == 0);
    604         uregex_close(re);
    605 
    606         status = U_ZERO_ERROR;
    607         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    608         result = uregex_groupCount(re, &status);
    609         TEST_ASSERT_SUCCESS(status);
    610         TEST_ASSERT(result == 3);
    611         uregex_close(re);
    612 
    613     }
    614 
    615 
    616     /*
    617      *  group()
    618      */
    619     {
    620         UChar    text1[80];
    621         UChar    buf[80];
    622         UBool    result;
    623         int32_t  resultSz;
    624         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
    625 
    626         status = U_ZERO_ERROR;
    627         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    628         TEST_ASSERT_SUCCESS(status);
    629 
    630 
    631         uregex_setText(re, text1, -1, &status);
    632         result = uregex_find(re, 0, &status);
    633         TEST_ASSERT(result==TRUE);
    634 
    635         /*  Capture Group 0, the full match.  Should succeed.  */
    636         status = U_ZERO_ERROR;
    637         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
    638         TEST_ASSERT_SUCCESS(status);
    639         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    640         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    641 
    642         /*  Capture group #1.  Should succeed. */
    643         status = U_ZERO_ERROR;
    644         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
    645         TEST_ASSERT_SUCCESS(status);
    646         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    647         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    648 
    649         /*  Capture group out of range.  Error. */
    650         status = U_ZERO_ERROR;
    651         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
    652         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    653 
    654         /* NULL buffer, pure pre-flight */
    655         status = U_ZERO_ERROR;
    656         resultSz = uregex_group(re, 0, NULL, 0, &status);
    657         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    658         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    659 
    660         /* Too small buffer, truncated string */
    661         status = U_ZERO_ERROR;
    662         memset(buf, -1, sizeof(buf));
    663         resultSz = uregex_group(re, 0, buf, 5, &status);
    664         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    665         TEST_ASSERT_STRING("abc i", buf, FALSE);
    666         TEST_ASSERT(buf[5] == (UChar)0xffff);
    667         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    668 
    669         /* Output string just fits buffer, no NUL term. */
    670         status = U_ZERO_ERROR;
    671         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    672         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    673         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    674         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    675         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    676 
    677         uregex_close(re);
    678 
    679     }
    680 
    681     /*
    682      *  Regions
    683      */
    684 
    685 
    686         /* SetRegion(), getRegion() do something  */
    687         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    688         UChar resultString[40];
    689         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    690         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    691         uregex_setRegion(re, 3, 6, &status);
    692         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    693         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    694         TEST_ASSERT(uregex_findNext(re, &status));
    695         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
    696         TEST_ASSERT_STRING("345", resultString, TRUE);
    697         TEST_TEARDOWN;
    698 
    699         /* find(start=-1) uses regions   */
    700         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    701         uregex_setRegion(re, 4, 6, &status);
    702         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    703         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    704         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    705         TEST_TEARDOWN;
    706 
    707         /* find (start >=0) does not use regions   */
    708         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    709         uregex_setRegion(re, 4, 6, &status);
    710         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    711         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    712         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    713         TEST_TEARDOWN;
    714 
    715         /* findNext() obeys regions    */
    716         TEST_SETUP(".", "0123456789ABCDEF", 0);
    717         uregex_setRegion(re, 4, 6, &status);
    718         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    719         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    720         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    721         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    722         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    723         TEST_TEARDOWN;
    724 
    725         /* matches(start=-1) uses regions                                           */
    726         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    727         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    728         uregex_setRegion(re, 4, 6, &status);
    729         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    730         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    731         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    732         TEST_TEARDOWN;
    733 
    734         /* matches (start >=0) does not use regions       */
    735         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    736         uregex_setRegion(re, 4, 6, &status);
    737         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    738         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    739         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    740         TEST_TEARDOWN;
    741 
    742         /* lookingAt(start=-1) uses regions                                         */
    743         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    744         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    745         uregex_setRegion(re, 4, 6, &status);
    746         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    747         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    748         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    749         TEST_TEARDOWN;
    750 
    751         /* lookingAt (start >=0) does not use regions  */
    752         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    753         uregex_setRegion(re, 4, 6, &status);
    754         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    755         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    756         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    757         TEST_TEARDOWN;
    758 
    759         /* hitEnd()       */
    760         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    761         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    762         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    763         TEST_TEARDOWN;
    764 
    765         TEST_SETUP("[a-f]*", "abcdef", 0);
    766         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    767         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    768         TEST_TEARDOWN;
    769 
    770         /* requireEnd   */
    771         TEST_SETUP("abcd", "abcd", 0);
    772         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    773         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    774         TEST_TEARDOWN;
    775 
    776         TEST_SETUP("abcd$", "abcd", 0);
    777         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    778         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    779         TEST_TEARDOWN;
    780 
    781         /* anchoringBounds        */
    782         TEST_SETUP("abc$", "abcdef", 0);
    783         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    784         uregex_useAnchoringBounds(re, FALSE, &status);
    785         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    786 
    787         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    788         uregex_useAnchoringBounds(re, TRUE, &status);
    789         uregex_setRegion(re, 0, 3, &status);
    790         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    791         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    792         TEST_TEARDOWN;
    793 
    794         /* Transparent Bounds      */
    795         TEST_SETUP("abc(?=def)", "abcdef", 0);
    796         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    797         uregex_useTransparentBounds(re, TRUE, &status);
    798         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    799 
    800         uregex_useTransparentBounds(re, FALSE, &status);
    801         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    802         uregex_setRegion(re, 0, 3, &status);
    803         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    804         uregex_useTransparentBounds(re, TRUE, &status);
    805         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    806         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    807         TEST_TEARDOWN;
    808 
    809 
    810     /*
    811      *  replaceFirst()
    812      */
    813     {
    814         UChar    text1[80];
    815         UChar    text2[80];
    816         UChar    replText[80];
    817         UChar    buf[80];
    818         int32_t  resultSz;
    819         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    820         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    821         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    822 
    823         status = U_ZERO_ERROR;
    824         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    825         TEST_ASSERT_SUCCESS(status);
    826 
    827         /*  Normal case, with match */
    828         uregex_setText(re, text1, -1, &status);
    829         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    830         TEST_ASSERT_SUCCESS(status);
    831         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    832         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    833 
    834         /* No match.  Text should copy to output with no changes.  */
    835         status = U_ZERO_ERROR;
    836         uregex_setText(re, text2, -1, &status);
    837         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    838         TEST_ASSERT_SUCCESS(status);
    839         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    840         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    841 
    842         /*  Match, output just fills buffer, no termination warning. */
    843         status = U_ZERO_ERROR;
    844         uregex_setText(re, text1, -1, &status);
    845         memset(buf, -1, sizeof(buf));
    846         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    847         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    848         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    849         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    850         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    851 
    852         /* Do the replaceFirst again, without first resetting anything.
    853          *  Should give the same results.
    854          */
    855         status = U_ZERO_ERROR;
    856         memset(buf, -1, sizeof(buf));
    857         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    858         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    859         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    860         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    861         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    862 
    863         /* NULL buffer, zero buffer length */
    864         status = U_ZERO_ERROR;
    865         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    866         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    867         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    868 
    869         /* Buffer too small by one */
    870         status = U_ZERO_ERROR;
    871         memset(buf, -1, sizeof(buf));
    872         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    873         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    874         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    875         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    876         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    877 
    878         uregex_close(re);
    879     }
    880 
    881 
    882     /*
    883      *  replaceAll()
    884      */
    885     {
    886         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    887         UChar    text2[80];          /*  "No match Here"           */
    888         UChar    replText[80];       /*  "<$1>"                    */
    889         UChar    replText2[80];      /*  "<<$1>>"                  */
    890         const char * pattern = "x(.*?)x";
    891         const char * expectedResult = "Replace <aa> <1> <...>.";
    892         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    893         UChar    buf[80];
    894         int32_t  resultSize;
    895         int32_t  expectedResultSize;
    896         int32_t  expectedResultSize2;
    897         int32_t  i;
    898 
    899         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    900         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    901         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    902         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
    903         expectedResultSize = strlen(expectedResult);
    904         expectedResultSize2 = strlen(expectedResult2);
    905 
    906         status = U_ZERO_ERROR;
    907         re = uregex_openC(pattern, 0, NULL, &status);
    908         TEST_ASSERT_SUCCESS(status);
    909 
    910         /*  Normal case, with match */
    911         uregex_setText(re, text1, -1, &status);
    912         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    913         TEST_ASSERT_SUCCESS(status);
    914         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    915         TEST_ASSERT(resultSize == expectedResultSize);
    916 
    917         /* No match.  Text should copy to output with no changes.  */
    918         status = U_ZERO_ERROR;
    919         uregex_setText(re, text2, -1, &status);
    920         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    921         TEST_ASSERT_SUCCESS(status);
    922         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    923         TEST_ASSERT(resultSize == u_strlen(text2));
    924 
    925         /*  Match, output just fills buffer, no termination warning. */
    926         status = U_ZERO_ERROR;
    927         uregex_setText(re, text1, -1, &status);
    928         memset(buf, -1, sizeof(buf));
    929         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    930         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    931         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    932         TEST_ASSERT(resultSize == expectedResultSize);
    933         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    934 
    935         /* Do the replaceFirst again, without first resetting anything.
    936          *  Should give the same results.
    937          */
    938         status = U_ZERO_ERROR;
    939         memset(buf, -1, sizeof(buf));
    940         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    941         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    942         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    943         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    944         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    945 
    946         /* NULL buffer, zero buffer length */
    947         status = U_ZERO_ERROR;
    948         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    949         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    950         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    951 
    952         /* Buffer too small.  Try every size, which will tickle edge cases
    953          * in uregex_appendReplacement (used by replaceAll)   */
    954         for (i=0; i<expectedResultSize; i++) {
    955             char  expected[80];
    956             status = U_ZERO_ERROR;
    957             memset(buf, -1, sizeof(buf));
    958             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    959             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    960             strcpy(expected, expectedResult);
    961             expected[i] = 0;
    962             TEST_ASSERT_STRING(expected, buf, FALSE);
    963             TEST_ASSERT(resultSize == expectedResultSize);
    964             TEST_ASSERT(buf[i] == (UChar)0xffff);
    965         }
    966 
    967         /* Buffer too small.  Same as previous test, except this time the replacement
    968          * text is longer than the match capture group, making the length of the complete
    969          * replacement longer than the original string.
    970          */
    971         for (i=0; i<expectedResultSize2; i++) {
    972             char  expected[80];
    973             status = U_ZERO_ERROR;
    974             memset(buf, -1, sizeof(buf));
    975             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    976             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    977             strcpy(expected, expectedResult2);
    978             expected[i] = 0;
    979             TEST_ASSERT_STRING(expected, buf, FALSE);
    980             TEST_ASSERT(resultSize == expectedResultSize2);
    981             TEST_ASSERT(buf[i] == (UChar)0xffff);
    982         }
    983 
    984 
    985         uregex_close(re);
    986     }
    987 
    988 
    989     /*
    990      *  appendReplacement()
    991      */
    992     {
    993         UChar    text[100];
    994         UChar    repl[100];
    995         UChar    buf[100];
    996         UChar   *bufPtr;
    997         int32_t  bufCap;
    998 
    999 
   1000         status = U_ZERO_ERROR;
   1001         re = uregex_openC(".*", 0, 0, &status);
   1002         TEST_ASSERT_SUCCESS(status);
   1003 
   1004         u_uastrncpy(text, "whatever",  sizeof(text)/2);
   1005         u_uastrncpy(repl, "some other", sizeof(repl)/2);
   1006         uregex_setText(re, text, -1, &status);
   1007 
   1008         /* match covers whole target string */
   1009         uregex_find(re, 0, &status);
   1010         TEST_ASSERT_SUCCESS(status);
   1011         bufPtr = buf;
   1012         bufCap = sizeof(buf) / 2;
   1013         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1014         TEST_ASSERT_SUCCESS(status);
   1015         TEST_ASSERT_STRING("some other", buf, TRUE);
   1016 
   1017         /* Match has \u \U escapes */
   1018         uregex_find(re, 0, &status);
   1019         TEST_ASSERT_SUCCESS(status);
   1020         bufPtr = buf;
   1021         bufCap = sizeof(buf) / 2;
   1022         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
   1023         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1024         TEST_ASSERT_SUCCESS(status);
   1025         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1026 
   1027         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
   1028         status = U_ZERO_ERROR;
   1029         uregex_find(re, 0, &status);
   1030         TEST_ASSERT_SUCCESS(status);
   1031         bufPtr = buf;
   1032         status = U_BUFFER_OVERFLOW_ERROR;
   1033         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
   1034         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1035 
   1036         uregex_close(re);
   1037     }
   1038 
   1039 
   1040     /*
   1041      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1042      */
   1043 
   1044     /*
   1045      *  split()
   1046      */
   1047     {
   1048         UChar    textToSplit[80];
   1049         UChar    text2[80];
   1050         UChar    buf[200];
   1051         UChar    *fields[10];
   1052         int32_t  numFields;
   1053         int32_t  requiredCapacity;
   1054         int32_t  spaceNeeded;
   1055         int32_t  sz;
   1056 
   1057         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1058         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1059 
   1060         status = U_ZERO_ERROR;
   1061         re = uregex_openC(":", 0, NULL, &status);
   1062 
   1063 
   1064         /*  Simple split */
   1065 
   1066         uregex_setText(re, textToSplit, -1, &status);
   1067         TEST_ASSERT_SUCCESS(status);
   1068 
   1069         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1070         if (U_SUCCESS(status)) {
   1071             memset(fields, -1, sizeof(fields));
   1072             numFields =
   1073                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1074             TEST_ASSERT_SUCCESS(status);
   1075 
   1076             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1077             if(U_SUCCESS(status)) {
   1078                 TEST_ASSERT(numFields == 3);
   1079                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1080                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1081                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1082                 TEST_ASSERT(fields[3] == NULL);
   1083 
   1084                 spaceNeeded = u_strlen(textToSplit) -
   1085                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1086                             numFields;          /* Each field gets a NUL terminator */
   1087 
   1088                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1089             }
   1090         }
   1091 
   1092         uregex_close(re);
   1093 
   1094 
   1095         /*  Split with too few output strings available */
   1096         status = U_ZERO_ERROR;
   1097         re = uregex_openC(":", 0, NULL, &status);
   1098         uregex_setText(re, textToSplit, -1, &status);
   1099         TEST_ASSERT_SUCCESS(status);
   1100 
   1101         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1102         if(U_SUCCESS(status)) {
   1103             memset(fields, -1, sizeof(fields));
   1104             numFields =
   1105                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1106             TEST_ASSERT_SUCCESS(status);
   1107 
   1108             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1109             if(U_SUCCESS(status)) {
   1110                 TEST_ASSERT(numFields == 2);
   1111                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1112                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1113                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1114 
   1115                 spaceNeeded = u_strlen(textToSplit) -
   1116                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1117                             numFields;          /* Each field gets a NUL terminator */
   1118 
   1119                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1120 
   1121                 /* Split with a range of output buffer sizes.  */
   1122                 spaceNeeded = u_strlen(textToSplit) -
   1123                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1124                     numFields;          /* Each field gets a NUL terminator */
   1125 
   1126                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1127                     memset(fields, -1, sizeof(fields));
   1128                     status = U_ZERO_ERROR;
   1129                     numFields =
   1130                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1131                     if (sz >= spaceNeeded) {
   1132                         TEST_ASSERT_SUCCESS(status);
   1133                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1134                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1135                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1136                     } else {
   1137                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1138                     }
   1139                     TEST_ASSERT(numFields == 3);
   1140                     TEST_ASSERT(fields[3] == NULL);
   1141                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1142                 }
   1143             }
   1144         }
   1145 
   1146         uregex_close(re);
   1147     }
   1148 
   1149 
   1150 
   1151 
   1152     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1153      *                   comes out as additional fields.  */
   1154     {
   1155         UChar    textToSplit[80];
   1156         UChar    buf[200];
   1157         UChar    *fields[10];
   1158         int32_t  numFields;
   1159         int32_t  requiredCapacity;
   1160         int32_t  spaceNeeded;
   1161         int32_t  sz;
   1162 
   1163         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   1164 
   1165         status = U_ZERO_ERROR;
   1166         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1167 
   1168         uregex_setText(re, textToSplit, -1, &status);
   1169         TEST_ASSERT_SUCCESS(status);
   1170 
   1171         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1172         if(U_SUCCESS(status)) {
   1173             memset(fields, -1, sizeof(fields));
   1174             numFields =
   1175                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1176             TEST_ASSERT_SUCCESS(status);
   1177 
   1178             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1179             if(U_SUCCESS(status)) {
   1180                 TEST_ASSERT(numFields == 5);
   1181                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1182                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1183                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1184                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1185                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1186                 TEST_ASSERT(fields[5] == NULL);
   1187                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1188                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1189             }
   1190         }
   1191 
   1192         /*  Split with too few output strings available (2) */
   1193         status = U_ZERO_ERROR;
   1194         memset(fields, -1, sizeof(fields));
   1195         numFields =
   1196             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1197         TEST_ASSERT_SUCCESS(status);
   1198 
   1199         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1200         if(U_SUCCESS(status)) {
   1201             TEST_ASSERT(numFields == 2);
   1202             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1203             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1204             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1205 
   1206             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1207             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1208         }
   1209 
   1210         /*  Split with too few output strings available (3) */
   1211         status = U_ZERO_ERROR;
   1212         memset(fields, -1, sizeof(fields));
   1213         numFields =
   1214             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
   1215         TEST_ASSERT_SUCCESS(status);
   1216 
   1217         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1218         if(U_SUCCESS(status)) {
   1219             TEST_ASSERT(numFields == 3);
   1220             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1221             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1222             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1223             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1224 
   1225             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1226             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1227         }
   1228 
   1229         /*  Split with just enough output strings available (5) */
   1230         status = U_ZERO_ERROR;
   1231         memset(fields, -1, sizeof(fields));
   1232         numFields =
   1233             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
   1234         TEST_ASSERT_SUCCESS(status);
   1235 
   1236         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1237         if(U_SUCCESS(status)) {
   1238             TEST_ASSERT(numFields == 5);
   1239             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1240             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1241             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1242             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1243             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1244             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1245 
   1246             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1247             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1248         }
   1249 
   1250         /* Split, end of text is a field delimiter.   */
   1251         status = U_ZERO_ERROR;
   1252         sz = strlen("first <tag-a> second<tag-b>");
   1253         uregex_setText(re, textToSplit, sz, &status);
   1254         TEST_ASSERT_SUCCESS(status);
   1255 
   1256         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1257         if(U_SUCCESS(status)) {
   1258             memset(fields, -1, sizeof(fields));
   1259             numFields =
   1260                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
   1261             TEST_ASSERT_SUCCESS(status);
   1262 
   1263             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1264             if(U_SUCCESS(status)) {
   1265                 TEST_ASSERT(numFields == 5);
   1266                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1267                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1268                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1269                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1270                 TEST_ASSERT_STRING("",        fields[4], TRUE);
   1271                 TEST_ASSERT(fields[5] == NULL);
   1272                 TEST_ASSERT(fields[8] == NULL);
   1273                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1274                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
   1275                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1276             }
   1277         }
   1278 
   1279         uregex_close(re);
   1280     }
   1281 
   1282     /*
   1283      * set/getTimeLimit
   1284      */
   1285      TEST_SETUP("abc$", "abcdef", 0);
   1286      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1287      uregex_setTimeLimit(re, 1000, &status);
   1288      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1289      TEST_ASSERT_SUCCESS(status);
   1290      uregex_setTimeLimit(re, -1, &status);
   1291      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1292      status = U_ZERO_ERROR;
   1293      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1294      TEST_TEARDOWN;
   1295 
   1296      /*
   1297       * set/get Stack Limit
   1298       */
   1299      TEST_SETUP("abc$", "abcdef", 0);
   1300      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1301      uregex_setStackLimit(re, 40000, &status);
   1302      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1303      TEST_ASSERT_SUCCESS(status);
   1304      uregex_setStackLimit(re, -1, &status);
   1305      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1306      status = U_ZERO_ERROR;
   1307      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1308      TEST_TEARDOWN;
   1309 
   1310 
   1311      /*
   1312       * Get/Set callback functions
   1313       *     This test is copied from intltest regex/Callbacks
   1314       *     The pattern and test data will run long enough to cause the callback
   1315       *       to be invoked.  The nested '+' operators give exponential time
   1316       *       behavior with increasing string length.
   1317       */
   1318      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1319      callBackContext cbInfo = {4, 0, 0};
   1320      const void     *pContext   = &cbInfo;
   1321      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1322 
   1323      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1324      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1325      TEST_ASSERT_SUCCESS(status);
   1326      TEST_ASSERT(returnedFn == NULL);
   1327      TEST_ASSERT(pContext == NULL);
   1328 
   1329      /* Set thecallback and do a match.                                   */
   1330      /* The callback function should record that it has been called.      */
   1331      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1332      TEST_ASSERT_SUCCESS(status);
   1333      TEST_ASSERT(cbInfo.numCalls == 0);
   1334      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1335      TEST_ASSERT_SUCCESS(status);
   1336      TEST_ASSERT(cbInfo.numCalls > 0);
   1337 
   1338      /* Getting the callback should return the values that were set above.  */
   1339      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1340      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1341      TEST_ASSERT(pContext == &cbInfo);
   1342 
   1343      TEST_TEARDOWN;
   1344 }
   1345 
   1346 
   1347 
   1348 static void TestBug4315(void) {
   1349     UErrorCode      theICUError = U_ZERO_ERROR;
   1350     URegularExpression *theRegEx;
   1351     UChar           *textBuff;
   1352     const char      *thePattern;
   1353     UChar            theString[100];
   1354     UChar           *destFields[24];
   1355     int32_t         neededLength1;
   1356     int32_t         neededLength2;
   1357 
   1358     int32_t         wordCount = 0;
   1359     int32_t         destFieldsSize = 24;
   1360 
   1361     thePattern  = "ck ";
   1362     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1363 
   1364     /* open a regex */
   1365     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1366     TEST_ASSERT_SUCCESS(theICUError);
   1367 
   1368     /* set the input string */
   1369     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1370     TEST_ASSERT_SUCCESS(theICUError);
   1371 
   1372     /* split */
   1373     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1374      *  error occurs! */
   1375     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1376         destFieldsSize, &theICUError);
   1377 
   1378     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1379     TEST_ASSERT(wordCount==3);
   1380 
   1381     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1382     {
   1383         theICUError = U_ZERO_ERROR;
   1384         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1385         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1386             destFields, destFieldsSize, &theICUError);
   1387         TEST_ASSERT(wordCount==3);
   1388         TEST_ASSERT_SUCCESS(theICUError);
   1389         TEST_ASSERT(neededLength1 == neededLength2);
   1390         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1391         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1392         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1393         TEST_ASSERT(destFields[3] == NULL);
   1394         free(textBuff);
   1395     }
   1396     uregex_close(theRegEx);
   1397 }
   1398 
   1399 /* Based on TestRegexCAPI() */
   1400 static void TestUTextAPI(void) {
   1401     UErrorCode           status = U_ZERO_ERROR;
   1402     URegularExpression  *re;
   1403     UText                patternText = UTEXT_INITIALIZER;
   1404     UChar                pat[200];
   1405     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1406 
   1407     /* Mimimalist open/close */
   1408     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1409     re = uregex_openUText(&patternText, 0, 0, &status);
   1410     if (U_FAILURE(status)) {
   1411          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1412          utext_close(&patternText);
   1413          return;
   1414     }
   1415     uregex_close(re);
   1416 
   1417     /* Open with all flag values set */
   1418     status = U_ZERO_ERROR;
   1419     re = uregex_openUText(&patternText,
   1420         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1421         0, &status);
   1422     TEST_ASSERT_SUCCESS(status);
   1423     uregex_close(re);
   1424 
   1425     /* Open with an invalid flag */
   1426     status = U_ZERO_ERROR;
   1427     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1428     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1429     uregex_close(re);
   1430 
   1431     /* open with an invalid parameter */
   1432     status = U_ZERO_ERROR;
   1433     re = uregex_openUText(NULL,
   1434         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1435     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1436 
   1437     /*
   1438      *  clone
   1439      */
   1440     {
   1441         URegularExpression *clone1;
   1442         URegularExpression *clone2;
   1443         URegularExpression *clone3;
   1444         UChar  testString1[30];
   1445         UChar  testString2[30];
   1446         UBool  result;
   1447 
   1448 
   1449         status = U_ZERO_ERROR;
   1450         re = uregex_openUText(&patternText, 0, 0, &status);
   1451         TEST_ASSERT_SUCCESS(status);
   1452         clone1 = uregex_clone(re, &status);
   1453         TEST_ASSERT_SUCCESS(status);
   1454         TEST_ASSERT(clone1 != NULL);
   1455 
   1456         status = U_ZERO_ERROR;
   1457         clone2 = uregex_clone(re, &status);
   1458         TEST_ASSERT_SUCCESS(status);
   1459         TEST_ASSERT(clone2 != NULL);
   1460         uregex_close(re);
   1461 
   1462         status = U_ZERO_ERROR;
   1463         clone3 = uregex_clone(clone2, &status);
   1464         TEST_ASSERT_SUCCESS(status);
   1465         TEST_ASSERT(clone3 != NULL);
   1466 
   1467         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
   1468         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
   1469 
   1470         status = U_ZERO_ERROR;
   1471         uregex_setText(clone1, testString1, -1, &status);
   1472         TEST_ASSERT_SUCCESS(status);
   1473         result = uregex_lookingAt(clone1, 0, &status);
   1474         TEST_ASSERT_SUCCESS(status);
   1475         TEST_ASSERT(result==TRUE);
   1476 
   1477         status = U_ZERO_ERROR;
   1478         uregex_setText(clone2, testString2, -1, &status);
   1479         TEST_ASSERT_SUCCESS(status);
   1480         result = uregex_lookingAt(clone2, 0, &status);
   1481         TEST_ASSERT_SUCCESS(status);
   1482         TEST_ASSERT(result==FALSE);
   1483         result = uregex_find(clone2, 0, &status);
   1484         TEST_ASSERT_SUCCESS(status);
   1485         TEST_ASSERT(result==TRUE);
   1486 
   1487         uregex_close(clone1);
   1488         uregex_close(clone2);
   1489         uregex_close(clone3);
   1490 
   1491     }
   1492 
   1493     /*
   1494      *  pattern() and patternText()
   1495      */
   1496     {
   1497         const UChar  *resultPat;
   1498         int32_t       resultLen;
   1499         UText        *resultText;
   1500         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1501         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1502         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
   1503         status = U_ZERO_ERROR;
   1504 
   1505         utext_openUTF8(&patternText, str_hello, -1, &status);
   1506         re = uregex_open(pat, -1, 0, NULL, &status);
   1507         resultPat = uregex_pattern(re, &resultLen, &status);
   1508         TEST_ASSERT_SUCCESS(status);
   1509 
   1510         /* The TEST_ASSERT_SUCCESS above should change too... */
   1511         if (U_SUCCESS(status)) {
   1512             TEST_ASSERT(resultLen == -1);
   1513             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1514         }
   1515 
   1516         resultText = uregex_patternUText(re, &status);
   1517         TEST_ASSERT_SUCCESS(status);
   1518         TEST_ASSERT_UTEXT(str_hello, resultText);
   1519 
   1520         uregex_close(re);
   1521 
   1522         status = U_ZERO_ERROR;
   1523         re = uregex_open(pat, 3, 0, NULL, &status);
   1524         resultPat = uregex_pattern(re, &resultLen, &status);
   1525         TEST_ASSERT_SUCCESS(status);
   1526 
   1527         /* The TEST_ASSERT_SUCCESS above should change too... */
   1528         if (U_SUCCESS(status)) {
   1529             TEST_ASSERT(resultLen == 3);
   1530             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1531             TEST_ASSERT(u_strlen(resultPat) == 3);
   1532         }
   1533 
   1534         resultText = uregex_patternUText(re, &status);
   1535         TEST_ASSERT_SUCCESS(status);
   1536         TEST_ASSERT_UTEXT(str_hel, resultText);
   1537 
   1538         uregex_close(re);
   1539     }
   1540 
   1541     /*
   1542      *  setUText() and lookingAt()
   1543      */
   1544     {
   1545         UText  text1 = UTEXT_INITIALIZER;
   1546         UText  text2 = UTEXT_INITIALIZER;
   1547         UBool  result;
   1548         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1549         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1550         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1551         status = U_ZERO_ERROR;
   1552         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1553         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1554 
   1555         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1556         re = uregex_openUText(&patternText, 0, NULL, &status);
   1557         TEST_ASSERT_SUCCESS(status);
   1558 
   1559         /* Operation before doing a setText should fail... */
   1560         status = U_ZERO_ERROR;
   1561         uregex_lookingAt(re, 0, &status);
   1562         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1563 
   1564         status = U_ZERO_ERROR;
   1565         uregex_setUText(re, &text1, &status);
   1566         result = uregex_lookingAt(re, 0, &status);
   1567         TEST_ASSERT(result == TRUE);
   1568         TEST_ASSERT_SUCCESS(status);
   1569 
   1570         status = U_ZERO_ERROR;
   1571         uregex_setUText(re, &text2, &status);
   1572         result = uregex_lookingAt(re, 0, &status);
   1573         TEST_ASSERT(result == FALSE);
   1574         TEST_ASSERT_SUCCESS(status);
   1575 
   1576         status = U_ZERO_ERROR;
   1577         uregex_setUText(re, &text1, &status);
   1578         result = uregex_lookingAt(re, 0, &status);
   1579         TEST_ASSERT(result == TRUE);
   1580         TEST_ASSERT_SUCCESS(status);
   1581 
   1582         uregex_close(re);
   1583         utext_close(&text1);
   1584         utext_close(&text2);
   1585     }
   1586 
   1587 
   1588     /*
   1589      *  getText() and getUText()
   1590      */
   1591     {
   1592         UText  text1 = UTEXT_INITIALIZER;
   1593         UText  text2 = UTEXT_INITIALIZER;
   1594         UChar  text2Chars[20];
   1595         UText  *resultText;
   1596         const UChar   *result;
   1597         int32_t  textLength;
   1598         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1599         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1600         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1601 
   1602 
   1603         status = U_ZERO_ERROR;
   1604         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1605         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
   1606         utext_openUChars(&text2, text2Chars, -1, &status);
   1607 
   1608         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1609         re = uregex_openUText(&patternText, 0, NULL, &status);
   1610 
   1611         /* First set a UText */
   1612         uregex_setUText(re, &text1, &status);
   1613         resultText = uregex_getUText(re, NULL, &status);
   1614         TEST_ASSERT_SUCCESS(status);
   1615         TEST_ASSERT(resultText != &text1);
   1616         utext_setNativeIndex(resultText, 0);
   1617         utext_setNativeIndex(&text1, 0);
   1618         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1619         utext_close(resultText);
   1620 
   1621         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1622         (void)result;    /* Suppress set but not used warning. */
   1623         TEST_ASSERT(textLength == -1 || textLength == 6);
   1624         resultText = uregex_getUText(re, NULL, &status);
   1625         TEST_ASSERT_SUCCESS(status);
   1626         TEST_ASSERT(resultText != &text1);
   1627         utext_setNativeIndex(resultText, 0);
   1628         utext_setNativeIndex(&text1, 0);
   1629         TEST_ASSERT(testUTextEqual(resultText, &text1));
   1630         utext_close(resultText);
   1631 
   1632         /* Then set a UChar * */
   1633         uregex_setText(re, text2Chars, 7, &status);
   1634         resultText = uregex_getUText(re, NULL, &status);
   1635         TEST_ASSERT_SUCCESS(status);
   1636         utext_setNativeIndex(resultText, 0);
   1637         utext_setNativeIndex(&text2, 0);
   1638         TEST_ASSERT(testUTextEqual(resultText, &text2));
   1639         utext_close(resultText);
   1640         result = uregex_getText(re, &textLength, &status);
   1641         TEST_ASSERT(textLength == 7);
   1642 
   1643         uregex_close(re);
   1644         utext_close(&text1);
   1645         utext_close(&text2);
   1646     }
   1647 
   1648     /*
   1649      *  matches()
   1650      */
   1651     {
   1652         UText   text1 = UTEXT_INITIALIZER;
   1653         UBool   result;
   1654         UText   nullText = UTEXT_INITIALIZER;
   1655         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1656         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1657 
   1658         status = U_ZERO_ERROR;
   1659         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1660         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1661         re = uregex_openUText(&patternText, 0, NULL, &status);
   1662 
   1663         uregex_setUText(re, &text1, &status);
   1664         result = uregex_matches(re, 0, &status);
   1665         TEST_ASSERT(result == FALSE);
   1666         TEST_ASSERT_SUCCESS(status);
   1667         uregex_close(re);
   1668 
   1669         status = U_ZERO_ERROR;
   1670         re = uregex_openC(".?", 0, NULL, &status);
   1671         uregex_setUText(re, &text1, &status);
   1672         result = uregex_matches(re, 7, &status);
   1673         TEST_ASSERT(result == TRUE);
   1674         TEST_ASSERT_SUCCESS(status);
   1675 
   1676         status = U_ZERO_ERROR;
   1677         utext_openUTF8(&nullText, "", -1, &status);
   1678         uregex_setUText(re, &nullText, &status);
   1679         TEST_ASSERT_SUCCESS(status);
   1680         result = uregex_matches(re, 0, &status);
   1681         TEST_ASSERT(result == TRUE);
   1682         TEST_ASSERT_SUCCESS(status);
   1683 
   1684         uregex_close(re);
   1685         utext_close(&text1);
   1686         utext_close(&nullText);
   1687     }
   1688 
   1689 
   1690     /*
   1691      *  lookingAt()    Used in setText test.
   1692      */
   1693 
   1694 
   1695     /*
   1696      *  find(), findNext, start, end, reset
   1697      */
   1698     {
   1699         UChar    text1[50];
   1700         UBool    result;
   1701         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
   1702         status = U_ZERO_ERROR;
   1703         re = uregex_openC("rx", 0, NULL, &status);
   1704 
   1705         uregex_setText(re, text1, -1, &status);
   1706         result = uregex_find(re, 0, &status);
   1707         TEST_ASSERT(result == TRUE);
   1708         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1709         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1710         TEST_ASSERT_SUCCESS(status);
   1711 
   1712         result = uregex_find(re, 9, &status);
   1713         TEST_ASSERT(result == TRUE);
   1714         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1715         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1716         TEST_ASSERT_SUCCESS(status);
   1717 
   1718         result = uregex_find(re, 14, &status);
   1719         TEST_ASSERT(result == FALSE);
   1720         TEST_ASSERT_SUCCESS(status);
   1721 
   1722         status = U_ZERO_ERROR;
   1723         uregex_reset(re, 0, &status);
   1724 
   1725         result = uregex_findNext(re, &status);
   1726         TEST_ASSERT(result == TRUE);
   1727         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1728         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1729         TEST_ASSERT_SUCCESS(status);
   1730 
   1731         result = uregex_findNext(re, &status);
   1732         TEST_ASSERT(result == TRUE);
   1733         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1734         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1735         TEST_ASSERT_SUCCESS(status);
   1736 
   1737         status = U_ZERO_ERROR;
   1738         uregex_reset(re, 12, &status);
   1739 
   1740         result = uregex_findNext(re, &status);
   1741         TEST_ASSERT(result == TRUE);
   1742         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1743         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1744         TEST_ASSERT_SUCCESS(status);
   1745 
   1746         result = uregex_findNext(re, &status);
   1747         TEST_ASSERT(result == FALSE);
   1748         TEST_ASSERT_SUCCESS(status);
   1749 
   1750         uregex_close(re);
   1751     }
   1752 
   1753     /*
   1754      *  group()
   1755      */
   1756     {
   1757         UChar    text1[80];
   1758         UText   *actual;
   1759         UBool    result;
   1760 
   1761         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
   1762         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
   1763 
   1764 
   1765         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
   1766 
   1767         status = U_ZERO_ERROR;
   1768         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1769         TEST_ASSERT_SUCCESS(status);
   1770 
   1771         uregex_setText(re, text1, -1, &status);
   1772         result = uregex_find(re, 0, &status);
   1773         TEST_ASSERT(result==TRUE);
   1774 
   1775         /*  Capture Group 0, the full match.  Should succeed.  */
   1776         status = U_ZERO_ERROR;
   1777         actual = uregex_groupUTextDeep(re, 0, NULL, &status);
   1778         TEST_ASSERT_SUCCESS(status);
   1779         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
   1780         utext_close(actual);
   1781 
   1782         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
   1783         status = U_ZERO_ERROR;
   1784         {
   1785             int64_t      group_len;
   1786             int32_t      len16;
   1787             UErrorCode   shallowStatus = U_ZERO_ERROR;
   1788             int64_t      nativeIndex;
   1789             UChar *groupChars;
   1790             UText groupText = UTEXT_INITIALIZER;
   1791 
   1792             actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
   1793             TEST_ASSERT_SUCCESS(status);
   1794 
   1795             nativeIndex = utext_getNativeIndex(actual);
   1796             /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
   1797             /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
   1798             len16 = (int32_t)group_len;
   1799 
   1800             groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
   1801             utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
   1802 
   1803             utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
   1804 
   1805             TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
   1806             utext_close(&groupText);
   1807             free(groupChars);
   1808         }
   1809         utext_close(actual);
   1810 
   1811         /*  Capture group #1.  Should succeed. */
   1812         status = U_ZERO_ERROR;
   1813         actual = uregex_groupUTextDeep(re, 1, NULL, &status);
   1814         TEST_ASSERT_SUCCESS(status);
   1815         TEST_ASSERT_UTEXT(str_interior, actual);
   1816         utext_close(actual);
   1817 
   1818         /*  Capture group out of range.  Error. */
   1819         status = U_ZERO_ERROR;
   1820         actual = uregex_groupUTextDeep(re, 2, NULL, &status);
   1821         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1822         TEST_ASSERT(utext_nativeLength(actual) == 0);
   1823         utext_close(actual);
   1824 
   1825         uregex_close(re);
   1826 
   1827     }
   1828 
   1829     /*
   1830      *  replaceFirst()
   1831      */
   1832     {
   1833         UChar    text1[80];
   1834         UChar    text2[80];
   1835         UText    replText = UTEXT_INITIALIZER;
   1836         UText   *result;
   1837         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1838         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1839         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
   1840         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1841         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1842         status = U_ZERO_ERROR;
   1843         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1844         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1845         utext_openUTF8(&replText, str_1x, -1, &status);
   1846 
   1847         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1848         TEST_ASSERT_SUCCESS(status);
   1849 
   1850         /*  Normal case, with match */
   1851         uregex_setText(re, text1, -1, &status);
   1852         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1853         TEST_ASSERT_SUCCESS(status);
   1854         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1855         utext_close(result);
   1856 
   1857         /* No match.  Text should copy to output with no changes.  */
   1858         uregex_setText(re, text2, -1, &status);
   1859         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1860         TEST_ASSERT_SUCCESS(status);
   1861         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1862         utext_close(result);
   1863 
   1864         /* Unicode escapes */
   1865         uregex_setText(re, text1, -1, &status);
   1866         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1867         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1868         TEST_ASSERT_SUCCESS(status);
   1869         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1870         utext_close(result);
   1871 
   1872         uregex_close(re);
   1873         utext_close(&replText);
   1874     }
   1875 
   1876 
   1877     /*
   1878      *  replaceAll()
   1879      */
   1880     {
   1881         UChar    text1[80];
   1882         UChar    text2[80];
   1883         UText    replText = UTEXT_INITIALIZER;
   1884         UText   *result;
   1885         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1886         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1887         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1888         status = U_ZERO_ERROR;
   1889         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1890         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1891         utext_openUTF8(&replText, str_1, -1, &status);
   1892 
   1893         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1894         TEST_ASSERT_SUCCESS(status);
   1895 
   1896         /*  Normal case, with match */
   1897         uregex_setText(re, text1, -1, &status);
   1898         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1899         TEST_ASSERT_SUCCESS(status);
   1900         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1901         utext_close(result);
   1902 
   1903         /* No match.  Text should copy to output with no changes.  */
   1904         uregex_setText(re, text2, -1, &status);
   1905         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1906         TEST_ASSERT_SUCCESS(status);
   1907         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1908         utext_close(result);
   1909 
   1910         uregex_close(re);
   1911         utext_close(&replText);
   1912     }
   1913 
   1914 
   1915     /*
   1916      *  appendReplacement()
   1917      */
   1918     {
   1919         UChar    text[100];
   1920         UChar    repl[100];
   1921         UChar    buf[100];
   1922         UChar   *bufPtr;
   1923         int32_t  bufCap;
   1924 
   1925         status = U_ZERO_ERROR;
   1926         re = uregex_openC(".*", 0, 0, &status);
   1927         TEST_ASSERT_SUCCESS(status);
   1928 
   1929         u_uastrncpy(text, "whatever",  sizeof(text)/2);
   1930         u_uastrncpy(repl, "some other", sizeof(repl)/2);
   1931         uregex_setText(re, text, -1, &status);
   1932 
   1933         /* match covers whole target string */
   1934         uregex_find(re, 0, &status);
   1935         TEST_ASSERT_SUCCESS(status);
   1936         bufPtr = buf;
   1937         bufCap = sizeof(buf) / 2;
   1938         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1939         TEST_ASSERT_SUCCESS(status);
   1940         TEST_ASSERT_STRING("some other", buf, TRUE);
   1941 
   1942         /* Match has \u \U escapes */
   1943         uregex_find(re, 0, &status);
   1944         TEST_ASSERT_SUCCESS(status);
   1945         bufPtr = buf;
   1946         bufCap = sizeof(buf) / 2;
   1947         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
   1948         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1949         TEST_ASSERT_SUCCESS(status);
   1950         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1951 
   1952         uregex_close(re);
   1953     }
   1954 
   1955 
   1956     /*
   1957      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1958      */
   1959 
   1960     /*
   1961      *  splitUText()
   1962      */
   1963     {
   1964         UChar    textToSplit[80];
   1965         UChar    text2[80];
   1966         UText    *fields[10];
   1967         int32_t  numFields;
   1968         int32_t i;
   1969 
   1970         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1971         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1972 
   1973         status = U_ZERO_ERROR;
   1974         re = uregex_openC(":", 0, NULL, &status);
   1975 
   1976 
   1977         /*  Simple split */
   1978 
   1979         uregex_setText(re, textToSplit, -1, &status);
   1980         TEST_ASSERT_SUCCESS(status);
   1981 
   1982         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1983         if (U_SUCCESS(status)) {
   1984             memset(fields, 0, sizeof(fields));
   1985             numFields = uregex_splitUText(re, fields, 10, &status);
   1986             TEST_ASSERT_SUCCESS(status);
   1987 
   1988             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1989             if(U_SUCCESS(status)) {
   1990               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1991               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1992               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1993                 TEST_ASSERT(numFields == 3);
   1994                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1995                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1996                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1997                 TEST_ASSERT(fields[3] == NULL);
   1998             }
   1999             for(i = 0; i < numFields; i++) {
   2000                 utext_close(fields[i]);
   2001             }
   2002         }
   2003 
   2004         uregex_close(re);
   2005 
   2006 
   2007         /*  Split with too few output strings available */
   2008         status = U_ZERO_ERROR;
   2009         re = uregex_openC(":", 0, NULL, &status);
   2010         uregex_setText(re, textToSplit, -1, &status);
   2011         TEST_ASSERT_SUCCESS(status);
   2012 
   2013         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2014         if(U_SUCCESS(status)) {
   2015             fields[0] = NULL;
   2016             fields[1] = NULL;
   2017             fields[2] = &patternText;
   2018             numFields = uregex_splitUText(re, fields, 2, &status);
   2019             TEST_ASSERT_SUCCESS(status);
   2020 
   2021             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2022             if(U_SUCCESS(status)) {
   2023                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2024                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   2025                 TEST_ASSERT(numFields == 2);
   2026                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2027                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   2028                 TEST_ASSERT(fields[2] == &patternText);
   2029             }
   2030             for(i = 0; i < numFields; i++) {
   2031                 utext_close(fields[i]);
   2032             }
   2033         }
   2034 
   2035         uregex_close(re);
   2036     }
   2037 
   2038     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   2039      *                   comes out as additional fields.  */
   2040     {
   2041         UChar    textToSplit[80];
   2042         UText    *fields[10];
   2043         int32_t  numFields;
   2044         int32_t i;
   2045 
   2046         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   2047 
   2048         status = U_ZERO_ERROR;
   2049         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   2050 
   2051         uregex_setText(re, textToSplit, -1, &status);
   2052         TEST_ASSERT_SUCCESS(status);
   2053 
   2054         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2055         if(U_SUCCESS(status)) {
   2056             memset(fields, 0, sizeof(fields));
   2057             numFields = uregex_splitUText(re, fields, 10, &status);
   2058             TEST_ASSERT_SUCCESS(status);
   2059 
   2060             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2061             if(U_SUCCESS(status)) {
   2062                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2063                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2064                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2065                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2066                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2067 
   2068                 TEST_ASSERT(numFields == 5);
   2069                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2070                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2071                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2072                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2073                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2074                 TEST_ASSERT(fields[5] == NULL);
   2075             }
   2076             for(i = 0; i < numFields; i++) {
   2077                 utext_close(fields[i]);
   2078             }
   2079         }
   2080 
   2081         /*  Split with too few output strings available (2) */
   2082         status = U_ZERO_ERROR;
   2083         fields[0] = NULL;
   2084         fields[1] = NULL;
   2085         fields[2] = &patternText;
   2086         numFields = uregex_splitUText(re, fields, 2, &status);
   2087         TEST_ASSERT_SUCCESS(status);
   2088 
   2089         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2090         if(U_SUCCESS(status)) {
   2091             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2092             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2093             TEST_ASSERT(numFields == 2);
   2094             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2095             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2096             TEST_ASSERT(fields[2] == &patternText);
   2097         }
   2098         for(i = 0; i < numFields; i++) {
   2099             utext_close(fields[i]);
   2100         }
   2101 
   2102 
   2103         /*  Split with too few output strings available (3) */
   2104         status = U_ZERO_ERROR;
   2105         fields[0] = NULL;
   2106         fields[1] = NULL;
   2107         fields[2] = NULL;
   2108         fields[3] = &patternText;
   2109         numFields = uregex_splitUText(re, fields, 3, &status);
   2110         TEST_ASSERT_SUCCESS(status);
   2111 
   2112         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2113         if(U_SUCCESS(status)) {
   2114             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2115             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2116             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2117             TEST_ASSERT(numFields == 3);
   2118             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2119             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2120             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2121             TEST_ASSERT(fields[3] == &patternText);
   2122         }
   2123         for(i = 0; i < numFields; i++) {
   2124             utext_close(fields[i]);
   2125         }
   2126 
   2127         /*  Split with just enough output strings available (5) */
   2128         status = U_ZERO_ERROR;
   2129         fields[0] = NULL;
   2130         fields[1] = NULL;
   2131         fields[2] = NULL;
   2132         fields[3] = NULL;
   2133         fields[4] = NULL;
   2134         fields[5] = &patternText;
   2135         numFields = uregex_splitUText(re, fields, 5, &status);
   2136         TEST_ASSERT_SUCCESS(status);
   2137 
   2138         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2139         if(U_SUCCESS(status)) {
   2140             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2141             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2142             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2143             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2144             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2145 
   2146             TEST_ASSERT(numFields == 5);
   2147             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2148             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2149             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2150             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2151             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2152             TEST_ASSERT(fields[5] == &patternText);
   2153         }
   2154         for(i = 0; i < numFields; i++) {
   2155             utext_close(fields[i]);
   2156         }
   2157 
   2158         /* Split, end of text is a field delimiter.   */
   2159         status = U_ZERO_ERROR;
   2160         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2161         TEST_ASSERT_SUCCESS(status);
   2162 
   2163         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2164         if(U_SUCCESS(status)) {
   2165             memset(fields, 0, sizeof(fields));
   2166             fields[9] = &patternText;
   2167             numFields = uregex_splitUText(re, fields, 9, &status);
   2168             TEST_ASSERT_SUCCESS(status);
   2169 
   2170             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2171             if(U_SUCCESS(status)) {
   2172                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2173                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2174                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2175                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2176                 const char str_empty[] = { 0x00 };
   2177 
   2178                 TEST_ASSERT(numFields == 5);
   2179                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2180                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2181                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2182                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2183                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
   2184                 TEST_ASSERT(fields[5] == NULL);
   2185                 TEST_ASSERT(fields[8] == NULL);
   2186                 TEST_ASSERT(fields[9] == &patternText);
   2187             }
   2188             for(i = 0; i < numFields; i++) {
   2189                 utext_close(fields[i]);
   2190             }
   2191         }
   2192 
   2193         uregex_close(re);
   2194     }
   2195     utext_close(&patternText);
   2196 }
   2197 
   2198 
   2199 static void TestRefreshInput(void) {
   2200     /*
   2201      *  RefreshInput changes out the input of a URegularExpression without
   2202      *    changing anything else in the match state.  Used with Java JNI,
   2203      *    when Java moves the underlying string storage.   This test
   2204      *    runs a find() loop, moving the text after the first match.
   2205      *    The right number of matches should still be found.
   2206      */
   2207     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
   2208     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
   2209     UErrorCode status = U_ZERO_ERROR;
   2210     URegularExpression *re;
   2211     UText ut1 = UTEXT_INITIALIZER;
   2212     UText ut2 = UTEXT_INITIALIZER;
   2213 
   2214     re = uregex_openC("[ABC]", 0, 0, &status);
   2215     TEST_ASSERT_SUCCESS(status);
   2216 
   2217     utext_openUChars(&ut1, testStr, -1, &status);
   2218     TEST_ASSERT_SUCCESS(status);
   2219     uregex_setUText(re, &ut1, &status);
   2220     TEST_ASSERT_SUCCESS(status);
   2221 
   2222     /* Find the first match "A" in the original string */
   2223     TEST_ASSERT(uregex_findNext(re, &status));
   2224     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
   2225 
   2226     /* Move the string, kill the original string.  */
   2227     u_strcpy(movedStr, testStr);
   2228     u_memset(testStr, 0, u_strlen(testStr));
   2229     utext_openUChars(&ut2, movedStr, -1, &status);
   2230     TEST_ASSERT_SUCCESS(status);
   2231     uregex_refreshUText(re, &ut2, &status);
   2232     TEST_ASSERT_SUCCESS(status);
   2233 
   2234     /* Find the following two matches, now working in the moved string. */
   2235     TEST_ASSERT(uregex_findNext(re, &status));
   2236     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
   2237     TEST_ASSERT(uregex_findNext(re, &status));
   2238     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
   2239     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
   2240 
   2241     uregex_close(re);
   2242 }
   2243 
   2244 
   2245 static void TestBug8421(void) {
   2246     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
   2247      *             was failing.
   2248      */
   2249     URegularExpression *re;
   2250     UErrorCode status = U_ZERO_ERROR;
   2251     int32_t  limit = -1;
   2252 
   2253     re = uregex_openC("abc", 0, 0, &status);
   2254     TEST_ASSERT_SUCCESS(status);
   2255 
   2256     limit = uregex_getTimeLimit(re, &status);
   2257     TEST_ASSERT_SUCCESS(status);
   2258     TEST_ASSERT(limit == 0);
   2259 
   2260     uregex_setTimeLimit(re, 100, &status);
   2261     TEST_ASSERT_SUCCESS(status);
   2262     limit = uregex_getTimeLimit(re, &status);
   2263     TEST_ASSERT_SUCCESS(status);
   2264     TEST_ASSERT(limit == 100);
   2265 
   2266     uregex_close(re);
   2267 }
   2268 
   2269 
   2270 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2271