Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2004-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File reapits.c
      9 *
     10 *********************************************************************************/
     11 /*C API TEST FOR Regular Expressions */
     12 /**
     13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
     14 *   try to test the full functionality.  It just calls each function and verifies that it
     15 *   works on a basic level.
     16 *
     17 *   More complete testing of regular expression functionality is done with the C++ tests.
     18 **/
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     23 
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include "unicode/uloc.h"
     27 #include "unicode/uregex.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/utext.h"
     30 #include "cintltst.h"
     31 
     32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
     33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
     34 
     35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
     36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
     37 
     38 /*
     39  *   TEST_SETUP and TEST_TEARDOWN
     40  *         macros to handle the boilerplate around setting up regex test cases.
     41  *         parameteres to setup:
     42  *              pattern:     The regex pattern, a (char *) null terminated C string.
     43  *              testString:  The string data, also a (char *) C string.
     44  *              flags:       Regex flags to set when compiling the pattern
     45  *
     46  *         Put arbitrary test code between SETUP and TEARDOWN.
     47  *         're" is the compiled, ready-to-go  regular expression.
     48  */
     49 #define TEST_SETUP(pattern, testString, flags) {  \
     50     UChar   *srcString = NULL;  \
     51     status = U_ZERO_ERROR; \
     52     re = uregex_openC(pattern, flags, NULL, &status);  \
     53     TEST_ASSERT_SUCCESS(status);   \
     54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
     55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
     56     uregex_setText(re, srcString, -1, &status); \
     57     TEST_ASSERT_SUCCESS(status);  \
     58     if (U_SUCCESS(status)) {
     59 
     60 #define TEST_TEARDOWN  \
     61     }  \
     62     TEST_ASSERT_SUCCESS(status);  \
     63     uregex_close(re);  \
     64     free(srcString);   \
     65     }
     66 
     67 
     68 /**
     69  * @param expected utf-8 array of bytes to be expected
     70  */
     71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
     72      char     buf_inside_macro[120];
     73      int32_t  len = (int32_t)strlen(expected);
     74      UBool    success;
     75      if (nulTerm) {
     76          u_austrncpy(buf_inside_macro, (actual), len+1);
     77          buf_inside_macro[len+2] = 0;
     78          success = (strcmp((expected), buf_inside_macro) == 0);
     79      } else {
     80          u_austrncpy(buf_inside_macro, (actual), len);
     81          buf_inside_macro[len+1] = 0;
     82          success = (strncmp((expected), buf_inside_macro, len) == 0);
     83      }
     84      if (success == FALSE) {
     85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
     86              file, line, (expected), buf_inside_macro);
     87      }
     88 }
     89 
     90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
     91 
     92 
     93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
     94     UErrorCode status = U_ZERO_ERROR;
     95     UText expectedText = UTEXT_INITIALIZER;
     96     utext_openUTF8(&expectedText, expected, -1, &status);
     97     utext_setNativeIndex(actual, 0);
     98     if (utext_compare(&expectedText, -1, actual, -1) != 0) {
     99         UChar32 c;
    100         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
    101         c = utext_next32From(actual, 0);
    102         while (c != U_SENTINEL) {
    103             if (0x20<c && c <0x7e) {
    104                 log_err("%c", c);
    105             } else {
    106                 log_err("%#x", c);
    107             }
    108             c = UTEXT_NEXT32(actual);
    109         }
    110         log_err("\"\n");
    111     }
    112     utext_close(&expectedText);
    113 }
    114 
    115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
    116 
    117 
    118 
    119 static void TestRegexCAPI(void);
    120 static void TestBug4315(void);
    121 static void TestUTextAPI(void);
    122 /* BEGIN android-added
    123    Removed this function after Android upgrade to ICU4.6.
    124 */
    125 static void TestRefreshInput(void);
    126 /* END android-added */
    127 
    128 void addURegexTest(TestNode** root);
    129 
    130 void addURegexTest(TestNode** root)
    131 {
    132     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
    133     addTest(root, &TestBug4315,   "regex/TestBug4315");
    134     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
    135     /* BEGIN android-added
    136        Removed this after Android upgrade to ICU4.6.
    137     */
    138     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
    139     /* END android-added */
    140 }
    141 
    142 /*
    143  * Call back function and context struct used for testing
    144  *    regular expression user callbacks.  This test is mostly the same as
    145  *   the corresponding C++ test in intltest.
    146  */
    147 typedef struct callBackContext {
    148     int32_t          maxCalls;
    149     int32_t          numCalls;
    150     int32_t          lastSteps;
    151 } callBackContext;
    152 
    153 static UBool U_EXPORT2 U_CALLCONV
    154 TestCallbackFn(const void *context, int32_t steps) {
    155   callBackContext  *info = (callBackContext *)context;
    156   if (info->lastSteps+1 != steps) {
    157       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
    158   }
    159   info->lastSteps = steps;
    160   info->numCalls++;
    161   return (info->numCalls < info->maxCalls);
    162 }
    163 
    164 /*
    165  *   Regular Expression C API Tests
    166  */
    167 static void TestRegexCAPI(void) {
    168     UErrorCode           status = U_ZERO_ERROR;
    169     URegularExpression  *re;
    170     UChar                pat[200];
    171     UChar               *minus1;
    172 
    173     memset(&minus1, -1, sizeof(minus1));
    174 
    175     /* Mimimalist open/close */
    176     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    177     re = uregex_open(pat, -1, 0, 0, &status);
    178     if (U_FAILURE(status)) {
    179          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
    180          return;
    181     }
    182     uregex_close(re);
    183 
    184     /* Open with all flag values set */
    185     status = U_ZERO_ERROR;
    186     re = uregex_open(pat, -1,
    187         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
    188         0, &status);
    189     TEST_ASSERT_SUCCESS(status);
    190     uregex_close(re);
    191 
    192     /* Open with an invalid flag */
    193     status = U_ZERO_ERROR;
    194     re = uregex_open(pat, -1, 0x40000000, 0, &status);
    195     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
    196     uregex_close(re);
    197 
    198     /* Open with an unimplemented flag */
    199     status = U_ZERO_ERROR;
    200     re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
    201     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
    202     uregex_close(re);
    203 
    204     /* openC with an invalid parameter */
    205     status = U_ZERO_ERROR;
    206     re = uregex_openC(NULL,
    207         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    208     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
    209 
    210     /* openC with an invalid parameter */
    211     status = U_USELESS_COLLATOR_ERROR;
    212     re = uregex_openC(NULL,
    213         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
    214     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
    215 
    216     /* openC   open from a C string */
    217     {
    218         const UChar   *p;
    219         int32_t  len;
    220         status = U_ZERO_ERROR;
    221         re = uregex_openC("abc*", 0, 0, &status);
    222         TEST_ASSERT_SUCCESS(status);
    223         p = uregex_pattern(re, &len, &status);
    224         TEST_ASSERT_SUCCESS(status);
    225 
    226         /* The TEST_ASSERT_SUCCESS above should change too... */
    227         if(U_SUCCESS(status)) {
    228             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
    229             TEST_ASSERT(u_strcmp(pat, p) == 0);
    230             TEST_ASSERT(len==(int32_t)strlen("abc*"));
    231         }
    232 
    233         uregex_close(re);
    234 
    235         /*  TODO:  Open with ParseError parameter */
    236     }
    237 
    238     /*
    239      *  clone
    240      */
    241     {
    242         URegularExpression *clone1;
    243         URegularExpression *clone2;
    244         URegularExpression *clone3;
    245         UChar  testString1[30];
    246         UChar  testString2[30];
    247         UBool  result;
    248 
    249 
    250         status = U_ZERO_ERROR;
    251         re = uregex_openC("abc*", 0, 0, &status);
    252         TEST_ASSERT_SUCCESS(status);
    253         clone1 = uregex_clone(re, &status);
    254         TEST_ASSERT_SUCCESS(status);
    255         TEST_ASSERT(clone1 != NULL);
    256 
    257         status = U_ZERO_ERROR;
    258         clone2 = uregex_clone(re, &status);
    259         TEST_ASSERT_SUCCESS(status);
    260         TEST_ASSERT(clone2 != NULL);
    261         uregex_close(re);
    262 
    263         status = U_ZERO_ERROR;
    264         clone3 = uregex_clone(clone2, &status);
    265         TEST_ASSERT_SUCCESS(status);
    266         TEST_ASSERT(clone3 != NULL);
    267 
    268         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
    269         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
    270 
    271         status = U_ZERO_ERROR;
    272         uregex_setText(clone1, testString1, -1, &status);
    273         TEST_ASSERT_SUCCESS(status);
    274         result = uregex_lookingAt(clone1, 0, &status);
    275         TEST_ASSERT_SUCCESS(status);
    276         TEST_ASSERT(result==TRUE);
    277 
    278         status = U_ZERO_ERROR;
    279         uregex_setText(clone2, testString2, -1, &status);
    280         TEST_ASSERT_SUCCESS(status);
    281         result = uregex_lookingAt(clone2, 0, &status);
    282         TEST_ASSERT_SUCCESS(status);
    283         TEST_ASSERT(result==FALSE);
    284         result = uregex_find(clone2, 0, &status);
    285         TEST_ASSERT_SUCCESS(status);
    286         TEST_ASSERT(result==TRUE);
    287 
    288         uregex_close(clone1);
    289         uregex_close(clone2);
    290         uregex_close(clone3);
    291 
    292     }
    293 
    294     /*
    295      *  pattern()
    296     */
    297     {
    298         const UChar  *resultPat;
    299         int32_t       resultLen;
    300         u_uastrncpy(pat, "hello", sizeof(pat)/2);
    301         status = U_ZERO_ERROR;
    302         re = uregex_open(pat, -1, 0, NULL, &status);
    303         resultPat = uregex_pattern(re, &resultLen, &status);
    304         TEST_ASSERT_SUCCESS(status);
    305 
    306         /* The TEST_ASSERT_SUCCESS above should change too... */
    307         if (U_SUCCESS(status)) {
    308             TEST_ASSERT(resultLen == -1);
    309             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
    310         }
    311 
    312         uregex_close(re);
    313 
    314         status = U_ZERO_ERROR;
    315         re = uregex_open(pat, 3, 0, NULL, &status);
    316         resultPat = uregex_pattern(re, &resultLen, &status);
    317         TEST_ASSERT_SUCCESS(status);
    318         TEST_ASSERT_SUCCESS(status);
    319 
    320         /* The TEST_ASSERT_SUCCESS above should change too... */
    321         if (U_SUCCESS(status)) {
    322             TEST_ASSERT(resultLen == 3);
    323             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
    324             TEST_ASSERT(u_strlen(resultPat) == 3);
    325         }
    326 
    327         uregex_close(re);
    328     }
    329 
    330     /*
    331      *  flags()
    332      */
    333     {
    334         int32_t  t;
    335 
    336         status = U_ZERO_ERROR;
    337         re = uregex_open(pat, -1, 0, NULL, &status);
    338         t  = uregex_flags(re, &status);
    339         TEST_ASSERT_SUCCESS(status);
    340         TEST_ASSERT(t == 0);
    341         uregex_close(re);
    342 
    343         status = U_ZERO_ERROR;
    344         re = uregex_open(pat, -1, 0, NULL, &status);
    345         t  = uregex_flags(re, &status);
    346         TEST_ASSERT_SUCCESS(status);
    347         TEST_ASSERT(t == 0);
    348         uregex_close(re);
    349 
    350         status = U_ZERO_ERROR;
    351         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
    352         t  = uregex_flags(re, &status);
    353         TEST_ASSERT_SUCCESS(status);
    354         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
    355         uregex_close(re);
    356     }
    357 
    358     /*
    359      *  setText() and lookingAt()
    360      */
    361     {
    362         UChar  text1[50];
    363         UChar  text2[50];
    364         UBool  result;
    365 
    366         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    367         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    368         status = U_ZERO_ERROR;
    369         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    370         re = uregex_open(pat, -1, 0, NULL, &status);
    371         TEST_ASSERT_SUCCESS(status);
    372 
    373         /* Operation before doing a setText should fail... */
    374         status = U_ZERO_ERROR;
    375         uregex_lookingAt(re, 0, &status);
    376         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
    377 
    378         status = U_ZERO_ERROR;
    379         uregex_setText(re, text1, -1, &status);
    380         result = uregex_lookingAt(re, 0, &status);
    381         TEST_ASSERT(result == TRUE);
    382         TEST_ASSERT_SUCCESS(status);
    383 
    384         status = U_ZERO_ERROR;
    385         uregex_setText(re, text2, -1, &status);
    386         result = uregex_lookingAt(re, 0, &status);
    387         TEST_ASSERT(result == FALSE);
    388         TEST_ASSERT_SUCCESS(status);
    389 
    390         status = U_ZERO_ERROR;
    391         uregex_setText(re, text1, -1, &status);
    392         result = uregex_lookingAt(re, 0, &status);
    393         TEST_ASSERT(result == TRUE);
    394         TEST_ASSERT_SUCCESS(status);
    395 
    396         status = U_ZERO_ERROR;
    397         uregex_setText(re, text1, 5, &status);
    398         result = uregex_lookingAt(re, 0, &status);
    399         TEST_ASSERT(result == FALSE);
    400         TEST_ASSERT_SUCCESS(status);
    401 
    402         status = U_ZERO_ERROR;
    403         uregex_setText(re, text1, 6, &status);
    404         result = uregex_lookingAt(re, 0, &status);
    405         TEST_ASSERT(result == TRUE);
    406         TEST_ASSERT_SUCCESS(status);
    407 
    408         uregex_close(re);
    409     }
    410 
    411 
    412     /*
    413      *  getText()
    414      */
    415     {
    416         UChar    text1[50];
    417         UChar    text2[50];
    418         const UChar   *result;
    419         int32_t  textLength;
    420 
    421         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
    422         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
    423         status = U_ZERO_ERROR;
    424         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    425         re = uregex_open(pat, -1, 0, NULL, &status);
    426 
    427         uregex_setText(re, text1, -1, &status);
    428         result = uregex_getText(re, &textLength, &status);
    429         TEST_ASSERT(result == text1);
    430         TEST_ASSERT(textLength == -1);
    431         TEST_ASSERT_SUCCESS(status);
    432 
    433         status = U_ZERO_ERROR;
    434         uregex_setText(re, text2, 7, &status);
    435         result = uregex_getText(re, &textLength, &status);
    436         TEST_ASSERT(result == text2);
    437         TEST_ASSERT(textLength == 7);
    438         TEST_ASSERT_SUCCESS(status);
    439 
    440         status = U_ZERO_ERROR;
    441         uregex_setText(re, text2, 4, &status);
    442         result = uregex_getText(re, &textLength, &status);
    443         TEST_ASSERT(result == text2);
    444         TEST_ASSERT(textLength == 4);
    445         TEST_ASSERT_SUCCESS(status);
    446         uregex_close(re);
    447     }
    448 
    449     /*
    450      *  matches()
    451      */
    452     {
    453         UChar   text1[50];
    454         UBool   result;
    455         int     len;
    456         UChar   nullString[] = {0,0,0};
    457 
    458         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
    459         status = U_ZERO_ERROR;
    460         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
    461         re = uregex_open(pat, -1, 0, NULL, &status);
    462 
    463         uregex_setText(re, text1, -1, &status);
    464         result = uregex_matches(re, 0, &status);
    465         TEST_ASSERT(result == FALSE);
    466         TEST_ASSERT_SUCCESS(status);
    467 
    468         status = U_ZERO_ERROR;
    469         uregex_setText(re, text1, 6, &status);
    470         result = uregex_matches(re, 0, &status);
    471         TEST_ASSERT(result == TRUE);
    472         TEST_ASSERT_SUCCESS(status);
    473 
    474         status = U_ZERO_ERROR;
    475         uregex_setText(re, text1, 6, &status);
    476         result = uregex_matches(re, 1, &status);
    477         TEST_ASSERT(result == FALSE);
    478         TEST_ASSERT_SUCCESS(status);
    479         uregex_close(re);
    480 
    481         status = U_ZERO_ERROR;
    482         re = uregex_openC(".?", 0, NULL, &status);
    483         uregex_setText(re, text1, -1, &status);
    484         len = u_strlen(text1);
    485         result = uregex_matches(re, len, &status);
    486         TEST_ASSERT(result == TRUE);
    487         TEST_ASSERT_SUCCESS(status);
    488 
    489         status = U_ZERO_ERROR;
    490         uregex_setText(re, nullString, -1, &status);
    491         TEST_ASSERT_SUCCESS(status);
    492         result = uregex_matches(re, 0, &status);
    493         TEST_ASSERT(result == TRUE);
    494         TEST_ASSERT_SUCCESS(status);
    495         uregex_close(re);
    496     }
    497 
    498 
    499     /*
    500      *  lookingAt()    Used in setText test.
    501      */
    502 
    503 
    504     /*
    505      *  find(), findNext, start, end, reset
    506      */
    507     {
    508         UChar    text1[50];
    509         UBool    result;
    510         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
    511         status = U_ZERO_ERROR;
    512         re = uregex_openC("rx", 0, NULL, &status);
    513 
    514         uregex_setText(re, text1, -1, &status);
    515         result = uregex_find(re, 0, &status);
    516         TEST_ASSERT(result == TRUE);
    517         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    518         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    519         TEST_ASSERT_SUCCESS(status);
    520 
    521         result = uregex_find(re, 9, &status);
    522         TEST_ASSERT(result == TRUE);
    523         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
    524         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
    525         TEST_ASSERT_SUCCESS(status);
    526 
    527         result = uregex_find(re, 14, &status);
    528         TEST_ASSERT(result == FALSE);
    529         TEST_ASSERT_SUCCESS(status);
    530 
    531         status = U_ZERO_ERROR;
    532         uregex_reset(re, 0, &status);
    533 
    534         result = uregex_findNext(re, &status);
    535         TEST_ASSERT(result == TRUE);
    536         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
    537         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
    538         TEST_ASSERT_SUCCESS(status);
    539 
    540         result = uregex_findNext(re, &status);
    541         TEST_ASSERT(result == TRUE);
    542         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
    543         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
    544         TEST_ASSERT_SUCCESS(status);
    545 
    546         status = U_ZERO_ERROR;
    547         uregex_reset(re, 12, &status);
    548 
    549         result = uregex_findNext(re, &status);
    550         TEST_ASSERT(result == TRUE);
    551         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
    552         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
    553         TEST_ASSERT_SUCCESS(status);
    554 
    555         result = uregex_findNext(re, &status);
    556         TEST_ASSERT(result == FALSE);
    557         TEST_ASSERT_SUCCESS(status);
    558 
    559         uregex_close(re);
    560     }
    561 
    562     /*
    563      *  groupCount
    564      */
    565     {
    566         int32_t result;
    567 
    568         status = U_ZERO_ERROR;
    569         re = uregex_openC("abc", 0, NULL, &status);
    570         result = uregex_groupCount(re, &status);
    571         TEST_ASSERT_SUCCESS(status);
    572         TEST_ASSERT(result == 0);
    573         uregex_close(re);
    574 
    575         status = U_ZERO_ERROR;
    576         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
    577         result = uregex_groupCount(re, &status);
    578         TEST_ASSERT_SUCCESS(status);
    579         TEST_ASSERT(result == 3);
    580         uregex_close(re);
    581 
    582     }
    583 
    584 
    585     /*
    586      *  group()
    587      */
    588     {
    589         UChar    text1[80];
    590         UChar    buf[80];
    591         UBool    result;
    592         int32_t  resultSz;
    593         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
    594 
    595         status = U_ZERO_ERROR;
    596         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
    597         TEST_ASSERT_SUCCESS(status);
    598 
    599 
    600         uregex_setText(re, text1, -1, &status);
    601         result = uregex_find(re, 0, &status);
    602         TEST_ASSERT(result==TRUE);
    603 
    604         /*  Capture Group 0, the full match.  Should succeed.  */
    605         status = U_ZERO_ERROR;
    606         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
    607         TEST_ASSERT_SUCCESS(status);
    608         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
    609         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    610 
    611         /*  Capture group #1.  Should succeed. */
    612         status = U_ZERO_ERROR;
    613         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
    614         TEST_ASSERT_SUCCESS(status);
    615         TEST_ASSERT_STRING(" interior ", buf, TRUE);
    616         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
    617 
    618         /*  Capture group out of range.  Error. */
    619         status = U_ZERO_ERROR;
    620         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
    621         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
    622 
    623         /* NULL buffer, pure pre-flight */
    624         status = U_ZERO_ERROR;
    625         resultSz = uregex_group(re, 0, NULL, 0, &status);
    626         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    627         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    628 
    629         /* Too small buffer, truncated string */
    630         status = U_ZERO_ERROR;
    631         memset(buf, -1, sizeof(buf));
    632         resultSz = uregex_group(re, 0, buf, 5, &status);
    633         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    634         TEST_ASSERT_STRING("abc i", buf, FALSE);
    635         TEST_ASSERT(buf[5] == (UChar)0xffff);
    636         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    637 
    638         /* Output string just fits buffer, no NUL term. */
    639         status = U_ZERO_ERROR;
    640         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
    641         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    642         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
    643         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
    644         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
    645 
    646         uregex_close(re);
    647 
    648     }
    649 
    650     /*
    651      *  Regions
    652      */
    653 
    654 
    655         /* SetRegion(), getRegion() do something  */
    656         TEST_SETUP(".*", "0123456789ABCDEF", 0)
    657         UChar resultString[40];
    658         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
    659         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
    660         uregex_setRegion(re, 3, 6, &status);
    661         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
    662         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
    663         TEST_ASSERT(uregex_findNext(re, &status));
    664         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
    665         TEST_ASSERT_STRING("345", resultString, TRUE);
    666         TEST_TEARDOWN;
    667 
    668         /* find(start=-1) uses regions   */
    669         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    670         uregex_setRegion(re, 4, 6, &status);
    671         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    672         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    673         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    674         TEST_TEARDOWN;
    675 
    676         /* find (start >=0) does not use regions   */
    677         TEST_SETUP(".*", "0123456789ABCDEF", 0);
    678         uregex_setRegion(re, 4, 6, &status);
    679         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    680         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    681         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    682         TEST_TEARDOWN;
    683 
    684         /* findNext() obeys regions    */
    685         TEST_SETUP(".", "0123456789ABCDEF", 0);
    686         uregex_setRegion(re, 4, 6, &status);
    687         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
    688         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    689         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
    690         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
    691         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
    692         TEST_TEARDOWN;
    693 
    694         /* matches(start=-1) uses regions                                           */
    695         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
    696         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    697         uregex_setRegion(re, 4, 6, &status);
    698         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
    699         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    700         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
    701         TEST_TEARDOWN;
    702 
    703         /* matches (start >=0) does not use regions       */
    704         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    705         uregex_setRegion(re, 4, 6, &status);
    706         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
    707         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    708         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
    709         TEST_TEARDOWN;
    710 
    711         /* lookingAt(start=-1) uses regions                                         */
    712         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
    713         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    714         uregex_setRegion(re, 4, 6, &status);
    715         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
    716         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
    717         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
    718         TEST_TEARDOWN;
    719 
    720         /* lookingAt (start >=0) does not use regions  */
    721         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
    722         uregex_setRegion(re, 4, 6, &status);
    723         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
    724         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
    725         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
    726         TEST_TEARDOWN;
    727 
    728         /* hitEnd()       */
    729         TEST_SETUP("[a-f]*", "abcdefghij", 0);
    730         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    731         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
    732         TEST_TEARDOWN;
    733 
    734         TEST_SETUP("[a-f]*", "abcdef", 0);
    735         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    736         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
    737         TEST_TEARDOWN;
    738 
    739         /* requireEnd   */
    740         TEST_SETUP("abcd", "abcd", 0);
    741         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    742         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
    743         TEST_TEARDOWN;
    744 
    745         TEST_SETUP("abcd$", "abcd", 0);
    746         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
    747         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
    748         TEST_TEARDOWN;
    749 
    750         /* anchoringBounds        */
    751         TEST_SETUP("abc$", "abcdef", 0);
    752         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
    753         uregex_useAnchoringBounds(re, FALSE, &status);
    754         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
    755 
    756         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
    757         uregex_useAnchoringBounds(re, TRUE, &status);
    758         uregex_setRegion(re, 0, 3, &status);
    759         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
    760         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    761         TEST_TEARDOWN;
    762 
    763         /* Transparent Bounds      */
    764         TEST_SETUP("abc(?=def)", "abcdef", 0);
    765         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
    766         uregex_useTransparentBounds(re, TRUE, &status);
    767         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
    768 
    769         uregex_useTransparentBounds(re, FALSE, &status);
    770         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
    771         uregex_setRegion(re, 0, 3, &status);
    772         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
    773         uregex_useTransparentBounds(re, TRUE, &status);
    774         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
    775         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
    776         TEST_TEARDOWN;
    777 
    778 
    779     /*
    780      *  replaceFirst()
    781      */
    782     {
    783         UChar    text1[80];
    784         UChar    text2[80];
    785         UChar    replText[80];
    786         UChar    buf[80];
    787         int32_t  resultSz;
    788         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    789         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    790         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    791 
    792         status = U_ZERO_ERROR;
    793         re = uregex_openC("x(.*?)x", 0, NULL, &status);
    794         TEST_ASSERT_SUCCESS(status);
    795 
    796         /*  Normal case, with match */
    797         uregex_setText(re, text1, -1, &status);
    798         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    799         TEST_ASSERT_SUCCESS(status);
    800         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
    801         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    802 
    803         /* No match.  Text should copy to output with no changes.  */
    804         status = U_ZERO_ERROR;
    805         uregex_setText(re, text2, -1, &status);
    806         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
    807         TEST_ASSERT_SUCCESS(status);
    808         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    809         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
    810 
    811         /*  Match, output just fills buffer, no termination warning. */
    812         status = U_ZERO_ERROR;
    813         uregex_setText(re, text1, -1, &status);
    814         memset(buf, -1, sizeof(buf));
    815         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    816         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    817         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    818         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    819         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    820 
    821         /* Do the replaceFirst again, without first resetting anything.
    822          *  Should give the same results.
    823          */
    824         status = U_ZERO_ERROR;
    825         memset(buf, -1, sizeof(buf));
    826         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
    827         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    828         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
    829         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    830         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    831 
    832         /* NULL buffer, zero buffer length */
    833         status = U_ZERO_ERROR;
    834         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
    835         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    836         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    837 
    838         /* Buffer too small by one */
    839         status = U_ZERO_ERROR;
    840         memset(buf, -1, sizeof(buf));
    841         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
    842         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    843         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
    844         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
    845         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
    846 
    847         uregex_close(re);
    848     }
    849 
    850 
    851     /*
    852      *  replaceAll()
    853      */
    854     {
    855         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
    856         UChar    text2[80];          /*  "No match Here"           */
    857         UChar    replText[80];       /*  "<$1>"                    */
    858         UChar    replText2[80];      /*  "<<$1>>"                  */
    859         const char * pattern = "x(.*?)x";
    860         const char * expectedResult = "Replace <aa> <1> <...>.";
    861         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
    862         UChar    buf[80];
    863         int32_t  resultSize;
    864         int32_t  expectedResultSize;
    865         int32_t  expectedResultSize2;
    866         int32_t  i;
    867 
    868         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
    869         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
    870         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
    871         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
    872         expectedResultSize = strlen(expectedResult);
    873         expectedResultSize2 = strlen(expectedResult2);
    874 
    875         status = U_ZERO_ERROR;
    876         re = uregex_openC(pattern, 0, NULL, &status);
    877         TEST_ASSERT_SUCCESS(status);
    878 
    879         /*  Normal case, with match */
    880         uregex_setText(re, text1, -1, &status);
    881         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    882         TEST_ASSERT_SUCCESS(status);
    883         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
    884         TEST_ASSERT(resultSize == expectedResultSize);
    885 
    886         /* No match.  Text should copy to output with no changes.  */
    887         status = U_ZERO_ERROR;
    888         uregex_setText(re, text2, -1, &status);
    889         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
    890         TEST_ASSERT_SUCCESS(status);
    891         TEST_ASSERT_STRING("No match here.", buf, TRUE);
    892         TEST_ASSERT(resultSize == u_strlen(text2));
    893 
    894         /*  Match, output just fills buffer, no termination warning. */
    895         status = U_ZERO_ERROR;
    896         uregex_setText(re, text1, -1, &status);
    897         memset(buf, -1, sizeof(buf));
    898         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
    899         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    900         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
    901         TEST_ASSERT(resultSize == expectedResultSize);
    902         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    903 
    904         /* Do the replaceFirst again, without first resetting anything.
    905          *  Should give the same results.
    906          */
    907         status = U_ZERO_ERROR;
    908         memset(buf, -1, sizeof(buf));
    909         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
    910         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
    911         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
    912         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    913         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
    914 
    915         /* NULL buffer, zero buffer length */
    916         status = U_ZERO_ERROR;
    917         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
    918         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    919         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
    920 
    921         /* Buffer too small.  Try every size, which will tickle edge cases
    922          * in uregex_appendReplacement (used by replaceAll)   */
    923         for (i=0; i<expectedResultSize; i++) {
    924             char  expected[80];
    925             status = U_ZERO_ERROR;
    926             memset(buf, -1, sizeof(buf));
    927             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
    928             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    929             strcpy(expected, expectedResult);
    930             expected[i] = 0;
    931             TEST_ASSERT_STRING(expected, buf, FALSE);
    932             TEST_ASSERT(resultSize == expectedResultSize);
    933             TEST_ASSERT(buf[i] == (UChar)0xffff);
    934         }
    935 
    936         /* Buffer too small.  Same as previous test, except this time the replacement
    937          * text is longer than the match capture group, making the length of the complete
    938          * replacement longer than the original string.
    939          */
    940         for (i=0; i<expectedResultSize2; i++) {
    941             char  expected[80];
    942             status = U_ZERO_ERROR;
    943             memset(buf, -1, sizeof(buf));
    944             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
    945             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    946             strcpy(expected, expectedResult2);
    947             expected[i] = 0;
    948             TEST_ASSERT_STRING(expected, buf, FALSE);
    949             TEST_ASSERT(resultSize == expectedResultSize2);
    950             TEST_ASSERT(buf[i] == (UChar)0xffff);
    951         }
    952 
    953 
    954         uregex_close(re);
    955     }
    956 
    957 
    958     /*
    959      *  appendReplacement()
    960      */
    961     {
    962         UChar    text[100];
    963         UChar    repl[100];
    964         UChar    buf[100];
    965         UChar   *bufPtr;
    966         int32_t  bufCap;
    967 
    968 
    969         status = U_ZERO_ERROR;
    970         re = uregex_openC(".*", 0, 0, &status);
    971         TEST_ASSERT_SUCCESS(status);
    972 
    973         u_uastrncpy(text, "whatever",  sizeof(text)/2);
    974         u_uastrncpy(repl, "some other", sizeof(repl)/2);
    975         uregex_setText(re, text, -1, &status);
    976 
    977         /* match covers whole target string */
    978         uregex_find(re, 0, &status);
    979         TEST_ASSERT_SUCCESS(status);
    980         bufPtr = buf;
    981         bufCap = sizeof(buf) / 2;
    982         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
    983         TEST_ASSERT_SUCCESS(status);
    984         TEST_ASSERT_STRING("some other", buf, TRUE);
    985 
    986         /* Match has \u \U escapes */
    987         uregex_find(re, 0, &status);
    988         TEST_ASSERT_SUCCESS(status);
    989         bufPtr = buf;
    990         bufCap = sizeof(buf) / 2;
    991         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
    992         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
    993         TEST_ASSERT_SUCCESS(status);
    994         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
    995 
    996         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
    997         status = U_ZERO_ERROR;
    998         uregex_find(re, 0, &status);
    999         TEST_ASSERT_SUCCESS(status);
   1000         bufPtr = buf;
   1001         status = U_BUFFER_OVERFLOW_ERROR;
   1002         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
   1003         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1004 
   1005         uregex_close(re);
   1006     }
   1007 
   1008 
   1009     /*
   1010      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
   1011      */
   1012 
   1013     /*
   1014      *  split()
   1015      */
   1016     {
   1017         UChar    textToSplit[80];
   1018         UChar    text2[80];
   1019         UChar    buf[200];
   1020         UChar    *fields[10];
   1021         int32_t  numFields;
   1022         int32_t  requiredCapacity;
   1023         int32_t  spaceNeeded;
   1024         int32_t  sz;
   1025 
   1026         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1027         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1028 
   1029         status = U_ZERO_ERROR;
   1030         re = uregex_openC(":", 0, NULL, &status);
   1031 
   1032 
   1033         /*  Simple split */
   1034 
   1035         uregex_setText(re, textToSplit, -1, &status);
   1036         TEST_ASSERT_SUCCESS(status);
   1037 
   1038         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1039         if (U_SUCCESS(status)) {
   1040             memset(fields, -1, sizeof(fields));
   1041             numFields =
   1042                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1043             TEST_ASSERT_SUCCESS(status);
   1044 
   1045             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1046             if(U_SUCCESS(status)) {
   1047                 TEST_ASSERT(numFields == 3);
   1048                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1049                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1050                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1051                 TEST_ASSERT(fields[3] == NULL);
   1052 
   1053                 spaceNeeded = u_strlen(textToSplit) -
   1054                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1055                             numFields;          /* Each field gets a NUL terminator */
   1056 
   1057                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1058             }
   1059         }
   1060 
   1061         uregex_close(re);
   1062 
   1063 
   1064         /*  Split with too few output strings available */
   1065         status = U_ZERO_ERROR;
   1066         re = uregex_openC(":", 0, NULL, &status);
   1067         uregex_setText(re, textToSplit, -1, &status);
   1068         TEST_ASSERT_SUCCESS(status);
   1069 
   1070         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1071         if(U_SUCCESS(status)) {
   1072             memset(fields, -1, sizeof(fields));
   1073             numFields =
   1074                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1075             TEST_ASSERT_SUCCESS(status);
   1076 
   1077             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1078             if(U_SUCCESS(status)) {
   1079                 TEST_ASSERT(numFields == 2);
   1080                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1081                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
   1082                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1083 
   1084                 spaceNeeded = u_strlen(textToSplit) -
   1085                             (numFields - 1)  +  /* Field delimiters do not appear in output */
   1086                             numFields;          /* Each field gets a NUL terminator */
   1087 
   1088                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1089 
   1090                 /* Split with a range of output buffer sizes.  */
   1091                 spaceNeeded = u_strlen(textToSplit) -
   1092                     (numFields - 1)  +  /* Field delimiters do not appear in output */
   1093                     numFields;          /* Each field gets a NUL terminator */
   1094 
   1095                 for (sz=0; sz < spaceNeeded+1; sz++) {
   1096                     memset(fields, -1, sizeof(fields));
   1097                     status = U_ZERO_ERROR;
   1098                     numFields =
   1099                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
   1100                     if (sz >= spaceNeeded) {
   1101                         TEST_ASSERT_SUCCESS(status);
   1102                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1103                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
   1104                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
   1105                     } else {
   1106                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
   1107                     }
   1108                     TEST_ASSERT(numFields == 3);
   1109                     TEST_ASSERT(fields[3] == NULL);
   1110                     TEST_ASSERT(spaceNeeded == requiredCapacity);
   1111                 }
   1112             }
   1113         }
   1114 
   1115         uregex_close(re);
   1116     }
   1117 
   1118 
   1119 
   1120 
   1121     /* Split(), part 2.  Patterns with capture groups.  The capture group text
   1122      *                   comes out as additional fields.  */
   1123     {
   1124         UChar    textToSplit[80];
   1125         UChar    buf[200];
   1126         UChar    *fields[10];
   1127         int32_t  numFields;
   1128         int32_t  requiredCapacity;
   1129         int32_t  spaceNeeded;
   1130         int32_t  sz;
   1131 
   1132         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   1133 
   1134         status = U_ZERO_ERROR;
   1135         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1136 
   1137         uregex_setText(re, textToSplit, -1, &status);
   1138         TEST_ASSERT_SUCCESS(status);
   1139 
   1140         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1141         if(U_SUCCESS(status)) {
   1142             memset(fields, -1, sizeof(fields));
   1143             numFields =
   1144                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
   1145             TEST_ASSERT_SUCCESS(status);
   1146 
   1147             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1148             if(U_SUCCESS(status)) {
   1149                 TEST_ASSERT(numFields == 5);
   1150                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1151                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1152                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1153                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1154                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1155                 TEST_ASSERT(fields[5] == NULL);
   1156                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1157                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1158             }
   1159         }
   1160 
   1161         /*  Split with too few output strings available (2) */
   1162         status = U_ZERO_ERROR;
   1163         memset(fields, -1, sizeof(fields));
   1164         numFields =
   1165             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
   1166         TEST_ASSERT_SUCCESS(status);
   1167 
   1168         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1169         if(U_SUCCESS(status)) {
   1170             TEST_ASSERT(numFields == 2);
   1171             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1172             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
   1173             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
   1174 
   1175             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
   1176             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1177         }
   1178 
   1179         /*  Split with too few output strings available (3) */
   1180         status = U_ZERO_ERROR;
   1181         memset(fields, -1, sizeof(fields));
   1182         numFields =
   1183             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
   1184         TEST_ASSERT_SUCCESS(status);
   1185 
   1186         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1187         if(U_SUCCESS(status)) {
   1188             TEST_ASSERT(numFields == 3);
   1189             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1190             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1191             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
   1192             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
   1193 
   1194             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
   1195             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1196         }
   1197 
   1198         /*  Split with just enough output strings available (5) */
   1199         status = U_ZERO_ERROR;
   1200         memset(fields, -1, sizeof(fields));
   1201         numFields =
   1202             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
   1203         TEST_ASSERT_SUCCESS(status);
   1204 
   1205         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1206         if(U_SUCCESS(status)) {
   1207             TEST_ASSERT(numFields == 5);
   1208             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1209             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1210             TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1211             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1212             TEST_ASSERT_STRING("  third", fields[4], TRUE);
   1213             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
   1214 
   1215             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
   1216             TEST_ASSERT(spaceNeeded == requiredCapacity);
   1217         }
   1218 
   1219         /* Split, end of text is a field delimiter.   */
   1220         status = U_ZERO_ERROR;
   1221         sz = strlen("first <tag-a> second<tag-b>");
   1222         uregex_setText(re, textToSplit, sz, &status);
   1223         TEST_ASSERT_SUCCESS(status);
   1224 
   1225         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1226         if(U_SUCCESS(status)) {
   1227             memset(fields, -1, sizeof(fields));
   1228             numFields =
   1229                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
   1230             TEST_ASSERT_SUCCESS(status);
   1231 
   1232             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1233             if(U_SUCCESS(status)) {
   1234                 TEST_ASSERT(numFields == 4);
   1235                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
   1236                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
   1237                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
   1238                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
   1239                 TEST_ASSERT(fields[4] == NULL);
   1240                 TEST_ASSERT(fields[8] == NULL);
   1241                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
   1242                 spaceNeeded = strlen("first .tag-a. second.tag-b.");  /* "." at NUL positions */
   1243                 TEST_ASSERT(spaceNeeded == requiredCapacity);
   1244             }
   1245         }
   1246 
   1247         uregex_close(re);
   1248     }
   1249 
   1250     /*
   1251      * set/getTimeLimit
   1252      */
   1253      TEST_SETUP("abc$", "abcdef", 0);
   1254      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
   1255      uregex_setTimeLimit(re, 1000, &status);
   1256      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1257      TEST_ASSERT_SUCCESS(status);
   1258      uregex_setTimeLimit(re, -1, &status);
   1259      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1260      status = U_ZERO_ERROR;
   1261      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
   1262      TEST_TEARDOWN;
   1263 
   1264      /*
   1265       * set/get Stack Limit
   1266       */
   1267      TEST_SETUP("abc$", "abcdef", 0);
   1268      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
   1269      uregex_setStackLimit(re, 40000, &status);
   1270      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1271      TEST_ASSERT_SUCCESS(status);
   1272      uregex_setStackLimit(re, -1, &status);
   1273      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1274      status = U_ZERO_ERROR;
   1275      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
   1276      TEST_TEARDOWN;
   1277 
   1278 
   1279      /*
   1280       * Get/Set callback functions
   1281       *     This test is copied from intltest regex/Callbacks
   1282       *     The pattern and test data will run long enough to cause the callback
   1283       *       to be invoked.  The nested '+' operators give exponential time
   1284       *       behavior with increasing string length.
   1285       */
   1286      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
   1287      callBackContext cbInfo = {4, 0, 0};
   1288      const void     *pContext   = &cbInfo;
   1289      URegexMatchCallback    *returnedFn = &TestCallbackFn;
   1290 
   1291      /*  Getting the callback fn when it hasn't been set must return NULL  */
   1292      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1293      TEST_ASSERT_SUCCESS(status);
   1294      TEST_ASSERT(returnedFn == NULL);
   1295      TEST_ASSERT(pContext == NULL);
   1296 
   1297      /* Set thecallback and do a match.                                   */
   1298      /* The callback function should record that it has been called.      */
   1299      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
   1300      TEST_ASSERT_SUCCESS(status);
   1301      TEST_ASSERT(cbInfo.numCalls == 0);
   1302      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
   1303      TEST_ASSERT_SUCCESS(status);
   1304      TEST_ASSERT(cbInfo.numCalls > 0);
   1305 
   1306      /* Getting the callback should return the values that were set above.  */
   1307      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
   1308      TEST_ASSERT(returnedFn == &TestCallbackFn);
   1309      TEST_ASSERT(pContext == &cbInfo);
   1310 
   1311      TEST_TEARDOWN;
   1312 }
   1313 
   1314 
   1315 
   1316 static void TestBug4315(void) {
   1317     UErrorCode      theICUError = U_ZERO_ERROR;
   1318     URegularExpression *theRegEx;
   1319     UChar           *textBuff;
   1320     const char      *thePattern;
   1321     UChar            theString[100];
   1322     UChar           *destFields[24];
   1323     int32_t         neededLength1;
   1324     int32_t         neededLength2;
   1325 
   1326     int32_t         wordCount = 0;
   1327     int32_t         destFieldsSize = 24;
   1328 
   1329     thePattern  = "ck ";
   1330     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
   1331 
   1332     /* open a regex */
   1333     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
   1334     TEST_ASSERT_SUCCESS(theICUError);
   1335 
   1336     /* set the input string */
   1337     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
   1338     TEST_ASSERT_SUCCESS(theICUError);
   1339 
   1340     /* split */
   1341     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
   1342      *  error occurs! */
   1343     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
   1344         destFieldsSize, &theICUError);
   1345 
   1346     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
   1347     TEST_ASSERT(wordCount==3);
   1348 
   1349     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
   1350     {
   1351         theICUError = U_ZERO_ERROR;
   1352         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
   1353         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
   1354             destFields, destFieldsSize, &theICUError);
   1355         TEST_ASSERT(wordCount==3);
   1356         TEST_ASSERT_SUCCESS(theICUError);
   1357         TEST_ASSERT(neededLength1 == neededLength2);
   1358         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
   1359         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
   1360         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
   1361         TEST_ASSERT(destFields[3] == NULL);
   1362         free(textBuff);
   1363     }
   1364     uregex_close(theRegEx);
   1365 }
   1366 
   1367 /* Based on TestRegexCAPI() */
   1368 static void TestUTextAPI(void) {
   1369     UErrorCode           status = U_ZERO_ERROR;
   1370     URegularExpression  *re;
   1371     UText                patternText = UTEXT_INITIALIZER;
   1372     UChar                pat[200];
   1373     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
   1374 
   1375     /* Mimimalist open/close */
   1376     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
   1377     re = uregex_openUText(&patternText, 0, 0, &status);
   1378     if (U_FAILURE(status)) {
   1379          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
   1380          utext_close(&patternText);
   1381          return;
   1382     }
   1383     uregex_close(re);
   1384 
   1385     /* Open with all flag values set */
   1386     status = U_ZERO_ERROR;
   1387     re = uregex_openUText(&patternText,
   1388         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
   1389         0, &status);
   1390     TEST_ASSERT_SUCCESS(status);
   1391     uregex_close(re);
   1392 
   1393     /* Open with an invalid flag */
   1394     status = U_ZERO_ERROR;
   1395     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
   1396     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
   1397     uregex_close(re);
   1398 
   1399     /* open with an invalid parameter */
   1400     status = U_ZERO_ERROR;
   1401     re = uregex_openUText(NULL,
   1402         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
   1403     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
   1404 
   1405     /*
   1406      *  clone
   1407      */
   1408     {
   1409         URegularExpression *clone1;
   1410         URegularExpression *clone2;
   1411         URegularExpression *clone3;
   1412         UChar  testString1[30];
   1413         UChar  testString2[30];
   1414         UBool  result;
   1415 
   1416 
   1417         status = U_ZERO_ERROR;
   1418         re = uregex_openUText(&patternText, 0, 0, &status);
   1419         TEST_ASSERT_SUCCESS(status);
   1420         clone1 = uregex_clone(re, &status);
   1421         TEST_ASSERT_SUCCESS(status);
   1422         TEST_ASSERT(clone1 != NULL);
   1423 
   1424         status = U_ZERO_ERROR;
   1425         clone2 = uregex_clone(re, &status);
   1426         TEST_ASSERT_SUCCESS(status);
   1427         TEST_ASSERT(clone2 != NULL);
   1428         uregex_close(re);
   1429 
   1430         status = U_ZERO_ERROR;
   1431         clone3 = uregex_clone(clone2, &status);
   1432         TEST_ASSERT_SUCCESS(status);
   1433         TEST_ASSERT(clone3 != NULL);
   1434 
   1435         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
   1436         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
   1437 
   1438         status = U_ZERO_ERROR;
   1439         uregex_setText(clone1, testString1, -1, &status);
   1440         TEST_ASSERT_SUCCESS(status);
   1441         result = uregex_lookingAt(clone1, 0, &status);
   1442         TEST_ASSERT_SUCCESS(status);
   1443         TEST_ASSERT(result==TRUE);
   1444 
   1445         status = U_ZERO_ERROR;
   1446         uregex_setText(clone2, testString2, -1, &status);
   1447         TEST_ASSERT_SUCCESS(status);
   1448         result = uregex_lookingAt(clone2, 0, &status);
   1449         TEST_ASSERT_SUCCESS(status);
   1450         TEST_ASSERT(result==FALSE);
   1451         result = uregex_find(clone2, 0, &status);
   1452         TEST_ASSERT_SUCCESS(status);
   1453         TEST_ASSERT(result==TRUE);
   1454 
   1455         uregex_close(clone1);
   1456         uregex_close(clone2);
   1457         uregex_close(clone3);
   1458 
   1459     }
   1460 
   1461     /*
   1462      *  pattern() and patternText()
   1463      */
   1464     {
   1465         const UChar  *resultPat;
   1466         int32_t       resultLen;
   1467         UText        *resultText;
   1468         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
   1469         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
   1470         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
   1471         status = U_ZERO_ERROR;
   1472 
   1473         utext_openUTF8(&patternText, str_hello, -1, &status);
   1474         re = uregex_open(pat, -1, 0, NULL, &status);
   1475         resultPat = uregex_pattern(re, &resultLen, &status);
   1476         TEST_ASSERT_SUCCESS(status);
   1477 
   1478         /* The TEST_ASSERT_SUCCESS above should change too... */
   1479         if (U_SUCCESS(status)) {
   1480             TEST_ASSERT(resultLen == -1);
   1481             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
   1482         }
   1483 
   1484         resultText = uregex_patternUText(re, &status);
   1485         TEST_ASSERT_SUCCESS(status);
   1486         TEST_ASSERT_UTEXT(str_hello, resultText);
   1487 
   1488         uregex_close(re);
   1489 
   1490         status = U_ZERO_ERROR;
   1491         re = uregex_open(pat, 3, 0, NULL, &status);
   1492         resultPat = uregex_pattern(re, &resultLen, &status);
   1493         TEST_ASSERT_SUCCESS(status);
   1494 
   1495         /* The TEST_ASSERT_SUCCESS above should change too... */
   1496         if (U_SUCCESS(status)) {
   1497             TEST_ASSERT(resultLen == 3);
   1498             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
   1499             TEST_ASSERT(u_strlen(resultPat) == 3);
   1500         }
   1501 
   1502         resultText = uregex_patternUText(re, &status);
   1503         TEST_ASSERT_SUCCESS(status);
   1504         TEST_ASSERT_UTEXT(str_hel, resultText);
   1505 
   1506         uregex_close(re);
   1507     }
   1508 
   1509     /*
   1510      *  setUText() and lookingAt()
   1511      */
   1512     {
   1513         UText  text1 = UTEXT_INITIALIZER;
   1514         UText  text2 = UTEXT_INITIALIZER;
   1515         UBool  result;
   1516         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1517         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1518         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1519         status = U_ZERO_ERROR;
   1520         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1521         utext_openUTF8(&text2, str_abcccxd, -1, &status);
   1522 
   1523         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1524         re = uregex_openUText(&patternText, 0, NULL, &status);
   1525         TEST_ASSERT_SUCCESS(status);
   1526 
   1527         /* Operation before doing a setText should fail... */
   1528         status = U_ZERO_ERROR;
   1529         uregex_lookingAt(re, 0, &status);
   1530         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
   1531 
   1532         status = U_ZERO_ERROR;
   1533         uregex_setUText(re, &text1, &status);
   1534         result = uregex_lookingAt(re, 0, &status);
   1535         TEST_ASSERT(result == TRUE);
   1536         TEST_ASSERT_SUCCESS(status);
   1537 
   1538         status = U_ZERO_ERROR;
   1539         uregex_setUText(re, &text2, &status);
   1540         result = uregex_lookingAt(re, 0, &status);
   1541         TEST_ASSERT(result == FALSE);
   1542         TEST_ASSERT_SUCCESS(status);
   1543 
   1544         status = U_ZERO_ERROR;
   1545         uregex_setUText(re, &text1, &status);
   1546         result = uregex_lookingAt(re, 0, &status);
   1547         TEST_ASSERT(result == TRUE);
   1548         TEST_ASSERT_SUCCESS(status);
   1549 
   1550         uregex_close(re);
   1551         utext_close(&text1);
   1552         utext_close(&text2);
   1553     }
   1554 
   1555 
   1556     /*
   1557      *  getText() and getUText()
   1558      */
   1559     {
   1560         UText  text1 = UTEXT_INITIALIZER;
   1561         UText  text2 = UTEXT_INITIALIZER;
   1562         UChar  text2Chars[20];
   1563         UText  *resultText;
   1564         const UChar   *result;
   1565         int32_t  textLength;
   1566         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
   1567         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
   1568         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
   1569 
   1570 
   1571         status = U_ZERO_ERROR;
   1572         utext_openUTF8(&text1, str_abcccd, -1, &status);
   1573         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
   1574         utext_openUChars(&text2, text2Chars, -1, &status);
   1575 
   1576         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1577         re = uregex_openUText(&patternText, 0, NULL, &status);
   1578 
   1579         /* First set a UText */
   1580         uregex_setUText(re, &text1, &status);
   1581         resultText = uregex_getUText(re, NULL, &status);
   1582         TEST_ASSERT_SUCCESS(status);
   1583         TEST_ASSERT(resultText != &text1);
   1584         utext_setNativeIndex(resultText, 0);
   1585         utext_setNativeIndex(&text1, 0);
   1586         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
   1587         utext_close(resultText);
   1588 
   1589         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
   1590         TEST_ASSERT(textLength == -1 || textLength == 6);
   1591         resultText = uregex_getUText(re, NULL, &status);
   1592         TEST_ASSERT_SUCCESS(status);
   1593         TEST_ASSERT(resultText != &text1);
   1594         utext_setNativeIndex(resultText, 0);
   1595         utext_setNativeIndex(&text1, 0);
   1596         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
   1597         utext_close(resultText);
   1598 
   1599         /* Then set a UChar * */
   1600         uregex_setText(re, text2Chars, 7, &status);
   1601         resultText = uregex_getUText(re, NULL, &status);
   1602         TEST_ASSERT_SUCCESS(status);
   1603         utext_setNativeIndex(resultText, 0);
   1604         utext_setNativeIndex(&text2, 0);
   1605         TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
   1606         utext_close(resultText);
   1607         result = uregex_getText(re, &textLength, &status);
   1608         TEST_ASSERT(textLength == 7);
   1609 
   1610         uregex_close(re);
   1611         utext_close(&text1);
   1612         utext_close(&text2);
   1613     }
   1614 
   1615     /*
   1616      *  matches()
   1617      */
   1618     {
   1619         UText   text1 = UTEXT_INITIALIZER;
   1620         UBool   result;
   1621         UText   nullText = UTEXT_INITIALIZER;
   1622         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
   1623         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
   1624 
   1625         status = U_ZERO_ERROR;
   1626         utext_openUTF8(&text1, str_abcccde, -1, &status);
   1627         utext_openUTF8(&patternText, str_abcd, -1, &status);
   1628         re = uregex_openUText(&patternText, 0, NULL, &status);
   1629 
   1630         uregex_setUText(re, &text1, &status);
   1631         result = uregex_matches(re, 0, &status);
   1632         TEST_ASSERT(result == FALSE);
   1633         TEST_ASSERT_SUCCESS(status);
   1634         uregex_close(re);
   1635 
   1636         status = U_ZERO_ERROR;
   1637         re = uregex_openC(".?", 0, NULL, &status);
   1638         uregex_setUText(re, &text1, &status);
   1639         result = uregex_matches(re, 7, &status);
   1640         TEST_ASSERT(result == TRUE);
   1641         TEST_ASSERT_SUCCESS(status);
   1642 
   1643         status = U_ZERO_ERROR;
   1644         utext_openUTF8(&nullText, "", -1, &status);
   1645         uregex_setUText(re, &nullText, &status);
   1646         TEST_ASSERT_SUCCESS(status);
   1647         result = uregex_matches(re, 0, &status);
   1648         TEST_ASSERT(result == TRUE);
   1649         TEST_ASSERT_SUCCESS(status);
   1650 
   1651         uregex_close(re);
   1652         utext_close(&text1);
   1653         utext_close(&nullText);
   1654     }
   1655 
   1656 
   1657     /*
   1658      *  lookingAt()    Used in setText test.
   1659      */
   1660 
   1661 
   1662     /*
   1663      *  find(), findNext, start, end, reset
   1664      */
   1665     {
   1666         UChar    text1[50];
   1667         UBool    result;
   1668         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
   1669         status = U_ZERO_ERROR;
   1670         re = uregex_openC("rx", 0, NULL, &status);
   1671 
   1672         uregex_setText(re, text1, -1, &status);
   1673         result = uregex_find(re, 0, &status);
   1674         TEST_ASSERT(result == TRUE);
   1675         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1676         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1677         TEST_ASSERT_SUCCESS(status);
   1678 
   1679         result = uregex_find(re, 9, &status);
   1680         TEST_ASSERT(result == TRUE);
   1681         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
   1682         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
   1683         TEST_ASSERT_SUCCESS(status);
   1684 
   1685         result = uregex_find(re, 14, &status);
   1686         TEST_ASSERT(result == FALSE);
   1687         TEST_ASSERT_SUCCESS(status);
   1688 
   1689         status = U_ZERO_ERROR;
   1690         uregex_reset(re, 0, &status);
   1691 
   1692         result = uregex_findNext(re, &status);
   1693         TEST_ASSERT(result == TRUE);
   1694         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
   1695         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
   1696         TEST_ASSERT_SUCCESS(status);
   1697 
   1698         result = uregex_findNext(re, &status);
   1699         TEST_ASSERT(result == TRUE);
   1700         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
   1701         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
   1702         TEST_ASSERT_SUCCESS(status);
   1703 
   1704         status = U_ZERO_ERROR;
   1705         uregex_reset(re, 12, &status);
   1706 
   1707         result = uregex_findNext(re, &status);
   1708         TEST_ASSERT(result == TRUE);
   1709         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
   1710         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
   1711         TEST_ASSERT_SUCCESS(status);
   1712 
   1713         result = uregex_findNext(re, &status);
   1714         TEST_ASSERT(result == FALSE);
   1715         TEST_ASSERT_SUCCESS(status);
   1716 
   1717         uregex_close(re);
   1718     }
   1719 
   1720     /*
   1721      *  group()
   1722      */
   1723     {
   1724         UChar    text1[80];
   1725         UText   *actual;
   1726         UBool    result;
   1727 
   1728         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
   1729         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
   1730 
   1731 
   1732         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
   1733 
   1734         status = U_ZERO_ERROR;
   1735         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
   1736         TEST_ASSERT_SUCCESS(status);
   1737 
   1738         uregex_setText(re, text1, -1, &status);
   1739         result = uregex_find(re, 0, &status);
   1740         TEST_ASSERT(result==TRUE);
   1741 
   1742         /*  Capture Group 0, the full match.  Should succeed.  */
   1743         status = U_ZERO_ERROR;
   1744         actual = uregex_groupUText(re, 0, NULL, &status);
   1745         TEST_ASSERT_SUCCESS(status);
   1746         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
   1747         utext_close(actual);
   1748 
   1749         /*  Capture group #1.  Should succeed. */
   1750         status = U_ZERO_ERROR;
   1751         actual = uregex_groupUText(re, 1, NULL, &status);
   1752         TEST_ASSERT_SUCCESS(status);
   1753         TEST_ASSERT_UTEXT(str_interior, actual);
   1754         utext_close(actual);
   1755 
   1756         /*  Capture group out of range.  Error. */
   1757         status = U_ZERO_ERROR;
   1758         actual = uregex_groupUText(re, 2, NULL, &status);
   1759         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
   1760         TEST_ASSERT(utext_nativeLength(actual) == 0);
   1761         utext_close(actual);
   1762 
   1763         uregex_close(re);
   1764 
   1765     }
   1766 
   1767     /*
   1768      *  replaceFirst()
   1769      */
   1770     {
   1771         UChar    text1[80];
   1772         UChar    text2[80];
   1773         UText    replText = UTEXT_INITIALIZER;
   1774         UText   *result;
   1775         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
   1776         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1777         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
   1778         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1779         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
   1780         status = U_ZERO_ERROR;
   1781         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1782         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1783         utext_openUTF8(&replText, str_1x, -1, &status);
   1784 
   1785         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1786         TEST_ASSERT_SUCCESS(status);
   1787 
   1788         /*  Normal case, with match */
   1789         uregex_setText(re, text1, -1, &status);
   1790         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1791         TEST_ASSERT_SUCCESS(status);
   1792         TEST_ASSERT_UTEXT(str_Replxxx, result);
   1793         utext_close(result);
   1794 
   1795         /* No match.  Text should copy to output with no changes.  */
   1796         uregex_setText(re, text2, -1, &status);
   1797         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1798         TEST_ASSERT_SUCCESS(status);
   1799         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1800         utext_close(result);
   1801 
   1802         /* Unicode escapes */
   1803         uregex_setText(re, text1, -1, &status);
   1804         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
   1805         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
   1806         TEST_ASSERT_SUCCESS(status);
   1807         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
   1808         utext_close(result);
   1809 
   1810         uregex_close(re);
   1811         utext_close(&replText);
   1812     }
   1813 
   1814 
   1815     /*
   1816      *  replaceAll()
   1817      */
   1818     {
   1819         UChar    text1[80];
   1820         UChar    text2[80];
   1821         UText    replText = UTEXT_INITIALIZER;
   1822         UText   *result;
   1823         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
   1824         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
   1825         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
   1826         status = U_ZERO_ERROR;
   1827         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
   1828         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1829         utext_openUTF8(&replText, str_1, -1, &status);
   1830 
   1831         re = uregex_openC("x(.*?)x", 0, NULL, &status);
   1832         TEST_ASSERT_SUCCESS(status);
   1833 
   1834         /*  Normal case, with match */
   1835         uregex_setText(re, text1, -1, &status);
   1836         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1837         TEST_ASSERT_SUCCESS(status);
   1838         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
   1839         utext_close(result);
   1840 
   1841         /* No match.  Text should copy to output with no changes.  */
   1842         uregex_setText(re, text2, -1, &status);
   1843         result = uregex_replaceAllUText(re, &replText, NULL, &status);
   1844         TEST_ASSERT_SUCCESS(status);
   1845         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
   1846         utext_close(result);
   1847 
   1848         uregex_close(re);
   1849         utext_close(&replText);
   1850     }
   1851 
   1852 
   1853     /*
   1854      *  appendReplacement()
   1855      */
   1856     {
   1857         UChar    text[100];
   1858         UChar    repl[100];
   1859         UChar    buf[100];
   1860         UChar   *bufPtr;
   1861         int32_t  bufCap;
   1862 
   1863         status = U_ZERO_ERROR;
   1864         re = uregex_openC(".*", 0, 0, &status);
   1865         TEST_ASSERT_SUCCESS(status);
   1866 
   1867         u_uastrncpy(text, "whatever",  sizeof(text)/2);
   1868         u_uastrncpy(repl, "some other", sizeof(repl)/2);
   1869         uregex_setText(re, text, -1, &status);
   1870 
   1871         /* match covers whole target string */
   1872         uregex_find(re, 0, &status);
   1873         TEST_ASSERT_SUCCESS(status);
   1874         bufPtr = buf;
   1875         bufCap = sizeof(buf) / 2;
   1876         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1877         TEST_ASSERT_SUCCESS(status);
   1878         TEST_ASSERT_STRING("some other", buf, TRUE);
   1879 
   1880         /* Match has \u \U escapes */
   1881         uregex_find(re, 0, &status);
   1882         TEST_ASSERT_SUCCESS(status);
   1883         bufPtr = buf;
   1884         bufCap = sizeof(buf) / 2;
   1885         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
   1886         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
   1887         TEST_ASSERT_SUCCESS(status);
   1888         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
   1889 
   1890         uregex_close(re);
   1891     }
   1892 
   1893 
   1894     /*
   1895      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
   1896      */
   1897 
   1898     /*
   1899      *  splitUText()
   1900      */
   1901     {
   1902         UChar    textToSplit[80];
   1903         UChar    text2[80];
   1904         UText    *fields[10];
   1905         int32_t  numFields;
   1906         int32_t i;
   1907 
   1908         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
   1909         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
   1910 
   1911         status = U_ZERO_ERROR;
   1912         re = uregex_openC(":", 0, NULL, &status);
   1913 
   1914 
   1915         /*  Simple split */
   1916 
   1917         uregex_setText(re, textToSplit, -1, &status);
   1918         TEST_ASSERT_SUCCESS(status);
   1919 
   1920         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1921         if (U_SUCCESS(status)) {
   1922             memset(fields, 0, sizeof(fields));
   1923             numFields = uregex_splitUText(re, fields, 10, &status);
   1924             TEST_ASSERT_SUCCESS(status);
   1925 
   1926             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1927             if(U_SUCCESS(status)) {
   1928               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
   1929               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
   1930               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
   1931                 TEST_ASSERT(numFields == 3);
   1932                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1933                 TEST_ASSERT_UTEXT(str_second, fields[1]);
   1934                 TEST_ASSERT_UTEXT(str_third, fields[2]);
   1935                 TEST_ASSERT(fields[3] == NULL);
   1936             }
   1937             for(i = 0; i < numFields; i++) {
   1938                 utext_close(fields[i]);
   1939             }
   1940         }
   1941 
   1942         uregex_close(re);
   1943 
   1944 
   1945         /*  Split with too few output strings available */
   1946         status = U_ZERO_ERROR;
   1947         re = uregex_openC(":", 0, NULL, &status);
   1948         uregex_setText(re, textToSplit, -1, &status);
   1949         TEST_ASSERT_SUCCESS(status);
   1950 
   1951         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1952         if(U_SUCCESS(status)) {
   1953             fields[0] = NULL;
   1954             fields[1] = NULL;
   1955             fields[2] = &patternText;
   1956             numFields = uregex_splitUText(re, fields, 2, &status);
   1957             TEST_ASSERT_SUCCESS(status);
   1958 
   1959             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1960             if(U_SUCCESS(status)) {
   1961                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   1962                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
   1963                 TEST_ASSERT(numFields == 2);
   1964                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   1965                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
   1966                 TEST_ASSERT(fields[2] == &patternText);
   1967             }
   1968             for(i = 0; i < numFields; i++) {
   1969                 utext_close(fields[i]);
   1970             }
   1971         }
   1972 
   1973         uregex_close(re);
   1974     }
   1975 
   1976     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
   1977      *                   comes out as additional fields.  */
   1978     {
   1979         UChar    textToSplit[80];
   1980         UText    *fields[10];
   1981         int32_t  numFields;
   1982         int32_t i;
   1983 
   1984         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
   1985 
   1986         status = U_ZERO_ERROR;
   1987         re = uregex_openC("<(.*?)>", 0, NULL, &status);
   1988 
   1989         uregex_setText(re, textToSplit, -1, &status);
   1990         TEST_ASSERT_SUCCESS(status);
   1991 
   1992         /* The TEST_ASSERT_SUCCESS call above should change too... */
   1993         if(U_SUCCESS(status)) {
   1994             memset(fields, 0, sizeof(fields));
   1995             numFields = uregex_splitUText(re, fields, 10, &status);
   1996             TEST_ASSERT_SUCCESS(status);
   1997 
   1998             /* The TEST_ASSERT_SUCCESS call above should change too... */
   1999             if(U_SUCCESS(status)) {
   2000                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2001                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2002                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2003                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2004                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2005 
   2006                 TEST_ASSERT(numFields == 5);
   2007                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2008                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2009                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2010                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2011                 TEST_ASSERT_UTEXT(str_third, fields[4]);
   2012                 TEST_ASSERT(fields[5] == NULL);
   2013             }
   2014             for(i = 0; i < numFields; i++) {
   2015                 utext_close(fields[i]);
   2016             }
   2017         }
   2018 
   2019         /*  Split with too few output strings available (2) */
   2020         status = U_ZERO_ERROR;
   2021         fields[0] = NULL;
   2022         fields[1] = NULL;
   2023         fields[2] = &patternText;
   2024         numFields = uregex_splitUText(re, fields, 2, &status);
   2025         TEST_ASSERT_SUCCESS(status);
   2026 
   2027         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2028         if(U_SUCCESS(status)) {
   2029             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2030             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2031             TEST_ASSERT(numFields == 2);
   2032             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2033             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
   2034             TEST_ASSERT(fields[2] == &patternText);
   2035         }
   2036         for(i = 0; i < numFields; i++) {
   2037             utext_close(fields[i]);
   2038         }
   2039 
   2040 
   2041         /*  Split with too few output strings available (3) */
   2042         status = U_ZERO_ERROR;
   2043         fields[0] = NULL;
   2044         fields[1] = NULL;
   2045         fields[2] = NULL;
   2046         fields[3] = &patternText;
   2047         numFields = uregex_splitUText(re, fields, 3, &status);
   2048         TEST_ASSERT_SUCCESS(status);
   2049 
   2050         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2051         if(U_SUCCESS(status)) {
   2052             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2053             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2054             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
   2055             TEST_ASSERT(numFields == 3);
   2056             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2057             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2058             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
   2059             TEST_ASSERT(fields[3] == &patternText);
   2060         }
   2061         for(i = 0; i < numFields; i++) {
   2062             utext_close(fields[i]);
   2063         }
   2064 
   2065         /*  Split with just enough output strings available (5) */
   2066         status = U_ZERO_ERROR;
   2067         fields[0] = NULL;
   2068         fields[1] = NULL;
   2069         fields[2] = NULL;
   2070         fields[3] = NULL;
   2071         fields[4] = NULL;
   2072         fields[5] = &patternText;
   2073         numFields = uregex_splitUText(re, fields, 5, &status);
   2074         TEST_ASSERT_SUCCESS(status);
   2075 
   2076         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2077         if(U_SUCCESS(status)) {
   2078             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2079             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2080             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2081             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2082             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
   2083 
   2084             TEST_ASSERT(numFields == 5);
   2085             TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2086             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2087             TEST_ASSERT_UTEXT(str_second, fields[2]);
   2088             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2089             TEST_ASSERT_UTEXT(str_third, fields[4]);
   2090             TEST_ASSERT(fields[5] == &patternText);
   2091         }
   2092         for(i = 0; i < numFields; i++) {
   2093             utext_close(fields[i]);
   2094         }
   2095 
   2096         /* Split, end of text is a field delimiter.   */
   2097         status = U_ZERO_ERROR;
   2098         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
   2099         TEST_ASSERT_SUCCESS(status);
   2100 
   2101         /* The TEST_ASSERT_SUCCESS call above should change too... */
   2102         if(U_SUCCESS(status)) {
   2103             memset(fields, 0, sizeof(fields));
   2104             fields[9] = &patternText;
   2105             numFields = uregex_splitUText(re, fields, 9, &status);
   2106             TEST_ASSERT_SUCCESS(status);
   2107 
   2108             /* The TEST_ASSERT_SUCCESS call above should change too... */
   2109             if(U_SUCCESS(status)) {
   2110                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
   2111                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
   2112                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
   2113                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
   2114 
   2115                 TEST_ASSERT(numFields == 4);
   2116                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
   2117                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
   2118                 TEST_ASSERT_UTEXT(str_second, fields[2]);
   2119                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
   2120                 TEST_ASSERT(fields[4] == NULL);
   2121                 TEST_ASSERT(fields[8] == NULL);
   2122                 TEST_ASSERT(fields[9] == &patternText);
   2123             }
   2124             for(i = 0; i < numFields; i++) {
   2125                 utext_close(fields[i]);
   2126             }
   2127         }
   2128 
   2129         uregex_close(re);
   2130     }
   2131     utext_close(&patternText);
   2132 }
   2133 
   2134 /* BEGIN android-added
   2135    Removed this function after Android upgrade to ICU4.6.
   2136 */
   2137 static void TestRefreshInput(void) {
   2138     /*
   2139      *  RefreshInput changes out the input of a URegularExpression without
   2140      *    changing anything else in the match state.  Used with Java JNI,
   2141      *    when Java moves the underlying string storage.   This test
   2142      *    runs a find() loop, moving the text after the first match.
   2143      *    The right number of matches should still be found.
   2144      */
   2145     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
   2146     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
   2147     UErrorCode status = U_ZERO_ERROR;
   2148     URegularExpression *re;
   2149     UText ut1 = UTEXT_INITIALIZER;
   2150     UText ut2 = UTEXT_INITIALIZER;
   2151 
   2152     re = uregex_openC("[ABC]", 0, 0, &status);
   2153     TEST_ASSERT_SUCCESS(status);
   2154 
   2155     utext_openUChars(&ut1, testStr, -1, &status);
   2156     TEST_ASSERT_SUCCESS(status);
   2157     uregex_setUText(re, &ut1, &status);
   2158     TEST_ASSERT_SUCCESS(status);
   2159 
   2160     /* Find the first match "A" in the original string */
   2161     TEST_ASSERT(uregex_findNext(re, &status));
   2162     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
   2163 
   2164     /* Move the string, kill the original string.  */
   2165     u_strcpy(movedStr, testStr);
   2166     u_memset(testStr, 0, u_strlen(testStr));
   2167     utext_openUChars(&ut2, movedStr, -1, &status);
   2168     TEST_ASSERT_SUCCESS(status);
   2169     uregex_refreshUText(re, &ut2, &status);
   2170     TEST_ASSERT_SUCCESS(status);
   2171 
   2172     /* Find the following two matches, now working in the moved string. */
   2173     TEST_ASSERT(uregex_findNext(re, &status));
   2174     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
   2175     TEST_ASSERT(uregex_findNext(re, &status));
   2176     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
   2177     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
   2178 
   2179     uregex_close(re);
   2180 }
   2181 /* END android-addedd */
   2182 
   2183 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
   2184