Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2004-2015, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  uregex.cpp
      7 */
      8 
      9 #include "unicode/utypes.h"
     10 
     11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     12 
     13 #include "unicode/regex.h"
     14 #include "unicode/uregex.h"
     15 #include "unicode/unistr.h"
     16 #include "unicode/ustring.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/uobject.h"
     19 #include "unicode/utf16.h"
     20 #include "cmemory.h"
     21 #include "uassert.h"
     22 #include "uhash.h"
     23 #include "umutex.h"
     24 #include "uvectr32.h"
     25 
     26 #include "regextxt.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
     31 
     32 struct RegularExpression: public UMemory {
     33 public:
     34     RegularExpression();
     35     ~RegularExpression();
     36     int32_t           fMagic;
     37     RegexPattern     *fPat;
     38     u_atomic_int32_t *fPatRefCount;
     39     UChar            *fPatString;
     40     int32_t           fPatStringLen;
     41     RegexMatcher     *fMatcher;
     42     const UChar      *fText;         // Text from setText()
     43     int32_t           fTextLength;   // Length provided by user with setText(), which
     44                                      //  may be -1.
     45     UBool             fOwnsText;
     46 };
     47 
     48 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
     49 
     50 RegularExpression::RegularExpression() {
     51     fMagic        = REXP_MAGIC;
     52     fPat          = NULL;
     53     fPatRefCount  = NULL;
     54     fPatString    = NULL;
     55     fPatStringLen = 0;
     56     fMatcher      = NULL;
     57     fText         = NULL;
     58     fTextLength   = 0;
     59     fOwnsText     = FALSE;
     60 }
     61 
     62 RegularExpression::~RegularExpression() {
     63     delete fMatcher;
     64     fMatcher = NULL;
     65     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
     66         delete fPat;
     67         uprv_free(fPatString);
     68         uprv_free((void *)fPatRefCount);
     69     }
     70     if (fOwnsText && fText!=NULL) {
     71         uprv_free((void *)fText);
     72     }
     73     fMagic = 0;
     74 }
     75 
     76 U_NAMESPACE_END
     77 
     78 U_NAMESPACE_USE
     79 
     80 //----------------------------------------------------------------------------------------
     81 //
     82 //   validateRE    Do boilerplate style checks on API function parameters.
     83 //                 Return TRUE if they look OK.
     84 //----------------------------------------------------------------------------------------
     85 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     86     if (U_FAILURE(*status)) {
     87         return FALSE;
     88     }
     89     if (re == NULL || re->fMagic != REXP_MAGIC) {
     90         *status = U_ILLEGAL_ARGUMENT_ERROR;
     91         return FALSE;
     92     }
     93     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
     94     if (requiresText && re->fText == NULL && !re->fOwnsText) {
     95         *status = U_REGEX_INVALID_STATE;
     96         return FALSE;
     97     }
     98     return TRUE;
     99 }
    100 
    101 //----------------------------------------------------------------------------------------
    102 //
    103 //    uregex_open
    104 //
    105 //----------------------------------------------------------------------------------------
    106 U_CAPI URegularExpression *  U_EXPORT2
    107 uregex_open( const  UChar          *pattern,
    108                     int32_t         patternLength,
    109                     uint32_t        flags,
    110                     UParseError    *pe,
    111                     UErrorCode     *status) {
    112 
    113     if (U_FAILURE(*status)) {
    114         return NULL;
    115     }
    116     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
    117         *status = U_ILLEGAL_ARGUMENT_ERROR;
    118         return NULL;
    119     }
    120     int32_t actualPatLen = patternLength;
    121     if (actualPatLen == -1) {
    122         actualPatLen = u_strlen(pattern);
    123     }
    124 
    125     RegularExpression  *re     = new RegularExpression;
    126     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    127     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
    128     if (re == NULL || refC == NULL || patBuf == NULL) {
    129         *status = U_MEMORY_ALLOCATION_ERROR;
    130         delete re;
    131         uprv_free((void *)refC);
    132         uprv_free(patBuf);
    133         return NULL;
    134     }
    135     re->fPatRefCount = refC;
    136     *re->fPatRefCount = 1;
    137 
    138     //
    139     // Make a copy of the pattern string, so we can return it later if asked.
    140     //    For compiling the pattern, we will use a UText wrapper around
    141     //    this local copy, to avoid making even more copies.
    142     //
    143     re->fPatString    = patBuf;
    144     re->fPatStringLen = patternLength;
    145     u_memcpy(patBuf, pattern, actualPatLen);
    146     patBuf[actualPatLen] = 0;
    147 
    148     UText patText = UTEXT_INITIALIZER;
    149     utext_openUChars(&patText, patBuf, patternLength, status);
    150 
    151     //
    152     // Compile the pattern
    153     //
    154     if (pe != NULL) {
    155         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    156     } else {
    157         re->fPat = RegexPattern::compile(&patText, flags, *status);
    158     }
    159     utext_close(&patText);
    160 
    161     if (U_FAILURE(*status)) {
    162         goto ErrorExit;
    163     }
    164 
    165     //
    166     // Create the matcher object
    167     //
    168     re->fMatcher = re->fPat->matcher(*status);
    169     if (U_SUCCESS(*status)) {
    170         return (URegularExpression*)re;
    171     }
    172 
    173 ErrorExit:
    174     delete re;
    175     return NULL;
    176 
    177 }
    178 
    179 //----------------------------------------------------------------------------------------
    180 //
    181 //    uregex_openUText
    182 //
    183 //----------------------------------------------------------------------------------------
    184 U_CAPI URegularExpression *  U_EXPORT2
    185 uregex_openUText(UText          *pattern,
    186                  uint32_t        flags,
    187                  UParseError    *pe,
    188                  UErrorCode     *status) {
    189 
    190     if (U_FAILURE(*status)) {
    191         return NULL;
    192     }
    193     if (pattern == NULL) {
    194         *status = U_ILLEGAL_ARGUMENT_ERROR;
    195         return NULL;
    196     }
    197 
    198     int64_t patternNativeLength = utext_nativeLength(pattern);
    199 
    200     if (patternNativeLength == 0) {
    201         *status = U_ILLEGAL_ARGUMENT_ERROR;
    202         return NULL;
    203     }
    204 
    205     RegularExpression *re     = new RegularExpression;
    206 
    207     UErrorCode lengthStatus = U_ZERO_ERROR;
    208     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
    209 
    210     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    211     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
    212     if (re == NULL || refC == NULL || patBuf == NULL) {
    213         *status = U_MEMORY_ALLOCATION_ERROR;
    214         delete re;
    215         uprv_free((void *)refC);
    216         uprv_free(patBuf);
    217         return NULL;
    218     }
    219     re->fPatRefCount = refC;
    220     *re->fPatRefCount = 1;
    221 
    222     //
    223     // Make a copy of the pattern string, so we can return it later if asked.
    224     //    For compiling the pattern, we will use a read-only UText wrapper
    225     //    around this local copy, to avoid making even more copies.
    226     //
    227     re->fPatString    = patBuf;
    228     re->fPatStringLen = pattern16Length;
    229     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
    230 
    231     UText patText = UTEXT_INITIALIZER;
    232     utext_openUChars(&patText, patBuf, pattern16Length, status);
    233 
    234     //
    235     // Compile the pattern
    236     //
    237     if (pe != NULL) {
    238         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    239     } else {
    240         re->fPat = RegexPattern::compile(&patText, flags, *status);
    241     }
    242     utext_close(&patText);
    243 
    244     if (U_FAILURE(*status)) {
    245         goto ErrorExit;
    246     }
    247 
    248     //
    249     // Create the matcher object
    250     //
    251     re->fMatcher = re->fPat->matcher(*status);
    252     if (U_SUCCESS(*status)) {
    253         return (URegularExpression*)re;
    254     }
    255 
    256 ErrorExit:
    257     delete re;
    258     return NULL;
    259 
    260 }
    261 
    262 //----------------------------------------------------------------------------------------
    263 //
    264 //    uregex_close
    265 //
    266 //----------------------------------------------------------------------------------------
    267 U_CAPI void  U_EXPORT2
    268 uregex_close(URegularExpression  *re2) {
    269     RegularExpression *re = (RegularExpression*)re2;
    270     UErrorCode  status = U_ZERO_ERROR;
    271     if (validateRE(re, FALSE, &status) == FALSE) {
    272         return;
    273     }
    274     delete re;
    275 }
    276 
    277 
    278 //----------------------------------------------------------------------------------------
    279 //
    280 //    uregex_clone
    281 //
    282 //----------------------------------------------------------------------------------------
    283 U_CAPI URegularExpression * U_EXPORT2
    284 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
    285     RegularExpression *source = (RegularExpression*)source2;
    286     if (validateRE(source, FALSE, status) == FALSE) {
    287         return NULL;
    288     }
    289 
    290     RegularExpression *clone = new RegularExpression;
    291     if (clone == NULL) {
    292         *status = U_MEMORY_ALLOCATION_ERROR;
    293         return NULL;
    294     }
    295 
    296     clone->fMatcher = source->fPat->matcher(*status);
    297     if (U_FAILURE(*status)) {
    298         delete clone;
    299         return NULL;
    300     }
    301 
    302     clone->fPat          = source->fPat;
    303     clone->fPatRefCount  = source->fPatRefCount;
    304     clone->fPatString    = source->fPatString;
    305     clone->fPatStringLen = source->fPatStringLen;
    306     umtx_atomic_inc(source->fPatRefCount);
    307     // Note:  fText is not cloned.
    308 
    309     return (URegularExpression*)clone;
    310 }
    311 
    312 
    313 
    314 
    315 //------------------------------------------------------------------------------
    316 //
    317 //    uregex_pattern
    318 //
    319 //------------------------------------------------------------------------------
    320 U_CAPI const UChar * U_EXPORT2
    321 uregex_pattern(const  URegularExpression *regexp2,
    322                       int32_t            *patLength,
    323                       UErrorCode         *status)  {
    324     RegularExpression *regexp = (RegularExpression*)regexp2;
    325 
    326     if (validateRE(regexp, FALSE, status) == FALSE) {
    327         return NULL;
    328     }
    329     if (patLength != NULL) {
    330         *patLength = regexp->fPatStringLen;
    331     }
    332     return regexp->fPatString;
    333 }
    334 
    335 
    336 //------------------------------------------------------------------------------
    337 //
    338 //    uregex_patternUText
    339 //
    340 //------------------------------------------------------------------------------
    341 U_CAPI UText * U_EXPORT2
    342 uregex_patternUText(const URegularExpression *regexp2,
    343                           UErrorCode         *status)  {
    344     RegularExpression *regexp = (RegularExpression*)regexp2;
    345     return regexp->fPat->patternText(*status);
    346 }
    347 
    348 
    349 //------------------------------------------------------------------------------
    350 //
    351 //    uregex_flags
    352 //
    353 //------------------------------------------------------------------------------
    354 U_CAPI int32_t U_EXPORT2
    355 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
    356     RegularExpression *regexp = (RegularExpression*)regexp2;
    357     if (validateRE(regexp, FALSE, status) == FALSE) {
    358         return 0;
    359     }
    360     int32_t flags = regexp->fPat->flags();
    361     return flags;
    362 }
    363 
    364 
    365 //------------------------------------------------------------------------------
    366 //
    367 //    uregex_setText
    368 //
    369 //------------------------------------------------------------------------------
    370 U_CAPI void U_EXPORT2
    371 uregex_setText(URegularExpression *regexp2,
    372                const UChar        *text,
    373                int32_t             textLength,
    374                UErrorCode         *status)  {
    375     RegularExpression *regexp = (RegularExpression*)regexp2;
    376     if (validateRE(regexp, FALSE, status) == FALSE) {
    377         return;
    378     }
    379     if (text == NULL || textLength < -1) {
    380         *status = U_ILLEGAL_ARGUMENT_ERROR;
    381         return;
    382     }
    383 
    384     if (regexp->fOwnsText && regexp->fText != NULL) {
    385         uprv_free((void *)regexp->fText);
    386     }
    387 
    388     regexp->fText       = text;
    389     regexp->fTextLength = textLength;
    390     regexp->fOwnsText   = FALSE;
    391 
    392     UText input = UTEXT_INITIALIZER;
    393     utext_openUChars(&input, text, textLength, status);
    394     regexp->fMatcher->reset(&input);
    395     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
    396 }
    397 
    398 
    399 //------------------------------------------------------------------------------
    400 //
    401 //    uregex_setUText
    402 //
    403 //------------------------------------------------------------------------------
    404 U_CAPI void U_EXPORT2
    405 uregex_setUText(URegularExpression *regexp2,
    406                 UText              *text,
    407                 UErrorCode         *status) {
    408     RegularExpression *regexp = (RegularExpression*)regexp2;
    409     if (validateRE(regexp, FALSE, status) == FALSE) {
    410         return;
    411     }
    412     if (text == NULL) {
    413         *status = U_ILLEGAL_ARGUMENT_ERROR;
    414         return;
    415     }
    416 
    417     if (regexp->fOwnsText && regexp->fText != NULL) {
    418         uprv_free((void *)regexp->fText);
    419     }
    420 
    421     regexp->fText       = NULL; // only fill it in on request
    422     regexp->fTextLength = -1;
    423     regexp->fOwnsText   = TRUE;
    424     regexp->fMatcher->reset(text);
    425 }
    426 
    427 
    428 
    429 //------------------------------------------------------------------------------
    430 //
    431 //    uregex_getText
    432 //
    433 //------------------------------------------------------------------------------
    434 U_CAPI const UChar * U_EXPORT2
    435 uregex_getText(URegularExpression *regexp2,
    436                int32_t            *textLength,
    437                UErrorCode         *status)  {
    438     RegularExpression *regexp = (RegularExpression*)regexp2;
    439     if (validateRE(regexp, FALSE, status) == FALSE) {
    440         return NULL;
    441     }
    442 
    443     if (regexp->fText == NULL) {
    444         // need to fill in the text
    445         UText *inputText = regexp->fMatcher->inputText();
    446         int64_t inputNativeLength = utext_nativeLength(inputText);
    447         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
    448             regexp->fText = inputText->chunkContents;
    449             regexp->fTextLength = (int32_t)inputNativeLength;
    450             regexp->fOwnsText = FALSE; // because the UText owns it
    451         } else {
    452             UErrorCode lengthStatus = U_ZERO_ERROR;
    453             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
    454             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
    455 
    456             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
    457             regexp->fText = inputChars;
    458             regexp->fOwnsText = TRUE; // should already be set but just in case
    459         }
    460     }
    461 
    462     if (textLength != NULL) {
    463         *textLength = regexp->fTextLength;
    464     }
    465     return regexp->fText;
    466 }
    467 
    468 
    469 //------------------------------------------------------------------------------
    470 //
    471 //    uregex_getUText
    472 //
    473 //------------------------------------------------------------------------------
    474 U_CAPI UText * U_EXPORT2
    475 uregex_getUText(URegularExpression *regexp2,
    476                 UText              *dest,
    477                 UErrorCode         *status)  {
    478     RegularExpression *regexp = (RegularExpression*)regexp2;
    479     if (validateRE(regexp, FALSE, status) == FALSE) {
    480         return dest;
    481     }
    482     return regexp->fMatcher->getInput(dest, *status);
    483 }
    484 
    485 
    486 //------------------------------------------------------------------------------
    487 //
    488 //    uregex_refreshUText
    489 //
    490 //------------------------------------------------------------------------------
    491 U_CAPI void U_EXPORT2
    492 uregex_refreshUText(URegularExpression *regexp2,
    493                     UText              *text,
    494                     UErrorCode         *status) {
    495     RegularExpression *regexp = (RegularExpression*)regexp2;
    496     if (validateRE(regexp, FALSE, status) == FALSE) {
    497         return;
    498     }
    499     regexp->fMatcher->refreshInputText(text, *status);
    500 }
    501 
    502 
    503 //------------------------------------------------------------------------------
    504 //
    505 //    uregex_matches
    506 //
    507 //------------------------------------------------------------------------------
    508 U_CAPI UBool U_EXPORT2
    509 uregex_matches(URegularExpression *regexp2,
    510                int32_t            startIndex,
    511                UErrorCode        *status)  {
    512     return uregex_matches64( regexp2, (int64_t)startIndex, status);
    513 }
    514 
    515 U_CAPI UBool U_EXPORT2
    516 uregex_matches64(URegularExpression *regexp2,
    517                  int64_t            startIndex,
    518                  UErrorCode        *status)  {
    519     RegularExpression *regexp = (RegularExpression*)regexp2;
    520     UBool result = FALSE;
    521     if (validateRE(regexp, TRUE, status) == FALSE) {
    522         return result;
    523     }
    524     if (startIndex == -1) {
    525         result = regexp->fMatcher->matches(*status);
    526     } else {
    527         result = regexp->fMatcher->matches(startIndex, *status);
    528     }
    529     return result;
    530 }
    531 
    532 
    533 //------------------------------------------------------------------------------
    534 //
    535 //    uregex_lookingAt
    536 //
    537 //------------------------------------------------------------------------------
    538 U_CAPI UBool U_EXPORT2
    539 uregex_lookingAt(URegularExpression *regexp2,
    540                  int32_t             startIndex,
    541                  UErrorCode         *status)  {
    542     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
    543 }
    544 
    545 U_CAPI UBool U_EXPORT2
    546 uregex_lookingAt64(URegularExpression *regexp2,
    547                    int64_t             startIndex,
    548                    UErrorCode         *status)  {
    549     RegularExpression *regexp = (RegularExpression*)regexp2;
    550     UBool result = FALSE;
    551     if (validateRE(regexp, TRUE, status) == FALSE) {
    552         return result;
    553     }
    554     if (startIndex == -1) {
    555         result = regexp->fMatcher->lookingAt(*status);
    556     } else {
    557         result = regexp->fMatcher->lookingAt(startIndex, *status);
    558     }
    559     return result;
    560 }
    561 
    562 
    563 
    564 //------------------------------------------------------------------------------
    565 //
    566 //    uregex_find
    567 //
    568 //------------------------------------------------------------------------------
    569 U_CAPI UBool U_EXPORT2
    570 uregex_find(URegularExpression *regexp2,
    571             int32_t             startIndex,
    572             UErrorCode         *status)  {
    573     return uregex_find64( regexp2, (int64_t)startIndex, status);
    574 }
    575 
    576 U_CAPI UBool U_EXPORT2
    577 uregex_find64(URegularExpression *regexp2,
    578               int64_t             startIndex,
    579               UErrorCode         *status)  {
    580     RegularExpression *regexp = (RegularExpression*)regexp2;
    581     UBool result = FALSE;
    582     if (validateRE(regexp, TRUE, status) == FALSE) {
    583         return result;
    584     }
    585     if (startIndex == -1) {
    586         regexp->fMatcher->resetPreserveRegion();
    587         result = regexp->fMatcher->find(*status);
    588     } else {
    589         result = regexp->fMatcher->find(startIndex, *status);
    590     }
    591     return result;
    592 }
    593 
    594 
    595 //------------------------------------------------------------------------------
    596 //
    597 //    uregex_findNext
    598 //
    599 //------------------------------------------------------------------------------
    600 U_CAPI UBool U_EXPORT2
    601 uregex_findNext(URegularExpression *regexp2,
    602                 UErrorCode         *status)  {
    603     RegularExpression *regexp = (RegularExpression*)regexp2;
    604     if (validateRE(regexp, TRUE, status) == FALSE) {
    605         return FALSE;
    606     }
    607     UBool result = regexp->fMatcher->find(*status);
    608     return result;
    609 }
    610 
    611 //------------------------------------------------------------------------------
    612 //
    613 //    uregex_groupCount
    614 //
    615 //------------------------------------------------------------------------------
    616 U_CAPI int32_t U_EXPORT2
    617 uregex_groupCount(URegularExpression *regexp2,
    618                   UErrorCode         *status)  {
    619     RegularExpression *regexp = (RegularExpression*)regexp2;
    620     if (validateRE(regexp, FALSE, status) == FALSE) {
    621         return 0;
    622     }
    623     int32_t  result = regexp->fMatcher->groupCount();
    624     return result;
    625 }
    626 
    627 
    628 //------------------------------------------------------------------------------
    629 //
    630 //    uregex_groupNumberFromName
    631 //
    632 //------------------------------------------------------------------------------
    633 int32_t
    634 uregex_groupNumberFromName(URegularExpression *regexp2,
    635                            const UChar        *groupName,
    636                            int32_t             nameLength,
    637                            UErrorCode          *status) {
    638     RegularExpression *regexp = (RegularExpression*)regexp2;
    639     if (validateRE(regexp, FALSE, status) == FALSE) {
    640         return 0;
    641     }
    642     int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
    643     return result;
    644 }
    645 
    646 int32_t
    647 uregex_groupNumberFromCName(URegularExpression *regexp2,
    648                             const char         *groupName,
    649                             int32_t             nameLength,
    650                             UErrorCode          *status) {
    651     RegularExpression *regexp = (RegularExpression*)regexp2;
    652     if (validateRE(regexp, FALSE, status) == FALSE) {
    653         return 0;
    654     }
    655     return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
    656 }
    657 
    658 //------------------------------------------------------------------------------
    659 //
    660 //    uregex_group
    661 //
    662 //------------------------------------------------------------------------------
    663 U_CAPI int32_t U_EXPORT2
    664 uregex_group(URegularExpression *regexp2,
    665              int32_t             groupNum,
    666              UChar              *dest,
    667              int32_t             destCapacity,
    668              UErrorCode          *status)  {
    669     RegularExpression *regexp = (RegularExpression*)regexp2;
    670     if (validateRE(regexp, TRUE, status) == FALSE) {
    671         return 0;
    672     }
    673     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
    674         *status = U_ILLEGAL_ARGUMENT_ERROR;
    675         return 0;
    676     }
    677 
    678     if (destCapacity == 0 || regexp->fText != NULL) {
    679         // If preflighting or if we already have the text as UChars,
    680         // this is a little cheaper than extracting from the UText
    681 
    682         //
    683         // Pick up the range of characters from the matcher
    684         //
    685         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    686         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    687         if (U_FAILURE(*status)) {
    688             return 0;
    689         }
    690 
    691         //
    692         // Trim length based on buffer capacity
    693         //
    694         int32_t fullLength = endIx - startIx;
    695         int32_t copyLength = fullLength;
    696         if (copyLength < destCapacity) {
    697             dest[copyLength] = 0;
    698         } else if (copyLength == destCapacity) {
    699             *status = U_STRING_NOT_TERMINATED_WARNING;
    700         } else {
    701             copyLength = destCapacity;
    702             *status = U_BUFFER_OVERFLOW_ERROR;
    703         }
    704 
    705         //
    706         // Copy capture group to user's buffer
    707         //
    708         if (copyLength > 0) {
    709             u_memcpy(dest, &regexp->fText[startIx], copyLength);
    710         }
    711         return fullLength;
    712     } else {
    713         int64_t  start = regexp->fMatcher->start64(groupNum, *status);
    714         int64_t  limit = regexp->fMatcher->end64(groupNum, *status);
    715         if (U_FAILURE(*status)) {
    716             return 0;
    717         }
    718         // Note edge cases:
    719         //   Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
    720         //   Zero Length Match: start == end.
    721         int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
    722         return length;
    723     }
    724 
    725 }
    726 
    727 
    728 //------------------------------------------------------------------------------
    729 //
    730 //    uregex_groupUText
    731 //
    732 //------------------------------------------------------------------------------
    733 U_CAPI UText * U_EXPORT2
    734 uregex_groupUText(URegularExpression *regexp2,
    735                   int32_t             groupNum,
    736                   UText              *dest,
    737                   int64_t            *groupLength,
    738                   UErrorCode         *status)  {
    739     RegularExpression *regexp = (RegularExpression*)regexp2;
    740     if (validateRE(regexp, TRUE, status) == FALSE) {
    741         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    742         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    743     }
    744 
    745     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
    746 }
    747 
    748 //------------------------------------------------------------------------------
    749 //
    750 //    uregex_start
    751 //
    752 //------------------------------------------------------------------------------
    753 U_CAPI int32_t U_EXPORT2
    754 uregex_start(URegularExpression *regexp2,
    755              int32_t             groupNum,
    756              UErrorCode          *status)  {
    757     return (int32_t)uregex_start64( regexp2, groupNum, status);
    758 }
    759 
    760 U_CAPI int64_t U_EXPORT2
    761 uregex_start64(URegularExpression *regexp2,
    762                int32_t             groupNum,
    763                UErrorCode          *status)  {
    764     RegularExpression *regexp = (RegularExpression*)regexp2;
    765     if (validateRE(regexp, TRUE, status) == FALSE) {
    766         return 0;
    767     }
    768     int32_t result = regexp->fMatcher->start(groupNum, *status);
    769     return result;
    770 }
    771 
    772 //------------------------------------------------------------------------------
    773 //
    774 //    uregex_end
    775 //
    776 //------------------------------------------------------------------------------
    777 U_CAPI int32_t U_EXPORT2
    778 uregex_end(URegularExpression   *regexp2,
    779            int32_t               groupNum,
    780            UErrorCode           *status)  {
    781     return (int32_t)uregex_end64( regexp2, groupNum, status);
    782 }
    783 
    784 U_CAPI int64_t U_EXPORT2
    785 uregex_end64(URegularExpression   *regexp2,
    786              int32_t               groupNum,
    787              UErrorCode           *status)  {
    788     RegularExpression *regexp = (RegularExpression*)regexp2;
    789     if (validateRE(regexp, TRUE, status) == FALSE) {
    790         return 0;
    791     }
    792     int32_t result = regexp->fMatcher->end(groupNum, *status);
    793     return result;
    794 }
    795 
    796 //------------------------------------------------------------------------------
    797 //
    798 //    uregex_reset
    799 //
    800 //------------------------------------------------------------------------------
    801 U_CAPI void U_EXPORT2
    802 uregex_reset(URegularExpression    *regexp2,
    803              int32_t               index,
    804              UErrorCode            *status)  {
    805     uregex_reset64( regexp2, (int64_t)index, status);
    806 }
    807 
    808 U_CAPI void U_EXPORT2
    809 uregex_reset64(URegularExpression    *regexp2,
    810                int64_t               index,
    811                UErrorCode            *status)  {
    812     RegularExpression *regexp = (RegularExpression*)regexp2;
    813     if (validateRE(regexp, TRUE, status) == FALSE) {
    814         return;
    815     }
    816     regexp->fMatcher->reset(index, *status);
    817 }
    818 
    819 
    820 //------------------------------------------------------------------------------
    821 //
    822 //    uregex_setRegion
    823 //
    824 //------------------------------------------------------------------------------
    825 U_CAPI void U_EXPORT2
    826 uregex_setRegion(URegularExpression   *regexp2,
    827                  int32_t               regionStart,
    828                  int32_t               regionLimit,
    829                  UErrorCode           *status)  {
    830     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
    831 }
    832 
    833 U_CAPI void U_EXPORT2
    834 uregex_setRegion64(URegularExpression   *regexp2,
    835                    int64_t               regionStart,
    836                    int64_t               regionLimit,
    837                    UErrorCode           *status)  {
    838     RegularExpression *regexp = (RegularExpression*)regexp2;
    839     if (validateRE(regexp, TRUE, status) == FALSE) {
    840         return;
    841     }
    842     regexp->fMatcher->region(regionStart, regionLimit, *status);
    843 }
    844 
    845 
    846 //------------------------------------------------------------------------------
    847 //
    848 //    uregex_setRegionAndStart
    849 //
    850 //------------------------------------------------------------------------------
    851 U_CAPI void U_EXPORT2
    852 uregex_setRegionAndStart(URegularExpression   *regexp2,
    853                  int64_t               regionStart,
    854                  int64_t               regionLimit,
    855                  int64_t               startIndex,
    856                  UErrorCode           *status)  {
    857     RegularExpression *regexp = (RegularExpression*)regexp2;
    858     if (validateRE(regexp, TRUE, status) == FALSE) {
    859         return;
    860     }
    861     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
    862 }
    863 
    864 //------------------------------------------------------------------------------
    865 //
    866 //    uregex_regionStart
    867 //
    868 //------------------------------------------------------------------------------
    869 U_CAPI int32_t U_EXPORT2
    870 uregex_regionStart(const  URegularExpression   *regexp2,
    871                           UErrorCode           *status)  {
    872     return (int32_t)uregex_regionStart64(regexp2, status);
    873 }
    874 
    875 U_CAPI int64_t U_EXPORT2
    876 uregex_regionStart64(const  URegularExpression   *regexp2,
    877                             UErrorCode           *status)  {
    878     RegularExpression *regexp = (RegularExpression*)regexp2;
    879     if (validateRE(regexp, TRUE, status) == FALSE) {
    880         return 0;
    881     }
    882     return regexp->fMatcher->regionStart();
    883 }
    884 
    885 
    886 //------------------------------------------------------------------------------
    887 //
    888 //    uregex_regionEnd
    889 //
    890 //------------------------------------------------------------------------------
    891 U_CAPI int32_t U_EXPORT2
    892 uregex_regionEnd(const  URegularExpression   *regexp2,
    893                         UErrorCode           *status)  {
    894     return (int32_t)uregex_regionEnd64(regexp2, status);
    895 }
    896 
    897 U_CAPI int64_t U_EXPORT2
    898 uregex_regionEnd64(const  URegularExpression   *regexp2,
    899                           UErrorCode           *status)  {
    900     RegularExpression *regexp = (RegularExpression*)regexp2;
    901     if (validateRE(regexp, TRUE, status) == FALSE) {
    902         return 0;
    903     }
    904     return regexp->fMatcher->regionEnd();
    905 }
    906 
    907 
    908 //------------------------------------------------------------------------------
    909 //
    910 //    uregex_hasTransparentBounds
    911 //
    912 //------------------------------------------------------------------------------
    913 U_CAPI UBool U_EXPORT2
    914 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
    915                                    UErrorCode           *status)  {
    916     RegularExpression *regexp = (RegularExpression*)regexp2;
    917     if (validateRE(regexp, FALSE, status) == FALSE) {
    918         return FALSE;
    919     }
    920     return regexp->fMatcher->hasTransparentBounds();
    921 }
    922 
    923 
    924 //------------------------------------------------------------------------------
    925 //
    926 //    uregex_useTransparentBounds
    927 //
    928 //------------------------------------------------------------------------------
    929 U_CAPI void U_EXPORT2
    930 uregex_useTransparentBounds(URegularExpression    *regexp2,
    931                             UBool                  b,
    932                             UErrorCode            *status)  {
    933     RegularExpression *regexp = (RegularExpression*)regexp2;
    934     if (validateRE(regexp, FALSE, status) == FALSE) {
    935         return;
    936     }
    937     regexp->fMatcher->useTransparentBounds(b);
    938 }
    939 
    940 
    941 //------------------------------------------------------------------------------
    942 //
    943 //    uregex_hasAnchoringBounds
    944 //
    945 //------------------------------------------------------------------------------
    946 U_CAPI UBool U_EXPORT2
    947 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
    948                                  UErrorCode           *status)  {
    949     RegularExpression *regexp = (RegularExpression*)regexp2;
    950     if (validateRE(regexp, FALSE, status) == FALSE) {
    951         return FALSE;
    952     }
    953     return regexp->fMatcher->hasAnchoringBounds();
    954 }
    955 
    956 
    957 //------------------------------------------------------------------------------
    958 //
    959 //    uregex_useAnchoringBounds
    960 //
    961 //------------------------------------------------------------------------------
    962 U_CAPI void U_EXPORT2
    963 uregex_useAnchoringBounds(URegularExpression    *regexp2,
    964                           UBool                  b,
    965                           UErrorCode            *status)  {
    966     RegularExpression *regexp = (RegularExpression*)regexp2;
    967     if (validateRE(regexp, FALSE, status) == FALSE) {
    968         return;
    969     }
    970     regexp->fMatcher->useAnchoringBounds(b);
    971 }
    972 
    973 
    974 //------------------------------------------------------------------------------
    975 //
    976 //    uregex_hitEnd
    977 //
    978 //------------------------------------------------------------------------------
    979 U_CAPI UBool U_EXPORT2
    980 uregex_hitEnd(const  URegularExpression   *regexp2,
    981                      UErrorCode           *status)  {
    982     RegularExpression *regexp = (RegularExpression*)regexp2;
    983     if (validateRE(regexp, TRUE, status) == FALSE) {
    984         return FALSE;
    985     }
    986     return regexp->fMatcher->hitEnd();
    987 }
    988 
    989 
    990 //------------------------------------------------------------------------------
    991 //
    992 //    uregex_requireEnd
    993 //
    994 //------------------------------------------------------------------------------
    995 U_CAPI UBool U_EXPORT2
    996 uregex_requireEnd(const  URegularExpression   *regexp2,
    997                          UErrorCode           *status)  {
    998     RegularExpression *regexp = (RegularExpression*)regexp2;
    999     if (validateRE(regexp, TRUE, status) == FALSE) {
   1000         return FALSE;
   1001     }
   1002     return regexp->fMatcher->requireEnd();
   1003 }
   1004 
   1005 
   1006 //------------------------------------------------------------------------------
   1007 //
   1008 //    uregex_setTimeLimit
   1009 //
   1010 //------------------------------------------------------------------------------
   1011 U_CAPI void U_EXPORT2
   1012 uregex_setTimeLimit(URegularExpression   *regexp2,
   1013                     int32_t               limit,
   1014                     UErrorCode           *status) {
   1015     RegularExpression *regexp = (RegularExpression*)regexp2;
   1016     if (validateRE(regexp, FALSE, status)) {
   1017         regexp->fMatcher->setTimeLimit(limit, *status);
   1018     }
   1019 }
   1020 
   1021 
   1022 
   1023 //------------------------------------------------------------------------------
   1024 //
   1025 //    uregex_getTimeLimit
   1026 //
   1027 //------------------------------------------------------------------------------
   1028 U_CAPI int32_t U_EXPORT2
   1029 uregex_getTimeLimit(const  URegularExpression   *regexp2,
   1030                            UErrorCode           *status) {
   1031     int32_t retVal = 0;
   1032     RegularExpression *regexp = (RegularExpression*)regexp2;
   1033     if (validateRE(regexp, FALSE, status)) {
   1034         retVal = regexp->fMatcher->getTimeLimit();
   1035     }
   1036     return retVal;
   1037 }
   1038 
   1039 
   1040 
   1041 //------------------------------------------------------------------------------
   1042 //
   1043 //    uregex_setStackLimit
   1044 //
   1045 //------------------------------------------------------------------------------
   1046 U_CAPI void U_EXPORT2
   1047 uregex_setStackLimit(URegularExpression   *regexp2,
   1048                      int32_t               limit,
   1049                      UErrorCode           *status) {
   1050     RegularExpression *regexp = (RegularExpression*)regexp2;
   1051     if (validateRE(regexp, FALSE, status)) {
   1052         regexp->fMatcher->setStackLimit(limit, *status);
   1053     }
   1054 }
   1055 
   1056 
   1057 
   1058 //------------------------------------------------------------------------------
   1059 //
   1060 //    uregex_getStackLimit
   1061 //
   1062 //------------------------------------------------------------------------------
   1063 U_CAPI int32_t U_EXPORT2
   1064 uregex_getStackLimit(const  URegularExpression   *regexp2,
   1065                             UErrorCode           *status) {
   1066     int32_t retVal = 0;
   1067     RegularExpression *regexp = (RegularExpression*)regexp2;
   1068     if (validateRE(regexp, FALSE, status)) {
   1069         retVal = regexp->fMatcher->getStackLimit();
   1070     }
   1071     return retVal;
   1072 }
   1073 
   1074 
   1075 //------------------------------------------------------------------------------
   1076 //
   1077 //    uregex_setMatchCallback
   1078 //
   1079 //------------------------------------------------------------------------------
   1080 U_CAPI void U_EXPORT2
   1081 uregex_setMatchCallback(URegularExpression      *regexp2,
   1082                         URegexMatchCallback     *callback,
   1083                         const void              *context,
   1084                         UErrorCode              *status) {
   1085     RegularExpression *regexp = (RegularExpression*)regexp2;
   1086     if (validateRE(regexp, FALSE, status)) {
   1087         regexp->fMatcher->setMatchCallback(callback, context, *status);
   1088     }
   1089 }
   1090 
   1091 
   1092 //------------------------------------------------------------------------------
   1093 //
   1094 //    uregex_getMatchCallback
   1095 //
   1096 //------------------------------------------------------------------------------
   1097 U_CAPI void U_EXPORT2
   1098 uregex_getMatchCallback(const URegularExpression    *regexp2,
   1099                         URegexMatchCallback        **callback,
   1100                         const void                 **context,
   1101                         UErrorCode                  *status) {
   1102     RegularExpression *regexp = (RegularExpression*)regexp2;
   1103      if (validateRE(regexp, FALSE, status)) {
   1104          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
   1105      }
   1106 }
   1107 
   1108 
   1109 //------------------------------------------------------------------------------
   1110 //
   1111 //    uregex_setMatchProgressCallback
   1112 //
   1113 //------------------------------------------------------------------------------
   1114 U_CAPI void U_EXPORT2
   1115 uregex_setFindProgressCallback(URegularExpression              *regexp2,
   1116                                 URegexFindProgressCallback      *callback,
   1117                                 const void                      *context,
   1118                                 UErrorCode                      *status) {
   1119     RegularExpression *regexp = (RegularExpression*)regexp2;
   1120     if (validateRE(regexp, FALSE, status)) {
   1121         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
   1122     }
   1123 }
   1124 
   1125 
   1126 //------------------------------------------------------------------------------
   1127 //
   1128 //    uregex_getMatchCallback
   1129 //
   1130 //------------------------------------------------------------------------------
   1131 U_CAPI void U_EXPORT2
   1132 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
   1133                                 URegexFindProgressCallback        **callback,
   1134                                 const void                        **context,
   1135                                 UErrorCode                        *status) {
   1136     RegularExpression *regexp = (RegularExpression*)regexp2;
   1137      if (validateRE(regexp, FALSE, status)) {
   1138          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
   1139      }
   1140 }
   1141 
   1142 
   1143 //------------------------------------------------------------------------------
   1144 //
   1145 //    uregex_replaceAll
   1146 //
   1147 //------------------------------------------------------------------------------
   1148 U_CAPI int32_t U_EXPORT2
   1149 uregex_replaceAll(URegularExpression    *regexp2,
   1150                   const UChar           *replacementText,
   1151                   int32_t                replacementLength,
   1152                   UChar                 *destBuf,
   1153                   int32_t                destCapacity,
   1154                   UErrorCode            *status)  {
   1155     RegularExpression *regexp = (RegularExpression*)regexp2;
   1156     if (validateRE(regexp, TRUE, status) == FALSE) {
   1157         return 0;
   1158     }
   1159     if (replacementText == NULL || replacementLength < -1 ||
   1160         (destBuf == NULL && destCapacity > 0) ||
   1161         destCapacity < 0) {
   1162         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1163         return 0;
   1164     }
   1165 
   1166     int32_t   len = 0;
   1167 
   1168     uregex_reset(regexp2, 0, status);
   1169 
   1170     // Note: Seperate error code variables for findNext() and appendReplacement()
   1171     //       are used so that destination buffer overflow errors
   1172     //       in appendReplacement won't stop findNext() from working.
   1173     //       appendReplacement() and appendTail() special case incoming buffer
   1174     //       overflow errors, continuing to return the correct length.
   1175     UErrorCode  findStatus = *status;
   1176     while (uregex_findNext(regexp2, &findStatus)) {
   1177         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1178                                         &destBuf, &destCapacity, status);
   1179     }
   1180     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1181 
   1182     if (U_FAILURE(findStatus)) {
   1183         // If anything went wrong with the findNext(), make that error trump
   1184         //   whatever may have happened with the append() operations.
   1185         //   Errors in findNext() are not expected.
   1186         *status = findStatus;
   1187     }
   1188 
   1189     return len;
   1190 }
   1191 
   1192 
   1193 //------------------------------------------------------------------------------
   1194 //
   1195 //    uregex_replaceAllUText
   1196 //
   1197 //------------------------------------------------------------------------------
   1198 U_CAPI UText * U_EXPORT2
   1199 uregex_replaceAllUText(URegularExpression    *regexp2,
   1200                        UText                 *replacementText,
   1201                        UText                 *dest,
   1202                        UErrorCode            *status)  {
   1203     RegularExpression *regexp = (RegularExpression*)regexp2;
   1204     if (validateRE(regexp, TRUE, status) == FALSE) {
   1205         return 0;
   1206     }
   1207     if (replacementText == NULL) {
   1208         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1209         return 0;
   1210     }
   1211 
   1212     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
   1213     return dest;
   1214 }
   1215 
   1216 
   1217 //------------------------------------------------------------------------------
   1218 //
   1219 //    uregex_replaceFirst
   1220 //
   1221 //------------------------------------------------------------------------------
   1222 U_CAPI int32_t U_EXPORT2
   1223 uregex_replaceFirst(URegularExpression  *regexp2,
   1224                     const UChar         *replacementText,
   1225                     int32_t              replacementLength,
   1226                     UChar               *destBuf,
   1227                     int32_t              destCapacity,
   1228                     UErrorCode          *status)  {
   1229     RegularExpression *regexp = (RegularExpression*)regexp2;
   1230     if (validateRE(regexp, TRUE, status) == FALSE) {
   1231         return 0;
   1232     }
   1233     if (replacementText == NULL || replacementLength < -1 ||
   1234         (destBuf == NULL && destCapacity > 0) ||
   1235         destCapacity < 0) {
   1236         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1237         return 0;
   1238     }
   1239 
   1240     int32_t   len = 0;
   1241     UBool     findSucceeded;
   1242     uregex_reset(regexp2, 0, status);
   1243     findSucceeded = uregex_find(regexp2, 0, status);
   1244     if (findSucceeded) {
   1245         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1246                                        &destBuf, &destCapacity, status);
   1247     }
   1248     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1249 
   1250     return len;
   1251 }
   1252 
   1253 
   1254 //------------------------------------------------------------------------------
   1255 //
   1256 //    uregex_replaceFirstUText
   1257 //
   1258 //------------------------------------------------------------------------------
   1259 U_CAPI UText * U_EXPORT2
   1260 uregex_replaceFirstUText(URegularExpression  *regexp2,
   1261                          UText                 *replacementText,
   1262                          UText                 *dest,
   1263                          UErrorCode            *status)  {
   1264     RegularExpression *regexp = (RegularExpression*)regexp2;
   1265     if (validateRE(regexp, TRUE, status) == FALSE) {
   1266         return 0;
   1267     }
   1268     if (replacementText == NULL) {
   1269         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1270         return 0;
   1271     }
   1272 
   1273     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
   1274     return dest;
   1275 }
   1276 
   1277 
   1278 //------------------------------------------------------------------------------
   1279 //
   1280 //    uregex_appendReplacement
   1281 //
   1282 //------------------------------------------------------------------------------
   1283 
   1284 U_NAMESPACE_BEGIN
   1285 //
   1286 //  Dummy class, because these functions need to be friends of class RegexMatcher,
   1287 //               and stand-alone C functions don't work as friends
   1288 //
   1289 class RegexCImpl {
   1290  public:
   1291    inline static  int32_t appendReplacement(RegularExpression    *regexp,
   1292                       const UChar           *replacementText,
   1293                       int32_t                replacementLength,
   1294                       UChar                **destBuf,
   1295                       int32_t               *destCapacity,
   1296                       UErrorCode            *status);
   1297 
   1298    inline static int32_t appendTail(RegularExpression    *regexp,
   1299         UChar                **destBuf,
   1300         int32_t               *destCapacity,
   1301         UErrorCode            *status);
   1302 
   1303     inline static int32_t split(RegularExpression    *regexp,
   1304         UChar                 *destBuf,
   1305         int32_t                destCapacity,
   1306         int32_t               *requiredCapacity,
   1307         UChar                 *destFields[],
   1308         int32_t                destFieldsCapacity,
   1309         UErrorCode            *status);
   1310 };
   1311 
   1312 U_NAMESPACE_END
   1313 
   1314 
   1315 
   1316 static const UChar BACKSLASH  = 0x5c;
   1317 static const UChar DOLLARSIGN = 0x24;
   1318 static const UChar LEFTBRACKET = 0x7b;
   1319 static const UChar RIGHTBRACKET = 0x7d;
   1320 
   1321 //
   1322 //  Move a character to an output buffer, with bounds checking on the index.
   1323 //      Index advances even if capacity is exceeded, for preflight size computations.
   1324 //      This little sequence is used a LOT.
   1325 //
   1326 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
   1327     if (*idx < bufCapacity) {
   1328         buf[*idx] = c;
   1329     }
   1330     (*idx)++;
   1331 }
   1332 
   1333 
   1334 //
   1335 //  appendReplacement, the actual implementation.
   1336 //
   1337 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
   1338                                       const UChar           *replacementText,
   1339                                       int32_t                replacementLength,
   1340                                       UChar                **destBuf,
   1341                                       int32_t               *destCapacity,
   1342                                       UErrorCode            *status)  {
   1343 
   1344     // If we come in with a buffer overflow error, don't suppress the operation.
   1345     //  A series of appendReplacements, appendTail need to correctly preflight
   1346     //  the buffer size when an overflow happens somewhere in the middle.
   1347     UBool pendingBufferOverflow = FALSE;
   1348     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1349         pendingBufferOverflow = TRUE;
   1350         *status = U_ZERO_ERROR;
   1351     }
   1352 
   1353     //
   1354     // Validate all paramters
   1355     //
   1356     if (validateRE(regexp, TRUE, status) == FALSE) {
   1357         return 0;
   1358     }
   1359     if (replacementText == NULL || replacementLength < -1 ||
   1360         destCapacity == NULL || destBuf == NULL ||
   1361         (*destBuf == NULL && *destCapacity > 0) ||
   1362         *destCapacity < 0) {
   1363         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1364         return 0;
   1365     }
   1366 
   1367     RegexMatcher *m = regexp->fMatcher;
   1368     if (m->fMatch == FALSE) {
   1369         *status = U_REGEX_INVALID_STATE;
   1370         return 0;
   1371     }
   1372 
   1373     UChar    *dest             = *destBuf;
   1374     int32_t   capacity         = *destCapacity;
   1375     int32_t   destIdx          =  0;
   1376     int32_t   i;
   1377 
   1378     // If it wasn't supplied by the caller,  get the length of the replacement text.
   1379     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
   1380     //          the fly and avoid this step.
   1381     if (replacementLength == -1) {
   1382         replacementLength = u_strlen(replacementText);
   1383     }
   1384 
   1385     // Copy input string from the end of previous match to start of current match
   1386     if (regexp->fText != NULL) {
   1387         int32_t matchStart;
   1388         int32_t lastMatchEnd;
   1389         if (UTEXT_USES_U16(m->fInputText)) {
   1390             lastMatchEnd = (int32_t)m->fLastMatchEnd;
   1391             matchStart = (int32_t)m->fMatchStart;
   1392         } else {
   1393             // !!!: Would like a better way to do this!
   1394             UErrorCode tempStatus = U_ZERO_ERROR;
   1395             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &tempStatus);
   1396             tempStatus = U_ZERO_ERROR;
   1397             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &tempStatus);
   1398         }
   1399         for (i=lastMatchEnd; i<matchStart; i++) {
   1400             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
   1401         }
   1402     } else {
   1403         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
   1404         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
   1405                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
   1406                                  &possibleOverflowError);
   1407     }
   1408     U_ASSERT(destIdx >= 0);
   1409 
   1410     // scan the replacement text, looking for substitutions ($n) and \escapes.
   1411     int32_t  replIdx = 0;
   1412     while (replIdx < replacementLength && U_SUCCESS(*status)) {
   1413         UChar  c = replacementText[replIdx];
   1414         replIdx++;
   1415         if (c != DOLLARSIGN && c != BACKSLASH) {
   1416             // Common case, no substitution, no escaping,
   1417             //  just copy the char to the dest buf.
   1418             appendToBuf(c, &destIdx, dest, capacity);
   1419             continue;
   1420         }
   1421 
   1422         if (c == BACKSLASH) {
   1423             // Backslash Escape.  Copy the following char out without further checks.
   1424             //                    Note:  Surrogate pairs don't need any special handling
   1425             //                           The second half wont be a '$' or a '\', and
   1426             //                           will move to the dest normally on the next
   1427             //                           loop iteration.
   1428             if (replIdx >= replacementLength) {
   1429                 break;
   1430             }
   1431             c = replacementText[replIdx];
   1432 
   1433             if (c==0x55/*U*/ || c==0x75/*u*/) {
   1434                 // We have a \udddd or \Udddddddd escape sequence.
   1435                 UChar32 escapedChar =
   1436                     u_unescapeAt(uregex_ucstr_unescape_charAt,
   1437                        &replIdx,                   // Index is updated by unescapeAt
   1438                        replacementLength,          // Length of replacement text
   1439                        (void *)replacementText);
   1440 
   1441                 if (escapedChar != (UChar32)0xFFFFFFFF) {
   1442                     if (escapedChar <= 0xffff) {
   1443                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
   1444                     } else {
   1445                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
   1446                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
   1447                     }
   1448                     continue;
   1449                 }
   1450                 // Note:  if the \u escape was invalid, just fall through and
   1451                 //        treat it as a plain \<anything> escape.
   1452             }
   1453 
   1454             // Plain backslash escape.  Just put out the escaped character.
   1455             appendToBuf(c, &destIdx, dest, capacity);
   1456 
   1457             replIdx++;
   1458             continue;
   1459         }
   1460 
   1461         // We've got a $.  Pick up the following capture group name or number.
   1462         // For numbers, consume only digits that produce a valid capture group for the pattern.
   1463 
   1464         int32_t groupNum  = 0;
   1465         U_ASSERT(c == DOLLARSIGN);
   1466         UChar32 c32;
   1467         U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1468         if (u_isdigit(c32)) {
   1469             int32_t numDigits = 0;
   1470             int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
   1471             for (;;) {
   1472                 if (replIdx >= replacementLength) {
   1473                     break;
   1474                 }
   1475                 U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1476                 if (u_isdigit(c32) == FALSE) {
   1477                     break;
   1478                 }
   1479 
   1480                 int32_t digitVal = u_charDigitValue(c32);
   1481                 if (groupNum * 10 + digitVal <= numCaptureGroups) {
   1482                     groupNum = groupNum * 10 + digitVal;
   1483                     U16_FWD_1(replacementText, replIdx, replacementLength);
   1484                     numDigits++;
   1485                 } else {
   1486                     if (numDigits == 0) {
   1487                         *status = U_INDEX_OUTOFBOUNDS_ERROR;
   1488                     }
   1489                     break;
   1490                 }
   1491             }
   1492         } else if (c32 == LEFTBRACKET) {
   1493             // Scan for Named Capture Group, ${name}.
   1494             UnicodeString groupName;
   1495             U16_FWD_1(replacementText, replIdx, replacementLength);
   1496             while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) {
   1497                 if (replIdx >= replacementLength) {
   1498                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1499                     break;
   1500                 }
   1501                 U16_NEXT(replacementText, replIdx, replacementLength, c32);
   1502                 if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z
   1503                         (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z
   1504                         (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9
   1505                     groupName.append(c32);
   1506                 } else if (c32 == RIGHTBRACKET) {
   1507                     groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName);
   1508                     if (groupNum == 0) {
   1509                         // Name not defined by pattern.
   1510                         *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1511                     }
   1512                 } else {
   1513                     // Character was something other than a name char or a closing '}'
   1514                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1515                 }
   1516             }
   1517         } else {
   1518             // $ not followed by {name} or digits.
   1519             *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1520         }
   1521 
   1522 
   1523         // Finally, append the capture group data to the destination.
   1524         if (U_SUCCESS(*status)) {
   1525             destIdx += uregex_group((URegularExpression*)regexp, groupNum,
   1526                                     dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
   1527             if (*status == U_BUFFER_OVERFLOW_ERROR) {
   1528                 // Ignore buffer overflow when extracting the group.  We need to
   1529                 //   continue on to get full size of the untruncated result.  We will
   1530                 //   raise our own buffer overflow error at the end.
   1531                 *status = U_ZERO_ERROR;
   1532             }
   1533         }
   1534 
   1535         if (U_FAILURE(*status)) {
   1536             // bad group number or name.
   1537             break;
   1538         }
   1539     }
   1540 
   1541     //
   1542     //  Nul Terminate the dest buffer if possible.
   1543     //  Set the appropriate buffer overflow or not terminated error, if needed.
   1544     //
   1545     if (destIdx < capacity) {
   1546         dest[destIdx] = 0;
   1547     } else if (U_SUCCESS(*status)) {
   1548         if (destIdx == *destCapacity) {
   1549             *status = U_STRING_NOT_TERMINATED_WARNING;
   1550         } else {
   1551             *status = U_BUFFER_OVERFLOW_ERROR;
   1552         }
   1553     }
   1554 
   1555     //
   1556     // Return an updated dest buffer and capacity to the caller.
   1557     //
   1558     if (destIdx > 0 &&  *destCapacity > 0) {
   1559         if (destIdx < capacity) {
   1560             *destBuf      += destIdx;
   1561             *destCapacity -= destIdx;
   1562         } else {
   1563             *destBuf      += capacity;
   1564             *destCapacity =  0;
   1565         }
   1566     }
   1567 
   1568     // If we came in with a buffer overflow, make sure we go out with one also.
   1569     //   (A zero length match right at the end of the previous match could
   1570     //    make this function succeed even though a previous call had overflowed the buf)
   1571     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1572         *status = U_BUFFER_OVERFLOW_ERROR;
   1573     }
   1574 
   1575     return destIdx;
   1576 }
   1577 
   1578 //
   1579 //   appendReplacement   the actual API function,
   1580 //
   1581 U_CAPI int32_t U_EXPORT2
   1582 uregex_appendReplacement(URegularExpression    *regexp2,
   1583                          const UChar           *replacementText,
   1584                          int32_t                replacementLength,
   1585                          UChar                **destBuf,
   1586                          int32_t               *destCapacity,
   1587                          UErrorCode            *status) {
   1588 
   1589     RegularExpression *regexp = (RegularExpression*)regexp2;
   1590     return RegexCImpl::appendReplacement(
   1591         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
   1592 }
   1593 
   1594 //
   1595 //   uregex_appendReplacementUText...can just use the normal C++ method
   1596 //
   1597 U_CAPI void U_EXPORT2
   1598 uregex_appendReplacementUText(URegularExpression    *regexp2,
   1599                               UText                 *replText,
   1600                               UText                 *dest,
   1601                               UErrorCode            *status)  {
   1602     RegularExpression *regexp = (RegularExpression*)regexp2;
   1603     regexp->fMatcher->appendReplacement(dest, replText, *status);
   1604 }
   1605 
   1606 
   1607 //------------------------------------------------------------------------------
   1608 //
   1609 //    uregex_appendTail
   1610 //
   1611 //------------------------------------------------------------------------------
   1612 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
   1613                                UChar                **destBuf,
   1614                                int32_t               *destCapacity,
   1615                                UErrorCode            *status)
   1616 {
   1617 
   1618     // If we come in with a buffer overflow error, don't suppress the operation.
   1619     //  A series of appendReplacements, appendTail need to correctly preflight
   1620     //  the buffer size when an overflow happens somewhere in the middle.
   1621     UBool pendingBufferOverflow = FALSE;
   1622     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1623         pendingBufferOverflow = TRUE;
   1624         *status = U_ZERO_ERROR;
   1625     }
   1626 
   1627     if (validateRE(regexp, TRUE, status) == FALSE) {
   1628         return 0;
   1629     }
   1630 
   1631     if (destCapacity == NULL || destBuf == NULL ||
   1632         (*destBuf == NULL && *destCapacity > 0) ||
   1633         *destCapacity < 0)
   1634     {
   1635         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1636         return 0;
   1637     }
   1638 
   1639     RegexMatcher *m = regexp->fMatcher;
   1640 
   1641     int32_t  destIdx     = 0;
   1642     int32_t  destCap     = *destCapacity;
   1643     UChar    *dest       = *destBuf;
   1644 
   1645     if (regexp->fText != NULL) {
   1646         int32_t srcIdx;
   1647         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
   1648         if (nativeIdx == -1) {
   1649             srcIdx = 0;
   1650         } else if (UTEXT_USES_U16(m->fInputText)) {
   1651             srcIdx = (int32_t)nativeIdx;
   1652         } else {
   1653             UErrorCode status = U_ZERO_ERROR;
   1654             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
   1655         }
   1656 
   1657         for (;;) {
   1658             U_ASSERT(destIdx >= 0);
   1659 
   1660             if (srcIdx == regexp->fTextLength) {
   1661                 break;
   1662             }
   1663             UChar c = regexp->fText[srcIdx];
   1664             if (c == 0 && regexp->fTextLength == -1) {
   1665                 regexp->fTextLength = srcIdx;
   1666                 break;
   1667             }
   1668 
   1669             if (destIdx < destCap) {
   1670                 dest[destIdx] = c;
   1671             } else {
   1672                 // We've overflowed the dest buffer.
   1673                 //  If the total input string length is known, we can
   1674                 //    compute the total buffer size needed without scanning through the string.
   1675                 if (regexp->fTextLength > 0) {
   1676                     destIdx += (regexp->fTextLength - srcIdx);
   1677                     break;
   1678                 }
   1679             }
   1680             srcIdx++;
   1681             destIdx++;
   1682         }
   1683     } else {
   1684         int64_t  srcIdx;
   1685         if (m->fMatch) {
   1686             // The most recent call to find() succeeded.
   1687             srcIdx = m->fMatchEnd;
   1688         } else {
   1689             // The last call to find() on this matcher failed().
   1690             //   Look back to the end of the last find() that succeeded for src index.
   1691             srcIdx = m->fLastMatchEnd;
   1692             if (srcIdx == -1)  {
   1693                 // There has been no successful match with this matcher.
   1694                 //   We want to copy the whole string.
   1695                 srcIdx = 0;
   1696             }
   1697         }
   1698 
   1699         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
   1700     }
   1701 
   1702     //
   1703     //  NUL terminate the output string, if possible, otherwise issue the
   1704     //   appropriate error or warning.
   1705     //
   1706     if (destIdx < destCap) {
   1707         dest[destIdx] = 0;
   1708     } else  if (destIdx == destCap) {
   1709         *status = U_STRING_NOT_TERMINATED_WARNING;
   1710     } else {
   1711         *status = U_BUFFER_OVERFLOW_ERROR;
   1712     }
   1713 
   1714     //
   1715     // Update the user's buffer ptr and capacity vars to reflect the
   1716     //   amount used.
   1717     //
   1718     if (destIdx < destCap) {
   1719         *destBuf      += destIdx;
   1720         *destCapacity -= destIdx;
   1721     } else if (*destBuf != NULL) {
   1722         *destBuf      += destCap;
   1723         *destCapacity  = 0;
   1724     }
   1725 
   1726     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1727         *status = U_BUFFER_OVERFLOW_ERROR;
   1728     }
   1729 
   1730     return destIdx;
   1731 }
   1732 
   1733 
   1734 //
   1735 //   appendTail   the actual API function
   1736 //
   1737 U_CAPI int32_t U_EXPORT2
   1738 uregex_appendTail(URegularExpression    *regexp2,
   1739                   UChar                **destBuf,
   1740                   int32_t               *destCapacity,
   1741                   UErrorCode            *status)  {
   1742     RegularExpression *regexp = (RegularExpression*)regexp2;
   1743     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
   1744 }
   1745 
   1746 
   1747 //
   1748 //   uregex_appendTailUText...can just use the normal C++ method
   1749 //
   1750 U_CAPI UText * U_EXPORT2
   1751 uregex_appendTailUText(URegularExpression    *regexp2,
   1752                        UText                 *dest,
   1753                        UErrorCode            *status)  {
   1754     RegularExpression *regexp = (RegularExpression*)regexp2;
   1755     return regexp->fMatcher->appendTail(dest, *status);
   1756 }
   1757 
   1758 
   1759 //------------------------------------------------------------------------------
   1760 //
   1761 //    copyString     Internal utility to copy a string to an output buffer,
   1762 //                   while managing buffer overflow and preflight size
   1763 //                   computation.  NUL termination is added to destination,
   1764 //                   and the NUL is counted in the output size.
   1765 //
   1766 //------------------------------------------------------------------------------
   1767 #if 0
   1768 static void copyString(UChar        *destBuffer,    //  Destination buffer.
   1769                        int32_t       destCapacity,  //  Total capacity of dest buffer
   1770                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
   1771                                                     //    Update not clipped to destCapacity.
   1772                        const UChar  *srcPtr,        //  Pointer to source string
   1773                        int32_t       srcLen)        //  Source string len.
   1774 {
   1775     int32_t  si;
   1776     int32_t  di = *destIndex;
   1777     UChar    c;
   1778 
   1779     for (si=0; si<srcLen;  si++) {
   1780         c = srcPtr[si];
   1781         if (di < destCapacity) {
   1782             destBuffer[di] = c;
   1783             di++;
   1784         } else {
   1785             di += srcLen - si;
   1786             break;
   1787         }
   1788     }
   1789     if (di<destCapacity) {
   1790         destBuffer[di] = 0;
   1791     }
   1792     di++;
   1793     *destIndex = di;
   1794 }
   1795 #endif
   1796 
   1797 //------------------------------------------------------------------------------
   1798 //
   1799 //    uregex_split
   1800 //
   1801 //------------------------------------------------------------------------------
   1802 int32_t RegexCImpl::split(RegularExpression     *regexp,
   1803                           UChar                 *destBuf,
   1804                           int32_t                destCapacity,
   1805                           int32_t               *requiredCapacity,
   1806                           UChar                 *destFields[],
   1807                           int32_t                destFieldsCapacity,
   1808                           UErrorCode            *status) {
   1809     //
   1810     // Reset for the input text
   1811     //
   1812     regexp->fMatcher->reset();
   1813     UText *inputText = regexp->fMatcher->fInputText;
   1814     int64_t   nextOutputStringStart = 0;
   1815     int64_t   inputLen = regexp->fMatcher->fInputLength;
   1816     if (inputLen == 0) {
   1817         return 0;
   1818     }
   1819 
   1820     //
   1821     // Loop through the input text, searching for the delimiter pattern
   1822     //
   1823     int32_t   i;             // Index of the field being processed.
   1824     int32_t   destIdx = 0;   // Next available position in destBuf;
   1825     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
   1826     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
   1827     for (i=0; ; i++) {
   1828         if (i>=destFieldsCapacity-1) {
   1829             // There are one or zero output strings left.
   1830             // Fill the last output string with whatever is left from the input, then exit the loop.
   1831             //  ( i will be == destFieldsCapacity if we filled the output array while processing
   1832             //    capture groups of the delimiter expression, in which case we will discard the
   1833             //    last capture group saved in favor of the unprocessed remainder of the
   1834             //    input string.)
   1835             if (inputLen > nextOutputStringStart) {
   1836                 if (i != destFieldsCapacity-1) {
   1837                     // No fields are left.  Recycle the last one for holding the trailing part of
   1838                     //   the input string.
   1839                     i = destFieldsCapacity-1;
   1840                     destIdx = (int32_t)(destFields[i] - destFields[0]);
   1841                 }
   1842 
   1843                 destFields[i] = &destBuf[destIdx];
   1844                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1845                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1846             }
   1847             break;
   1848         }
   1849 
   1850         if (regexp->fMatcher->find()) {
   1851             // We found another delimiter.  Move everything from where we started looking
   1852             //  up until the start of the delimiter into the next output string.
   1853             destFields[i] = &destBuf[destIdx];
   1854 
   1855             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
   1856                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
   1857             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1858                 tStatus = U_ZERO_ERROR;
   1859             } else {
   1860                 *status = tStatus;
   1861             }
   1862             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
   1863 
   1864             // If the delimiter pattern has capturing parentheses, the captured
   1865             //  text goes out into the next n destination strings.
   1866             int32_t groupNum;
   1867             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
   1868                 // If we've run out of output string slots, bail out.
   1869                 if (i==destFieldsCapacity-1) {
   1870                     break;
   1871                 }
   1872                 i++;
   1873 
   1874                 // Set up to extract the capture group contents into the dest buffer.
   1875                 destFields[i] = &destBuf[destIdx];
   1876                 tStatus = U_ZERO_ERROR;
   1877                 int32_t t = uregex_group((URegularExpression*)regexp,
   1878                                          groupNum,
   1879                                          destFields[i],
   1880                                          REMAINING_CAPACITY(destIdx, destCapacity),
   1881                                          &tStatus);
   1882                 destIdx += t + 1;    // Record the space used in the output string buffer.
   1883                                      //  +1 for the NUL that terminates the string.
   1884                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1885                     tStatus = U_ZERO_ERROR;
   1886                 } else {
   1887                     *status = tStatus;
   1888                 }
   1889             }
   1890 
   1891             if (nextOutputStringStart == inputLen) {
   1892                 // The delimiter was at the end of the string.
   1893                 // Output an empty string, and then we are done.
   1894                 if (destIdx < destCapacity) {
   1895                     destBuf[destIdx] = 0;
   1896                 }
   1897                 if (i < destFieldsCapacity-1) {
   1898                    ++i;
   1899                 }
   1900                 if (destIdx < destCapacity) {
   1901                     destFields[i] = destBuf + destIdx;
   1902                 }
   1903                 ++destIdx;
   1904                 break;
   1905             }
   1906 
   1907         }
   1908         else
   1909         {
   1910             // We ran off the end of the input while looking for the next delimiter.
   1911             // All the remaining text goes into the current output string.
   1912             destFields[i] = &destBuf[destIdx];
   1913             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1914                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1915             break;
   1916         }
   1917     }
   1918 
   1919     // Zero out any unused portion of the destFields array
   1920     int j;
   1921     for (j=i+1; j<destFieldsCapacity; j++) {
   1922         destFields[j] = NULL;
   1923     }
   1924 
   1925     if (requiredCapacity != NULL) {
   1926         *requiredCapacity = destIdx;
   1927     }
   1928     if (destIdx > destCapacity) {
   1929         *status = U_BUFFER_OVERFLOW_ERROR;
   1930     }
   1931     return i+1;
   1932 }
   1933 
   1934 //
   1935 //   uregex_split   The actual API function
   1936 //
   1937 U_CAPI int32_t U_EXPORT2
   1938 uregex_split(URegularExpression      *regexp2,
   1939              UChar                   *destBuf,
   1940              int32_t                  destCapacity,
   1941              int32_t                 *requiredCapacity,
   1942              UChar                   *destFields[],
   1943              int32_t                  destFieldsCapacity,
   1944              UErrorCode              *status) {
   1945     RegularExpression *regexp = (RegularExpression*)regexp2;
   1946     if (validateRE(regexp, TRUE, status) == FALSE) {
   1947         return 0;
   1948     }
   1949     if ((destBuf == NULL && destCapacity > 0) ||
   1950         destCapacity < 0 ||
   1951         destFields == NULL ||
   1952         destFieldsCapacity < 1 ) {
   1953         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1954         return 0;
   1955     }
   1956 
   1957     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
   1958 }
   1959 
   1960 
   1961 //
   1962 //   uregex_splitUText...can just use the normal C++ method
   1963 //
   1964 U_CAPI int32_t U_EXPORT2
   1965 uregex_splitUText(URegularExpression    *regexp2,
   1966                   UText                 *destFields[],
   1967                   int32_t                destFieldsCapacity,
   1968                   UErrorCode            *status) {
   1969     RegularExpression *regexp = (RegularExpression*)regexp2;
   1970     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
   1971 }
   1972 
   1973 
   1974 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
   1975 
   1976