Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2004-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  uregex.cpp
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uregex.h"
     17 #include "unicode/unistr.h"
     18 #include "unicode/ustring.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/uobject.h"
     21 #include "unicode/utf16.h"
     22 #include "cmemory.h"
     23 #include "uassert.h"
     24 #include "uhash.h"
     25 #include "umutex.h"
     26 #include "uvectr32.h"
     27 
     28 #include "regextxt.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
     33 
     34 struct RegularExpression: public UMemory {
     35 public:
     36     RegularExpression();
     37     ~RegularExpression();
     38     int32_t           fMagic;
     39     RegexPattern     *fPat;
     40     u_atomic_int32_t *fPatRefCount;
     41     UChar            *fPatString;
     42     int32_t           fPatStringLen;
     43     RegexMatcher     *fMatcher;
     44     const UChar      *fText;         // Text from setText()
     45     int32_t           fTextLength;   // Length provided by user with setText(), which
     46                                      //  may be -1.
     47     UBool             fOwnsText;
     48 };
     49 
     50 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
     51 
     52 RegularExpression::RegularExpression() {
     53     fMagic        = REXP_MAGIC;
     54     fPat          = NULL;
     55     fPatRefCount  = NULL;
     56     fPatString    = NULL;
     57     fPatStringLen = 0;
     58     fMatcher      = NULL;
     59     fText         = NULL;
     60     fTextLength   = 0;
     61     fOwnsText     = FALSE;
     62 }
     63 
     64 RegularExpression::~RegularExpression() {
     65     delete fMatcher;
     66     fMatcher = NULL;
     67     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
     68         delete fPat;
     69         uprv_free(fPatString);
     70         uprv_free((void *)fPatRefCount);
     71     }
     72     if (fOwnsText && fText!=NULL) {
     73         uprv_free((void *)fText);
     74     }
     75     fMagic = 0;
     76 }
     77 
     78 U_NAMESPACE_END
     79 
     80 U_NAMESPACE_USE
     81 
     82 //----------------------------------------------------------------------------------------
     83 //
     84 //   validateRE    Do boilerplate style checks on API function parameters.
     85 //                 Return TRUE if they look OK.
     86 //----------------------------------------------------------------------------------------
     87 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     88     if (U_FAILURE(*status)) {
     89         return FALSE;
     90     }
     91     if (re == NULL || re->fMagic != REXP_MAGIC) {
     92         *status = U_ILLEGAL_ARGUMENT_ERROR;
     93         return FALSE;
     94     }
     95     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
     96     if (requiresText && re->fText == NULL && !re->fOwnsText) {
     97         *status = U_REGEX_INVALID_STATE;
     98         return FALSE;
     99     }
    100     return TRUE;
    101 }
    102 
    103 //----------------------------------------------------------------------------------------
    104 //
    105 //    uregex_open
    106 //
    107 //----------------------------------------------------------------------------------------
    108 U_CAPI URegularExpression *  U_EXPORT2
    109 uregex_open( const  UChar          *pattern,
    110                     int32_t         patternLength,
    111                     uint32_t        flags,
    112                     UParseError    *pe,
    113                     UErrorCode     *status) {
    114 
    115     if (U_FAILURE(*status)) {
    116         return NULL;
    117     }
    118     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
    119         *status = U_ILLEGAL_ARGUMENT_ERROR;
    120         return NULL;
    121     }
    122     int32_t actualPatLen = patternLength;
    123     if (actualPatLen == -1) {
    124         actualPatLen = u_strlen(pattern);
    125     }
    126 
    127     RegularExpression  *re     = new RegularExpression;
    128     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    129     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
    130     if (re == NULL || refC == NULL || patBuf == NULL) {
    131         *status = U_MEMORY_ALLOCATION_ERROR;
    132         delete re;
    133         uprv_free((void *)refC);
    134         uprv_free(patBuf);
    135         return NULL;
    136     }
    137     re->fPatRefCount = refC;
    138     *re->fPatRefCount = 1;
    139 
    140     //
    141     // Make a copy of the pattern string, so we can return it later if asked.
    142     //    For compiling the pattern, we will use a UText wrapper around
    143     //    this local copy, to avoid making even more copies.
    144     //
    145     re->fPatString    = patBuf;
    146     re->fPatStringLen = patternLength;
    147     u_memcpy(patBuf, pattern, actualPatLen);
    148     patBuf[actualPatLen] = 0;
    149 
    150     UText patText = UTEXT_INITIALIZER;
    151     utext_openUChars(&patText, patBuf, patternLength, status);
    152 
    153     //
    154     // Compile the pattern
    155     //
    156     if (pe != NULL) {
    157         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    158     } else {
    159         re->fPat = RegexPattern::compile(&patText, flags, *status);
    160     }
    161     utext_close(&patText);
    162 
    163     if (U_FAILURE(*status)) {
    164         goto ErrorExit;
    165     }
    166 
    167     //
    168     // Create the matcher object
    169     //
    170     re->fMatcher = re->fPat->matcher(*status);
    171     if (U_SUCCESS(*status)) {
    172         return (URegularExpression*)re;
    173     }
    174 
    175 ErrorExit:
    176     delete re;
    177     return NULL;
    178 
    179 }
    180 
    181 //----------------------------------------------------------------------------------------
    182 //
    183 //    uregex_openUText
    184 //
    185 //----------------------------------------------------------------------------------------
    186 U_CAPI URegularExpression *  U_EXPORT2
    187 uregex_openUText(UText          *pattern,
    188                  uint32_t        flags,
    189                  UParseError    *pe,
    190                  UErrorCode     *status) {
    191 
    192     if (U_FAILURE(*status)) {
    193         return NULL;
    194     }
    195     if (pattern == NULL) {
    196         *status = U_ILLEGAL_ARGUMENT_ERROR;
    197         return NULL;
    198     }
    199 
    200     int64_t patternNativeLength = utext_nativeLength(pattern);
    201 
    202     if (patternNativeLength == 0) {
    203         *status = U_ILLEGAL_ARGUMENT_ERROR;
    204         return NULL;
    205     }
    206 
    207     RegularExpression *re     = new RegularExpression;
    208 
    209     UErrorCode lengthStatus = U_ZERO_ERROR;
    210     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
    211 
    212     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    213     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
    214     if (re == NULL || refC == NULL || patBuf == NULL) {
    215         *status = U_MEMORY_ALLOCATION_ERROR;
    216         delete re;
    217         uprv_free((void *)refC);
    218         uprv_free(patBuf);
    219         return NULL;
    220     }
    221     re->fPatRefCount = refC;
    222     *re->fPatRefCount = 1;
    223 
    224     //
    225     // Make a copy of the pattern string, so we can return it later if asked.
    226     //    For compiling the pattern, we will use a read-only UText wrapper
    227     //    around this local copy, to avoid making even more copies.
    228     //
    229     re->fPatString    = patBuf;
    230     re->fPatStringLen = pattern16Length;
    231     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
    232 
    233     UText patText = UTEXT_INITIALIZER;
    234     utext_openUChars(&patText, patBuf, pattern16Length, status);
    235 
    236     //
    237     // Compile the pattern
    238     //
    239     if (pe != NULL) {
    240         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    241     } else {
    242         re->fPat = RegexPattern::compile(&patText, flags, *status);
    243     }
    244     utext_close(&patText);
    245 
    246     if (U_FAILURE(*status)) {
    247         goto ErrorExit;
    248     }
    249 
    250     //
    251     // Create the matcher object
    252     //
    253     re->fMatcher = re->fPat->matcher(*status);
    254     if (U_SUCCESS(*status)) {
    255         return (URegularExpression*)re;
    256     }
    257 
    258 ErrorExit:
    259     delete re;
    260     return NULL;
    261 
    262 }
    263 
    264 //----------------------------------------------------------------------------------------
    265 //
    266 //    uregex_close
    267 //
    268 //----------------------------------------------------------------------------------------
    269 U_CAPI void  U_EXPORT2
    270 uregex_close(URegularExpression  *re2) {
    271     RegularExpression *re = (RegularExpression*)re2;
    272     UErrorCode  status = U_ZERO_ERROR;
    273     if (validateRE(re, FALSE, &status) == FALSE) {
    274         return;
    275     }
    276     delete re;
    277 }
    278 
    279 
    280 //----------------------------------------------------------------------------------------
    281 //
    282 //    uregex_clone
    283 //
    284 //----------------------------------------------------------------------------------------
    285 U_CAPI URegularExpression * U_EXPORT2
    286 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
    287     RegularExpression *source = (RegularExpression*)source2;
    288     if (validateRE(source, FALSE, status) == FALSE) {
    289         return NULL;
    290     }
    291 
    292     RegularExpression *clone = new RegularExpression;
    293     if (clone == NULL) {
    294         *status = U_MEMORY_ALLOCATION_ERROR;
    295         return NULL;
    296     }
    297 
    298     clone->fMatcher = source->fPat->matcher(*status);
    299     if (U_FAILURE(*status)) {
    300         delete clone;
    301         return NULL;
    302     }
    303 
    304     clone->fPat          = source->fPat;
    305     clone->fPatRefCount  = source->fPatRefCount;
    306     clone->fPatString    = source->fPatString;
    307     clone->fPatStringLen = source->fPatStringLen;
    308     umtx_atomic_inc(source->fPatRefCount);
    309     // Note:  fText is not cloned.
    310 
    311     return (URegularExpression*)clone;
    312 }
    313 
    314 
    315 
    316 
    317 //------------------------------------------------------------------------------
    318 //
    319 //    uregex_pattern
    320 //
    321 //------------------------------------------------------------------------------
    322 U_CAPI const UChar * U_EXPORT2
    323 uregex_pattern(const  URegularExpression *regexp2,
    324                       int32_t            *patLength,
    325                       UErrorCode         *status)  {
    326     RegularExpression *regexp = (RegularExpression*)regexp2;
    327 
    328     if (validateRE(regexp, FALSE, status) == FALSE) {
    329         return NULL;
    330     }
    331     if (patLength != NULL) {
    332         *patLength = regexp->fPatStringLen;
    333     }
    334     return regexp->fPatString;
    335 }
    336 
    337 
    338 //------------------------------------------------------------------------------
    339 //
    340 //    uregex_patternUText
    341 //
    342 //------------------------------------------------------------------------------
    343 U_CAPI UText * U_EXPORT2
    344 uregex_patternUText(const URegularExpression *regexp2,
    345                           UErrorCode         *status)  {
    346     RegularExpression *regexp = (RegularExpression*)regexp2;
    347     return regexp->fPat->patternText(*status);
    348 }
    349 
    350 
    351 //------------------------------------------------------------------------------
    352 //
    353 //    uregex_flags
    354 //
    355 //------------------------------------------------------------------------------
    356 U_CAPI int32_t U_EXPORT2
    357 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
    358     RegularExpression *regexp = (RegularExpression*)regexp2;
    359     if (validateRE(regexp, FALSE, status) == FALSE) {
    360         return 0;
    361     }
    362     int32_t flags = regexp->fPat->flags();
    363     return flags;
    364 }
    365 
    366 
    367 //------------------------------------------------------------------------------
    368 //
    369 //    uregex_setText
    370 //
    371 //------------------------------------------------------------------------------
    372 U_CAPI void U_EXPORT2
    373 uregex_setText(URegularExpression *regexp2,
    374                const UChar        *text,
    375                int32_t             textLength,
    376                UErrorCode         *status)  {
    377     RegularExpression *regexp = (RegularExpression*)regexp2;
    378     if (validateRE(regexp, FALSE, status) == FALSE) {
    379         return;
    380     }
    381     if (text == NULL || textLength < -1) {
    382         *status = U_ILLEGAL_ARGUMENT_ERROR;
    383         return;
    384     }
    385 
    386     if (regexp->fOwnsText && regexp->fText != NULL) {
    387         uprv_free((void *)regexp->fText);
    388     }
    389 
    390     regexp->fText       = text;
    391     regexp->fTextLength = textLength;
    392     regexp->fOwnsText   = FALSE;
    393 
    394     UText input = UTEXT_INITIALIZER;
    395     utext_openUChars(&input, text, textLength, status);
    396     regexp->fMatcher->reset(&input);
    397     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
    398 }
    399 
    400 
    401 //------------------------------------------------------------------------------
    402 //
    403 //    uregex_setUText
    404 //
    405 //------------------------------------------------------------------------------
    406 U_CAPI void U_EXPORT2
    407 uregex_setUText(URegularExpression *regexp2,
    408                 UText              *text,
    409                 UErrorCode         *status) {
    410     RegularExpression *regexp = (RegularExpression*)regexp2;
    411     if (validateRE(regexp, FALSE, status) == FALSE) {
    412         return;
    413     }
    414     if (text == NULL) {
    415         *status = U_ILLEGAL_ARGUMENT_ERROR;
    416         return;
    417     }
    418 
    419     if (regexp->fOwnsText && regexp->fText != NULL) {
    420         uprv_free((void *)regexp->fText);
    421     }
    422 
    423     regexp->fText       = NULL; // only fill it in on request
    424     regexp->fTextLength = -1;
    425     regexp->fOwnsText   = TRUE;
    426     regexp->fMatcher->reset(text);
    427 }
    428 
    429 
    430 
    431 //------------------------------------------------------------------------------
    432 //
    433 //    uregex_getText
    434 //
    435 //------------------------------------------------------------------------------
    436 U_CAPI const UChar * U_EXPORT2
    437 uregex_getText(URegularExpression *regexp2,
    438                int32_t            *textLength,
    439                UErrorCode         *status)  {
    440     RegularExpression *regexp = (RegularExpression*)regexp2;
    441     if (validateRE(regexp, FALSE, status) == FALSE) {
    442         return NULL;
    443     }
    444 
    445     if (regexp->fText == NULL) {
    446         // need to fill in the text
    447         UText *inputText = regexp->fMatcher->inputText();
    448         int64_t inputNativeLength = utext_nativeLength(inputText);
    449         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
    450             regexp->fText = inputText->chunkContents;
    451             regexp->fTextLength = (int32_t)inputNativeLength;
    452             regexp->fOwnsText = FALSE; // because the UText owns it
    453         } else {
    454             UErrorCode lengthStatus = U_ZERO_ERROR;
    455             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
    456             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
    457 
    458             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
    459             regexp->fText = inputChars;
    460             regexp->fOwnsText = TRUE; // should already be set but just in case
    461         }
    462     }
    463 
    464     if (textLength != NULL) {
    465         *textLength = regexp->fTextLength;
    466     }
    467     return regexp->fText;
    468 }
    469 
    470 
    471 //------------------------------------------------------------------------------
    472 //
    473 //    uregex_getUText
    474 //
    475 //------------------------------------------------------------------------------
    476 U_CAPI UText * U_EXPORT2
    477 uregex_getUText(URegularExpression *regexp2,
    478                 UText              *dest,
    479                 UErrorCode         *status)  {
    480     RegularExpression *regexp = (RegularExpression*)regexp2;
    481     if (validateRE(regexp, FALSE, status) == FALSE) {
    482         return dest;
    483     }
    484     return regexp->fMatcher->getInput(dest, *status);
    485 }
    486 
    487 
    488 //------------------------------------------------------------------------------
    489 //
    490 //    uregex_refreshUText
    491 //
    492 //------------------------------------------------------------------------------
    493 U_CAPI void U_EXPORT2
    494 uregex_refreshUText(URegularExpression *regexp2,
    495                     UText              *text,
    496                     UErrorCode         *status) {
    497     RegularExpression *regexp = (RegularExpression*)regexp2;
    498     if (validateRE(regexp, FALSE, status) == FALSE) {
    499         return;
    500     }
    501     regexp->fMatcher->refreshInputText(text, *status);
    502 }
    503 
    504 
    505 //------------------------------------------------------------------------------
    506 //
    507 //    uregex_matches
    508 //
    509 //------------------------------------------------------------------------------
    510 U_CAPI UBool U_EXPORT2
    511 uregex_matches(URegularExpression *regexp2,
    512                int32_t            startIndex,
    513                UErrorCode        *status)  {
    514     return uregex_matches64( regexp2, (int64_t)startIndex, status);
    515 }
    516 
    517 U_CAPI UBool U_EXPORT2
    518 uregex_matches64(URegularExpression *regexp2,
    519                  int64_t            startIndex,
    520                  UErrorCode        *status)  {
    521     RegularExpression *regexp = (RegularExpression*)regexp2;
    522     UBool result = FALSE;
    523     if (validateRE(regexp, TRUE, status) == FALSE) {
    524         return result;
    525     }
    526     if (startIndex == -1) {
    527         result = regexp->fMatcher->matches(*status);
    528     } else {
    529         result = regexp->fMatcher->matches(startIndex, *status);
    530     }
    531     return result;
    532 }
    533 
    534 
    535 //------------------------------------------------------------------------------
    536 //
    537 //    uregex_lookingAt
    538 //
    539 //------------------------------------------------------------------------------
    540 U_CAPI UBool U_EXPORT2
    541 uregex_lookingAt(URegularExpression *regexp2,
    542                  int32_t             startIndex,
    543                  UErrorCode         *status)  {
    544     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
    545 }
    546 
    547 U_CAPI UBool U_EXPORT2
    548 uregex_lookingAt64(URegularExpression *regexp2,
    549                    int64_t             startIndex,
    550                    UErrorCode         *status)  {
    551     RegularExpression *regexp = (RegularExpression*)regexp2;
    552     UBool result = FALSE;
    553     if (validateRE(regexp, TRUE, status) == FALSE) {
    554         return result;
    555     }
    556     if (startIndex == -1) {
    557         result = regexp->fMatcher->lookingAt(*status);
    558     } else {
    559         result = regexp->fMatcher->lookingAt(startIndex, *status);
    560     }
    561     return result;
    562 }
    563 
    564 
    565 
    566 //------------------------------------------------------------------------------
    567 //
    568 //    uregex_find
    569 //
    570 //------------------------------------------------------------------------------
    571 U_CAPI UBool U_EXPORT2
    572 uregex_find(URegularExpression *regexp2,
    573             int32_t             startIndex,
    574             UErrorCode         *status)  {
    575     return uregex_find64( regexp2, (int64_t)startIndex, status);
    576 }
    577 
    578 U_CAPI UBool U_EXPORT2
    579 uregex_find64(URegularExpression *regexp2,
    580               int64_t             startIndex,
    581               UErrorCode         *status)  {
    582     RegularExpression *regexp = (RegularExpression*)regexp2;
    583     UBool result = FALSE;
    584     if (validateRE(regexp, TRUE, status) == FALSE) {
    585         return result;
    586     }
    587     if (startIndex == -1) {
    588         regexp->fMatcher->resetPreserveRegion();
    589         result = regexp->fMatcher->find(*status);
    590     } else {
    591         result = regexp->fMatcher->find(startIndex, *status);
    592     }
    593     return result;
    594 }
    595 
    596 
    597 //------------------------------------------------------------------------------
    598 //
    599 //    uregex_findNext
    600 //
    601 //------------------------------------------------------------------------------
    602 U_CAPI UBool U_EXPORT2
    603 uregex_findNext(URegularExpression *regexp2,
    604                 UErrorCode         *status)  {
    605     RegularExpression *regexp = (RegularExpression*)regexp2;
    606     if (validateRE(regexp, TRUE, status) == FALSE) {
    607         return FALSE;
    608     }
    609     UBool result = regexp->fMatcher->find(*status);
    610     return result;
    611 }
    612 
    613 //------------------------------------------------------------------------------
    614 //
    615 //    uregex_groupCount
    616 //
    617 //------------------------------------------------------------------------------
    618 U_CAPI int32_t U_EXPORT2
    619 uregex_groupCount(URegularExpression *regexp2,
    620                   UErrorCode         *status)  {
    621     RegularExpression *regexp = (RegularExpression*)regexp2;
    622     if (validateRE(regexp, FALSE, status) == FALSE) {
    623         return 0;
    624     }
    625     int32_t  result = regexp->fMatcher->groupCount();
    626     return result;
    627 }
    628 
    629 
    630 //------------------------------------------------------------------------------
    631 //
    632 //    uregex_groupNumberFromName
    633 //
    634 //------------------------------------------------------------------------------
    635 int32_t
    636 uregex_groupNumberFromName(URegularExpression *regexp2,
    637                            const UChar        *groupName,
    638                            int32_t             nameLength,
    639                            UErrorCode          *status) {
    640     RegularExpression *regexp = (RegularExpression*)regexp2;
    641     if (validateRE(regexp, FALSE, status) == FALSE) {
    642         return 0;
    643     }
    644     int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
    645     return result;
    646 }
    647 
    648 int32_t
    649 uregex_groupNumberFromCName(URegularExpression *regexp2,
    650                             const char         *groupName,
    651                             int32_t             nameLength,
    652                             UErrorCode          *status) {
    653     RegularExpression *regexp = (RegularExpression*)regexp2;
    654     if (validateRE(regexp, FALSE, status) == FALSE) {
    655         return 0;
    656     }
    657     return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
    658 }
    659 
    660 //------------------------------------------------------------------------------
    661 //
    662 //    uregex_group
    663 //
    664 //------------------------------------------------------------------------------
    665 U_CAPI int32_t U_EXPORT2
    666 uregex_group(URegularExpression *regexp2,
    667              int32_t             groupNum,
    668              UChar              *dest,
    669              int32_t             destCapacity,
    670              UErrorCode          *status)  {
    671     RegularExpression *regexp = (RegularExpression*)regexp2;
    672     if (validateRE(regexp, TRUE, status) == FALSE) {
    673         return 0;
    674     }
    675     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
    676         *status = U_ILLEGAL_ARGUMENT_ERROR;
    677         return 0;
    678     }
    679 
    680     if (destCapacity == 0 || regexp->fText != NULL) {
    681         // If preflighting or if we already have the text as UChars,
    682         // this is a little cheaper than extracting from the UText
    683 
    684         //
    685         // Pick up the range of characters from the matcher
    686         //
    687         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    688         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    689         if (U_FAILURE(*status)) {
    690             return 0;
    691         }
    692 
    693         //
    694         // Trim length based on buffer capacity
    695         //
    696         int32_t fullLength = endIx - startIx;
    697         int32_t copyLength = fullLength;
    698         if (copyLength < destCapacity) {
    699             dest[copyLength] = 0;
    700         } else if (copyLength == destCapacity) {
    701             *status = U_STRING_NOT_TERMINATED_WARNING;
    702         } else {
    703             copyLength = destCapacity;
    704             *status = U_BUFFER_OVERFLOW_ERROR;
    705         }
    706 
    707         //
    708         // Copy capture group to user's buffer
    709         //
    710         if (copyLength > 0) {
    711             u_memcpy(dest, &regexp->fText[startIx], copyLength);
    712         }
    713         return fullLength;
    714     } else {
    715         int64_t  start = regexp->fMatcher->start64(groupNum, *status);
    716         int64_t  limit = regexp->fMatcher->end64(groupNum, *status);
    717         if (U_FAILURE(*status)) {
    718             return 0;
    719         }
    720         // Note edge cases:
    721         //   Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
    722         //   Zero Length Match: start == end.
    723         int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
    724         return length;
    725     }
    726 
    727 }
    728 
    729 
    730 //------------------------------------------------------------------------------
    731 //
    732 //    uregex_groupUText
    733 //
    734 //------------------------------------------------------------------------------
    735 U_CAPI UText * U_EXPORT2
    736 uregex_groupUText(URegularExpression *regexp2,
    737                   int32_t             groupNum,
    738                   UText              *dest,
    739                   int64_t            *groupLength,
    740                   UErrorCode         *status)  {
    741     RegularExpression *regexp = (RegularExpression*)regexp2;
    742     if (validateRE(regexp, TRUE, status) == FALSE) {
    743         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    744         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    745     }
    746 
    747     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
    748 }
    749 
    750 //------------------------------------------------------------------------------
    751 //
    752 //    uregex_start
    753 //
    754 //------------------------------------------------------------------------------
    755 U_CAPI int32_t U_EXPORT2
    756 uregex_start(URegularExpression *regexp2,
    757              int32_t             groupNum,
    758              UErrorCode          *status)  {
    759     return (int32_t)uregex_start64( regexp2, groupNum, status);
    760 }
    761 
    762 U_CAPI int64_t U_EXPORT2
    763 uregex_start64(URegularExpression *regexp2,
    764                int32_t             groupNum,
    765                UErrorCode          *status)  {
    766     RegularExpression *regexp = (RegularExpression*)regexp2;
    767     if (validateRE(regexp, TRUE, status) == FALSE) {
    768         return 0;
    769     }
    770     int32_t result = regexp->fMatcher->start(groupNum, *status);
    771     return result;
    772 }
    773 
    774 //------------------------------------------------------------------------------
    775 //
    776 //    uregex_end
    777 //
    778 //------------------------------------------------------------------------------
    779 U_CAPI int32_t U_EXPORT2
    780 uregex_end(URegularExpression   *regexp2,
    781            int32_t               groupNum,
    782            UErrorCode           *status)  {
    783     return (int32_t)uregex_end64( regexp2, groupNum, status);
    784 }
    785 
    786 U_CAPI int64_t U_EXPORT2
    787 uregex_end64(URegularExpression   *regexp2,
    788              int32_t               groupNum,
    789              UErrorCode           *status)  {
    790     RegularExpression *regexp = (RegularExpression*)regexp2;
    791     if (validateRE(regexp, TRUE, status) == FALSE) {
    792         return 0;
    793     }
    794     int32_t result = regexp->fMatcher->end(groupNum, *status);
    795     return result;
    796 }
    797 
    798 //------------------------------------------------------------------------------
    799 //
    800 //    uregex_reset
    801 //
    802 //------------------------------------------------------------------------------
    803 U_CAPI void U_EXPORT2
    804 uregex_reset(URegularExpression    *regexp2,
    805              int32_t               index,
    806              UErrorCode            *status)  {
    807     uregex_reset64( regexp2, (int64_t)index, status);
    808 }
    809 
    810 U_CAPI void U_EXPORT2
    811 uregex_reset64(URegularExpression    *regexp2,
    812                int64_t               index,
    813                UErrorCode            *status)  {
    814     RegularExpression *regexp = (RegularExpression*)regexp2;
    815     if (validateRE(regexp, TRUE, status) == FALSE) {
    816         return;
    817     }
    818     regexp->fMatcher->reset(index, *status);
    819 }
    820 
    821 
    822 //------------------------------------------------------------------------------
    823 //
    824 //    uregex_setRegion
    825 //
    826 //------------------------------------------------------------------------------
    827 U_CAPI void U_EXPORT2
    828 uregex_setRegion(URegularExpression   *regexp2,
    829                  int32_t               regionStart,
    830                  int32_t               regionLimit,
    831                  UErrorCode           *status)  {
    832     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
    833 }
    834 
    835 U_CAPI void U_EXPORT2
    836 uregex_setRegion64(URegularExpression   *regexp2,
    837                    int64_t               regionStart,
    838                    int64_t               regionLimit,
    839                    UErrorCode           *status)  {
    840     RegularExpression *regexp = (RegularExpression*)regexp2;
    841     if (validateRE(regexp, TRUE, status) == FALSE) {
    842         return;
    843     }
    844     regexp->fMatcher->region(regionStart, regionLimit, *status);
    845 }
    846 
    847 
    848 //------------------------------------------------------------------------------
    849 //
    850 //    uregex_setRegionAndStart
    851 //
    852 //------------------------------------------------------------------------------
    853 U_CAPI void U_EXPORT2
    854 uregex_setRegionAndStart(URegularExpression   *regexp2,
    855                  int64_t               regionStart,
    856                  int64_t               regionLimit,
    857                  int64_t               startIndex,
    858                  UErrorCode           *status)  {
    859     RegularExpression *regexp = (RegularExpression*)regexp2;
    860     if (validateRE(regexp, TRUE, status) == FALSE) {
    861         return;
    862     }
    863     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
    864 }
    865 
    866 //------------------------------------------------------------------------------
    867 //
    868 //    uregex_regionStart
    869 //
    870 //------------------------------------------------------------------------------
    871 U_CAPI int32_t U_EXPORT2
    872 uregex_regionStart(const  URegularExpression   *regexp2,
    873                           UErrorCode           *status)  {
    874     return (int32_t)uregex_regionStart64(regexp2, status);
    875 }
    876 
    877 U_CAPI int64_t U_EXPORT2
    878 uregex_regionStart64(const  URegularExpression   *regexp2,
    879                             UErrorCode           *status)  {
    880     RegularExpression *regexp = (RegularExpression*)regexp2;
    881     if (validateRE(regexp, TRUE, status) == FALSE) {
    882         return 0;
    883     }
    884     return regexp->fMatcher->regionStart();
    885 }
    886 
    887 
    888 //------------------------------------------------------------------------------
    889 //
    890 //    uregex_regionEnd
    891 //
    892 //------------------------------------------------------------------------------
    893 U_CAPI int32_t U_EXPORT2
    894 uregex_regionEnd(const  URegularExpression   *regexp2,
    895                         UErrorCode           *status)  {
    896     return (int32_t)uregex_regionEnd64(regexp2, status);
    897 }
    898 
    899 U_CAPI int64_t U_EXPORT2
    900 uregex_regionEnd64(const  URegularExpression   *regexp2,
    901                           UErrorCode           *status)  {
    902     RegularExpression *regexp = (RegularExpression*)regexp2;
    903     if (validateRE(regexp, TRUE, status) == FALSE) {
    904         return 0;
    905     }
    906     return regexp->fMatcher->regionEnd();
    907 }
    908 
    909 
    910 //------------------------------------------------------------------------------
    911 //
    912 //    uregex_hasTransparentBounds
    913 //
    914 //------------------------------------------------------------------------------
    915 U_CAPI UBool U_EXPORT2
    916 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
    917                                    UErrorCode           *status)  {
    918     RegularExpression *regexp = (RegularExpression*)regexp2;
    919     if (validateRE(regexp, FALSE, status) == FALSE) {
    920         return FALSE;
    921     }
    922     return regexp->fMatcher->hasTransparentBounds();
    923 }
    924 
    925 
    926 //------------------------------------------------------------------------------
    927 //
    928 //    uregex_useTransparentBounds
    929 //
    930 //------------------------------------------------------------------------------
    931 U_CAPI void U_EXPORT2
    932 uregex_useTransparentBounds(URegularExpression    *regexp2,
    933                             UBool                  b,
    934                             UErrorCode            *status)  {
    935     RegularExpression *regexp = (RegularExpression*)regexp2;
    936     if (validateRE(regexp, FALSE, status) == FALSE) {
    937         return;
    938     }
    939     regexp->fMatcher->useTransparentBounds(b);
    940 }
    941 
    942 
    943 //------------------------------------------------------------------------------
    944 //
    945 //    uregex_hasAnchoringBounds
    946 //
    947 //------------------------------------------------------------------------------
    948 U_CAPI UBool U_EXPORT2
    949 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
    950                                  UErrorCode           *status)  {
    951     RegularExpression *regexp = (RegularExpression*)regexp2;
    952     if (validateRE(regexp, FALSE, status) == FALSE) {
    953         return FALSE;
    954     }
    955     return regexp->fMatcher->hasAnchoringBounds();
    956 }
    957 
    958 
    959 //------------------------------------------------------------------------------
    960 //
    961 //    uregex_useAnchoringBounds
    962 //
    963 //------------------------------------------------------------------------------
    964 U_CAPI void U_EXPORT2
    965 uregex_useAnchoringBounds(URegularExpression    *regexp2,
    966                           UBool                  b,
    967                           UErrorCode            *status)  {
    968     RegularExpression *regexp = (RegularExpression*)regexp2;
    969     if (validateRE(regexp, FALSE, status) == FALSE) {
    970         return;
    971     }
    972     regexp->fMatcher->useAnchoringBounds(b);
    973 }
    974 
    975 
    976 //------------------------------------------------------------------------------
    977 //
    978 //    uregex_hitEnd
    979 //
    980 //------------------------------------------------------------------------------
    981 U_CAPI UBool U_EXPORT2
    982 uregex_hitEnd(const  URegularExpression   *regexp2,
    983                      UErrorCode           *status)  {
    984     RegularExpression *regexp = (RegularExpression*)regexp2;
    985     if (validateRE(regexp, TRUE, status) == FALSE) {
    986         return FALSE;
    987     }
    988     return regexp->fMatcher->hitEnd();
    989 }
    990 
    991 
    992 //------------------------------------------------------------------------------
    993 //
    994 //    uregex_requireEnd
    995 //
    996 //------------------------------------------------------------------------------
    997 U_CAPI UBool U_EXPORT2
    998 uregex_requireEnd(const  URegularExpression   *regexp2,
    999                          UErrorCode           *status)  {
   1000     RegularExpression *regexp = (RegularExpression*)regexp2;
   1001     if (validateRE(regexp, TRUE, status) == FALSE) {
   1002         return FALSE;
   1003     }
   1004     return regexp->fMatcher->requireEnd();
   1005 }
   1006 
   1007 
   1008 //------------------------------------------------------------------------------
   1009 //
   1010 //    uregex_setTimeLimit
   1011 //
   1012 //------------------------------------------------------------------------------
   1013 U_CAPI void U_EXPORT2
   1014 uregex_setTimeLimit(URegularExpression   *regexp2,
   1015                     int32_t               limit,
   1016                     UErrorCode           *status) {
   1017     RegularExpression *regexp = (RegularExpression*)regexp2;
   1018     if (validateRE(regexp, FALSE, status)) {
   1019         regexp->fMatcher->setTimeLimit(limit, *status);
   1020     }
   1021 }
   1022 
   1023 
   1024 
   1025 //------------------------------------------------------------------------------
   1026 //
   1027 //    uregex_getTimeLimit
   1028 //
   1029 //------------------------------------------------------------------------------
   1030 U_CAPI int32_t U_EXPORT2
   1031 uregex_getTimeLimit(const  URegularExpression   *regexp2,
   1032                            UErrorCode           *status) {
   1033     int32_t retVal = 0;
   1034     RegularExpression *regexp = (RegularExpression*)regexp2;
   1035     if (validateRE(regexp, FALSE, status)) {
   1036         retVal = regexp->fMatcher->getTimeLimit();
   1037     }
   1038     return retVal;
   1039 }
   1040 
   1041 
   1042 
   1043 //------------------------------------------------------------------------------
   1044 //
   1045 //    uregex_setStackLimit
   1046 //
   1047 //------------------------------------------------------------------------------
   1048 U_CAPI void U_EXPORT2
   1049 uregex_setStackLimit(URegularExpression   *regexp2,
   1050                      int32_t               limit,
   1051                      UErrorCode           *status) {
   1052     RegularExpression *regexp = (RegularExpression*)regexp2;
   1053     if (validateRE(regexp, FALSE, status)) {
   1054         regexp->fMatcher->setStackLimit(limit, *status);
   1055     }
   1056 }
   1057 
   1058 
   1059 
   1060 //------------------------------------------------------------------------------
   1061 //
   1062 //    uregex_getStackLimit
   1063 //
   1064 //------------------------------------------------------------------------------
   1065 U_CAPI int32_t U_EXPORT2
   1066 uregex_getStackLimit(const  URegularExpression   *regexp2,
   1067                             UErrorCode           *status) {
   1068     int32_t retVal = 0;
   1069     RegularExpression *regexp = (RegularExpression*)regexp2;
   1070     if (validateRE(regexp, FALSE, status)) {
   1071         retVal = regexp->fMatcher->getStackLimit();
   1072     }
   1073     return retVal;
   1074 }
   1075 
   1076 
   1077 //------------------------------------------------------------------------------
   1078 //
   1079 //    uregex_setMatchCallback
   1080 //
   1081 //------------------------------------------------------------------------------
   1082 U_CAPI void U_EXPORT2
   1083 uregex_setMatchCallback(URegularExpression      *regexp2,
   1084                         URegexMatchCallback     *callback,
   1085                         const void              *context,
   1086                         UErrorCode              *status) {
   1087     RegularExpression *regexp = (RegularExpression*)regexp2;
   1088     if (validateRE(regexp, FALSE, status)) {
   1089         regexp->fMatcher->setMatchCallback(callback, context, *status);
   1090     }
   1091 }
   1092 
   1093 
   1094 //------------------------------------------------------------------------------
   1095 //
   1096 //    uregex_getMatchCallback
   1097 //
   1098 //------------------------------------------------------------------------------
   1099 U_CAPI void U_EXPORT2
   1100 uregex_getMatchCallback(const URegularExpression    *regexp2,
   1101                         URegexMatchCallback        **callback,
   1102                         const void                 **context,
   1103                         UErrorCode                  *status) {
   1104     RegularExpression *regexp = (RegularExpression*)regexp2;
   1105      if (validateRE(regexp, FALSE, status)) {
   1106          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
   1107      }
   1108 }
   1109 
   1110 
   1111 //------------------------------------------------------------------------------
   1112 //
   1113 //    uregex_setMatchProgressCallback
   1114 //
   1115 //------------------------------------------------------------------------------
   1116 U_CAPI void U_EXPORT2
   1117 uregex_setFindProgressCallback(URegularExpression              *regexp2,
   1118                                 URegexFindProgressCallback      *callback,
   1119                                 const void                      *context,
   1120                                 UErrorCode                      *status) {
   1121     RegularExpression *regexp = (RegularExpression*)regexp2;
   1122     if (validateRE(regexp, FALSE, status)) {
   1123         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
   1124     }
   1125 }
   1126 
   1127 
   1128 //------------------------------------------------------------------------------
   1129 //
   1130 //    uregex_getMatchCallback
   1131 //
   1132 //------------------------------------------------------------------------------
   1133 U_CAPI void U_EXPORT2
   1134 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
   1135                                 URegexFindProgressCallback        **callback,
   1136                                 const void                        **context,
   1137                                 UErrorCode                        *status) {
   1138     RegularExpression *regexp = (RegularExpression*)regexp2;
   1139      if (validateRE(regexp, FALSE, status)) {
   1140          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
   1141      }
   1142 }
   1143 
   1144 
   1145 //------------------------------------------------------------------------------
   1146 //
   1147 //    uregex_replaceAll
   1148 //
   1149 //------------------------------------------------------------------------------
   1150 U_CAPI int32_t U_EXPORT2
   1151 uregex_replaceAll(URegularExpression    *regexp2,
   1152                   const UChar           *replacementText,
   1153                   int32_t                replacementLength,
   1154                   UChar                 *destBuf,
   1155                   int32_t                destCapacity,
   1156                   UErrorCode            *status)  {
   1157     RegularExpression *regexp = (RegularExpression*)regexp2;
   1158     if (validateRE(regexp, TRUE, status) == FALSE) {
   1159         return 0;
   1160     }
   1161     if (replacementText == NULL || replacementLength < -1 ||
   1162         (destBuf == NULL && destCapacity > 0) ||
   1163         destCapacity < 0) {
   1164         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1165         return 0;
   1166     }
   1167 
   1168     int32_t   len = 0;
   1169 
   1170     uregex_reset(regexp2, 0, status);
   1171 
   1172     // Note: Seperate error code variables for findNext() and appendReplacement()
   1173     //       are used so that destination buffer overflow errors
   1174     //       in appendReplacement won't stop findNext() from working.
   1175     //       appendReplacement() and appendTail() special case incoming buffer
   1176     //       overflow errors, continuing to return the correct length.
   1177     UErrorCode  findStatus = *status;
   1178     while (uregex_findNext(regexp2, &findStatus)) {
   1179         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1180                                         &destBuf, &destCapacity, status);
   1181     }
   1182     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1183 
   1184     if (U_FAILURE(findStatus)) {
   1185         // If anything went wrong with the findNext(), make that error trump
   1186         //   whatever may have happened with the append() operations.
   1187         //   Errors in findNext() are not expected.
   1188         *status = findStatus;
   1189     }
   1190 
   1191     return len;
   1192 }
   1193 
   1194 
   1195 //------------------------------------------------------------------------------
   1196 //
   1197 //    uregex_replaceAllUText
   1198 //
   1199 //------------------------------------------------------------------------------
   1200 U_CAPI UText * U_EXPORT2
   1201 uregex_replaceAllUText(URegularExpression    *regexp2,
   1202                        UText                 *replacementText,
   1203                        UText                 *dest,
   1204                        UErrorCode            *status)  {
   1205     RegularExpression *regexp = (RegularExpression*)regexp2;
   1206     if (validateRE(regexp, TRUE, status) == FALSE) {
   1207         return 0;
   1208     }
   1209     if (replacementText == NULL) {
   1210         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1211         return 0;
   1212     }
   1213 
   1214     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
   1215     return dest;
   1216 }
   1217 
   1218 
   1219 //------------------------------------------------------------------------------
   1220 //
   1221 //    uregex_replaceFirst
   1222 //
   1223 //------------------------------------------------------------------------------
   1224 U_CAPI int32_t U_EXPORT2
   1225 uregex_replaceFirst(URegularExpression  *regexp2,
   1226                     const UChar         *replacementText,
   1227                     int32_t              replacementLength,
   1228                     UChar               *destBuf,
   1229                     int32_t              destCapacity,
   1230                     UErrorCode          *status)  {
   1231     RegularExpression *regexp = (RegularExpression*)regexp2;
   1232     if (validateRE(regexp, TRUE, status) == FALSE) {
   1233         return 0;
   1234     }
   1235     if (replacementText == NULL || replacementLength < -1 ||
   1236         (destBuf == NULL && destCapacity > 0) ||
   1237         destCapacity < 0) {
   1238         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1239         return 0;
   1240     }
   1241 
   1242     int32_t   len = 0;
   1243     UBool     findSucceeded;
   1244     uregex_reset(regexp2, 0, status);
   1245     findSucceeded = uregex_find(regexp2, 0, status);
   1246     if (findSucceeded) {
   1247         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1248                                        &destBuf, &destCapacity, status);
   1249     }
   1250     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1251 
   1252     return len;
   1253 }
   1254 
   1255 
   1256 //------------------------------------------------------------------------------
   1257 //
   1258 //    uregex_replaceFirstUText
   1259 //
   1260 //------------------------------------------------------------------------------
   1261 U_CAPI UText * U_EXPORT2
   1262 uregex_replaceFirstUText(URegularExpression  *regexp2,
   1263                          UText                 *replacementText,
   1264                          UText                 *dest,
   1265                          UErrorCode            *status)  {
   1266     RegularExpression *regexp = (RegularExpression*)regexp2;
   1267     if (validateRE(regexp, TRUE, status) == FALSE) {
   1268         return 0;
   1269     }
   1270     if (replacementText == NULL) {
   1271         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1272         return 0;
   1273     }
   1274 
   1275     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
   1276     return dest;
   1277 }
   1278 
   1279 
   1280 //------------------------------------------------------------------------------
   1281 //
   1282 //    uregex_appendReplacement
   1283 //
   1284 //------------------------------------------------------------------------------
   1285 
   1286 U_NAMESPACE_BEGIN
   1287 //
   1288 //  Dummy class, because these functions need to be friends of class RegexMatcher,
   1289 //               and stand-alone C functions don't work as friends
   1290 //
   1291 class RegexCImpl {
   1292  public:
   1293    inline static  int32_t appendReplacement(RegularExpression    *regexp,
   1294                       const UChar           *replacementText,
   1295                       int32_t                replacementLength,
   1296                       UChar                **destBuf,
   1297                       int32_t               *destCapacity,
   1298                       UErrorCode            *status);
   1299 
   1300    inline static int32_t appendTail(RegularExpression    *regexp,
   1301         UChar                **destBuf,
   1302         int32_t               *destCapacity,
   1303         UErrorCode            *status);
   1304 
   1305     inline static int32_t split(RegularExpression    *regexp,
   1306         UChar                 *destBuf,
   1307         int32_t                destCapacity,
   1308         int32_t               *requiredCapacity,
   1309         UChar                 *destFields[],
   1310         int32_t                destFieldsCapacity,
   1311         UErrorCode            *status);
   1312 };
   1313 
   1314 U_NAMESPACE_END
   1315 
   1316 
   1317 
   1318 static const UChar BACKSLASH  = 0x5c;
   1319 static const UChar DOLLARSIGN = 0x24;
   1320 static const UChar LEFTBRACKET = 0x7b;
   1321 static const UChar RIGHTBRACKET = 0x7d;
   1322 
   1323 //
   1324 //  Move a character to an output buffer, with bounds checking on the index.
   1325 //      Index advances even if capacity is exceeded, for preflight size computations.
   1326 //      This little sequence is used a LOT.
   1327 //
   1328 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
   1329     if (*idx < bufCapacity) {
   1330         buf[*idx] = c;
   1331     }
   1332     (*idx)++;
   1333 }
   1334 
   1335 
   1336 //
   1337 //  appendReplacement, the actual implementation.
   1338 //
   1339 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
   1340                                       const UChar           *replacementText,
   1341                                       int32_t                replacementLength,
   1342                                       UChar                **destBuf,
   1343                                       int32_t               *destCapacity,
   1344                                       UErrorCode            *status)  {
   1345 
   1346     // If we come in with a buffer overflow error, don't suppress the operation.
   1347     //  A series of appendReplacements, appendTail need to correctly preflight
   1348     //  the buffer size when an overflow happens somewhere in the middle.
   1349     UBool pendingBufferOverflow = FALSE;
   1350     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1351         pendingBufferOverflow = TRUE;
   1352         *status = U_ZERO_ERROR;
   1353     }
   1354 
   1355     //
   1356     // Validate all paramters
   1357     //
   1358     if (validateRE(regexp, TRUE, status) == FALSE) {
   1359         return 0;
   1360     }
   1361     if (replacementText == NULL || replacementLength < -1 ||
   1362         destCapacity == NULL || destBuf == NULL ||
   1363         (*destBuf == NULL && *destCapacity > 0) ||
   1364         *destCapacity < 0) {
   1365         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1366         return 0;
   1367     }
   1368 
   1369     RegexMatcher *m = regexp->fMatcher;
   1370     if (m->fMatch == FALSE) {
   1371         *status = U_REGEX_INVALID_STATE;
   1372         return 0;
   1373     }
   1374 
   1375     UChar    *dest             = *destBuf;
   1376     int32_t   capacity         = *destCapacity;
   1377     int32_t   destIdx          =  0;
   1378     int32_t   i;
   1379 
   1380     // If it wasn't supplied by the caller,  get the length of the replacement text.
   1381     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
   1382     //          the fly and avoid this step.
   1383     if (replacementLength == -1) {
   1384         replacementLength = u_strlen(replacementText);
   1385     }
   1386 
   1387     // Copy input string from the end of previous match to start of current match
   1388     if (regexp->fText != NULL) {
   1389         int32_t matchStart;
   1390         int32_t lastMatchEnd;
   1391         if (UTEXT_USES_U16(m->fInputText)) {
   1392             lastMatchEnd = (int32_t)m->fLastMatchEnd;
   1393             matchStart = (int32_t)m->fMatchStart;
   1394         } else {
   1395             // !!!: Would like a better way to do this!
   1396             UErrorCode tempStatus = U_ZERO_ERROR;
   1397             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &tempStatus);
   1398             tempStatus = U_ZERO_ERROR;
   1399             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &tempStatus);
   1400         }
   1401         for (i=lastMatchEnd; i<matchStart; i++) {
   1402             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
   1403         }
   1404     } else {
   1405         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
   1406         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
   1407                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
   1408                                  &possibleOverflowError);
   1409     }
   1410     U_ASSERT(destIdx >= 0);
   1411 
   1412     // scan the replacement text, looking for substitutions ($n) and \escapes.
   1413     int32_t  replIdx = 0;
   1414     while (replIdx < replacementLength && U_SUCCESS(*status)) {
   1415         UChar  c = replacementText[replIdx];
   1416         replIdx++;
   1417         if (c != DOLLARSIGN && c != BACKSLASH) {
   1418             // Common case, no substitution, no escaping,
   1419             //  just copy the char to the dest buf.
   1420             appendToBuf(c, &destIdx, dest, capacity);
   1421             continue;
   1422         }
   1423 
   1424         if (c == BACKSLASH) {
   1425             // Backslash Escape.  Copy the following char out without further checks.
   1426             //                    Note:  Surrogate pairs don't need any special handling
   1427             //                           The second half wont be a '$' or a '\', and
   1428             //                           will move to the dest normally on the next
   1429             //                           loop iteration.
   1430             if (replIdx >= replacementLength) {
   1431                 break;
   1432             }
   1433             c = replacementText[replIdx];
   1434 
   1435             if (c==0x55/*U*/ || c==0x75/*u*/) {
   1436                 // We have a \udddd or \Udddddddd escape sequence.
   1437                 UChar32 escapedChar =
   1438                     u_unescapeAt(uregex_ucstr_unescape_charAt,
   1439                        &replIdx,                   // Index is updated by unescapeAt
   1440                        replacementLength,          // Length of replacement text
   1441                        (void *)replacementText);
   1442 
   1443                 if (escapedChar != (UChar32)0xFFFFFFFF) {
   1444                     if (escapedChar <= 0xffff) {
   1445                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
   1446                     } else {
   1447                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
   1448                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
   1449                     }
   1450                     continue;
   1451                 }
   1452                 // Note:  if the \u escape was invalid, just fall through and
   1453                 //        treat it as a plain \<anything> escape.
   1454             }
   1455 
   1456             // Plain backslash escape.  Just put out the escaped character.
   1457             appendToBuf(c, &destIdx, dest, capacity);
   1458 
   1459             replIdx++;
   1460             continue;
   1461         }
   1462 
   1463         // We've got a $.  Pick up the following capture group name or number.
   1464         // For numbers, consume only digits that produce a valid capture group for the pattern.
   1465 
   1466         int32_t groupNum  = 0;
   1467         U_ASSERT(c == DOLLARSIGN);
   1468         UChar32 c32;
   1469         U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1470         if (u_isdigit(c32)) {
   1471             int32_t numDigits = 0;
   1472             int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
   1473             for (;;) {
   1474                 if (replIdx >= replacementLength) {
   1475                     break;
   1476                 }
   1477                 U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1478                 if (u_isdigit(c32) == FALSE) {
   1479                     break;
   1480                 }
   1481 
   1482                 int32_t digitVal = u_charDigitValue(c32);
   1483                 if (groupNum * 10 + digitVal <= numCaptureGroups) {
   1484                     groupNum = groupNum * 10 + digitVal;
   1485                     U16_FWD_1(replacementText, replIdx, replacementLength);
   1486                     numDigits++;
   1487                 } else {
   1488                     if (numDigits == 0) {
   1489                         *status = U_INDEX_OUTOFBOUNDS_ERROR;
   1490                     }
   1491                     break;
   1492                 }
   1493             }
   1494         } else if (c32 == LEFTBRACKET) {
   1495             // Scan for Named Capture Group, ${name}.
   1496             UnicodeString groupName;
   1497             U16_FWD_1(replacementText, replIdx, replacementLength);
   1498             while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) {
   1499                 if (replIdx >= replacementLength) {
   1500                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1501                     break;
   1502                 }
   1503                 U16_NEXT(replacementText, replIdx, replacementLength, c32);
   1504                 if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z
   1505                         (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z
   1506                         (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9
   1507                     groupName.append(c32);
   1508                 } else if (c32 == RIGHTBRACKET) {
   1509                     groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName);
   1510                     if (groupNum == 0) {
   1511                         // Name not defined by pattern.
   1512                         *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1513                     }
   1514                 } else {
   1515                     // Character was something other than a name char or a closing '}'
   1516                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1517                 }
   1518             }
   1519         } else {
   1520             // $ not followed by {name} or digits.
   1521             *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1522         }
   1523 
   1524 
   1525         // Finally, append the capture group data to the destination.
   1526         if (U_SUCCESS(*status)) {
   1527             destIdx += uregex_group((URegularExpression*)regexp, groupNum,
   1528                                     dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
   1529             if (*status == U_BUFFER_OVERFLOW_ERROR) {
   1530                 // Ignore buffer overflow when extracting the group.  We need to
   1531                 //   continue on to get full size of the untruncated result.  We will
   1532                 //   raise our own buffer overflow error at the end.
   1533                 *status = U_ZERO_ERROR;
   1534             }
   1535         }
   1536 
   1537         if (U_FAILURE(*status)) {
   1538             // bad group number or name.
   1539             break;
   1540         }
   1541     }
   1542 
   1543     //
   1544     //  Nul Terminate the dest buffer if possible.
   1545     //  Set the appropriate buffer overflow or not terminated error, if needed.
   1546     //
   1547     if (destIdx < capacity) {
   1548         dest[destIdx] = 0;
   1549     } else if (U_SUCCESS(*status)) {
   1550         if (destIdx == *destCapacity) {
   1551             *status = U_STRING_NOT_TERMINATED_WARNING;
   1552         } else {
   1553             *status = U_BUFFER_OVERFLOW_ERROR;
   1554         }
   1555     }
   1556 
   1557     //
   1558     // Return an updated dest buffer and capacity to the caller.
   1559     //
   1560     if (destIdx > 0 &&  *destCapacity > 0) {
   1561         if (destIdx < capacity) {
   1562             *destBuf      += destIdx;
   1563             *destCapacity -= destIdx;
   1564         } else {
   1565             *destBuf      += capacity;
   1566             *destCapacity =  0;
   1567         }
   1568     }
   1569 
   1570     // If we came in with a buffer overflow, make sure we go out with one also.
   1571     //   (A zero length match right at the end of the previous match could
   1572     //    make this function succeed even though a previous call had overflowed the buf)
   1573     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1574         *status = U_BUFFER_OVERFLOW_ERROR;
   1575     }
   1576 
   1577     return destIdx;
   1578 }
   1579 
   1580 //
   1581 //   appendReplacement   the actual API function,
   1582 //
   1583 U_CAPI int32_t U_EXPORT2
   1584 uregex_appendReplacement(URegularExpression    *regexp2,
   1585                          const UChar           *replacementText,
   1586                          int32_t                replacementLength,
   1587                          UChar                **destBuf,
   1588                          int32_t               *destCapacity,
   1589                          UErrorCode            *status) {
   1590 
   1591     RegularExpression *regexp = (RegularExpression*)regexp2;
   1592     return RegexCImpl::appendReplacement(
   1593         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
   1594 }
   1595 
   1596 //
   1597 //   uregex_appendReplacementUText...can just use the normal C++ method
   1598 //
   1599 U_CAPI void U_EXPORT2
   1600 uregex_appendReplacementUText(URegularExpression    *regexp2,
   1601                               UText                 *replText,
   1602                               UText                 *dest,
   1603                               UErrorCode            *status)  {
   1604     RegularExpression *regexp = (RegularExpression*)regexp2;
   1605     regexp->fMatcher->appendReplacement(dest, replText, *status);
   1606 }
   1607 
   1608 
   1609 //------------------------------------------------------------------------------
   1610 //
   1611 //    uregex_appendTail
   1612 //
   1613 //------------------------------------------------------------------------------
   1614 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
   1615                                UChar                **destBuf,
   1616                                int32_t               *destCapacity,
   1617                                UErrorCode            *status)
   1618 {
   1619 
   1620     // If we come in with a buffer overflow error, don't suppress the operation.
   1621     //  A series of appendReplacements, appendTail need to correctly preflight
   1622     //  the buffer size when an overflow happens somewhere in the middle.
   1623     UBool pendingBufferOverflow = FALSE;
   1624     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1625         pendingBufferOverflow = TRUE;
   1626         *status = U_ZERO_ERROR;
   1627     }
   1628 
   1629     if (validateRE(regexp, TRUE, status) == FALSE) {
   1630         return 0;
   1631     }
   1632 
   1633     if (destCapacity == NULL || destBuf == NULL ||
   1634         (*destBuf == NULL && *destCapacity > 0) ||
   1635         *destCapacity < 0)
   1636     {
   1637         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1638         return 0;
   1639     }
   1640 
   1641     RegexMatcher *m = regexp->fMatcher;
   1642 
   1643     int32_t  destIdx     = 0;
   1644     int32_t  destCap     = *destCapacity;
   1645     UChar    *dest       = *destBuf;
   1646 
   1647     if (regexp->fText != NULL) {
   1648         int32_t srcIdx;
   1649         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
   1650         if (nativeIdx == -1) {
   1651             srcIdx = 0;
   1652         } else if (UTEXT_USES_U16(m->fInputText)) {
   1653             srcIdx = (int32_t)nativeIdx;
   1654         } else {
   1655             UErrorCode status = U_ZERO_ERROR;
   1656             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
   1657         }
   1658 
   1659         for (;;) {
   1660             U_ASSERT(destIdx >= 0);
   1661 
   1662             if (srcIdx == regexp->fTextLength) {
   1663                 break;
   1664             }
   1665             UChar c = regexp->fText[srcIdx];
   1666             if (c == 0 && regexp->fTextLength == -1) {
   1667                 regexp->fTextLength = srcIdx;
   1668                 break;
   1669             }
   1670 
   1671             if (destIdx < destCap) {
   1672                 dest[destIdx] = c;
   1673             } else {
   1674                 // We've overflowed the dest buffer.
   1675                 //  If the total input string length is known, we can
   1676                 //    compute the total buffer size needed without scanning through the string.
   1677                 if (regexp->fTextLength > 0) {
   1678                     destIdx += (regexp->fTextLength - srcIdx);
   1679                     break;
   1680                 }
   1681             }
   1682             srcIdx++;
   1683             destIdx++;
   1684         }
   1685     } else {
   1686         int64_t  srcIdx;
   1687         if (m->fMatch) {
   1688             // The most recent call to find() succeeded.
   1689             srcIdx = m->fMatchEnd;
   1690         } else {
   1691             // The last call to find() on this matcher failed().
   1692             //   Look back to the end of the last find() that succeeded for src index.
   1693             srcIdx = m->fLastMatchEnd;
   1694             if (srcIdx == -1)  {
   1695                 // There has been no successful match with this matcher.
   1696                 //   We want to copy the whole string.
   1697                 srcIdx = 0;
   1698             }
   1699         }
   1700 
   1701         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
   1702     }
   1703 
   1704     //
   1705     //  NUL terminate the output string, if possible, otherwise issue the
   1706     //   appropriate error or warning.
   1707     //
   1708     if (destIdx < destCap) {
   1709         dest[destIdx] = 0;
   1710     } else  if (destIdx == destCap) {
   1711         *status = U_STRING_NOT_TERMINATED_WARNING;
   1712     } else {
   1713         *status = U_BUFFER_OVERFLOW_ERROR;
   1714     }
   1715 
   1716     //
   1717     // Update the user's buffer ptr and capacity vars to reflect the
   1718     //   amount used.
   1719     //
   1720     if (destIdx < destCap) {
   1721         *destBuf      += destIdx;
   1722         *destCapacity -= destIdx;
   1723     } else if (*destBuf != NULL) {
   1724         *destBuf      += destCap;
   1725         *destCapacity  = 0;
   1726     }
   1727 
   1728     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1729         *status = U_BUFFER_OVERFLOW_ERROR;
   1730     }
   1731 
   1732     return destIdx;
   1733 }
   1734 
   1735 
   1736 //
   1737 //   appendTail   the actual API function
   1738 //
   1739 U_CAPI int32_t U_EXPORT2
   1740 uregex_appendTail(URegularExpression    *regexp2,
   1741                   UChar                **destBuf,
   1742                   int32_t               *destCapacity,
   1743                   UErrorCode            *status)  {
   1744     RegularExpression *regexp = (RegularExpression*)regexp2;
   1745     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
   1746 }
   1747 
   1748 
   1749 //
   1750 //   uregex_appendTailUText...can just use the normal C++ method
   1751 //
   1752 U_CAPI UText * U_EXPORT2
   1753 uregex_appendTailUText(URegularExpression    *regexp2,
   1754                        UText                 *dest,
   1755                        UErrorCode            *status)  {
   1756     RegularExpression *regexp = (RegularExpression*)regexp2;
   1757     return regexp->fMatcher->appendTail(dest, *status);
   1758 }
   1759 
   1760 
   1761 //------------------------------------------------------------------------------
   1762 //
   1763 //    copyString     Internal utility to copy a string to an output buffer,
   1764 //                   while managing buffer overflow and preflight size
   1765 //                   computation.  NUL termination is added to destination,
   1766 //                   and the NUL is counted in the output size.
   1767 //
   1768 //------------------------------------------------------------------------------
   1769 #if 0
   1770 static void copyString(UChar        *destBuffer,    //  Destination buffer.
   1771                        int32_t       destCapacity,  //  Total capacity of dest buffer
   1772                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
   1773                                                     //    Update not clipped to destCapacity.
   1774                        const UChar  *srcPtr,        //  Pointer to source string
   1775                        int32_t       srcLen)        //  Source string len.
   1776 {
   1777     int32_t  si;
   1778     int32_t  di = *destIndex;
   1779     UChar    c;
   1780 
   1781     for (si=0; si<srcLen;  si++) {
   1782         c = srcPtr[si];
   1783         if (di < destCapacity) {
   1784             destBuffer[di] = c;
   1785             di++;
   1786         } else {
   1787             di += srcLen - si;
   1788             break;
   1789         }
   1790     }
   1791     if (di<destCapacity) {
   1792         destBuffer[di] = 0;
   1793     }
   1794     di++;
   1795     *destIndex = di;
   1796 }
   1797 #endif
   1798 
   1799 //------------------------------------------------------------------------------
   1800 //
   1801 //    uregex_split
   1802 //
   1803 //------------------------------------------------------------------------------
   1804 int32_t RegexCImpl::split(RegularExpression     *regexp,
   1805                           UChar                 *destBuf,
   1806                           int32_t                destCapacity,
   1807                           int32_t               *requiredCapacity,
   1808                           UChar                 *destFields[],
   1809                           int32_t                destFieldsCapacity,
   1810                           UErrorCode            *status) {
   1811     //
   1812     // Reset for the input text
   1813     //
   1814     regexp->fMatcher->reset();
   1815     UText *inputText = regexp->fMatcher->fInputText;
   1816     int64_t   nextOutputStringStart = 0;
   1817     int64_t   inputLen = regexp->fMatcher->fInputLength;
   1818     if (inputLen == 0) {
   1819         return 0;
   1820     }
   1821 
   1822     //
   1823     // Loop through the input text, searching for the delimiter pattern
   1824     //
   1825     int32_t   i;             // Index of the field being processed.
   1826     int32_t   destIdx = 0;   // Next available position in destBuf;
   1827     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
   1828     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
   1829     for (i=0; ; i++) {
   1830         if (i>=destFieldsCapacity-1) {
   1831             // There are one or zero output strings left.
   1832             // Fill the last output string with whatever is left from the input, then exit the loop.
   1833             //  ( i will be == destFieldsCapacity if we filled the output array while processing
   1834             //    capture groups of the delimiter expression, in which case we will discard the
   1835             //    last capture group saved in favor of the unprocessed remainder of the
   1836             //    input string.)
   1837             if (inputLen > nextOutputStringStart) {
   1838                 if (i != destFieldsCapacity-1) {
   1839                     // No fields are left.  Recycle the last one for holding the trailing part of
   1840                     //   the input string.
   1841                     i = destFieldsCapacity-1;
   1842                     destIdx = (int32_t)(destFields[i] - destFields[0]);
   1843                 }
   1844 
   1845                 destFields[i] = &destBuf[destIdx];
   1846                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1847                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1848             }
   1849             break;
   1850         }
   1851 
   1852         if (regexp->fMatcher->find()) {
   1853             // We found another delimiter.  Move everything from where we started looking
   1854             //  up until the start of the delimiter into the next output string.
   1855             destFields[i] = &destBuf[destIdx];
   1856 
   1857             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
   1858                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
   1859             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1860                 tStatus = U_ZERO_ERROR;
   1861             } else {
   1862                 *status = tStatus;
   1863             }
   1864             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
   1865 
   1866             // If the delimiter pattern has capturing parentheses, the captured
   1867             //  text goes out into the next n destination strings.
   1868             int32_t groupNum;
   1869             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
   1870                 // If we've run out of output string slots, bail out.
   1871                 if (i==destFieldsCapacity-1) {
   1872                     break;
   1873                 }
   1874                 i++;
   1875 
   1876                 // Set up to extract the capture group contents into the dest buffer.
   1877                 destFields[i] = &destBuf[destIdx];
   1878                 tStatus = U_ZERO_ERROR;
   1879                 int32_t t = uregex_group((URegularExpression*)regexp,
   1880                                          groupNum,
   1881                                          destFields[i],
   1882                                          REMAINING_CAPACITY(destIdx, destCapacity),
   1883                                          &tStatus);
   1884                 destIdx += t + 1;    // Record the space used in the output string buffer.
   1885                                      //  +1 for the NUL that terminates the string.
   1886                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1887                     tStatus = U_ZERO_ERROR;
   1888                 } else {
   1889                     *status = tStatus;
   1890                 }
   1891             }
   1892 
   1893             if (nextOutputStringStart == inputLen) {
   1894                 // The delimiter was at the end of the string.
   1895                 // Output an empty string, and then we are done.
   1896                 if (destIdx < destCapacity) {
   1897                     destBuf[destIdx] = 0;
   1898                 }
   1899                 if (i < destFieldsCapacity-1) {
   1900                    ++i;
   1901                 }
   1902                 if (destIdx < destCapacity) {
   1903                     destFields[i] = destBuf + destIdx;
   1904                 }
   1905                 ++destIdx;
   1906                 break;
   1907             }
   1908 
   1909         }
   1910         else
   1911         {
   1912             // We ran off the end of the input while looking for the next delimiter.
   1913             // All the remaining text goes into the current output string.
   1914             destFields[i] = &destBuf[destIdx];
   1915             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1916                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1917             break;
   1918         }
   1919     }
   1920 
   1921     // Zero out any unused portion of the destFields array
   1922     int j;
   1923     for (j=i+1; j<destFieldsCapacity; j++) {
   1924         destFields[j] = NULL;
   1925     }
   1926 
   1927     if (requiredCapacity != NULL) {
   1928         *requiredCapacity = destIdx;
   1929     }
   1930     if (destIdx > destCapacity) {
   1931         *status = U_BUFFER_OVERFLOW_ERROR;
   1932     }
   1933     return i+1;
   1934 }
   1935 
   1936 //
   1937 //   uregex_split   The actual API function
   1938 //
   1939 U_CAPI int32_t U_EXPORT2
   1940 uregex_split(URegularExpression      *regexp2,
   1941              UChar                   *destBuf,
   1942              int32_t                  destCapacity,
   1943              int32_t                 *requiredCapacity,
   1944              UChar                   *destFields[],
   1945              int32_t                  destFieldsCapacity,
   1946              UErrorCode              *status) {
   1947     RegularExpression *regexp = (RegularExpression*)regexp2;
   1948     if (validateRE(regexp, TRUE, status) == FALSE) {
   1949         return 0;
   1950     }
   1951     if ((destBuf == NULL && destCapacity > 0) ||
   1952         destCapacity < 0 ||
   1953         destFields == NULL ||
   1954         destFieldsCapacity < 1 ) {
   1955         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1956         return 0;
   1957     }
   1958 
   1959     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
   1960 }
   1961 
   1962 
   1963 //
   1964 //   uregex_splitUText...can just use the normal C++ method
   1965 //
   1966 U_CAPI int32_t U_EXPORT2
   1967 uregex_splitUText(URegularExpression    *regexp2,
   1968                   UText                 *destFields[],
   1969                   int32_t                destFieldsCapacity,
   1970                   UErrorCode            *status) {
   1971     RegularExpression *regexp = (RegularExpression*)regexp2;
   1972     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
   1973 }
   1974 
   1975 
   1976 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
   1977 
   1978