Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2004-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  regex.cpp
      7 */
      8 
      9 #include "unicode/utypes.h"
     10 
     11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     12 
     13 #include "unicode/regex.h"
     14 #include "unicode/uregex.h"
     15 #include "unicode/unistr.h"
     16 #include "unicode/ustring.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/uobject.h"
     19 #include "unicode/utf16.h"
     20 #include "umutex.h"
     21 #include "uassert.h"
     22 #include "cmemory.h"
     23 
     24 #include "regextxt.h"
     25 
     26 #include <stdio.h>
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
     31 
     32 struct RegularExpression: public UMemory {
     33 public:
     34     RegularExpression();
     35     ~RegularExpression();
     36     int32_t           fMagic;
     37     RegexPattern     *fPat;
     38     int32_t          *fPatRefCount;
     39     UChar            *fPatString;
     40     int32_t           fPatStringLen;
     41     RegexMatcher     *fMatcher;
     42     const UChar      *fText;         // Text from setText()
     43     int32_t           fTextLength;   // Length provided by user with setText(), which
     44                                      //  may be -1.
     45     UBool             fOwnsText;
     46 };
     47 
     48 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
     49 
     50 RegularExpression::RegularExpression() {
     51     fMagic        = REXP_MAGIC;
     52     fPat          = NULL;
     53     fPatRefCount  = NULL;
     54     fPatString    = NULL;
     55     fPatStringLen = 0;
     56     fMatcher      = NULL;
     57     fText         = NULL;
     58     fTextLength   = 0;
     59     fOwnsText     = FALSE;
     60 }
     61 
     62 RegularExpression::~RegularExpression() {
     63     delete fMatcher;
     64     fMatcher = NULL;
     65     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
     66         delete fPat;
     67         uprv_free(fPatString);
     68         uprv_free(fPatRefCount);
     69     }
     70     if (fOwnsText && fText!=NULL) {
     71         uprv_free((void *)fText);
     72     }
     73     fMagic = 0;
     74 }
     75 
     76 U_NAMESPACE_END
     77 
     78 U_NAMESPACE_USE
     79 
     80 //----------------------------------------------------------------------------------------
     81 //
     82 //   validateRE    Do boilerplate style checks on API function parameters.
     83 //                 Return TRUE if they look OK.
     84 //----------------------------------------------------------------------------------------
     85 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     86     if (U_FAILURE(*status)) {
     87         return FALSE;
     88     }
     89     if (re == NULL || re->fMagic != REXP_MAGIC) {
     90         *status = U_ILLEGAL_ARGUMENT_ERROR;
     91         return FALSE;
     92     }
     93     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
     94     if (requiresText && re->fText == NULL && !re->fOwnsText) {
     95         *status = U_REGEX_INVALID_STATE;
     96         return FALSE;
     97     }
     98     return TRUE;
     99 }
    100 
    101 //----------------------------------------------------------------------------------------
    102 //
    103 //    uregex_open
    104 //
    105 //----------------------------------------------------------------------------------------
    106 U_CAPI URegularExpression *  U_EXPORT2
    107 uregex_open( const  UChar          *pattern,
    108                     int32_t         patternLength,
    109                     uint32_t        flags,
    110                     UParseError    *pe,
    111                     UErrorCode     *status) {
    112 
    113     if (U_FAILURE(*status)) {
    114         return NULL;
    115     }
    116     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
    117         *status = U_ILLEGAL_ARGUMENT_ERROR;
    118         return NULL;
    119     }
    120     int32_t actualPatLen = patternLength;
    121     if (actualPatLen == -1) {
    122         actualPatLen = u_strlen(pattern);
    123     }
    124 
    125     RegularExpression *re     = new RegularExpression;
    126     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
    127     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
    128     if (re == NULL || refC == NULL || patBuf == NULL) {
    129         *status = U_MEMORY_ALLOCATION_ERROR;
    130         delete re;
    131         uprv_free(refC);
    132         uprv_free(patBuf);
    133         return NULL;
    134     }
    135     re->fPatRefCount = refC;
    136     *re->fPatRefCount = 1;
    137 
    138     //
    139     // Make a copy of the pattern string, so we can return it later if asked.
    140     //    For compiling the pattern, we will use a UText wrapper around
    141     //    this local copy, to avoid making even more copies.
    142     //
    143     re->fPatString    = patBuf;
    144     re->fPatStringLen = patternLength;
    145     u_memcpy(patBuf, pattern, actualPatLen);
    146     patBuf[actualPatLen] = 0;
    147 
    148     UText patText = UTEXT_INITIALIZER;
    149     utext_openUChars(&patText, patBuf, patternLength, status);
    150 
    151     //
    152     // Compile the pattern
    153     //
    154     if (pe != NULL) {
    155         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    156     } else {
    157         re->fPat = RegexPattern::compile(&patText, flags, *status);
    158     }
    159     utext_close(&patText);
    160 
    161     if (U_FAILURE(*status)) {
    162         goto ErrorExit;
    163     }
    164 
    165     //
    166     // Create the matcher object
    167     //
    168     re->fMatcher = re->fPat->matcher(*status);
    169     if (U_SUCCESS(*status)) {
    170         return (URegularExpression*)re;
    171     }
    172 
    173 ErrorExit:
    174     delete re;
    175     return NULL;
    176 
    177 }
    178 
    179 //----------------------------------------------------------------------------------------
    180 //
    181 //    uregex_openUText
    182 //
    183 //----------------------------------------------------------------------------------------
    184 U_CAPI URegularExpression *  U_EXPORT2
    185 uregex_openUText(UText          *pattern,
    186                  uint32_t        flags,
    187                  UParseError    *pe,
    188                  UErrorCode     *status) {
    189 
    190     if (U_FAILURE(*status)) {
    191         return NULL;
    192     }
    193     if (pattern == NULL) {
    194         *status = U_ILLEGAL_ARGUMENT_ERROR;
    195         return NULL;
    196     }
    197 
    198     int64_t patternNativeLength = utext_nativeLength(pattern);
    199 
    200     if (patternNativeLength == 0) {
    201         *status = U_ILLEGAL_ARGUMENT_ERROR;
    202         return NULL;
    203     }
    204 
    205     RegularExpression *re     = new RegularExpression;
    206 
    207     UErrorCode lengthStatus = U_ZERO_ERROR;
    208     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
    209 
    210     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
    211     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
    212     if (re == NULL || refC == NULL || patBuf == NULL) {
    213         *status = U_MEMORY_ALLOCATION_ERROR;
    214         delete re;
    215         uprv_free(refC);
    216         uprv_free(patBuf);
    217         return NULL;
    218     }
    219     re->fPatRefCount = refC;
    220     *re->fPatRefCount = 1;
    221 
    222     //
    223     // Make a copy of the pattern string, so we can return it later if asked.
    224     //    For compiling the pattern, we will use a read-only UText wrapper
    225     //    around this local copy, to avoid making even more copies.
    226     //
    227     re->fPatString    = patBuf;
    228     re->fPatStringLen = pattern16Length;
    229     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
    230 
    231     UText patText = UTEXT_INITIALIZER;
    232     utext_openUChars(&patText, patBuf, pattern16Length, status);
    233 
    234     //
    235     // Compile the pattern
    236     //
    237     if (pe != NULL) {
    238         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    239     } else {
    240         re->fPat = RegexPattern::compile(&patText, flags, *status);
    241     }
    242     utext_close(&patText);
    243 
    244     if (U_FAILURE(*status)) {
    245         goto ErrorExit;
    246     }
    247 
    248     //
    249     // Create the matcher object
    250     //
    251     re->fMatcher = re->fPat->matcher(*status);
    252     if (U_SUCCESS(*status)) {
    253         return (URegularExpression*)re;
    254     }
    255 
    256 ErrorExit:
    257     delete re;
    258     return NULL;
    259 
    260 }
    261 
    262 //----------------------------------------------------------------------------------------
    263 //
    264 //    uregex_close
    265 //
    266 //----------------------------------------------------------------------------------------
    267 U_CAPI void  U_EXPORT2
    268 uregex_close(URegularExpression  *re2) {
    269     RegularExpression *re = (RegularExpression*)re2;
    270     UErrorCode  status = U_ZERO_ERROR;
    271     if (validateRE(re, FALSE, &status) == FALSE) {
    272         return;
    273     }
    274     delete re;
    275 }
    276 
    277 
    278 //----------------------------------------------------------------------------------------
    279 //
    280 //    uregex_clone
    281 //
    282 //----------------------------------------------------------------------------------------
    283 U_CAPI URegularExpression * U_EXPORT2
    284 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
    285     RegularExpression *source = (RegularExpression*)source2;
    286     if (validateRE(source, FALSE, status) == FALSE) {
    287         return NULL;
    288     }
    289 
    290     RegularExpression *clone = new RegularExpression;
    291     if (clone == NULL) {
    292         *status = U_MEMORY_ALLOCATION_ERROR;
    293         return NULL;
    294     }
    295 
    296     clone->fMatcher = source->fPat->matcher(*status);
    297     if (U_FAILURE(*status)) {
    298         delete clone;
    299         return NULL;
    300     }
    301 
    302     clone->fPat          = source->fPat;
    303     clone->fPatRefCount  = source->fPatRefCount;
    304     clone->fPatString    = source->fPatString;
    305     clone->fPatStringLen = source->fPatStringLen;
    306     umtx_atomic_inc(source->fPatRefCount);
    307     // Note:  fText is not cloned.
    308 
    309     return (URegularExpression*)clone;
    310 }
    311 
    312 
    313 
    314 
    315 //------------------------------------------------------------------------------
    316 //
    317 //    uregex_pattern
    318 //
    319 //------------------------------------------------------------------------------
    320 U_CAPI const UChar * U_EXPORT2
    321 uregex_pattern(const  URegularExpression *regexp2,
    322                       int32_t            *patLength,
    323                       UErrorCode         *status)  {
    324     RegularExpression *regexp = (RegularExpression*)regexp2;
    325 
    326     if (validateRE(regexp, FALSE, status) == FALSE) {
    327         return NULL;
    328     }
    329     if (patLength != NULL) {
    330         *patLength = regexp->fPatStringLen;
    331     }
    332     return regexp->fPatString;
    333 }
    334 
    335 
    336 //------------------------------------------------------------------------------
    337 //
    338 //    uregex_patternUText
    339 //
    340 //------------------------------------------------------------------------------
    341 U_CAPI UText * U_EXPORT2
    342 uregex_patternUText(const URegularExpression *regexp2,
    343                           UErrorCode         *status)  {
    344     RegularExpression *regexp = (RegularExpression*)regexp2;
    345     return regexp->fPat->patternText(*status);
    346 }
    347 
    348 
    349 //------------------------------------------------------------------------------
    350 //
    351 //    uregex_flags
    352 //
    353 //------------------------------------------------------------------------------
    354 U_CAPI int32_t U_EXPORT2
    355 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
    356     RegularExpression *regexp = (RegularExpression*)regexp2;
    357     if (validateRE(regexp, FALSE, status) == FALSE) {
    358         return 0;
    359     }
    360     int32_t flags = regexp->fPat->flags();
    361     return flags;
    362 }
    363 
    364 
    365 //------------------------------------------------------------------------------
    366 //
    367 //    uregex_setText
    368 //
    369 //------------------------------------------------------------------------------
    370 U_CAPI void U_EXPORT2
    371 uregex_setText(URegularExpression *regexp2,
    372                const UChar        *text,
    373                int32_t             textLength,
    374                UErrorCode         *status)  {
    375     RegularExpression *regexp = (RegularExpression*)regexp2;
    376     if (validateRE(regexp, FALSE, status) == FALSE) {
    377         return;
    378     }
    379     if (text == NULL || textLength < -1) {
    380         *status = U_ILLEGAL_ARGUMENT_ERROR;
    381         return;
    382     }
    383 
    384     if (regexp->fOwnsText && regexp->fText != NULL) {
    385         uprv_free((void *)regexp->fText);
    386     }
    387 
    388     regexp->fText       = text;
    389     regexp->fTextLength = textLength;
    390     regexp->fOwnsText   = FALSE;
    391 
    392     UText input = UTEXT_INITIALIZER;
    393     utext_openUChars(&input, text, textLength, status);
    394     regexp->fMatcher->reset(&input);
    395     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
    396 }
    397 
    398 
    399 //------------------------------------------------------------------------------
    400 //
    401 //    uregex_setUText
    402 //
    403 //------------------------------------------------------------------------------
    404 U_CAPI void U_EXPORT2
    405 uregex_setUText(URegularExpression *regexp2,
    406                 UText              *text,
    407                 UErrorCode         *status) {
    408     RegularExpression *regexp = (RegularExpression*)regexp2;
    409     if (validateRE(regexp, FALSE, status) == FALSE) {
    410         return;
    411     }
    412     if (text == NULL) {
    413         *status = U_ILLEGAL_ARGUMENT_ERROR;
    414         return;
    415     }
    416 
    417     if (regexp->fOwnsText && regexp->fText != NULL) {
    418         uprv_free((void *)regexp->fText);
    419     }
    420 
    421     regexp->fText       = NULL; // only fill it in on request
    422     regexp->fTextLength = -1;
    423     regexp->fOwnsText   = TRUE;
    424     regexp->fMatcher->reset(text);
    425 }
    426 
    427 
    428 
    429 //------------------------------------------------------------------------------
    430 //
    431 //    uregex_getText
    432 //
    433 //------------------------------------------------------------------------------
    434 U_CAPI const UChar * U_EXPORT2
    435 uregex_getText(URegularExpression *regexp2,
    436                int32_t            *textLength,
    437                UErrorCode         *status)  {
    438     RegularExpression *regexp = (RegularExpression*)regexp2;
    439     if (validateRE(regexp, FALSE, status) == FALSE) {
    440         return NULL;
    441     }
    442 
    443     if (regexp->fText == NULL) {
    444         // need to fill in the text
    445         UText *inputText = regexp->fMatcher->inputText();
    446         int64_t inputNativeLength = utext_nativeLength(inputText);
    447         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
    448             regexp->fText = inputText->chunkContents;
    449             regexp->fTextLength = (int32_t)inputNativeLength;
    450             regexp->fOwnsText = FALSE; // because the UText owns it
    451         } else {
    452             UErrorCode lengthStatus = U_ZERO_ERROR;
    453             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
    454             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
    455 
    456             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
    457             regexp->fText = inputChars;
    458             regexp->fOwnsText = TRUE; // should already be set but just in case
    459         }
    460     }
    461 
    462     if (textLength != NULL) {
    463         *textLength = regexp->fTextLength;
    464     }
    465     return regexp->fText;
    466 }
    467 
    468 
    469 //------------------------------------------------------------------------------
    470 //
    471 //    uregex_getUText
    472 //
    473 //------------------------------------------------------------------------------
    474 U_CAPI UText * U_EXPORT2
    475 uregex_getUText(URegularExpression *regexp2,
    476                 UText              *dest,
    477                 UErrorCode         *status)  {
    478     RegularExpression *regexp = (RegularExpression*)regexp2;
    479     if (validateRE(regexp, FALSE, status) == FALSE) {
    480         return dest;
    481     }
    482     return regexp->fMatcher->getInput(dest, *status);
    483 }
    484 
    485 
    486 //------------------------------------------------------------------------------
    487 //
    488 //    uregex_refreshUText
    489 //
    490 //------------------------------------------------------------------------------
    491 U_CAPI void U_EXPORT2
    492 uregex_refreshUText(URegularExpression *regexp2,
    493                     UText              *text,
    494                     UErrorCode         *status) {
    495     RegularExpression *regexp = (RegularExpression*)regexp2;
    496     if (validateRE(regexp, FALSE, status) == FALSE) {
    497         return;
    498     }
    499     regexp->fMatcher->refreshInputText(text, *status);
    500 }
    501 
    502 
    503 //------------------------------------------------------------------------------
    504 //
    505 //    uregex_matches
    506 //
    507 //------------------------------------------------------------------------------
    508 U_CAPI UBool U_EXPORT2
    509 uregex_matches(URegularExpression *regexp2,
    510                int32_t            startIndex,
    511                UErrorCode        *status)  {
    512     return uregex_matches64( regexp2, (int64_t)startIndex, status);
    513 }
    514 
    515 U_CAPI UBool U_EXPORT2
    516 uregex_matches64(URegularExpression *regexp2,
    517                  int64_t            startIndex,
    518                  UErrorCode        *status)  {
    519     RegularExpression *regexp = (RegularExpression*)regexp2;
    520     UBool result = FALSE;
    521     if (validateRE(regexp, TRUE, status) == FALSE) {
    522         return result;
    523     }
    524     if (startIndex == -1) {
    525         result = regexp->fMatcher->matches(*status);
    526     } else {
    527         result = regexp->fMatcher->matches(startIndex, *status);
    528     }
    529     return result;
    530 }
    531 
    532 
    533 //------------------------------------------------------------------------------
    534 //
    535 //    uregex_lookingAt
    536 //
    537 //------------------------------------------------------------------------------
    538 U_CAPI UBool U_EXPORT2
    539 uregex_lookingAt(URegularExpression *regexp2,
    540                  int32_t             startIndex,
    541                  UErrorCode         *status)  {
    542     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
    543 }
    544 
    545 U_CAPI UBool U_EXPORT2
    546 uregex_lookingAt64(URegularExpression *regexp2,
    547                    int64_t             startIndex,
    548                    UErrorCode         *status)  {
    549     RegularExpression *regexp = (RegularExpression*)regexp2;
    550     UBool result = FALSE;
    551     if (validateRE(regexp, TRUE, status) == FALSE) {
    552         return result;
    553     }
    554     if (startIndex == -1) {
    555         result = regexp->fMatcher->lookingAt(*status);
    556     } else {
    557         result = regexp->fMatcher->lookingAt(startIndex, *status);
    558     }
    559     return result;
    560 }
    561 
    562 
    563 
    564 //------------------------------------------------------------------------------
    565 //
    566 //    uregex_find
    567 //
    568 //------------------------------------------------------------------------------
    569 U_CAPI UBool U_EXPORT2
    570 uregex_find(URegularExpression *regexp2,
    571             int32_t             startIndex,
    572             UErrorCode         *status)  {
    573     return uregex_find64( regexp2, (int64_t)startIndex, status);
    574 }
    575 
    576 U_CAPI UBool U_EXPORT2
    577 uregex_find64(URegularExpression *regexp2,
    578               int64_t             startIndex,
    579               UErrorCode         *status)  {
    580     RegularExpression *regexp = (RegularExpression*)regexp2;
    581     UBool result = FALSE;
    582     if (validateRE(regexp, TRUE, status) == FALSE) {
    583         return result;
    584     }
    585     if (startIndex == -1) {
    586         regexp->fMatcher->resetPreserveRegion();
    587         result = regexp->fMatcher->find();
    588     } else {
    589         result = regexp->fMatcher->find(startIndex, *status);
    590     }
    591     return result;
    592 }
    593 
    594 
    595 //------------------------------------------------------------------------------
    596 //
    597 //    uregex_findNext
    598 //
    599 //------------------------------------------------------------------------------
    600 U_CAPI UBool U_EXPORT2
    601 uregex_findNext(URegularExpression *regexp2,
    602                 UErrorCode         *status)  {
    603     RegularExpression *regexp = (RegularExpression*)regexp2;
    604     if (validateRE(regexp, TRUE, status) == FALSE) {
    605         return FALSE;
    606     }
    607     UBool result = regexp->fMatcher->find();
    608     return result;
    609 }
    610 
    611 //------------------------------------------------------------------------------
    612 //
    613 //    uregex_groupCount
    614 //
    615 //------------------------------------------------------------------------------
    616 U_CAPI int32_t U_EXPORT2
    617 uregex_groupCount(URegularExpression *regexp2,
    618                   UErrorCode         *status)  {
    619     RegularExpression *regexp = (RegularExpression*)regexp2;
    620     if (validateRE(regexp, FALSE, status) == FALSE) {
    621         return 0;
    622     }
    623     int32_t  result = regexp->fMatcher->groupCount();
    624     return result;
    625 }
    626 
    627 
    628 //------------------------------------------------------------------------------
    629 //
    630 //    uregex_group
    631 //
    632 //------------------------------------------------------------------------------
    633 U_CAPI int32_t U_EXPORT2
    634 uregex_group(URegularExpression *regexp2,
    635              int32_t             groupNum,
    636              UChar              *dest,
    637              int32_t             destCapacity,
    638              UErrorCode          *status)  {
    639     RegularExpression *regexp = (RegularExpression*)regexp2;
    640     if (validateRE(regexp, TRUE, status) == FALSE) {
    641         return 0;
    642     }
    643     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
    644         *status = U_ILLEGAL_ARGUMENT_ERROR;
    645         return 0;
    646     }
    647 
    648     if (destCapacity == 0 || regexp->fText != NULL) {
    649         // If preflighting or if we already have the text as UChars,
    650         // this is a little cheaper than going through uregex_groupUTextDeep()
    651 
    652         //
    653         // Pick up the range of characters from the matcher
    654         //
    655         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    656         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    657         if (U_FAILURE(*status)) {
    658             return 0;
    659         }
    660 
    661         //
    662         // Trim length based on buffer capacity
    663         //
    664         int32_t fullLength = endIx - startIx;
    665         int32_t copyLength = fullLength;
    666         if (copyLength < destCapacity) {
    667             dest[copyLength] = 0;
    668         } else if (copyLength == destCapacity) {
    669             *status = U_STRING_NOT_TERMINATED_WARNING;
    670         } else {
    671             copyLength = destCapacity;
    672             *status = U_BUFFER_OVERFLOW_ERROR;
    673         }
    674 
    675         //
    676         // Copy capture group to user's buffer
    677         //
    678         if (copyLength > 0) {
    679             u_memcpy(dest, &regexp->fText[startIx], copyLength);
    680         }
    681         return fullLength;
    682     } else {
    683         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
    684         int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
    685         utext_close(groupText);
    686         return result;
    687     }
    688 }
    689 
    690 
    691 //------------------------------------------------------------------------------
    692 //
    693 //    uregex_groupUText
    694 //
    695 //------------------------------------------------------------------------------
    696 U_CAPI UText * U_EXPORT2
    697 uregex_groupUText(URegularExpression *regexp2,
    698                   int32_t             groupNum,
    699                   UText              *dest,
    700                   int64_t            *groupLength,
    701                   UErrorCode         *status)  {
    702     RegularExpression *regexp = (RegularExpression*)regexp2;
    703     if (validateRE(regexp, TRUE, status) == FALSE) {
    704         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    705         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    706     }
    707 
    708     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
    709 }
    710 
    711 //------------------------------------------------------------------------------
    712 //
    713 //    uregex_groupUTextDeep
    714 //
    715 //------------------------------------------------------------------------------
    716 U_CAPI UText * U_EXPORT2
    717 uregex_groupUTextDeep(URegularExpression *regexp2,
    718                   int32_t             groupNum,
    719                   UText              *dest,
    720                   UErrorCode         *status)  {
    721     RegularExpression *regexp = (RegularExpression*)regexp2;
    722     if (validateRE(regexp, TRUE, status) == FALSE) {
    723         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    724         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    725     }
    726 
    727     if (regexp->fText != NULL) {
    728         //
    729         // Pick up the range of characters from the matcher
    730         // and use our already-extracted characters
    731         //
    732         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    733         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    734         if (U_FAILURE(*status)) {
    735             UErrorCode emptyTextStatus = U_ZERO_ERROR;
    736             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    737         }
    738 
    739         if (dest) {
    740             utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
    741         } else {
    742             UText groupText = UTEXT_INITIALIZER;
    743             utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
    744             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
    745             utext_close(&groupText);
    746         }
    747 
    748         return dest;
    749     } else {
    750         return regexp->fMatcher->group(groupNum, dest, *status);
    751     }
    752 }
    753 
    754 //------------------------------------------------------------------------------
    755 //
    756 //    uregex_start
    757 //
    758 //------------------------------------------------------------------------------
    759 U_CAPI int32_t U_EXPORT2
    760 uregex_start(URegularExpression *regexp2,
    761              int32_t             groupNum,
    762              UErrorCode          *status)  {
    763     return (int32_t)uregex_start64( regexp2, groupNum, status);
    764 }
    765 
    766 U_CAPI int64_t U_EXPORT2
    767 uregex_start64(URegularExpression *regexp2,
    768                int32_t             groupNum,
    769                UErrorCode          *status)  {
    770     RegularExpression *regexp = (RegularExpression*)regexp2;
    771     if (validateRE(regexp, TRUE, status) == FALSE) {
    772         return 0;
    773     }
    774     int32_t result = regexp->fMatcher->start(groupNum, *status);
    775     return result;
    776 }
    777 
    778 //------------------------------------------------------------------------------
    779 //
    780 //    uregex_end
    781 //
    782 //------------------------------------------------------------------------------
    783 U_CAPI int32_t U_EXPORT2
    784 uregex_end(URegularExpression   *regexp2,
    785            int32_t               groupNum,
    786            UErrorCode           *status)  {
    787     return (int32_t)uregex_end64( regexp2, groupNum, status);
    788 }
    789 
    790 U_CAPI int64_t U_EXPORT2
    791 uregex_end64(URegularExpression   *regexp2,
    792              int32_t               groupNum,
    793              UErrorCode           *status)  {
    794     RegularExpression *regexp = (RegularExpression*)regexp2;
    795     if (validateRE(regexp, TRUE, status) == FALSE) {
    796         return 0;
    797     }
    798     int32_t result = regexp->fMatcher->end(groupNum, *status);
    799     return result;
    800 }
    801 
    802 //------------------------------------------------------------------------------
    803 //
    804 //    uregex_reset
    805 //
    806 //------------------------------------------------------------------------------
    807 U_CAPI void U_EXPORT2
    808 uregex_reset(URegularExpression    *regexp2,
    809              int32_t               index,
    810              UErrorCode            *status)  {
    811     uregex_reset64( regexp2, (int64_t)index, status);
    812 }
    813 
    814 U_CAPI void U_EXPORT2
    815 uregex_reset64(URegularExpression    *regexp2,
    816                int64_t               index,
    817                UErrorCode            *status)  {
    818     RegularExpression *regexp = (RegularExpression*)regexp2;
    819     if (validateRE(regexp, TRUE, status) == FALSE) {
    820         return;
    821     }
    822     regexp->fMatcher->reset(index, *status);
    823 }
    824 
    825 
    826 //------------------------------------------------------------------------------
    827 //
    828 //    uregex_setRegion
    829 //
    830 //------------------------------------------------------------------------------
    831 U_CAPI void U_EXPORT2
    832 uregex_setRegion(URegularExpression   *regexp2,
    833                  int32_t               regionStart,
    834                  int32_t               regionLimit,
    835                  UErrorCode           *status)  {
    836     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
    837 }
    838 
    839 U_CAPI void U_EXPORT2
    840 uregex_setRegion64(URegularExpression   *regexp2,
    841                    int64_t               regionStart,
    842                    int64_t               regionLimit,
    843                    UErrorCode           *status)  {
    844     RegularExpression *regexp = (RegularExpression*)regexp2;
    845     if (validateRE(regexp, TRUE, status) == FALSE) {
    846         return;
    847     }
    848     regexp->fMatcher->region(regionStart, regionLimit, *status);
    849 }
    850 
    851 
    852 //------------------------------------------------------------------------------
    853 //
    854 //    uregex_setRegionAndStart
    855 //
    856 //------------------------------------------------------------------------------
    857 U_CAPI void U_EXPORT2
    858 uregex_setRegionAndStart(URegularExpression   *regexp2,
    859                  int64_t               regionStart,
    860                  int64_t               regionLimit,
    861                  int64_t               startIndex,
    862                  UErrorCode           *status)  {
    863     RegularExpression *regexp = (RegularExpression*)regexp2;
    864     if (validateRE(regexp, TRUE, status) == FALSE) {
    865         return;
    866     }
    867     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
    868 }
    869 
    870 //------------------------------------------------------------------------------
    871 //
    872 //    uregex_regionStart
    873 //
    874 //------------------------------------------------------------------------------
    875 U_CAPI int32_t U_EXPORT2
    876 uregex_regionStart(const  URegularExpression   *regexp2,
    877                           UErrorCode           *status)  {
    878     return (int32_t)uregex_regionStart64(regexp2, status);
    879 }
    880 
    881 U_CAPI int64_t U_EXPORT2
    882 uregex_regionStart64(const  URegularExpression   *regexp2,
    883                             UErrorCode           *status)  {
    884     RegularExpression *regexp = (RegularExpression*)regexp2;
    885     if (validateRE(regexp, TRUE, status) == FALSE) {
    886         return 0;
    887     }
    888     return regexp->fMatcher->regionStart();
    889 }
    890 
    891 
    892 //------------------------------------------------------------------------------
    893 //
    894 //    uregex_regionEnd
    895 //
    896 //------------------------------------------------------------------------------
    897 U_CAPI int32_t U_EXPORT2
    898 uregex_regionEnd(const  URegularExpression   *regexp2,
    899                         UErrorCode           *status)  {
    900     return (int32_t)uregex_regionEnd64(regexp2, status);
    901 }
    902 
    903 U_CAPI int64_t U_EXPORT2
    904 uregex_regionEnd64(const  URegularExpression   *regexp2,
    905                           UErrorCode           *status)  {
    906     RegularExpression *regexp = (RegularExpression*)regexp2;
    907     if (validateRE(regexp, TRUE, status) == FALSE) {
    908         return 0;
    909     }
    910     return regexp->fMatcher->regionEnd();
    911 }
    912 
    913 
    914 //------------------------------------------------------------------------------
    915 //
    916 //    uregex_hasTransparentBounds
    917 //
    918 //------------------------------------------------------------------------------
    919 U_CAPI UBool U_EXPORT2
    920 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
    921                                    UErrorCode           *status)  {
    922     RegularExpression *regexp = (RegularExpression*)regexp2;
    923     if (validateRE(regexp, FALSE, status) == FALSE) {
    924         return FALSE;
    925     }
    926     return regexp->fMatcher->hasTransparentBounds();
    927 }
    928 
    929 
    930 //------------------------------------------------------------------------------
    931 //
    932 //    uregex_useTransparentBounds
    933 //
    934 //------------------------------------------------------------------------------
    935 U_CAPI void U_EXPORT2
    936 uregex_useTransparentBounds(URegularExpression    *regexp2,
    937                             UBool                  b,
    938                             UErrorCode            *status)  {
    939     RegularExpression *regexp = (RegularExpression*)regexp2;
    940     if (validateRE(regexp, FALSE, status) == FALSE) {
    941         return;
    942     }
    943     regexp->fMatcher->useTransparentBounds(b);
    944 }
    945 
    946 
    947 //------------------------------------------------------------------------------
    948 //
    949 //    uregex_hasAnchoringBounds
    950 //
    951 //------------------------------------------------------------------------------
    952 U_CAPI UBool U_EXPORT2
    953 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
    954                                  UErrorCode           *status)  {
    955     RegularExpression *regexp = (RegularExpression*)regexp2;
    956     if (validateRE(regexp, FALSE, status) == FALSE) {
    957         return FALSE;
    958     }
    959     return regexp->fMatcher->hasAnchoringBounds();
    960 }
    961 
    962 
    963 //------------------------------------------------------------------------------
    964 //
    965 //    uregex_useAnchoringBounds
    966 //
    967 //------------------------------------------------------------------------------
    968 U_CAPI void U_EXPORT2
    969 uregex_useAnchoringBounds(URegularExpression    *regexp2,
    970                           UBool                  b,
    971                           UErrorCode            *status)  {
    972     RegularExpression *regexp = (RegularExpression*)regexp2;
    973     if (validateRE(regexp, FALSE, status) == FALSE) {
    974         return;
    975     }
    976     regexp->fMatcher->useAnchoringBounds(b);
    977 }
    978 
    979 
    980 //------------------------------------------------------------------------------
    981 //
    982 //    uregex_hitEnd
    983 //
    984 //------------------------------------------------------------------------------
    985 U_CAPI UBool U_EXPORT2
    986 uregex_hitEnd(const  URegularExpression   *regexp2,
    987                      UErrorCode           *status)  {
    988     RegularExpression *regexp = (RegularExpression*)regexp2;
    989     if (validateRE(regexp, TRUE, status) == FALSE) {
    990         return FALSE;
    991     }
    992     return regexp->fMatcher->hitEnd();
    993 }
    994 
    995 
    996 //------------------------------------------------------------------------------
    997 //
    998 //    uregex_requireEnd
    999 //
   1000 //------------------------------------------------------------------------------
   1001 U_CAPI UBool U_EXPORT2
   1002 uregex_requireEnd(const  URegularExpression   *regexp2,
   1003                          UErrorCode           *status)  {
   1004     RegularExpression *regexp = (RegularExpression*)regexp2;
   1005     if (validateRE(regexp, TRUE, status) == FALSE) {
   1006         return FALSE;
   1007     }
   1008     return regexp->fMatcher->requireEnd();
   1009 }
   1010 
   1011 
   1012 //------------------------------------------------------------------------------
   1013 //
   1014 //    uregex_setTimeLimit
   1015 //
   1016 //------------------------------------------------------------------------------
   1017 U_CAPI void U_EXPORT2
   1018 uregex_setTimeLimit(URegularExpression   *regexp2,
   1019                     int32_t               limit,
   1020                     UErrorCode           *status) {
   1021     RegularExpression *regexp = (RegularExpression*)regexp2;
   1022     if (validateRE(regexp, FALSE, status)) {
   1023         regexp->fMatcher->setTimeLimit(limit, *status);
   1024     }
   1025 }
   1026 
   1027 
   1028 
   1029 //------------------------------------------------------------------------------
   1030 //
   1031 //    uregex_getTimeLimit
   1032 //
   1033 //------------------------------------------------------------------------------
   1034 U_CAPI int32_t U_EXPORT2
   1035 uregex_getTimeLimit(const  URegularExpression   *regexp2,
   1036                            UErrorCode           *status) {
   1037     int32_t retVal = 0;
   1038     RegularExpression *regexp = (RegularExpression*)regexp2;
   1039     if (validateRE(regexp, FALSE, status)) {
   1040         retVal = regexp->fMatcher->getTimeLimit();
   1041     }
   1042     return retVal;
   1043 }
   1044 
   1045 
   1046 
   1047 //------------------------------------------------------------------------------
   1048 //
   1049 //    uregex_setStackLimit
   1050 //
   1051 //------------------------------------------------------------------------------
   1052 U_CAPI void U_EXPORT2
   1053 uregex_setStackLimit(URegularExpression   *regexp2,
   1054                      int32_t               limit,
   1055                      UErrorCode           *status) {
   1056     RegularExpression *regexp = (RegularExpression*)regexp2;
   1057     if (validateRE(regexp, FALSE, status)) {
   1058         regexp->fMatcher->setStackLimit(limit, *status);
   1059     }
   1060 }
   1061 
   1062 
   1063 
   1064 //------------------------------------------------------------------------------
   1065 //
   1066 //    uregex_getStackLimit
   1067 //
   1068 //------------------------------------------------------------------------------
   1069 U_CAPI int32_t U_EXPORT2
   1070 uregex_getStackLimit(const  URegularExpression   *regexp2,
   1071                             UErrorCode           *status) {
   1072     int32_t retVal = 0;
   1073     RegularExpression *regexp = (RegularExpression*)regexp2;
   1074     if (validateRE(regexp, FALSE, status)) {
   1075         retVal = regexp->fMatcher->getStackLimit();
   1076     }
   1077     return retVal;
   1078 }
   1079 
   1080 
   1081 //------------------------------------------------------------------------------
   1082 //
   1083 //    uregex_setMatchCallback
   1084 //
   1085 //------------------------------------------------------------------------------
   1086 U_CAPI void U_EXPORT2
   1087 uregex_setMatchCallback(URegularExpression      *regexp2,
   1088                         URegexMatchCallback     *callback,
   1089                         const void              *context,
   1090                         UErrorCode              *status) {
   1091     RegularExpression *regexp = (RegularExpression*)regexp2;
   1092     if (validateRE(regexp, FALSE, status)) {
   1093         regexp->fMatcher->setMatchCallback(callback, context, *status);
   1094     }
   1095 }
   1096 
   1097 
   1098 //------------------------------------------------------------------------------
   1099 //
   1100 //    uregex_getMatchCallback
   1101 //
   1102 //------------------------------------------------------------------------------
   1103 U_CAPI void U_EXPORT2
   1104 uregex_getMatchCallback(const URegularExpression    *regexp2,
   1105                         URegexMatchCallback        **callback,
   1106                         const void                 **context,
   1107                         UErrorCode                  *status) {
   1108     RegularExpression *regexp = (RegularExpression*)regexp2;
   1109      if (validateRE(regexp, FALSE, status)) {
   1110          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
   1111      }
   1112 }
   1113 
   1114 
   1115 //------------------------------------------------------------------------------
   1116 //
   1117 //    uregex_setMatchProgressCallback
   1118 //
   1119 //------------------------------------------------------------------------------
   1120 U_CAPI void U_EXPORT2
   1121 uregex_setFindProgressCallback(URegularExpression              *regexp2,
   1122                                 URegexFindProgressCallback      *callback,
   1123                                 const void                      *context,
   1124                                 UErrorCode                      *status) {
   1125     RegularExpression *regexp = (RegularExpression*)regexp2;
   1126     if (validateRE(regexp, FALSE, status)) {
   1127         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
   1128     }
   1129 }
   1130 
   1131 
   1132 //------------------------------------------------------------------------------
   1133 //
   1134 //    uregex_getMatchCallback
   1135 //
   1136 //------------------------------------------------------------------------------
   1137 U_CAPI void U_EXPORT2
   1138 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
   1139                                 URegexFindProgressCallback        **callback,
   1140                                 const void                        **context,
   1141                                 UErrorCode                        *status) {
   1142     RegularExpression *regexp = (RegularExpression*)regexp2;
   1143      if (validateRE(regexp, FALSE, status)) {
   1144          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
   1145      }
   1146 }
   1147 
   1148 
   1149 //------------------------------------------------------------------------------
   1150 //
   1151 //    uregex_replaceAll
   1152 //
   1153 //------------------------------------------------------------------------------
   1154 U_CAPI int32_t U_EXPORT2
   1155 uregex_replaceAll(URegularExpression    *regexp2,
   1156                   const UChar           *replacementText,
   1157                   int32_t                replacementLength,
   1158                   UChar                 *destBuf,
   1159                   int32_t                destCapacity,
   1160                   UErrorCode            *status)  {
   1161     RegularExpression *regexp = (RegularExpression*)regexp2;
   1162     if (validateRE(regexp, TRUE, status) == FALSE) {
   1163         return 0;
   1164     }
   1165     if (replacementText == NULL || replacementLength < -1 ||
   1166         (destBuf == NULL && destCapacity > 0) ||
   1167         destCapacity < 0) {
   1168         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1169         return 0;
   1170     }
   1171 
   1172     int32_t   len = 0;
   1173 
   1174     uregex_reset(regexp2, 0, status);
   1175 
   1176     // Note: Seperate error code variables for findNext() and appendReplacement()
   1177     //       are used so that destination buffer overflow errors
   1178     //       in appendReplacement won't stop findNext() from working.
   1179     //       appendReplacement() and appendTail() special case incoming buffer
   1180     //       overflow errors, continuing to return the correct length.
   1181     UErrorCode  findStatus = *status;
   1182     while (uregex_findNext(regexp2, &findStatus)) {
   1183         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1184                                         &destBuf, &destCapacity, status);
   1185     }
   1186     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1187 
   1188     if (U_FAILURE(findStatus)) {
   1189         // If anything went wrong with the findNext(), make that error trump
   1190         //   whatever may have happened with the append() operations.
   1191         //   Errors in findNext() are not expected.
   1192         *status = findStatus;
   1193     }
   1194 
   1195     return len;
   1196 }
   1197 
   1198 
   1199 //------------------------------------------------------------------------------
   1200 //
   1201 //    uregex_replaceAllUText
   1202 //
   1203 //------------------------------------------------------------------------------
   1204 U_CAPI UText * U_EXPORT2
   1205 uregex_replaceAllUText(URegularExpression    *regexp2,
   1206                        UText                 *replacementText,
   1207                        UText                 *dest,
   1208                        UErrorCode            *status)  {
   1209     RegularExpression *regexp = (RegularExpression*)regexp2;
   1210     if (validateRE(regexp, TRUE, status) == FALSE) {
   1211         return 0;
   1212     }
   1213     if (replacementText == NULL) {
   1214         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1215         return 0;
   1216     }
   1217 
   1218     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
   1219     return dest;
   1220 }
   1221 
   1222 
   1223 //------------------------------------------------------------------------------
   1224 //
   1225 //    uregex_replaceFirst
   1226 //
   1227 //------------------------------------------------------------------------------
   1228 U_CAPI int32_t U_EXPORT2
   1229 uregex_replaceFirst(URegularExpression  *regexp2,
   1230                     const UChar         *replacementText,
   1231                     int32_t              replacementLength,
   1232                     UChar               *destBuf,
   1233                     int32_t              destCapacity,
   1234                     UErrorCode          *status)  {
   1235     RegularExpression *regexp = (RegularExpression*)regexp2;
   1236     if (validateRE(regexp, TRUE, status) == FALSE) {
   1237         return 0;
   1238     }
   1239     if (replacementText == NULL || replacementLength < -1 ||
   1240         (destBuf == NULL && destCapacity > 0) ||
   1241         destCapacity < 0) {
   1242         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1243         return 0;
   1244     }
   1245 
   1246     int32_t   len = 0;
   1247     UBool     findSucceeded;
   1248     uregex_reset(regexp2, 0, status);
   1249     findSucceeded = uregex_find(regexp2, 0, status);
   1250     if (findSucceeded) {
   1251         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1252                                        &destBuf, &destCapacity, status);
   1253     }
   1254     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1255 
   1256     return len;
   1257 }
   1258 
   1259 
   1260 //------------------------------------------------------------------------------
   1261 //
   1262 //    uregex_replaceFirstUText
   1263 //
   1264 //------------------------------------------------------------------------------
   1265 U_CAPI UText * U_EXPORT2
   1266 uregex_replaceFirstUText(URegularExpression  *regexp2,
   1267                          UText                 *replacementText,
   1268                          UText                 *dest,
   1269                          UErrorCode            *status)  {
   1270     RegularExpression *regexp = (RegularExpression*)regexp2;
   1271     if (validateRE(regexp, TRUE, status) == FALSE) {
   1272         return 0;
   1273     }
   1274     if (replacementText == NULL) {
   1275         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1276         return 0;
   1277     }
   1278 
   1279     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
   1280     return dest;
   1281 }
   1282 
   1283 
   1284 //------------------------------------------------------------------------------
   1285 //
   1286 //    uregex_appendReplacement
   1287 //
   1288 //------------------------------------------------------------------------------
   1289 
   1290 U_NAMESPACE_BEGIN
   1291 //
   1292 //  Dummy class, because these functions need to be friends of class RegexMatcher,
   1293 //               and stand-alone C functions don't work as friends
   1294 //
   1295 class RegexCImpl {
   1296  public:
   1297    inline static  int32_t appendReplacement(RegularExpression    *regexp,
   1298                       const UChar           *replacementText,
   1299                       int32_t                replacementLength,
   1300                       UChar                **destBuf,
   1301                       int32_t               *destCapacity,
   1302                       UErrorCode            *status);
   1303 
   1304    inline static int32_t appendTail(RegularExpression    *regexp,
   1305         UChar                **destBuf,
   1306         int32_t               *destCapacity,
   1307         UErrorCode            *status);
   1308 
   1309     inline static int32_t split(RegularExpression    *regexp,
   1310         UChar                 *destBuf,
   1311         int32_t                destCapacity,
   1312         int32_t               *requiredCapacity,
   1313         UChar                 *destFields[],
   1314         int32_t                destFieldsCapacity,
   1315         UErrorCode            *status);
   1316 };
   1317 
   1318 U_NAMESPACE_END
   1319 
   1320 
   1321 
   1322 static const UChar BACKSLASH  = 0x5c;
   1323 static const UChar DOLLARSIGN = 0x24;
   1324 
   1325 //
   1326 //  Move a character to an output buffer, with bounds checking on the index.
   1327 //      Index advances even if capacity is exceeded, for preflight size computations.
   1328 //      This little sequence is used a LOT.
   1329 //
   1330 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
   1331     if (*idx < bufCapacity) {
   1332         buf[*idx] = c;
   1333     }
   1334     (*idx)++;
   1335 }
   1336 
   1337 
   1338 //
   1339 //  appendReplacement, the actual implementation.
   1340 //
   1341 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
   1342                                       const UChar           *replacementText,
   1343                                       int32_t                replacementLength,
   1344                                       UChar                **destBuf,
   1345                                       int32_t               *destCapacity,
   1346                                       UErrorCode            *status)  {
   1347 
   1348     // If we come in with a buffer overflow error, don't suppress the operation.
   1349     //  A series of appendReplacements, appendTail need to correctly preflight
   1350     //  the buffer size when an overflow happens somewhere in the middle.
   1351     UBool pendingBufferOverflow = FALSE;
   1352     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1353         pendingBufferOverflow = TRUE;
   1354         *status = U_ZERO_ERROR;
   1355     }
   1356 
   1357     //
   1358     // Validate all paramters
   1359     //
   1360     if (validateRE(regexp, TRUE, status) == FALSE) {
   1361         return 0;
   1362     }
   1363     if (replacementText == NULL || replacementLength < -1 ||
   1364         destCapacity == NULL || destBuf == NULL ||
   1365         (*destBuf == NULL && *destCapacity > 0) ||
   1366         *destCapacity < 0) {
   1367         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1368         return 0;
   1369     }
   1370 
   1371     RegexMatcher *m = regexp->fMatcher;
   1372     if (m->fMatch == FALSE) {
   1373         *status = U_REGEX_INVALID_STATE;
   1374         return 0;
   1375     }
   1376 
   1377     UChar    *dest             = *destBuf;
   1378     int32_t   capacity         = *destCapacity;
   1379     int32_t   destIdx          =  0;
   1380     int32_t   i;
   1381 
   1382     // If it wasn't supplied by the caller,  get the length of the replacement text.
   1383     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
   1384     //          the fly and avoid this step.
   1385     if (replacementLength == -1) {
   1386         replacementLength = u_strlen(replacementText);
   1387     }
   1388 
   1389     // Copy input string from the end of previous match to start of current match
   1390     if (regexp->fText != NULL) {
   1391         int32_t matchStart;
   1392         int32_t lastMatchEnd;
   1393         if (UTEXT_USES_U16(m->fInputText)) {
   1394             lastMatchEnd = (int32_t)m->fLastMatchEnd;
   1395             matchStart = (int32_t)m->fMatchStart;
   1396         } else {
   1397             // !!!: Would like a better way to do this!
   1398             UErrorCode status = U_ZERO_ERROR;
   1399             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
   1400             status = U_ZERO_ERROR;
   1401             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
   1402         }
   1403         for (i=lastMatchEnd; i<matchStart; i++) {
   1404             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
   1405         }
   1406     } else {
   1407         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
   1408         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
   1409                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
   1410                                  &possibleOverflowError);
   1411     }
   1412     U_ASSERT(destIdx >= 0);
   1413 
   1414     // scan the replacement text, looking for substitutions ($n) and \escapes.
   1415     int32_t  replIdx = 0;
   1416     while (replIdx < replacementLength) {
   1417         UChar  c = replacementText[replIdx];
   1418         replIdx++;
   1419         if (c != DOLLARSIGN && c != BACKSLASH) {
   1420             // Common case, no substitution, no escaping,
   1421             //  just copy the char to the dest buf.
   1422             appendToBuf(c, &destIdx, dest, capacity);
   1423             continue;
   1424         }
   1425 
   1426         if (c == BACKSLASH) {
   1427             // Backslash Escape.  Copy the following char out without further checks.
   1428             //                    Note:  Surrogate pairs don't need any special handling
   1429             //                           The second half wont be a '$' or a '\', and
   1430             //                           will move to the dest normally on the next
   1431             //                           loop iteration.
   1432             if (replIdx >= replacementLength) {
   1433                 break;
   1434             }
   1435             c = replacementText[replIdx];
   1436 
   1437             if (c==0x55/*U*/ || c==0x75/*u*/) {
   1438                 // We have a \udddd or \Udddddddd escape sequence.
   1439                 UChar32 escapedChar =
   1440                     u_unescapeAt(uregex_ucstr_unescape_charAt,
   1441                        &replIdx,                   // Index is updated by unescapeAt
   1442                        replacementLength,          // Length of replacement text
   1443                        (void *)replacementText);
   1444 
   1445                 if (escapedChar != (UChar32)0xFFFFFFFF) {
   1446                     if (escapedChar <= 0xffff) {
   1447                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
   1448                     } else {
   1449                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
   1450                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
   1451                     }
   1452                     continue;
   1453                 }
   1454                 // Note:  if the \u escape was invalid, just fall through and
   1455                 //        treat it as a plain \<anything> escape.
   1456             }
   1457 
   1458             // Plain backslash escape.  Just put out the escaped character.
   1459             appendToBuf(c, &destIdx, dest, capacity);
   1460 
   1461             replIdx++;
   1462             continue;
   1463         }
   1464 
   1465 
   1466 
   1467         // We've got a $.  Pick up a capture group number if one follows.
   1468         // Consume at most the number of digits necessary for the largest capture
   1469         // number that is valid for this pattern.
   1470 
   1471         int32_t numDigits = 0;
   1472         int32_t groupNum  = 0;
   1473         UChar32 digitC;
   1474         for (;;) {
   1475             if (replIdx >= replacementLength) {
   1476                 break;
   1477             }
   1478             U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
   1479             if (u_isdigit(digitC) == FALSE) {
   1480                 break;
   1481             }
   1482 
   1483             U16_FWD_1(replacementText, replIdx, replacementLength);
   1484             groupNum=groupNum*10 + u_charDigitValue(digitC);
   1485             numDigits++;
   1486             if (numDigits >= m->fPattern->fMaxCaptureDigits) {
   1487                 break;
   1488             }
   1489         }
   1490 
   1491 
   1492         if (numDigits == 0) {
   1493             // The $ didn't introduce a group number at all.
   1494             // Treat it as just part of the substitution text.
   1495             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
   1496             continue;
   1497         }
   1498 
   1499         // Finally, append the capture group data to the destination.
   1500         destIdx += uregex_group((URegularExpression*)regexp, groupNum,
   1501                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
   1502         if (*status == U_BUFFER_OVERFLOW_ERROR) {
   1503             // Ignore buffer overflow when extracting the group.  We need to
   1504             //   continue on to get full size of the untruncated result.  We will
   1505             //   raise our own buffer overflow error at the end.
   1506             *status = U_ZERO_ERROR;
   1507         }
   1508 
   1509         if (U_FAILURE(*status)) {
   1510             // Can fail if group number is out of range.
   1511             break;
   1512         }
   1513 
   1514     }
   1515 
   1516     //
   1517     //  Nul Terminate the dest buffer if possible.
   1518     //  Set the appropriate buffer overflow or not terminated error, if needed.
   1519     //
   1520     if (destIdx < capacity) {
   1521         dest[destIdx] = 0;
   1522     } else if (destIdx == *destCapacity) {
   1523         *status = U_STRING_NOT_TERMINATED_WARNING;
   1524     } else {
   1525         *status = U_BUFFER_OVERFLOW_ERROR;
   1526     }
   1527 
   1528     //
   1529     // Return an updated dest buffer and capacity to the caller.
   1530     //
   1531     if (destIdx > 0 &&  *destCapacity > 0) {
   1532         if (destIdx < capacity) {
   1533             *destBuf      += destIdx;
   1534             *destCapacity -= destIdx;
   1535         } else {
   1536             *destBuf      += capacity;
   1537             *destCapacity =  0;
   1538         }
   1539     }
   1540 
   1541     // If we came in with a buffer overflow, make sure we go out with one also.
   1542     //   (A zero length match right at the end of the previous match could
   1543     //    make this function succeed even though a previous call had overflowed the buf)
   1544     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1545         *status = U_BUFFER_OVERFLOW_ERROR;
   1546     }
   1547 
   1548     return destIdx;
   1549 }
   1550 
   1551 //
   1552 //   appendReplacement   the actual API function,
   1553 //
   1554 U_CAPI int32_t U_EXPORT2
   1555 uregex_appendReplacement(URegularExpression    *regexp2,
   1556                          const UChar           *replacementText,
   1557                          int32_t                replacementLength,
   1558                          UChar                **destBuf,
   1559                          int32_t               *destCapacity,
   1560                          UErrorCode            *status) {
   1561 
   1562     RegularExpression *regexp = (RegularExpression*)regexp2;
   1563     return RegexCImpl::appendReplacement(
   1564         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
   1565 }
   1566 
   1567 //
   1568 //   uregex_appendReplacementUText...can just use the normal C++ method
   1569 //
   1570 U_CAPI void U_EXPORT2
   1571 uregex_appendReplacementUText(URegularExpression    *regexp2,
   1572                               UText                 *replText,
   1573                               UText                 *dest,
   1574                               UErrorCode            *status)  {
   1575     RegularExpression *regexp = (RegularExpression*)regexp2;
   1576     regexp->fMatcher->appendReplacement(dest, replText, *status);
   1577 }
   1578 
   1579 
   1580 //------------------------------------------------------------------------------
   1581 //
   1582 //    uregex_appendTail
   1583 //
   1584 //------------------------------------------------------------------------------
   1585 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
   1586                                UChar                **destBuf,
   1587                                int32_t               *destCapacity,
   1588                                UErrorCode            *status)
   1589 {
   1590 
   1591     // If we come in with a buffer overflow error, don't suppress the operation.
   1592     //  A series of appendReplacements, appendTail need to correctly preflight
   1593     //  the buffer size when an overflow happens somewhere in the middle.
   1594     UBool pendingBufferOverflow = FALSE;
   1595     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1596         pendingBufferOverflow = TRUE;
   1597         *status = U_ZERO_ERROR;
   1598     }
   1599 
   1600     if (validateRE(regexp, TRUE, status) == FALSE) {
   1601         return 0;
   1602     }
   1603 
   1604     if (destCapacity == NULL || destBuf == NULL ||
   1605         (*destBuf == NULL && *destCapacity > 0) ||
   1606         *destCapacity < 0)
   1607     {
   1608         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1609         return 0;
   1610     }
   1611 
   1612     RegexMatcher *m = regexp->fMatcher;
   1613 
   1614     int32_t  destIdx     = 0;
   1615     int32_t  destCap     = *destCapacity;
   1616     UChar    *dest       = *destBuf;
   1617 
   1618     if (regexp->fText != NULL) {
   1619         int32_t srcIdx;
   1620         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
   1621         if (nativeIdx == -1) {
   1622             srcIdx = 0;
   1623         } else if (UTEXT_USES_U16(m->fInputText)) {
   1624             srcIdx = (int32_t)nativeIdx;
   1625         } else {
   1626             UErrorCode status = U_ZERO_ERROR;
   1627             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
   1628         }
   1629 
   1630         for (;;) {
   1631             U_ASSERT(destIdx >= 0);
   1632 
   1633             if (srcIdx == regexp->fTextLength) {
   1634                 break;
   1635             }
   1636             UChar c = regexp->fText[srcIdx];
   1637             if (c == 0 && regexp->fTextLength == -1) {
   1638                 regexp->fTextLength = srcIdx;
   1639                 break;
   1640             }
   1641 
   1642             if (destIdx < destCap) {
   1643                 dest[destIdx] = c;
   1644             } else {
   1645                 // We've overflowed the dest buffer.
   1646                 //  If the total input string length is known, we can
   1647                 //    compute the total buffer size needed without scanning through the string.
   1648                 if (regexp->fTextLength > 0) {
   1649                     destIdx += (regexp->fTextLength - srcIdx);
   1650                     break;
   1651                 }
   1652             }
   1653             srcIdx++;
   1654             destIdx++;
   1655         }
   1656     } else {
   1657         int64_t  srcIdx;
   1658         if (m->fMatch) {
   1659             // The most recent call to find() succeeded.
   1660             srcIdx = m->fMatchEnd;
   1661         } else {
   1662             // The last call to find() on this matcher failed().
   1663             //   Look back to the end of the last find() that succeeded for src index.
   1664             srcIdx = m->fLastMatchEnd;
   1665             if (srcIdx == -1)  {
   1666                 // There has been no successful match with this matcher.
   1667                 //   We want to copy the whole string.
   1668                 srcIdx = 0;
   1669             }
   1670         }
   1671 
   1672         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
   1673     }
   1674 
   1675     //
   1676     //  NUL terminate the output string, if possible, otherwise issue the
   1677     //   appropriate error or warning.
   1678     //
   1679     if (destIdx < destCap) {
   1680         dest[destIdx] = 0;
   1681     } else  if (destIdx == destCap) {
   1682         *status = U_STRING_NOT_TERMINATED_WARNING;
   1683     } else {
   1684         *status = U_BUFFER_OVERFLOW_ERROR;
   1685     }
   1686 
   1687     //
   1688     // Update the user's buffer ptr and capacity vars to reflect the
   1689     //   amount used.
   1690     //
   1691     if (destIdx < destCap) {
   1692         *destBuf      += destIdx;
   1693         *destCapacity -= destIdx;
   1694     } else if (*destBuf != NULL) {
   1695         *destBuf      += destCap;
   1696         *destCapacity  = 0;
   1697     }
   1698 
   1699     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1700         *status = U_BUFFER_OVERFLOW_ERROR;
   1701     }
   1702 
   1703     return destIdx;
   1704 }
   1705 
   1706 
   1707 //
   1708 //   appendTail   the actual API function
   1709 //
   1710 U_CAPI int32_t U_EXPORT2
   1711 uregex_appendTail(URegularExpression    *regexp2,
   1712                   UChar                **destBuf,
   1713                   int32_t               *destCapacity,
   1714                   UErrorCode            *status)  {
   1715     RegularExpression *regexp = (RegularExpression*)regexp2;
   1716     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
   1717 }
   1718 
   1719 
   1720 //
   1721 //   uregex_appendTailUText...can just use the normal C++ method
   1722 //
   1723 U_CAPI UText * U_EXPORT2
   1724 uregex_appendTailUText(URegularExpression    *regexp2,
   1725                        UText                 *dest,
   1726                        UErrorCode            *status)  {
   1727     RegularExpression *regexp = (RegularExpression*)regexp2;
   1728     return regexp->fMatcher->appendTail(dest, *status);
   1729 }
   1730 
   1731 
   1732 //------------------------------------------------------------------------------
   1733 //
   1734 //    copyString     Internal utility to copy a string to an output buffer,
   1735 //                   while managing buffer overflow and preflight size
   1736 //                   computation.  NUL termination is added to destination,
   1737 //                   and the NUL is counted in the output size.
   1738 //
   1739 //------------------------------------------------------------------------------
   1740 #if 0
   1741 static void copyString(UChar        *destBuffer,    //  Destination buffer.
   1742                        int32_t       destCapacity,  //  Total capacity of dest buffer
   1743                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
   1744                                                     //    Update not clipped to destCapacity.
   1745                        const UChar  *srcPtr,        //  Pointer to source string
   1746                        int32_t       srcLen)        //  Source string len.
   1747 {
   1748     int32_t  si;
   1749     int32_t  di = *destIndex;
   1750     UChar    c;
   1751 
   1752     for (si=0; si<srcLen;  si++) {
   1753         c = srcPtr[si];
   1754         if (di < destCapacity) {
   1755             destBuffer[di] = c;
   1756             di++;
   1757         } else {
   1758             di += srcLen - si;
   1759             break;
   1760         }
   1761     }
   1762     if (di<destCapacity) {
   1763         destBuffer[di] = 0;
   1764     }
   1765     di++;
   1766     *destIndex = di;
   1767 }
   1768 #endif
   1769 
   1770 //------------------------------------------------------------------------------
   1771 //
   1772 //    uregex_split
   1773 //
   1774 //------------------------------------------------------------------------------
   1775 int32_t RegexCImpl::split(RegularExpression     *regexp,
   1776                           UChar                 *destBuf,
   1777                           int32_t                destCapacity,
   1778                           int32_t               *requiredCapacity,
   1779                           UChar                 *destFields[],
   1780                           int32_t                destFieldsCapacity,
   1781                           UErrorCode            *status) {
   1782     //
   1783     // Reset for the input text
   1784     //
   1785     regexp->fMatcher->reset();
   1786     UText *inputText = regexp->fMatcher->fInputText;
   1787     int64_t   nextOutputStringStart = 0;
   1788     int64_t   inputLen = regexp->fMatcher->fInputLength;
   1789     if (inputLen == 0) {
   1790         return 0;
   1791     }
   1792 
   1793     //
   1794     // Loop through the input text, searching for the delimiter pattern
   1795     //
   1796     int32_t   i;             // Index of the field being processed.
   1797     int32_t   destIdx = 0;   // Next available position in destBuf;
   1798     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
   1799     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
   1800     for (i=0; ; i++) {
   1801         if (i>=destFieldsCapacity-1) {
   1802             // There are one or zero output strings left.
   1803             // Fill the last output string with whatever is left from the input, then exit the loop.
   1804             //  ( i will be == destFieldsCapacity if we filled the output array while processing
   1805             //    capture groups of the delimiter expression, in which case we will discard the
   1806             //    last capture group saved in favor of the unprocessed remainder of the
   1807             //    input string.)
   1808             if (inputLen > nextOutputStringStart) {
   1809                 if (i != destFieldsCapacity-1) {
   1810                     // No fields are left.  Recycle the last one for holding the trailing part of
   1811                     //   the input string.
   1812                     i = destFieldsCapacity-1;
   1813                     destIdx = (int32_t)(destFields[i] - destFields[0]);
   1814                 }
   1815 
   1816                 destFields[i] = &destBuf[destIdx];
   1817                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1818                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1819             }
   1820             break;
   1821         }
   1822 
   1823         if (regexp->fMatcher->find()) {
   1824             // We found another delimiter.  Move everything from where we started looking
   1825             //  up until the start of the delimiter into the next output string.
   1826             destFields[i] = &destBuf[destIdx];
   1827 
   1828             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
   1829                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
   1830             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1831                 tStatus = U_ZERO_ERROR;
   1832             } else {
   1833                 *status = tStatus;
   1834             }
   1835             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
   1836 
   1837             // If the delimiter pattern has capturing parentheses, the captured
   1838             //  text goes out into the next n destination strings.
   1839             int32_t groupNum;
   1840             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
   1841                 // If we've run out of output string slots, bail out.
   1842                 if (i==destFieldsCapacity-1) {
   1843                     break;
   1844                 }
   1845                 i++;
   1846 
   1847                 // Set up to extract the capture group contents into the dest buffer.
   1848                 destFields[i] = &destBuf[destIdx];
   1849                 tStatus = U_ZERO_ERROR;
   1850                 int32_t t = uregex_group((URegularExpression*)regexp,
   1851                                          groupNum,
   1852                                          destFields[i],
   1853                                          REMAINING_CAPACITY(destIdx, destCapacity),
   1854                                          &tStatus);
   1855                 destIdx += t + 1;    // Record the space used in the output string buffer.
   1856                                      //  +1 for the NUL that terminates the string.
   1857                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1858                     tStatus = U_ZERO_ERROR;
   1859                 } else {
   1860                     *status = tStatus;
   1861                 }
   1862             }
   1863 
   1864             if (nextOutputStringStart == inputLen) {
   1865                 // The delimiter was at the end of the string.
   1866                 // Output an empty string, and then we are done.
   1867                 if (destIdx < destCapacity) {
   1868                     destBuf[destIdx] = 0;
   1869                 }
   1870                 if (i < destFieldsCapacity-1) {
   1871                    ++i;
   1872                 }
   1873                 if (destIdx < destCapacity) {
   1874                     destFields[i] = destBuf + destIdx;
   1875                 }
   1876                 ++destIdx;
   1877                 break;
   1878             }
   1879 
   1880         }
   1881         else
   1882         {
   1883             // We ran off the end of the input while looking for the next delimiter.
   1884             // All the remaining text goes into the current output string.
   1885             destFields[i] = &destBuf[destIdx];
   1886             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1887                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1888             break;
   1889         }
   1890     }
   1891 
   1892     // Zero out any unused portion of the destFields array
   1893     int j;
   1894     for (j=i+1; j<destFieldsCapacity; j++) {
   1895         destFields[j] = NULL;
   1896     }
   1897 
   1898     if (requiredCapacity != NULL) {
   1899         *requiredCapacity = destIdx;
   1900     }
   1901     if (destIdx > destCapacity) {
   1902         *status = U_BUFFER_OVERFLOW_ERROR;
   1903     }
   1904     return i+1;
   1905 }
   1906 
   1907 //
   1908 //   uregex_split   The actual API function
   1909 //
   1910 U_CAPI int32_t U_EXPORT2
   1911 uregex_split(URegularExpression      *regexp2,
   1912              UChar                   *destBuf,
   1913              int32_t                  destCapacity,
   1914              int32_t                 *requiredCapacity,
   1915              UChar                   *destFields[],
   1916              int32_t                  destFieldsCapacity,
   1917              UErrorCode              *status) {
   1918     RegularExpression *regexp = (RegularExpression*)regexp2;
   1919     if (validateRE(regexp, TRUE, status) == FALSE) {
   1920         return 0;
   1921     }
   1922     if ((destBuf == NULL && destCapacity > 0) ||
   1923         destCapacity < 0 ||
   1924         destFields == NULL ||
   1925         destFieldsCapacity < 1 ) {
   1926         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1927         return 0;
   1928     }
   1929 
   1930     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
   1931 }
   1932 
   1933 
   1934 //
   1935 //   uregex_splitUText...can just use the normal C++ method
   1936 //
   1937 U_CAPI int32_t U_EXPORT2
   1938 uregex_splitUText(URegularExpression    *regexp2,
   1939                   UText                 *destFields[],
   1940                   int32_t                destFieldsCapacity,
   1941                   UErrorCode            *status) {
   1942     RegularExpression *regexp = (RegularExpression*)regexp2;
   1943     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
   1944 }
   1945 
   1946 
   1947 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
   1948 
   1949