Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2004-2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  uregex.cpp
      7 */
      8 
      9 #include "unicode/utypes.h"
     10 
     11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     12 
     13 #include "unicode/regex.h"
     14 #include "unicode/uregex.h"
     15 #include "unicode/unistr.h"
     16 #include "unicode/ustring.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/uobject.h"
     19 #include "unicode/utf16.h"
     20 #include "umutex.h"
     21 #include "uassert.h"
     22 #include "cmemory.h"
     23 
     24 #include "regextxt.h"
     25 
     26 #include <stdio.h>
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
     31 
     32 struct RegularExpression: public UMemory {
     33 public:
     34     RegularExpression();
     35     ~RegularExpression();
     36     int32_t           fMagic;
     37     RegexPattern     *fPat;
     38     u_atomic_int32_t *fPatRefCount;
     39     UChar            *fPatString;
     40     int32_t           fPatStringLen;
     41     RegexMatcher     *fMatcher;
     42     const UChar      *fText;         // Text from setText()
     43     int32_t           fTextLength;   // Length provided by user with setText(), which
     44                                      //  may be -1.
     45     UBool             fOwnsText;
     46 };
     47 
     48 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
     49 
     50 RegularExpression::RegularExpression() {
     51     fMagic        = REXP_MAGIC;
     52     fPat          = NULL;
     53     fPatRefCount  = NULL;
     54     fPatString    = NULL;
     55     fPatStringLen = 0;
     56     fMatcher      = NULL;
     57     fText         = NULL;
     58     fTextLength   = 0;
     59     fOwnsText     = FALSE;
     60 }
     61 
     62 RegularExpression::~RegularExpression() {
     63     delete fMatcher;
     64     fMatcher = NULL;
     65     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
     66         delete fPat;
     67         uprv_free(fPatString);
     68         uprv_free((void *)fPatRefCount);
     69     }
     70     if (fOwnsText && fText!=NULL) {
     71         uprv_free((void *)fText);
     72     }
     73     fMagic = 0;
     74 }
     75 
     76 U_NAMESPACE_END
     77 
     78 U_NAMESPACE_USE
     79 
     80 //----------------------------------------------------------------------------------------
     81 //
     82 //   validateRE    Do boilerplate style checks on API function parameters.
     83 //                 Return TRUE if they look OK.
     84 //----------------------------------------------------------------------------------------
     85 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     86     if (U_FAILURE(*status)) {
     87         return FALSE;
     88     }
     89     if (re == NULL || re->fMagic != REXP_MAGIC) {
     90         *status = U_ILLEGAL_ARGUMENT_ERROR;
     91         return FALSE;
     92     }
     93     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
     94     if (requiresText && re->fText == NULL && !re->fOwnsText) {
     95         *status = U_REGEX_INVALID_STATE;
     96         return FALSE;
     97     }
     98     return TRUE;
     99 }
    100 
    101 //----------------------------------------------------------------------------------------
    102 //
    103 //    uregex_open
    104 //
    105 //----------------------------------------------------------------------------------------
    106 U_CAPI URegularExpression *  U_EXPORT2
    107 uregex_open( const  UChar          *pattern,
    108                     int32_t         patternLength,
    109                     uint32_t        flags,
    110                     UParseError    *pe,
    111                     UErrorCode     *status) {
    112 
    113     if (U_FAILURE(*status)) {
    114         return NULL;
    115     }
    116     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
    117         *status = U_ILLEGAL_ARGUMENT_ERROR;
    118         return NULL;
    119     }
    120     int32_t actualPatLen = patternLength;
    121     if (actualPatLen == -1) {
    122         actualPatLen = u_strlen(pattern);
    123     }
    124 
    125     RegularExpression  *re     = new RegularExpression;
    126     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    127     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
    128     if (re == NULL || refC == NULL || patBuf == NULL) {
    129         *status = U_MEMORY_ALLOCATION_ERROR;
    130         delete re;
    131         uprv_free((void *)refC);
    132         uprv_free(patBuf);
    133         return NULL;
    134     }
    135     re->fPatRefCount = refC;
    136     *re->fPatRefCount = 1;
    137 
    138     //
    139     // Make a copy of the pattern string, so we can return it later if asked.
    140     //    For compiling the pattern, we will use a UText wrapper around
    141     //    this local copy, to avoid making even more copies.
    142     //
    143     re->fPatString    = patBuf;
    144     re->fPatStringLen = patternLength;
    145     u_memcpy(patBuf, pattern, actualPatLen);
    146     patBuf[actualPatLen] = 0;
    147 
    148     UText patText = UTEXT_INITIALIZER;
    149     utext_openUChars(&patText, patBuf, patternLength, status);
    150 
    151     //
    152     // Compile the pattern
    153     //
    154     if (pe != NULL) {
    155         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    156     } else {
    157         re->fPat = RegexPattern::compile(&patText, flags, *status);
    158     }
    159     utext_close(&patText);
    160 
    161     if (U_FAILURE(*status)) {
    162         goto ErrorExit;
    163     }
    164 
    165     //
    166     // Create the matcher object
    167     //
    168     re->fMatcher = re->fPat->matcher(*status);
    169     if (U_SUCCESS(*status)) {
    170         return (URegularExpression*)re;
    171     }
    172 
    173 ErrorExit:
    174     delete re;
    175     return NULL;
    176 
    177 }
    178 
    179 //----------------------------------------------------------------------------------------
    180 //
    181 //    uregex_openUText
    182 //
    183 //----------------------------------------------------------------------------------------
    184 U_CAPI URegularExpression *  U_EXPORT2
    185 uregex_openUText(UText          *pattern,
    186                  uint32_t        flags,
    187                  UParseError    *pe,
    188                  UErrorCode     *status) {
    189 
    190     if (U_FAILURE(*status)) {
    191         return NULL;
    192     }
    193     if (pattern == NULL) {
    194         *status = U_ILLEGAL_ARGUMENT_ERROR;
    195         return NULL;
    196     }
    197 
    198     int64_t patternNativeLength = utext_nativeLength(pattern);
    199 
    200     if (patternNativeLength == 0) {
    201         *status = U_ILLEGAL_ARGUMENT_ERROR;
    202         return NULL;
    203     }
    204 
    205     RegularExpression *re     = new RegularExpression;
    206 
    207     UErrorCode lengthStatus = U_ZERO_ERROR;
    208     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
    209 
    210     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    211     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
    212     if (re == NULL || refC == NULL || patBuf == NULL) {
    213         *status = U_MEMORY_ALLOCATION_ERROR;
    214         delete re;
    215         uprv_free((void *)refC);
    216         uprv_free(patBuf);
    217         return NULL;
    218     }
    219     re->fPatRefCount = refC;
    220     *re->fPatRefCount = 1;
    221 
    222     //
    223     // Make a copy of the pattern string, so we can return it later if asked.
    224     //    For compiling the pattern, we will use a read-only UText wrapper
    225     //    around this local copy, to avoid making even more copies.
    226     //
    227     re->fPatString    = patBuf;
    228     re->fPatStringLen = pattern16Length;
    229     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
    230 
    231     UText patText = UTEXT_INITIALIZER;
    232     utext_openUChars(&patText, patBuf, pattern16Length, status);
    233 
    234     //
    235     // Compile the pattern
    236     //
    237     if (pe != NULL) {
    238         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    239     } else {
    240         re->fPat = RegexPattern::compile(&patText, flags, *status);
    241     }
    242     utext_close(&patText);
    243 
    244     if (U_FAILURE(*status)) {
    245         goto ErrorExit;
    246     }
    247 
    248     //
    249     // Create the matcher object
    250     //
    251     re->fMatcher = re->fPat->matcher(*status);
    252     if (U_SUCCESS(*status)) {
    253         return (URegularExpression*)re;
    254     }
    255 
    256 ErrorExit:
    257     delete re;
    258     return NULL;
    259 
    260 }
    261 
    262 //----------------------------------------------------------------------------------------
    263 //
    264 //    uregex_close
    265 //
    266 //----------------------------------------------------------------------------------------
    267 U_CAPI void  U_EXPORT2
    268 uregex_close(URegularExpression  *re2) {
    269     RegularExpression *re = (RegularExpression*)re2;
    270     UErrorCode  status = U_ZERO_ERROR;
    271     if (validateRE(re, FALSE, &status) == FALSE) {
    272         return;
    273     }
    274     delete re;
    275 }
    276 
    277 
    278 //----------------------------------------------------------------------------------------
    279 //
    280 //    uregex_clone
    281 //
    282 //----------------------------------------------------------------------------------------
    283 U_CAPI URegularExpression * U_EXPORT2
    284 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
    285     RegularExpression *source = (RegularExpression*)source2;
    286     if (validateRE(source, FALSE, status) == FALSE) {
    287         return NULL;
    288     }
    289 
    290     RegularExpression *clone = new RegularExpression;
    291     if (clone == NULL) {
    292         *status = U_MEMORY_ALLOCATION_ERROR;
    293         return NULL;
    294     }
    295 
    296     clone->fMatcher = source->fPat->matcher(*status);
    297     if (U_FAILURE(*status)) {
    298         delete clone;
    299         return NULL;
    300     }
    301 
    302     clone->fPat          = source->fPat;
    303     clone->fPatRefCount  = source->fPatRefCount;
    304     clone->fPatString    = source->fPatString;
    305     clone->fPatStringLen = source->fPatStringLen;
    306     umtx_atomic_inc(source->fPatRefCount);
    307     // Note:  fText is not cloned.
    308 
    309     return (URegularExpression*)clone;
    310 }
    311 
    312 
    313 
    314 
    315 //------------------------------------------------------------------------------
    316 //
    317 //    uregex_pattern
    318 //
    319 //------------------------------------------------------------------------------
    320 U_CAPI const UChar * U_EXPORT2
    321 uregex_pattern(const  URegularExpression *regexp2,
    322                       int32_t            *patLength,
    323                       UErrorCode         *status)  {
    324     RegularExpression *regexp = (RegularExpression*)regexp2;
    325 
    326     if (validateRE(regexp, FALSE, status) == FALSE) {
    327         return NULL;
    328     }
    329     if (patLength != NULL) {
    330         *patLength = regexp->fPatStringLen;
    331     }
    332     return regexp->fPatString;
    333 }
    334 
    335 
    336 //------------------------------------------------------------------------------
    337 //
    338 //    uregex_patternUText
    339 //
    340 //------------------------------------------------------------------------------
    341 U_CAPI UText * U_EXPORT2
    342 uregex_patternUText(const URegularExpression *regexp2,
    343                           UErrorCode         *status)  {
    344     RegularExpression *regexp = (RegularExpression*)regexp2;
    345     return regexp->fPat->patternText(*status);
    346 }
    347 
    348 
    349 //------------------------------------------------------------------------------
    350 //
    351 //    uregex_flags
    352 //
    353 //------------------------------------------------------------------------------
    354 U_CAPI int32_t U_EXPORT2
    355 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
    356     RegularExpression *regexp = (RegularExpression*)regexp2;
    357     if (validateRE(regexp, FALSE, status) == FALSE) {
    358         return 0;
    359     }
    360     int32_t flags = regexp->fPat->flags();
    361     return flags;
    362 }
    363 
    364 
    365 //------------------------------------------------------------------------------
    366 //
    367 //    uregex_setText
    368 //
    369 //------------------------------------------------------------------------------
    370 U_CAPI void U_EXPORT2
    371 uregex_setText(URegularExpression *regexp2,
    372                const UChar        *text,
    373                int32_t             textLength,
    374                UErrorCode         *status)  {
    375     RegularExpression *regexp = (RegularExpression*)regexp2;
    376     if (validateRE(regexp, FALSE, status) == FALSE) {
    377         return;
    378     }
    379     if (text == NULL || textLength < -1) {
    380         *status = U_ILLEGAL_ARGUMENT_ERROR;
    381         return;
    382     }
    383 
    384     if (regexp->fOwnsText && regexp->fText != NULL) {
    385         uprv_free((void *)regexp->fText);
    386     }
    387 
    388     regexp->fText       = text;
    389     regexp->fTextLength = textLength;
    390     regexp->fOwnsText   = FALSE;
    391 
    392     UText input = UTEXT_INITIALIZER;
    393     utext_openUChars(&input, text, textLength, status);
    394     regexp->fMatcher->reset(&input);
    395     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
    396 }
    397 
    398 
    399 //------------------------------------------------------------------------------
    400 //
    401 //    uregex_setUText
    402 //
    403 //------------------------------------------------------------------------------
    404 U_CAPI void U_EXPORT2
    405 uregex_setUText(URegularExpression *regexp2,
    406                 UText              *text,
    407                 UErrorCode         *status) {
    408     RegularExpression *regexp = (RegularExpression*)regexp2;
    409     if (validateRE(regexp, FALSE, status) == FALSE) {
    410         return;
    411     }
    412     if (text == NULL) {
    413         *status = U_ILLEGAL_ARGUMENT_ERROR;
    414         return;
    415     }
    416 
    417     if (regexp->fOwnsText && regexp->fText != NULL) {
    418         uprv_free((void *)regexp->fText);
    419     }
    420 
    421     regexp->fText       = NULL; // only fill it in on request
    422     regexp->fTextLength = -1;
    423     regexp->fOwnsText   = TRUE;
    424     regexp->fMatcher->reset(text);
    425 }
    426 
    427 
    428 
    429 //------------------------------------------------------------------------------
    430 //
    431 //    uregex_getText
    432 //
    433 //------------------------------------------------------------------------------
    434 U_CAPI const UChar * U_EXPORT2
    435 uregex_getText(URegularExpression *regexp2,
    436                int32_t            *textLength,
    437                UErrorCode         *status)  {
    438     RegularExpression *regexp = (RegularExpression*)regexp2;
    439     if (validateRE(regexp, FALSE, status) == FALSE) {
    440         return NULL;
    441     }
    442 
    443     if (regexp->fText == NULL) {
    444         // need to fill in the text
    445         UText *inputText = regexp->fMatcher->inputText();
    446         int64_t inputNativeLength = utext_nativeLength(inputText);
    447         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
    448             regexp->fText = inputText->chunkContents;
    449             regexp->fTextLength = (int32_t)inputNativeLength;
    450             regexp->fOwnsText = FALSE; // because the UText owns it
    451         } else {
    452             UErrorCode lengthStatus = U_ZERO_ERROR;
    453             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
    454             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
    455 
    456             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
    457             regexp->fText = inputChars;
    458             regexp->fOwnsText = TRUE; // should already be set but just in case
    459         }
    460     }
    461 
    462     if (textLength != NULL) {
    463         *textLength = regexp->fTextLength;
    464     }
    465     return regexp->fText;
    466 }
    467 
    468 
    469 //------------------------------------------------------------------------------
    470 //
    471 //    uregex_getUText
    472 //
    473 //------------------------------------------------------------------------------
    474 U_CAPI UText * U_EXPORT2
    475 uregex_getUText(URegularExpression *regexp2,
    476                 UText              *dest,
    477                 UErrorCode         *status)  {
    478     RegularExpression *regexp = (RegularExpression*)regexp2;
    479     if (validateRE(regexp, FALSE, status) == FALSE) {
    480         return dest;
    481     }
    482     return regexp->fMatcher->getInput(dest, *status);
    483 }
    484 
    485 
    486 //------------------------------------------------------------------------------
    487 //
    488 //    uregex_refreshUText
    489 //
    490 //------------------------------------------------------------------------------
    491 U_CAPI void U_EXPORT2
    492 uregex_refreshUText(URegularExpression *regexp2,
    493                     UText              *text,
    494                     UErrorCode         *status) {
    495     RegularExpression *regexp = (RegularExpression*)regexp2;
    496     if (validateRE(regexp, FALSE, status) == FALSE) {
    497         return;
    498     }
    499     regexp->fMatcher->refreshInputText(text, *status);
    500 }
    501 
    502 
    503 //------------------------------------------------------------------------------
    504 //
    505 //    uregex_matches
    506 //
    507 //------------------------------------------------------------------------------
    508 U_CAPI UBool U_EXPORT2
    509 uregex_matches(URegularExpression *regexp2,
    510                int32_t            startIndex,
    511                UErrorCode        *status)  {
    512     return uregex_matches64( regexp2, (int64_t)startIndex, status);
    513 }
    514 
    515 U_CAPI UBool U_EXPORT2
    516 uregex_matches64(URegularExpression *regexp2,
    517                  int64_t            startIndex,
    518                  UErrorCode        *status)  {
    519     RegularExpression *regexp = (RegularExpression*)regexp2;
    520     UBool result = FALSE;
    521     if (validateRE(regexp, TRUE, status) == FALSE) {
    522         return result;
    523     }
    524     if (startIndex == -1) {
    525         result = regexp->fMatcher->matches(*status);
    526     } else {
    527         result = regexp->fMatcher->matches(startIndex, *status);
    528     }
    529     return result;
    530 }
    531 
    532 
    533 //------------------------------------------------------------------------------
    534 //
    535 //    uregex_lookingAt
    536 //
    537 //------------------------------------------------------------------------------
    538 U_CAPI UBool U_EXPORT2
    539 uregex_lookingAt(URegularExpression *regexp2,
    540                  int32_t             startIndex,
    541                  UErrorCode         *status)  {
    542     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
    543 }
    544 
    545 U_CAPI UBool U_EXPORT2
    546 uregex_lookingAt64(URegularExpression *regexp2,
    547                    int64_t             startIndex,
    548                    UErrorCode         *status)  {
    549     RegularExpression *regexp = (RegularExpression*)regexp2;
    550     UBool result = FALSE;
    551     if (validateRE(regexp, TRUE, status) == FALSE) {
    552         return result;
    553     }
    554     if (startIndex == -1) {
    555         result = regexp->fMatcher->lookingAt(*status);
    556     } else {
    557         result = regexp->fMatcher->lookingAt(startIndex, *status);
    558     }
    559     return result;
    560 }
    561 
    562 
    563 
    564 //------------------------------------------------------------------------------
    565 //
    566 //    uregex_find
    567 //
    568 //------------------------------------------------------------------------------
    569 U_CAPI UBool U_EXPORT2
    570 uregex_find(URegularExpression *regexp2,
    571             int32_t             startIndex,
    572             UErrorCode         *status)  {
    573     return uregex_find64( regexp2, (int64_t)startIndex, status);
    574 }
    575 
    576 U_CAPI UBool U_EXPORT2
    577 uregex_find64(URegularExpression *regexp2,
    578               int64_t             startIndex,
    579               UErrorCode         *status)  {
    580     RegularExpression *regexp = (RegularExpression*)regexp2;
    581     UBool result = FALSE;
    582     if (validateRE(regexp, TRUE, status) == FALSE) {
    583         return result;
    584     }
    585     if (startIndex == -1) {
    586         regexp->fMatcher->resetPreserveRegion();
    587         result = regexp->fMatcher->find();
    588     } else {
    589         result = regexp->fMatcher->find(startIndex, *status);
    590     }
    591     return result;
    592 }
    593 
    594 
    595 //------------------------------------------------------------------------------
    596 //
    597 //    uregex_findNext
    598 //
    599 //------------------------------------------------------------------------------
    600 U_CAPI UBool U_EXPORT2
    601 uregex_findNext(URegularExpression *regexp2,
    602                 UErrorCode         *status)  {
    603     RegularExpression *regexp = (RegularExpression*)regexp2;
    604     if (validateRE(regexp, TRUE, status) == FALSE) {
    605         return FALSE;
    606     }
    607     UBool result = regexp->fMatcher->find();
    608     return result;
    609 }
    610 
    611 //------------------------------------------------------------------------------
    612 //
    613 //    uregex_groupCount
    614 //
    615 //------------------------------------------------------------------------------
    616 U_CAPI int32_t U_EXPORT2
    617 uregex_groupCount(URegularExpression *regexp2,
    618                   UErrorCode         *status)  {
    619     RegularExpression *regexp = (RegularExpression*)regexp2;
    620     if (validateRE(regexp, FALSE, status) == FALSE) {
    621         return 0;
    622     }
    623     int32_t  result = regexp->fMatcher->groupCount();
    624     return result;
    625 }
    626 
    627 
    628 //------------------------------------------------------------------------------
    629 //
    630 //    uregex_group
    631 //
    632 //------------------------------------------------------------------------------
    633 U_CAPI int32_t U_EXPORT2
    634 uregex_group(URegularExpression *regexp2,
    635              int32_t             groupNum,
    636              UChar              *dest,
    637              int32_t             destCapacity,
    638              UErrorCode          *status)  {
    639     RegularExpression *regexp = (RegularExpression*)regexp2;
    640     if (validateRE(regexp, TRUE, status) == FALSE) {
    641         return 0;
    642     }
    643     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
    644         *status = U_ILLEGAL_ARGUMENT_ERROR;
    645         return 0;
    646     }
    647 
    648     if (destCapacity == 0 || regexp->fText != NULL) {
    649         // If preflighting or if we already have the text as UChars,
    650         // this is a little cheaper than going through uregex_groupUTextDeep()
    651 
    652         //
    653         // Pick up the range of characters from the matcher
    654         //
    655         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    656         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    657         if (U_FAILURE(*status)) {
    658             return 0;
    659         }
    660 
    661         //
    662         // Trim length based on buffer capacity
    663         //
    664         int32_t fullLength = endIx - startIx;
    665         int32_t copyLength = fullLength;
    666         if (copyLength < destCapacity) {
    667             dest[copyLength] = 0;
    668         } else if (copyLength == destCapacity) {
    669             *status = U_STRING_NOT_TERMINATED_WARNING;
    670         } else {
    671             copyLength = destCapacity;
    672             *status = U_BUFFER_OVERFLOW_ERROR;
    673         }
    674 
    675         //
    676         // Copy capture group to user's buffer
    677         //
    678         if (copyLength > 0) {
    679             u_memcpy(dest, &regexp->fText[startIx], copyLength);
    680         }
    681         return fullLength;
    682     } else {
    683         int32_t result = 0;
    684         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
    685         if (U_SUCCESS(*status)) {
    686             result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
    687         }
    688         utext_close(groupText);
    689         return result;
    690     }
    691 }
    692 
    693 
    694 //------------------------------------------------------------------------------
    695 //
    696 //    uregex_groupUText
    697 //
    698 //------------------------------------------------------------------------------
    699 U_CAPI UText * U_EXPORT2
    700 uregex_groupUText(URegularExpression *regexp2,
    701                   int32_t             groupNum,
    702                   UText              *dest,
    703                   int64_t            *groupLength,
    704                   UErrorCode         *status)  {
    705     RegularExpression *regexp = (RegularExpression*)regexp2;
    706     if (validateRE(regexp, TRUE, status) == FALSE) {
    707         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    708         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    709     }
    710 
    711     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
    712 }
    713 
    714 //------------------------------------------------------------------------------
    715 //
    716 //    uregex_groupUTextDeep
    717 //
    718 //------------------------------------------------------------------------------
    719 U_CAPI UText * U_EXPORT2
    720 uregex_groupUTextDeep(URegularExpression *regexp2,
    721                   int32_t             groupNum,
    722                   UText              *dest,
    723                   UErrorCode         *status)  {
    724     RegularExpression *regexp = (RegularExpression*)regexp2;
    725     if (validateRE(regexp, TRUE, status) == FALSE) {
    726         UErrorCode emptyTextStatus = U_ZERO_ERROR;
    727         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    728     }
    729 
    730     if (regexp->fText != NULL) {
    731         //
    732         // Pick up the range of characters from the matcher
    733         // and use our already-extracted characters
    734         //
    735         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    736         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    737         if (U_FAILURE(*status)) {
    738             UErrorCode emptyTextStatus = U_ZERO_ERROR;
    739             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
    740         }
    741 
    742         if (dest) {
    743             utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
    744         } else {
    745             UText groupText = UTEXT_INITIALIZER;
    746             utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
    747             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
    748             utext_close(&groupText);
    749         }
    750 
    751         return dest;
    752     } else {
    753         return regexp->fMatcher->group(groupNum, dest, *status);
    754     }
    755 }
    756 
    757 //------------------------------------------------------------------------------
    758 //
    759 //    uregex_start
    760 //
    761 //------------------------------------------------------------------------------
    762 U_CAPI int32_t U_EXPORT2
    763 uregex_start(URegularExpression *regexp2,
    764              int32_t             groupNum,
    765              UErrorCode          *status)  {
    766     return (int32_t)uregex_start64( regexp2, groupNum, status);
    767 }
    768 
    769 U_CAPI int64_t U_EXPORT2
    770 uregex_start64(URegularExpression *regexp2,
    771                int32_t             groupNum,
    772                UErrorCode          *status)  {
    773     RegularExpression *regexp = (RegularExpression*)regexp2;
    774     if (validateRE(regexp, TRUE, status) == FALSE) {
    775         return 0;
    776     }
    777     int32_t result = regexp->fMatcher->start(groupNum, *status);
    778     return result;
    779 }
    780 
    781 //------------------------------------------------------------------------------
    782 //
    783 //    uregex_end
    784 //
    785 //------------------------------------------------------------------------------
    786 U_CAPI int32_t U_EXPORT2
    787 uregex_end(URegularExpression   *regexp2,
    788            int32_t               groupNum,
    789            UErrorCode           *status)  {
    790     return (int32_t)uregex_end64( regexp2, groupNum, status);
    791 }
    792 
    793 U_CAPI int64_t U_EXPORT2
    794 uregex_end64(URegularExpression   *regexp2,
    795              int32_t               groupNum,
    796              UErrorCode           *status)  {
    797     RegularExpression *regexp = (RegularExpression*)regexp2;
    798     if (validateRE(regexp, TRUE, status) == FALSE) {
    799         return 0;
    800     }
    801     int32_t result = regexp->fMatcher->end(groupNum, *status);
    802     return result;
    803 }
    804 
    805 //------------------------------------------------------------------------------
    806 //
    807 //    uregex_reset
    808 //
    809 //------------------------------------------------------------------------------
    810 U_CAPI void U_EXPORT2
    811 uregex_reset(URegularExpression    *regexp2,
    812              int32_t               index,
    813              UErrorCode            *status)  {
    814     uregex_reset64( regexp2, (int64_t)index, status);
    815 }
    816 
    817 U_CAPI void U_EXPORT2
    818 uregex_reset64(URegularExpression    *regexp2,
    819                int64_t               index,
    820                UErrorCode            *status)  {
    821     RegularExpression *regexp = (RegularExpression*)regexp2;
    822     if (validateRE(regexp, TRUE, status) == FALSE) {
    823         return;
    824     }
    825     regexp->fMatcher->reset(index, *status);
    826 }
    827 
    828 
    829 //------------------------------------------------------------------------------
    830 //
    831 //    uregex_setRegion
    832 //
    833 //------------------------------------------------------------------------------
    834 U_CAPI void U_EXPORT2
    835 uregex_setRegion(URegularExpression   *regexp2,
    836                  int32_t               regionStart,
    837                  int32_t               regionLimit,
    838                  UErrorCode           *status)  {
    839     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
    840 }
    841 
    842 U_CAPI void U_EXPORT2
    843 uregex_setRegion64(URegularExpression   *regexp2,
    844                    int64_t               regionStart,
    845                    int64_t               regionLimit,
    846                    UErrorCode           *status)  {
    847     RegularExpression *regexp = (RegularExpression*)regexp2;
    848     if (validateRE(regexp, TRUE, status) == FALSE) {
    849         return;
    850     }
    851     regexp->fMatcher->region(regionStart, regionLimit, *status);
    852 }
    853 
    854 
    855 //------------------------------------------------------------------------------
    856 //
    857 //    uregex_setRegionAndStart
    858 //
    859 //------------------------------------------------------------------------------
    860 U_CAPI void U_EXPORT2
    861 uregex_setRegionAndStart(URegularExpression   *regexp2,
    862                  int64_t               regionStart,
    863                  int64_t               regionLimit,
    864                  int64_t               startIndex,
    865                  UErrorCode           *status)  {
    866     RegularExpression *regexp = (RegularExpression*)regexp2;
    867     if (validateRE(regexp, TRUE, status) == FALSE) {
    868         return;
    869     }
    870     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
    871 }
    872 
    873 //------------------------------------------------------------------------------
    874 //
    875 //    uregex_regionStart
    876 //
    877 //------------------------------------------------------------------------------
    878 U_CAPI int32_t U_EXPORT2
    879 uregex_regionStart(const  URegularExpression   *regexp2,
    880                           UErrorCode           *status)  {
    881     return (int32_t)uregex_regionStart64(regexp2, status);
    882 }
    883 
    884 U_CAPI int64_t U_EXPORT2
    885 uregex_regionStart64(const  URegularExpression   *regexp2,
    886                             UErrorCode           *status)  {
    887     RegularExpression *regexp = (RegularExpression*)regexp2;
    888     if (validateRE(regexp, TRUE, status) == FALSE) {
    889         return 0;
    890     }
    891     return regexp->fMatcher->regionStart();
    892 }
    893 
    894 
    895 //------------------------------------------------------------------------------
    896 //
    897 //    uregex_regionEnd
    898 //
    899 //------------------------------------------------------------------------------
    900 U_CAPI int32_t U_EXPORT2
    901 uregex_regionEnd(const  URegularExpression   *regexp2,
    902                         UErrorCode           *status)  {
    903     return (int32_t)uregex_regionEnd64(regexp2, status);
    904 }
    905 
    906 U_CAPI int64_t U_EXPORT2
    907 uregex_regionEnd64(const  URegularExpression   *regexp2,
    908                           UErrorCode           *status)  {
    909     RegularExpression *regexp = (RegularExpression*)regexp2;
    910     if (validateRE(regexp, TRUE, status) == FALSE) {
    911         return 0;
    912     }
    913     return regexp->fMatcher->regionEnd();
    914 }
    915 
    916 
    917 //------------------------------------------------------------------------------
    918 //
    919 //    uregex_hasTransparentBounds
    920 //
    921 //------------------------------------------------------------------------------
    922 U_CAPI UBool U_EXPORT2
    923 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
    924                                    UErrorCode           *status)  {
    925     RegularExpression *regexp = (RegularExpression*)regexp2;
    926     if (validateRE(regexp, FALSE, status) == FALSE) {
    927         return FALSE;
    928     }
    929     return regexp->fMatcher->hasTransparentBounds();
    930 }
    931 
    932 
    933 //------------------------------------------------------------------------------
    934 //
    935 //    uregex_useTransparentBounds
    936 //
    937 //------------------------------------------------------------------------------
    938 U_CAPI void U_EXPORT2
    939 uregex_useTransparentBounds(URegularExpression    *regexp2,
    940                             UBool                  b,
    941                             UErrorCode            *status)  {
    942     RegularExpression *regexp = (RegularExpression*)regexp2;
    943     if (validateRE(regexp, FALSE, status) == FALSE) {
    944         return;
    945     }
    946     regexp->fMatcher->useTransparentBounds(b);
    947 }
    948 
    949 
    950 //------------------------------------------------------------------------------
    951 //
    952 //    uregex_hasAnchoringBounds
    953 //
    954 //------------------------------------------------------------------------------
    955 U_CAPI UBool U_EXPORT2
    956 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
    957                                  UErrorCode           *status)  {
    958     RegularExpression *regexp = (RegularExpression*)regexp2;
    959     if (validateRE(regexp, FALSE, status) == FALSE) {
    960         return FALSE;
    961     }
    962     return regexp->fMatcher->hasAnchoringBounds();
    963 }
    964 
    965 
    966 //------------------------------------------------------------------------------
    967 //
    968 //    uregex_useAnchoringBounds
    969 //
    970 //------------------------------------------------------------------------------
    971 U_CAPI void U_EXPORT2
    972 uregex_useAnchoringBounds(URegularExpression    *regexp2,
    973                           UBool                  b,
    974                           UErrorCode            *status)  {
    975     RegularExpression *regexp = (RegularExpression*)regexp2;
    976     if (validateRE(regexp, FALSE, status) == FALSE) {
    977         return;
    978     }
    979     regexp->fMatcher->useAnchoringBounds(b);
    980 }
    981 
    982 
    983 //------------------------------------------------------------------------------
    984 //
    985 //    uregex_hitEnd
    986 //
    987 //------------------------------------------------------------------------------
    988 U_CAPI UBool U_EXPORT2
    989 uregex_hitEnd(const  URegularExpression   *regexp2,
    990                      UErrorCode           *status)  {
    991     RegularExpression *regexp = (RegularExpression*)regexp2;
    992     if (validateRE(regexp, TRUE, status) == FALSE) {
    993         return FALSE;
    994     }
    995     return regexp->fMatcher->hitEnd();
    996 }
    997 
    998 
    999 //------------------------------------------------------------------------------
   1000 //
   1001 //    uregex_requireEnd
   1002 //
   1003 //------------------------------------------------------------------------------
   1004 U_CAPI UBool U_EXPORT2
   1005 uregex_requireEnd(const  URegularExpression   *regexp2,
   1006                          UErrorCode           *status)  {
   1007     RegularExpression *regexp = (RegularExpression*)regexp2;
   1008     if (validateRE(regexp, TRUE, status) == FALSE) {
   1009         return FALSE;
   1010     }
   1011     return regexp->fMatcher->requireEnd();
   1012 }
   1013 
   1014 
   1015 //------------------------------------------------------------------------------
   1016 //
   1017 //    uregex_setTimeLimit
   1018 //
   1019 //------------------------------------------------------------------------------
   1020 U_CAPI void U_EXPORT2
   1021 uregex_setTimeLimit(URegularExpression   *regexp2,
   1022                     int32_t               limit,
   1023                     UErrorCode           *status) {
   1024     RegularExpression *regexp = (RegularExpression*)regexp2;
   1025     if (validateRE(regexp, FALSE, status)) {
   1026         regexp->fMatcher->setTimeLimit(limit, *status);
   1027     }
   1028 }
   1029 
   1030 
   1031 
   1032 //------------------------------------------------------------------------------
   1033 //
   1034 //    uregex_getTimeLimit
   1035 //
   1036 //------------------------------------------------------------------------------
   1037 U_CAPI int32_t U_EXPORT2
   1038 uregex_getTimeLimit(const  URegularExpression   *regexp2,
   1039                            UErrorCode           *status) {
   1040     int32_t retVal = 0;
   1041     RegularExpression *regexp = (RegularExpression*)regexp2;
   1042     if (validateRE(regexp, FALSE, status)) {
   1043         retVal = regexp->fMatcher->getTimeLimit();
   1044     }
   1045     return retVal;
   1046 }
   1047 
   1048 
   1049 
   1050 //------------------------------------------------------------------------------
   1051 //
   1052 //    uregex_setStackLimit
   1053 //
   1054 //------------------------------------------------------------------------------
   1055 U_CAPI void U_EXPORT2
   1056 uregex_setStackLimit(URegularExpression   *regexp2,
   1057                      int32_t               limit,
   1058                      UErrorCode           *status) {
   1059     RegularExpression *regexp = (RegularExpression*)regexp2;
   1060     if (validateRE(regexp, FALSE, status)) {
   1061         regexp->fMatcher->setStackLimit(limit, *status);
   1062     }
   1063 }
   1064 
   1065 
   1066 
   1067 //------------------------------------------------------------------------------
   1068 //
   1069 //    uregex_getStackLimit
   1070 //
   1071 //------------------------------------------------------------------------------
   1072 U_CAPI int32_t U_EXPORT2
   1073 uregex_getStackLimit(const  URegularExpression   *regexp2,
   1074                             UErrorCode           *status) {
   1075     int32_t retVal = 0;
   1076     RegularExpression *regexp = (RegularExpression*)regexp2;
   1077     if (validateRE(regexp, FALSE, status)) {
   1078         retVal = regexp->fMatcher->getStackLimit();
   1079     }
   1080     return retVal;
   1081 }
   1082 
   1083 
   1084 //------------------------------------------------------------------------------
   1085 //
   1086 //    uregex_setMatchCallback
   1087 //
   1088 //------------------------------------------------------------------------------
   1089 U_CAPI void U_EXPORT2
   1090 uregex_setMatchCallback(URegularExpression      *regexp2,
   1091                         URegexMatchCallback     *callback,
   1092                         const void              *context,
   1093                         UErrorCode              *status) {
   1094     RegularExpression *regexp = (RegularExpression*)regexp2;
   1095     if (validateRE(regexp, FALSE, status)) {
   1096         regexp->fMatcher->setMatchCallback(callback, context, *status);
   1097     }
   1098 }
   1099 
   1100 
   1101 //------------------------------------------------------------------------------
   1102 //
   1103 //    uregex_getMatchCallback
   1104 //
   1105 //------------------------------------------------------------------------------
   1106 U_CAPI void U_EXPORT2
   1107 uregex_getMatchCallback(const URegularExpression    *regexp2,
   1108                         URegexMatchCallback        **callback,
   1109                         const void                 **context,
   1110                         UErrorCode                  *status) {
   1111     RegularExpression *regexp = (RegularExpression*)regexp2;
   1112      if (validateRE(regexp, FALSE, status)) {
   1113          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
   1114      }
   1115 }
   1116 
   1117 
   1118 //------------------------------------------------------------------------------
   1119 //
   1120 //    uregex_setMatchProgressCallback
   1121 //
   1122 //------------------------------------------------------------------------------
   1123 U_CAPI void U_EXPORT2
   1124 uregex_setFindProgressCallback(URegularExpression              *regexp2,
   1125                                 URegexFindProgressCallback      *callback,
   1126                                 const void                      *context,
   1127                                 UErrorCode                      *status) {
   1128     RegularExpression *regexp = (RegularExpression*)regexp2;
   1129     if (validateRE(regexp, FALSE, status)) {
   1130         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
   1131     }
   1132 }
   1133 
   1134 
   1135 //------------------------------------------------------------------------------
   1136 //
   1137 //    uregex_getMatchCallback
   1138 //
   1139 //------------------------------------------------------------------------------
   1140 U_CAPI void U_EXPORT2
   1141 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
   1142                                 URegexFindProgressCallback        **callback,
   1143                                 const void                        **context,
   1144                                 UErrorCode                        *status) {
   1145     RegularExpression *regexp = (RegularExpression*)regexp2;
   1146      if (validateRE(regexp, FALSE, status)) {
   1147          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
   1148      }
   1149 }
   1150 
   1151 
   1152 //------------------------------------------------------------------------------
   1153 //
   1154 //    uregex_replaceAll
   1155 //
   1156 //------------------------------------------------------------------------------
   1157 U_CAPI int32_t U_EXPORT2
   1158 uregex_replaceAll(URegularExpression    *regexp2,
   1159                   const UChar           *replacementText,
   1160                   int32_t                replacementLength,
   1161                   UChar                 *destBuf,
   1162                   int32_t                destCapacity,
   1163                   UErrorCode            *status)  {
   1164     RegularExpression *regexp = (RegularExpression*)regexp2;
   1165     if (validateRE(regexp, TRUE, status) == FALSE) {
   1166         return 0;
   1167     }
   1168     if (replacementText == NULL || replacementLength < -1 ||
   1169         (destBuf == NULL && destCapacity > 0) ||
   1170         destCapacity < 0) {
   1171         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1172         return 0;
   1173     }
   1174 
   1175     int32_t   len = 0;
   1176 
   1177     uregex_reset(regexp2, 0, status);
   1178 
   1179     // Note: Seperate error code variables for findNext() and appendReplacement()
   1180     //       are used so that destination buffer overflow errors
   1181     //       in appendReplacement won't stop findNext() from working.
   1182     //       appendReplacement() and appendTail() special case incoming buffer
   1183     //       overflow errors, continuing to return the correct length.
   1184     UErrorCode  findStatus = *status;
   1185     while (uregex_findNext(regexp2, &findStatus)) {
   1186         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1187                                         &destBuf, &destCapacity, status);
   1188     }
   1189     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1190 
   1191     if (U_FAILURE(findStatus)) {
   1192         // If anything went wrong with the findNext(), make that error trump
   1193         //   whatever may have happened with the append() operations.
   1194         //   Errors in findNext() are not expected.
   1195         *status = findStatus;
   1196     }
   1197 
   1198     return len;
   1199 }
   1200 
   1201 
   1202 //------------------------------------------------------------------------------
   1203 //
   1204 //    uregex_replaceAllUText
   1205 //
   1206 //------------------------------------------------------------------------------
   1207 U_CAPI UText * U_EXPORT2
   1208 uregex_replaceAllUText(URegularExpression    *regexp2,
   1209                        UText                 *replacementText,
   1210                        UText                 *dest,
   1211                        UErrorCode            *status)  {
   1212     RegularExpression *regexp = (RegularExpression*)regexp2;
   1213     if (validateRE(regexp, TRUE, status) == FALSE) {
   1214         return 0;
   1215     }
   1216     if (replacementText == NULL) {
   1217         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1218         return 0;
   1219     }
   1220 
   1221     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
   1222     return dest;
   1223 }
   1224 
   1225 
   1226 //------------------------------------------------------------------------------
   1227 //
   1228 //    uregex_replaceFirst
   1229 //
   1230 //------------------------------------------------------------------------------
   1231 U_CAPI int32_t U_EXPORT2
   1232 uregex_replaceFirst(URegularExpression  *regexp2,
   1233                     const UChar         *replacementText,
   1234                     int32_t              replacementLength,
   1235                     UChar               *destBuf,
   1236                     int32_t              destCapacity,
   1237                     UErrorCode          *status)  {
   1238     RegularExpression *regexp = (RegularExpression*)regexp2;
   1239     if (validateRE(regexp, TRUE, status) == FALSE) {
   1240         return 0;
   1241     }
   1242     if (replacementText == NULL || replacementLength < -1 ||
   1243         (destBuf == NULL && destCapacity > 0) ||
   1244         destCapacity < 0) {
   1245         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1246         return 0;
   1247     }
   1248 
   1249     int32_t   len = 0;
   1250     UBool     findSucceeded;
   1251     uregex_reset(regexp2, 0, status);
   1252     findSucceeded = uregex_find(regexp2, 0, status);
   1253     if (findSucceeded) {
   1254         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1255                                        &destBuf, &destCapacity, status);
   1256     }
   1257     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1258 
   1259     return len;
   1260 }
   1261 
   1262 
   1263 //------------------------------------------------------------------------------
   1264 //
   1265 //    uregex_replaceFirstUText
   1266 //
   1267 //------------------------------------------------------------------------------
   1268 U_CAPI UText * U_EXPORT2
   1269 uregex_replaceFirstUText(URegularExpression  *regexp2,
   1270                          UText                 *replacementText,
   1271                          UText                 *dest,
   1272                          UErrorCode            *status)  {
   1273     RegularExpression *regexp = (RegularExpression*)regexp2;
   1274     if (validateRE(regexp, TRUE, status) == FALSE) {
   1275         return 0;
   1276     }
   1277     if (replacementText == NULL) {
   1278         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1279         return 0;
   1280     }
   1281 
   1282     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
   1283     return dest;
   1284 }
   1285 
   1286 
   1287 //------------------------------------------------------------------------------
   1288 //
   1289 //    uregex_appendReplacement
   1290 //
   1291 //------------------------------------------------------------------------------
   1292 
   1293 U_NAMESPACE_BEGIN
   1294 //
   1295 //  Dummy class, because these functions need to be friends of class RegexMatcher,
   1296 //               and stand-alone C functions don't work as friends
   1297 //
   1298 class RegexCImpl {
   1299  public:
   1300    inline static  int32_t appendReplacement(RegularExpression    *regexp,
   1301                       const UChar           *replacementText,
   1302                       int32_t                replacementLength,
   1303                       UChar                **destBuf,
   1304                       int32_t               *destCapacity,
   1305                       UErrorCode            *status);
   1306 
   1307    inline static int32_t appendTail(RegularExpression    *regexp,
   1308         UChar                **destBuf,
   1309         int32_t               *destCapacity,
   1310         UErrorCode            *status);
   1311 
   1312     inline static int32_t split(RegularExpression    *regexp,
   1313         UChar                 *destBuf,
   1314         int32_t                destCapacity,
   1315         int32_t               *requiredCapacity,
   1316         UChar                 *destFields[],
   1317         int32_t                destFieldsCapacity,
   1318         UErrorCode            *status);
   1319 };
   1320 
   1321 U_NAMESPACE_END
   1322 
   1323 
   1324 
   1325 static const UChar BACKSLASH  = 0x5c;
   1326 static const UChar DOLLARSIGN = 0x24;
   1327 
   1328 //
   1329 //  Move a character to an output buffer, with bounds checking on the index.
   1330 //      Index advances even if capacity is exceeded, for preflight size computations.
   1331 //      This little sequence is used a LOT.
   1332 //
   1333 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
   1334     if (*idx < bufCapacity) {
   1335         buf[*idx] = c;
   1336     }
   1337     (*idx)++;
   1338 }
   1339 
   1340 
   1341 //
   1342 //  appendReplacement, the actual implementation.
   1343 //
   1344 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
   1345                                       const UChar           *replacementText,
   1346                                       int32_t                replacementLength,
   1347                                       UChar                **destBuf,
   1348                                       int32_t               *destCapacity,
   1349                                       UErrorCode            *status)  {
   1350 
   1351     // If we come in with a buffer overflow error, don't suppress the operation.
   1352     //  A series of appendReplacements, appendTail need to correctly preflight
   1353     //  the buffer size when an overflow happens somewhere in the middle.
   1354     UBool pendingBufferOverflow = FALSE;
   1355     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1356         pendingBufferOverflow = TRUE;
   1357         *status = U_ZERO_ERROR;
   1358     }
   1359 
   1360     //
   1361     // Validate all paramters
   1362     //
   1363     if (validateRE(regexp, TRUE, status) == FALSE) {
   1364         return 0;
   1365     }
   1366     if (replacementText == NULL || replacementLength < -1 ||
   1367         destCapacity == NULL || destBuf == NULL ||
   1368         (*destBuf == NULL && *destCapacity > 0) ||
   1369         *destCapacity < 0) {
   1370         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1371         return 0;
   1372     }
   1373 
   1374     RegexMatcher *m = regexp->fMatcher;
   1375     if (m->fMatch == FALSE) {
   1376         *status = U_REGEX_INVALID_STATE;
   1377         return 0;
   1378     }
   1379 
   1380     UChar    *dest             = *destBuf;
   1381     int32_t   capacity         = *destCapacity;
   1382     int32_t   destIdx          =  0;
   1383     int32_t   i;
   1384 
   1385     // If it wasn't supplied by the caller,  get the length of the replacement text.
   1386     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
   1387     //          the fly and avoid this step.
   1388     if (replacementLength == -1) {
   1389         replacementLength = u_strlen(replacementText);
   1390     }
   1391 
   1392     // Copy input string from the end of previous match to start of current match
   1393     if (regexp->fText != NULL) {
   1394         int32_t matchStart;
   1395         int32_t lastMatchEnd;
   1396         if (UTEXT_USES_U16(m->fInputText)) {
   1397             lastMatchEnd = (int32_t)m->fLastMatchEnd;
   1398             matchStart = (int32_t)m->fMatchStart;
   1399         } else {
   1400             // !!!: Would like a better way to do this!
   1401             UErrorCode status = U_ZERO_ERROR;
   1402             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
   1403             status = U_ZERO_ERROR;
   1404             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
   1405         }
   1406         for (i=lastMatchEnd; i<matchStart; i++) {
   1407             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
   1408         }
   1409     } else {
   1410         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
   1411         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
   1412                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
   1413                                  &possibleOverflowError);
   1414     }
   1415     U_ASSERT(destIdx >= 0);
   1416 
   1417     // scan the replacement text, looking for substitutions ($n) and \escapes.
   1418     int32_t  replIdx = 0;
   1419     while (replIdx < replacementLength) {
   1420         UChar  c = replacementText[replIdx];
   1421         replIdx++;
   1422         if (c != DOLLARSIGN && c != BACKSLASH) {
   1423             // Common case, no substitution, no escaping,
   1424             //  just copy the char to the dest buf.
   1425             appendToBuf(c, &destIdx, dest, capacity);
   1426             continue;
   1427         }
   1428 
   1429         if (c == BACKSLASH) {
   1430             // Backslash Escape.  Copy the following char out without further checks.
   1431             //                    Note:  Surrogate pairs don't need any special handling
   1432             //                           The second half wont be a '$' or a '\', and
   1433             //                           will move to the dest normally on the next
   1434             //                           loop iteration.
   1435             if (replIdx >= replacementLength) {
   1436                 break;
   1437             }
   1438             c = replacementText[replIdx];
   1439 
   1440             if (c==0x55/*U*/ || c==0x75/*u*/) {
   1441                 // We have a \udddd or \Udddddddd escape sequence.
   1442                 UChar32 escapedChar =
   1443                     u_unescapeAt(uregex_ucstr_unescape_charAt,
   1444                        &replIdx,                   // Index is updated by unescapeAt
   1445                        replacementLength,          // Length of replacement text
   1446                        (void *)replacementText);
   1447 
   1448                 if (escapedChar != (UChar32)0xFFFFFFFF) {
   1449                     if (escapedChar <= 0xffff) {
   1450                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
   1451                     } else {
   1452                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
   1453                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
   1454                     }
   1455                     continue;
   1456                 }
   1457                 // Note:  if the \u escape was invalid, just fall through and
   1458                 //        treat it as a plain \<anything> escape.
   1459             }
   1460 
   1461             // Plain backslash escape.  Just put out the escaped character.
   1462             appendToBuf(c, &destIdx, dest, capacity);
   1463 
   1464             replIdx++;
   1465             continue;
   1466         }
   1467 
   1468 
   1469 
   1470         // We've got a $.  Pick up a capture group number if one follows.
   1471         // Consume at most the number of digits necessary for the largest capture
   1472         // number that is valid for this pattern.
   1473 
   1474         int32_t numDigits = 0;
   1475         int32_t groupNum  = 0;
   1476         UChar32 digitC;
   1477         for (;;) {
   1478             if (replIdx >= replacementLength) {
   1479                 break;
   1480             }
   1481             U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
   1482             if (u_isdigit(digitC) == FALSE) {
   1483                 break;
   1484             }
   1485 
   1486             U16_FWD_1(replacementText, replIdx, replacementLength);
   1487             groupNum=groupNum*10 + u_charDigitValue(digitC);
   1488             numDigits++;
   1489             if (numDigits >= m->fPattern->fMaxCaptureDigits) {
   1490                 break;
   1491             }
   1492         }
   1493 
   1494 
   1495         if (numDigits == 0) {
   1496             // The $ didn't introduce a group number at all.
   1497             // Treat it as just part of the substitution text.
   1498             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
   1499             continue;
   1500         }
   1501 
   1502         // Finally, append the capture group data to the destination.
   1503         destIdx += uregex_group((URegularExpression*)regexp, groupNum,
   1504                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
   1505         if (*status == U_BUFFER_OVERFLOW_ERROR) {
   1506             // Ignore buffer overflow when extracting the group.  We need to
   1507             //   continue on to get full size of the untruncated result.  We will
   1508             //   raise our own buffer overflow error at the end.
   1509             *status = U_ZERO_ERROR;
   1510         }
   1511 
   1512         if (U_FAILURE(*status)) {
   1513             // Can fail if group number is out of range.
   1514             break;
   1515         }
   1516 
   1517     }
   1518 
   1519     //
   1520     //  Nul Terminate the dest buffer if possible.
   1521     //  Set the appropriate buffer overflow or not terminated error, if needed.
   1522     //
   1523     if (destIdx < capacity) {
   1524         dest[destIdx] = 0;
   1525     } else if (destIdx == *destCapacity) {
   1526         *status = U_STRING_NOT_TERMINATED_WARNING;
   1527     } else {
   1528         *status = U_BUFFER_OVERFLOW_ERROR;
   1529     }
   1530 
   1531     //
   1532     // Return an updated dest buffer and capacity to the caller.
   1533     //
   1534     if (destIdx > 0 &&  *destCapacity > 0) {
   1535         if (destIdx < capacity) {
   1536             *destBuf      += destIdx;
   1537             *destCapacity -= destIdx;
   1538         } else {
   1539             *destBuf      += capacity;
   1540             *destCapacity =  0;
   1541         }
   1542     }
   1543 
   1544     // If we came in with a buffer overflow, make sure we go out with one also.
   1545     //   (A zero length match right at the end of the previous match could
   1546     //    make this function succeed even though a previous call had overflowed the buf)
   1547     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1548         *status = U_BUFFER_OVERFLOW_ERROR;
   1549     }
   1550 
   1551     return destIdx;
   1552 }
   1553 
   1554 //
   1555 //   appendReplacement   the actual API function,
   1556 //
   1557 U_CAPI int32_t U_EXPORT2
   1558 uregex_appendReplacement(URegularExpression    *regexp2,
   1559                          const UChar           *replacementText,
   1560                          int32_t                replacementLength,
   1561                          UChar                **destBuf,
   1562                          int32_t               *destCapacity,
   1563                          UErrorCode            *status) {
   1564 
   1565     RegularExpression *regexp = (RegularExpression*)regexp2;
   1566     return RegexCImpl::appendReplacement(
   1567         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
   1568 }
   1569 
   1570 //
   1571 //   uregex_appendReplacementUText...can just use the normal C++ method
   1572 //
   1573 U_CAPI void U_EXPORT2
   1574 uregex_appendReplacementUText(URegularExpression    *regexp2,
   1575                               UText                 *replText,
   1576                               UText                 *dest,
   1577                               UErrorCode            *status)  {
   1578     RegularExpression *regexp = (RegularExpression*)regexp2;
   1579     regexp->fMatcher->appendReplacement(dest, replText, *status);
   1580 }
   1581 
   1582 
   1583 //------------------------------------------------------------------------------
   1584 //
   1585 //    uregex_appendTail
   1586 //
   1587 //------------------------------------------------------------------------------
   1588 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
   1589                                UChar                **destBuf,
   1590                                int32_t               *destCapacity,
   1591                                UErrorCode            *status)
   1592 {
   1593 
   1594     // If we come in with a buffer overflow error, don't suppress the operation.
   1595     //  A series of appendReplacements, appendTail need to correctly preflight
   1596     //  the buffer size when an overflow happens somewhere in the middle.
   1597     UBool pendingBufferOverflow = FALSE;
   1598     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
   1599         pendingBufferOverflow = TRUE;
   1600         *status = U_ZERO_ERROR;
   1601     }
   1602 
   1603     if (validateRE(regexp, TRUE, status) == FALSE) {
   1604         return 0;
   1605     }
   1606 
   1607     if (destCapacity == NULL || destBuf == NULL ||
   1608         (*destBuf == NULL && *destCapacity > 0) ||
   1609         *destCapacity < 0)
   1610     {
   1611         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1612         return 0;
   1613     }
   1614 
   1615     RegexMatcher *m = regexp->fMatcher;
   1616 
   1617     int32_t  destIdx     = 0;
   1618     int32_t  destCap     = *destCapacity;
   1619     UChar    *dest       = *destBuf;
   1620 
   1621     if (regexp->fText != NULL) {
   1622         int32_t srcIdx;
   1623         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
   1624         if (nativeIdx == -1) {
   1625             srcIdx = 0;
   1626         } else if (UTEXT_USES_U16(m->fInputText)) {
   1627             srcIdx = (int32_t)nativeIdx;
   1628         } else {
   1629             UErrorCode status = U_ZERO_ERROR;
   1630             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
   1631         }
   1632 
   1633         for (;;) {
   1634             U_ASSERT(destIdx >= 0);
   1635 
   1636             if (srcIdx == regexp->fTextLength) {
   1637                 break;
   1638             }
   1639             UChar c = regexp->fText[srcIdx];
   1640             if (c == 0 && regexp->fTextLength == -1) {
   1641                 regexp->fTextLength = srcIdx;
   1642                 break;
   1643             }
   1644 
   1645             if (destIdx < destCap) {
   1646                 dest[destIdx] = c;
   1647             } else {
   1648                 // We've overflowed the dest buffer.
   1649                 //  If the total input string length is known, we can
   1650                 //    compute the total buffer size needed without scanning through the string.
   1651                 if (regexp->fTextLength > 0) {
   1652                     destIdx += (regexp->fTextLength - srcIdx);
   1653                     break;
   1654                 }
   1655             }
   1656             srcIdx++;
   1657             destIdx++;
   1658         }
   1659     } else {
   1660         int64_t  srcIdx;
   1661         if (m->fMatch) {
   1662             // The most recent call to find() succeeded.
   1663             srcIdx = m->fMatchEnd;
   1664         } else {
   1665             // The last call to find() on this matcher failed().
   1666             //   Look back to the end of the last find() that succeeded for src index.
   1667             srcIdx = m->fLastMatchEnd;
   1668             if (srcIdx == -1)  {
   1669                 // There has been no successful match with this matcher.
   1670                 //   We want to copy the whole string.
   1671                 srcIdx = 0;
   1672             }
   1673         }
   1674 
   1675         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
   1676     }
   1677 
   1678     //
   1679     //  NUL terminate the output string, if possible, otherwise issue the
   1680     //   appropriate error or warning.
   1681     //
   1682     if (destIdx < destCap) {
   1683         dest[destIdx] = 0;
   1684     } else  if (destIdx == destCap) {
   1685         *status = U_STRING_NOT_TERMINATED_WARNING;
   1686     } else {
   1687         *status = U_BUFFER_OVERFLOW_ERROR;
   1688     }
   1689 
   1690     //
   1691     // Update the user's buffer ptr and capacity vars to reflect the
   1692     //   amount used.
   1693     //
   1694     if (destIdx < destCap) {
   1695         *destBuf      += destIdx;
   1696         *destCapacity -= destIdx;
   1697     } else if (*destBuf != NULL) {
   1698         *destBuf      += destCap;
   1699         *destCapacity  = 0;
   1700     }
   1701 
   1702     if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1703         *status = U_BUFFER_OVERFLOW_ERROR;
   1704     }
   1705 
   1706     return destIdx;
   1707 }
   1708 
   1709 
   1710 //
   1711 //   appendTail   the actual API function
   1712 //
   1713 U_CAPI int32_t U_EXPORT2
   1714 uregex_appendTail(URegularExpression    *regexp2,
   1715                   UChar                **destBuf,
   1716                   int32_t               *destCapacity,
   1717                   UErrorCode            *status)  {
   1718     RegularExpression *regexp = (RegularExpression*)regexp2;
   1719     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
   1720 }
   1721 
   1722 
   1723 //
   1724 //   uregex_appendTailUText...can just use the normal C++ method
   1725 //
   1726 U_CAPI UText * U_EXPORT2
   1727 uregex_appendTailUText(URegularExpression    *regexp2,
   1728                        UText                 *dest,
   1729                        UErrorCode            *status)  {
   1730     RegularExpression *regexp = (RegularExpression*)regexp2;
   1731     return regexp->fMatcher->appendTail(dest, *status);
   1732 }
   1733 
   1734 
   1735 //------------------------------------------------------------------------------
   1736 //
   1737 //    copyString     Internal utility to copy a string to an output buffer,
   1738 //                   while managing buffer overflow and preflight size
   1739 //                   computation.  NUL termination is added to destination,
   1740 //                   and the NUL is counted in the output size.
   1741 //
   1742 //------------------------------------------------------------------------------
   1743 #if 0
   1744 static void copyString(UChar        *destBuffer,    //  Destination buffer.
   1745                        int32_t       destCapacity,  //  Total capacity of dest buffer
   1746                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
   1747                                                     //    Update not clipped to destCapacity.
   1748                        const UChar  *srcPtr,        //  Pointer to source string
   1749                        int32_t       srcLen)        //  Source string len.
   1750 {
   1751     int32_t  si;
   1752     int32_t  di = *destIndex;
   1753     UChar    c;
   1754 
   1755     for (si=0; si<srcLen;  si++) {
   1756         c = srcPtr[si];
   1757         if (di < destCapacity) {
   1758             destBuffer[di] = c;
   1759             di++;
   1760         } else {
   1761             di += srcLen - si;
   1762             break;
   1763         }
   1764     }
   1765     if (di<destCapacity) {
   1766         destBuffer[di] = 0;
   1767     }
   1768     di++;
   1769     *destIndex = di;
   1770 }
   1771 #endif
   1772 
   1773 //------------------------------------------------------------------------------
   1774 //
   1775 //    uregex_split
   1776 //
   1777 //------------------------------------------------------------------------------
   1778 int32_t RegexCImpl::split(RegularExpression     *regexp,
   1779                           UChar                 *destBuf,
   1780                           int32_t                destCapacity,
   1781                           int32_t               *requiredCapacity,
   1782                           UChar                 *destFields[],
   1783                           int32_t                destFieldsCapacity,
   1784                           UErrorCode            *status) {
   1785     //
   1786     // Reset for the input text
   1787     //
   1788     regexp->fMatcher->reset();
   1789     UText *inputText = regexp->fMatcher->fInputText;
   1790     int64_t   nextOutputStringStart = 0;
   1791     int64_t   inputLen = regexp->fMatcher->fInputLength;
   1792     if (inputLen == 0) {
   1793         return 0;
   1794     }
   1795 
   1796     //
   1797     // Loop through the input text, searching for the delimiter pattern
   1798     //
   1799     int32_t   i;             // Index of the field being processed.
   1800     int32_t   destIdx = 0;   // Next available position in destBuf;
   1801     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
   1802     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
   1803     for (i=0; ; i++) {
   1804         if (i>=destFieldsCapacity-1) {
   1805             // There are one or zero output strings left.
   1806             // Fill the last output string with whatever is left from the input, then exit the loop.
   1807             //  ( i will be == destFieldsCapacity if we filled the output array while processing
   1808             //    capture groups of the delimiter expression, in which case we will discard the
   1809             //    last capture group saved in favor of the unprocessed remainder of the
   1810             //    input string.)
   1811             if (inputLen > nextOutputStringStart) {
   1812                 if (i != destFieldsCapacity-1) {
   1813                     // No fields are left.  Recycle the last one for holding the trailing part of
   1814                     //   the input string.
   1815                     i = destFieldsCapacity-1;
   1816                     destIdx = (int32_t)(destFields[i] - destFields[0]);
   1817                 }
   1818 
   1819                 destFields[i] = &destBuf[destIdx];
   1820                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1821                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1822             }
   1823             break;
   1824         }
   1825 
   1826         if (regexp->fMatcher->find()) {
   1827             // We found another delimiter.  Move everything from where we started looking
   1828             //  up until the start of the delimiter into the next output string.
   1829             destFields[i] = &destBuf[destIdx];
   1830 
   1831             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
   1832                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
   1833             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1834                 tStatus = U_ZERO_ERROR;
   1835             } else {
   1836                 *status = tStatus;
   1837             }
   1838             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
   1839 
   1840             // If the delimiter pattern has capturing parentheses, the captured
   1841             //  text goes out into the next n destination strings.
   1842             int32_t groupNum;
   1843             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
   1844                 // If we've run out of output string slots, bail out.
   1845                 if (i==destFieldsCapacity-1) {
   1846                     break;
   1847                 }
   1848                 i++;
   1849 
   1850                 // Set up to extract the capture group contents into the dest buffer.
   1851                 destFields[i] = &destBuf[destIdx];
   1852                 tStatus = U_ZERO_ERROR;
   1853                 int32_t t = uregex_group((URegularExpression*)regexp,
   1854                                          groupNum,
   1855                                          destFields[i],
   1856                                          REMAINING_CAPACITY(destIdx, destCapacity),
   1857                                          &tStatus);
   1858                 destIdx += t + 1;    // Record the space used in the output string buffer.
   1859                                      //  +1 for the NUL that terminates the string.
   1860                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1861                     tStatus = U_ZERO_ERROR;
   1862                 } else {
   1863                     *status = tStatus;
   1864                 }
   1865             }
   1866 
   1867             if (nextOutputStringStart == inputLen) {
   1868                 // The delimiter was at the end of the string.
   1869                 // Output an empty string, and then we are done.
   1870                 if (destIdx < destCapacity) {
   1871                     destBuf[destIdx] = 0;
   1872                 }
   1873                 if (i < destFieldsCapacity-1) {
   1874                    ++i;
   1875                 }
   1876                 if (destIdx < destCapacity) {
   1877                     destFields[i] = destBuf + destIdx;
   1878                 }
   1879                 ++destIdx;
   1880                 break;
   1881             }
   1882 
   1883         }
   1884         else
   1885         {
   1886             // We ran off the end of the input while looking for the next delimiter.
   1887             // All the remaining text goes into the current output string.
   1888             destFields[i] = &destBuf[destIdx];
   1889             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1890                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1891             break;
   1892         }
   1893     }
   1894 
   1895     // Zero out any unused portion of the destFields array
   1896     int j;
   1897     for (j=i+1; j<destFieldsCapacity; j++) {
   1898         destFields[j] = NULL;
   1899     }
   1900 
   1901     if (requiredCapacity != NULL) {
   1902         *requiredCapacity = destIdx;
   1903     }
   1904     if (destIdx > destCapacity) {
   1905         *status = U_BUFFER_OVERFLOW_ERROR;
   1906     }
   1907     return i+1;
   1908 }
   1909 
   1910 //
   1911 //   uregex_split   The actual API function
   1912 //
   1913 U_CAPI int32_t U_EXPORT2
   1914 uregex_split(URegularExpression      *regexp2,
   1915              UChar                   *destBuf,
   1916              int32_t                  destCapacity,
   1917              int32_t                 *requiredCapacity,
   1918              UChar                   *destFields[],
   1919              int32_t                  destFieldsCapacity,
   1920              UErrorCode              *status) {
   1921     RegularExpression *regexp = (RegularExpression*)regexp2;
   1922     if (validateRE(regexp, TRUE, status) == FALSE) {
   1923         return 0;
   1924     }
   1925     if ((destBuf == NULL && destCapacity > 0) ||
   1926         destCapacity < 0 ||
   1927         destFields == NULL ||
   1928         destFieldsCapacity < 1 ) {
   1929         *status = U_ILLEGAL_ARGUMENT_ERROR;
   1930         return 0;
   1931     }
   1932 
   1933     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
   1934 }
   1935 
   1936 
   1937 //
   1938 //   uregex_splitUText...can just use the normal C++ method
   1939 //
   1940 U_CAPI int32_t U_EXPORT2
   1941 uregex_splitUText(URegularExpression    *regexp2,
   1942                   UText                 *destFields[],
   1943                   int32_t                destFieldsCapacity,
   1944                   UErrorCode            *status) {
   1945     RegularExpression *regexp = (RegularExpression*)regexp2;
   1946     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
   1947 }
   1948 
   1949 
   1950 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
   1951 
   1952