Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  file:  repattrn.cpp
      5 //
      6 /*
      7 ***************************************************************************
      8 *   Copyright (C) 2002-2016 International Business Machines Corporation
      9 *   and others. All rights reserved.
     10 ***************************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     16 
     17 #include "unicode/regex.h"
     18 #include "unicode/uclean.h"
     19 #include "cmemory.h"
     20 #include "cstr.h"
     21 #include "uassert.h"
     22 #include "uhash.h"
     23 #include "uvector.h"
     24 #include "uvectr32.h"
     25 #include "uvectr64.h"
     26 #include "regexcmp.h"
     27 #include "regeximp.h"
     28 #include "regexst.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 //--------------------------------------------------------------------------
     33 //
     34 //    RegexPattern    Default Constructor
     35 //
     36 //--------------------------------------------------------------------------
     37 RegexPattern::RegexPattern() {
     38     // Init all of this instances data.
     39     init();
     40 }
     41 
     42 
     43 //--------------------------------------------------------------------------
     44 //
     45 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     46 //                                  but it probably doesn't matter.
     47 //
     48 //--------------------------------------------------------------------------
     49 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     50     init();
     51     *this = other;
     52 }
     53 
     54 
     55 
     56 //--------------------------------------------------------------------------
     57 //
     58 //    Assignment Operator
     59 //
     60 //--------------------------------------------------------------------------
     61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     62     if (this == &other) {
     63         // Source and destination are the same.  Don't do anything.
     64         return *this;
     65     }
     66 
     67     // Clean out any previous contents of object being assigned to.
     68     zap();
     69 
     70     // Give target object a default initialization
     71     init();
     72 
     73     // Copy simple fields
     74     fDeferredStatus   = other.fDeferredStatus;
     75 
     76     if (U_FAILURE(fDeferredStatus)) {
     77         return *this;
     78     }
     79 
     80     if (other.fPatternString == NULL) {
     81         fPatternString = NULL;
     82         fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
     83     } else {
     84         fPatternString = new UnicodeString(*(other.fPatternString));
     85         if (fPatternString == NULL) {
     86             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     87         } else {
     88             fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
     89         }
     90     }
     91     if (U_FAILURE(fDeferredStatus)) {
     92         return *this;
     93     }
     94 
     95     fFlags            = other.fFlags;
     96     fLiteralText      = other.fLiteralText;
     97     fMinMatchLen      = other.fMinMatchLen;
     98     fFrameSize        = other.fFrameSize;
     99     fDataSize         = other.fDataSize;
    100     fStaticSets       = other.fStaticSets;
    101     fStaticSets8      = other.fStaticSets8;
    102 
    103     fStartType        = other.fStartType;
    104     fInitialStringIdx = other.fInitialStringIdx;
    105     fInitialStringLen = other.fInitialStringLen;
    106     *fInitialChars    = *other.fInitialChars;
    107     fInitialChar      = other.fInitialChar;
    108     *fInitialChars8   = *other.fInitialChars8;
    109     fNeedsAltInput    = other.fNeedsAltInput;
    110 
    111     //  Copy the pattern.  It's just values, nothing deep to copy.
    112     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    113     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    114 
    115     //  Copy the Unicode Sets.
    116     //    Could be made more efficient if the sets were reference counted and shared,
    117     //    but I doubt that pattern copying will be particularly common.
    118     //    Note:  init() already added an empty element zero to fSets
    119     int32_t i;
    120     int32_t  numSets = other.fSets->size();
    121     fSets8 = new Regex8BitSet[numSets];
    122     if (fSets8 == NULL) {
    123     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    124     	return *this;
    125     }
    126     for (i=1; i<numSets; i++) {
    127         if (U_FAILURE(fDeferredStatus)) {
    128             return *this;
    129         }
    130         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
    131         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    132         if (newSet == NULL) {
    133             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    134             break;
    135         }
    136         fSets->addElement(newSet, fDeferredStatus);
    137         fSets8[i] = other.fSets8[i];
    138     }
    139 
    140     // Copy the named capture group hash map.
    141     int32_t hashPos = UHASH_FIRST;
    142     while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
    143         if (U_FAILURE(fDeferredStatus)) {
    144             break;
    145         }
    146         const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
    147         UnicodeString *key = new UnicodeString(*name);
    148         int32_t val = hashEl->value.integer;
    149         if (key == NULL) {
    150             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    151         } else {
    152             uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
    153         }
    154     }
    155     return *this;
    156 }
    157 
    158 
    159 //--------------------------------------------------------------------------
    160 //
    161 //    init        Shared initialization for use by constructors.
    162 //                Bring an uninitialized RegexPattern up to a default state.
    163 //
    164 //--------------------------------------------------------------------------
    165 void RegexPattern::init() {
    166     fFlags            = 0;
    167     fCompiledPat      = 0;
    168     fLiteralText.remove();
    169     fSets             = NULL;
    170     fSets8            = NULL;
    171     fDeferredStatus   = U_ZERO_ERROR;
    172     fMinMatchLen      = 0;
    173     fFrameSize        = 0;
    174     fDataSize         = 0;
    175     fGroupMap         = NULL;
    176     fStaticSets       = NULL;
    177     fStaticSets8      = NULL;
    178     fStartType        = START_NO_INFO;
    179     fInitialStringIdx = 0;
    180     fInitialStringLen = 0;
    181     fInitialChars     = NULL;
    182     fInitialChar      = 0;
    183     fInitialChars8    = NULL;
    184     fNeedsAltInput    = FALSE;
    185     fNamedCaptureMap  = NULL;
    186 
    187     fPattern          = NULL; // will be set later
    188     fPatternString    = NULL; // may be set later
    189     fCompiledPat      = new UVector64(fDeferredStatus);
    190     fGroupMap         = new UVector32(fDeferredStatus);
    191     fSets             = new UVector(fDeferredStatus);
    192     fInitialChars     = new UnicodeSet;
    193     fInitialChars8    = new Regex8BitSet;
    194     fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
    195                                    uhash_compareUnicodeString,  // Key comparator function
    196                                    uhash_compareLong,           // Value comparator function
    197                                    &fDeferredStatus);
    198     if (U_FAILURE(fDeferredStatus)) {
    199         return;
    200     }
    201     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
    202             fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
    203         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    204         return;
    205     }
    206 
    207     // Slot zero of the vector of sets is reserved.  Fill it here.
    208     fSets->addElement((int32_t)0, fDeferredStatus);
    209 
    210     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
    211     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
    212 }
    213 
    214 
    215 //--------------------------------------------------------------------------
    216 //
    217 //   zap            Delete everything owned by this RegexPattern.
    218 //
    219 //--------------------------------------------------------------------------
    220 void RegexPattern::zap() {
    221     delete fCompiledPat;
    222     fCompiledPat = NULL;
    223     int i;
    224     for (i=1; i<fSets->size(); i++) {
    225         UnicodeSet *s;
    226         s = (UnicodeSet *)fSets->elementAt(i);
    227         if (s != NULL) {
    228             delete s;
    229         }
    230     }
    231     delete fSets;
    232     fSets = NULL;
    233     delete[] fSets8;
    234     fSets8 = NULL;
    235     delete fGroupMap;
    236     fGroupMap = NULL;
    237     delete fInitialChars;
    238     fInitialChars = NULL;
    239     delete fInitialChars8;
    240     fInitialChars8 = NULL;
    241     if (fPattern != NULL) {
    242         utext_close(fPattern);
    243         fPattern = NULL;
    244     }
    245     if (fPatternString != NULL) {
    246         delete fPatternString;
    247         fPatternString = NULL;
    248     }
    249     uhash_close(fNamedCaptureMap);
    250     fNamedCaptureMap = NULL;
    251 }
    252 
    253 
    254 //--------------------------------------------------------------------------
    255 //
    256 //   Destructor
    257 //
    258 //--------------------------------------------------------------------------
    259 RegexPattern::~RegexPattern() {
    260     zap();
    261 }
    262 
    263 
    264 //--------------------------------------------------------------------------
    265 //
    266 //   Clone
    267 //
    268 //--------------------------------------------------------------------------
    269 RegexPattern  *RegexPattern::clone() const {
    270     RegexPattern  *copy = new RegexPattern(*this);
    271     return copy;
    272 }
    273 
    274 
    275 //--------------------------------------------------------------------------
    276 //
    277 //   operator ==   (comparison)    Consider to patterns to be == if the
    278 //                                 pattern strings and the flags are the same.
    279 //                                 Note that pattern strings with the same
    280 //                                 characters can still be considered different.
    281 //
    282 //--------------------------------------------------------------------------
    283 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
    284     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    285         if (this->fPatternString != NULL && other.fPatternString != NULL) {
    286             return *(this->fPatternString) == *(other.fPatternString);
    287         } else if (this->fPattern == NULL) {
    288             if (other.fPattern == NULL) {
    289                 return TRUE;
    290             }
    291         } else if (other.fPattern != NULL) {
    292             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    293             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    294             return utext_equals(this->fPattern, other.fPattern);
    295         }
    296     }
    297     return FALSE;
    298 }
    299 
    300 //---------------------------------------------------------------------
    301 //
    302 //   compile
    303 //
    304 //---------------------------------------------------------------------
    305 RegexPattern * U_EXPORT2
    306 RegexPattern::compile(const UnicodeString &regex,
    307                       uint32_t             flags,
    308                       UParseError          &pe,
    309                       UErrorCode           &status)
    310 {
    311     if (U_FAILURE(status)) {
    312         return NULL;
    313     }
    314 
    315     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    316     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    317     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    318 
    319     if ((flags & ~allFlags) != 0) {
    320         status = U_REGEX_INVALID_FLAG;
    321         return NULL;
    322     }
    323 
    324     if ((flags & UREGEX_CANON_EQ) != 0) {
    325         status = U_REGEX_UNIMPLEMENTED;
    326         return NULL;
    327     }
    328 
    329     RegexPattern *This = new RegexPattern;
    330     if (This == NULL) {
    331         status = U_MEMORY_ALLOCATION_ERROR;
    332         return NULL;
    333     }
    334     if (U_FAILURE(This->fDeferredStatus)) {
    335         status = This->fDeferredStatus;
    336         delete This;
    337         return NULL;
    338     }
    339     This->fFlags = flags;
    340 
    341     RegexCompile     compiler(This, status);
    342     compiler.compile(regex, pe, status);
    343 
    344     if (U_FAILURE(status)) {
    345         delete This;
    346         This = NULL;
    347     }
    348 
    349     return This;
    350 }
    351 
    352 
    353 //
    354 //   compile, UText mode
    355 //
    356 RegexPattern * U_EXPORT2
    357 RegexPattern::compile(UText                *regex,
    358                       uint32_t             flags,
    359                       UParseError          &pe,
    360                       UErrorCode           &status)
    361 {
    362     if (U_FAILURE(status)) {
    363         return NULL;
    364     }
    365 
    366     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    367                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    368                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    369 
    370     if ((flags & ~allFlags) != 0) {
    371         status = U_REGEX_INVALID_FLAG;
    372         return NULL;
    373     }
    374 
    375     if ((flags & UREGEX_CANON_EQ) != 0) {
    376         status = U_REGEX_UNIMPLEMENTED;
    377         return NULL;
    378     }
    379 
    380     RegexPattern *This = new RegexPattern;
    381     if (This == NULL) {
    382         status = U_MEMORY_ALLOCATION_ERROR;
    383         return NULL;
    384     }
    385     if (U_FAILURE(This->fDeferredStatus)) {
    386         status = This->fDeferredStatus;
    387         delete This;
    388         return NULL;
    389     }
    390     This->fFlags = flags;
    391 
    392     RegexCompile     compiler(This, status);
    393     compiler.compile(regex, pe, status);
    394 
    395     if (U_FAILURE(status)) {
    396         delete This;
    397         This = NULL;
    398     }
    399 
    400     return This;
    401 }
    402 
    403 //
    404 //   compile with default flags.
    405 //
    406 RegexPattern * U_EXPORT2
    407 RegexPattern::compile(const UnicodeString &regex,
    408                       UParseError         &pe,
    409                       UErrorCode          &err)
    410 {
    411     return compile(regex, 0, pe, err);
    412 }
    413 
    414 
    415 //
    416 //   compile with default flags, UText mode
    417 //
    418 RegexPattern * U_EXPORT2
    419 RegexPattern::compile(UText               *regex,
    420                       UParseError         &pe,
    421                       UErrorCode          &err)
    422 {
    423     return compile(regex, 0, pe, err);
    424 }
    425 
    426 
    427 //
    428 //   compile with no UParseErr parameter.
    429 //
    430 RegexPattern * U_EXPORT2
    431 RegexPattern::compile(const UnicodeString &regex,
    432                       uint32_t             flags,
    433                       UErrorCode          &err)
    434 {
    435     UParseError pe;
    436     return compile(regex, flags, pe, err);
    437 }
    438 
    439 
    440 //
    441 //   compile with no UParseErr parameter, UText mode
    442 //
    443 RegexPattern * U_EXPORT2
    444 RegexPattern::compile(UText                *regex,
    445                       uint32_t             flags,
    446                       UErrorCode           &err)
    447 {
    448     UParseError pe;
    449     return compile(regex, flags, pe, err);
    450 }
    451 
    452 
    453 //---------------------------------------------------------------------
    454 //
    455 //   flags
    456 //
    457 //---------------------------------------------------------------------
    458 uint32_t RegexPattern::flags() const {
    459     return fFlags;
    460 }
    461 
    462 
    463 //---------------------------------------------------------------------
    464 //
    465 //   matcher(UnicodeString, err)
    466 //
    467 //---------------------------------------------------------------------
    468 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    469                                     UErrorCode          &status)  const {
    470     RegexMatcher    *retMatcher = matcher(status);
    471     if (retMatcher != NULL) {
    472         retMatcher->fDeferredStatus = status;
    473         retMatcher->reset(input);
    474     }
    475     return retMatcher;
    476 }
    477 
    478 
    479 //---------------------------------------------------------------------
    480 //
    481 //   matcher(status)
    482 //
    483 //---------------------------------------------------------------------
    484 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    485     RegexMatcher    *retMatcher = NULL;
    486 
    487     if (U_FAILURE(status)) {
    488         return NULL;
    489     }
    490     if (U_FAILURE(fDeferredStatus)) {
    491         status = fDeferredStatus;
    492         return NULL;
    493     }
    494 
    495     retMatcher = new RegexMatcher(this);
    496     if (retMatcher == NULL) {
    497         status = U_MEMORY_ALLOCATION_ERROR;
    498         return NULL;
    499     }
    500     return retMatcher;
    501 }
    502 
    503 
    504 
    505 //---------------------------------------------------------------------
    506 //
    507 //   matches        Convenience function to test for a match, starting
    508 //                  with a pattern string and a data string.
    509 //
    510 //---------------------------------------------------------------------
    511 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    512               const UnicodeString   &input,
    513                     UParseError     &pe,
    514                     UErrorCode      &status) {
    515 
    516     if (U_FAILURE(status)) {return FALSE;}
    517 
    518     UBool         retVal;
    519     RegexPattern *pat     = NULL;
    520     RegexMatcher *matcher = NULL;
    521 
    522     pat     = RegexPattern::compile(regex, 0, pe, status);
    523     matcher = pat->matcher(input, status);
    524     retVal  = matcher->matches(status);
    525 
    526     delete matcher;
    527     delete pat;
    528     return retVal;
    529 }
    530 
    531 
    532 //
    533 //   matches, UText mode
    534 //
    535 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    536                     UText           *input,
    537                     UParseError     &pe,
    538                     UErrorCode      &status) {
    539 
    540     if (U_FAILURE(status)) {return FALSE;}
    541 
    542     UBool         retVal  = FALSE;
    543     RegexPattern *pat     = NULL;
    544     RegexMatcher *matcher = NULL;
    545 
    546     pat     = RegexPattern::compile(regex, 0, pe, status);
    547     matcher = pat->matcher(status);
    548     if (U_SUCCESS(status)) {
    549         matcher->reset(input);
    550         retVal  = matcher->matches(status);
    551     }
    552 
    553     delete matcher;
    554     delete pat;
    555     return retVal;
    556 }
    557 
    558 
    559 
    560 
    561 
    562 //---------------------------------------------------------------------
    563 //
    564 //   pattern
    565 //
    566 //---------------------------------------------------------------------
    567 UnicodeString RegexPattern::pattern() const {
    568     if (fPatternString != NULL) {
    569         return *fPatternString;
    570     } else if (fPattern == NULL) {
    571         return UnicodeString();
    572     } else {
    573         UErrorCode status = U_ZERO_ERROR;
    574         int64_t nativeLen = utext_nativeLength(fPattern);
    575         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
    576         UnicodeString result;
    577 
    578         status = U_ZERO_ERROR;
    579         UChar *resultChars = result.getBuffer(len16);
    580         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    581         result.releaseBuffer(len16);
    582 
    583         return result;
    584     }
    585 }
    586 
    587 
    588 
    589 
    590 //---------------------------------------------------------------------
    591 //
    592 //   patternText
    593 //
    594 //---------------------------------------------------------------------
    595 UText *RegexPattern::patternText(UErrorCode      &status) const {
    596     if (U_FAILURE(status)) {return NULL;}
    597     status = U_ZERO_ERROR;
    598 
    599     if (fPattern != NULL) {
    600         return fPattern;
    601     } else {
    602         RegexStaticSets::initGlobals(&status);
    603         return RegexStaticSets::gStaticSets->fEmptyText;
    604     }
    605 }
    606 
    607 
    608 //--------------------------------------------------------------------------------
    609 //
    610 //  groupNumberFromName()
    611 //
    612 //--------------------------------------------------------------------------------
    613 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
    614     if (U_FAILURE(status)) {
    615         return 0;
    616     }
    617 
    618     // No need to explicitly check for syntactically valid names.
    619     // Invalid ones will never be in the map, and the lookup will fail.
    620 
    621     int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
    622     if (number == 0) {
    623         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
    624     }
    625     return number;
    626 }
    627 
    628 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
    629     if (U_FAILURE(status)) {
    630         return 0;
    631     }
    632     UnicodeString name(groupName, nameLength, US_INV);
    633     return groupNumberFromName(name, status);
    634 }
    635 
    636 
    637 //---------------------------------------------------------------------
    638 //
    639 //   split
    640 //
    641 //---------------------------------------------------------------------
    642 int32_t  RegexPattern::split(const UnicodeString &input,
    643         UnicodeString    dest[],
    644         int32_t          destCapacity,
    645         UErrorCode      &status) const
    646 {
    647     if (U_FAILURE(status)) {
    648         return 0;
    649     };
    650 
    651     RegexMatcher  m(this);
    652     int32_t r = 0;
    653     // Check m's status to make sure all is ok.
    654     if (U_SUCCESS(m.fDeferredStatus)) {
    655     	r = m.split(input, dest, destCapacity, status);
    656     }
    657     return r;
    658 }
    659 
    660 //
    661 //   split, UText mode
    662 //
    663 int32_t  RegexPattern::split(UText *input,
    664         UText           *dest[],
    665         int32_t          destCapacity,
    666         UErrorCode      &status) const
    667 {
    668     if (U_FAILURE(status)) {
    669         return 0;
    670     };
    671 
    672     RegexMatcher  m(this);
    673     int32_t r = 0;
    674     // Check m's status to make sure all is ok.
    675     if (U_SUCCESS(m.fDeferredStatus)) {
    676     	r = m.split(input, dest, destCapacity, status);
    677     }
    678     return r;
    679 }
    680 
    681 
    682 //---------------------------------------------------------------------
    683 //
    684 //   dump    Output the compiled form of the pattern.
    685 //           Debugging function only.
    686 //
    687 //---------------------------------------------------------------------
    688 void   RegexPattern::dumpOp(int32_t index) const {
    689     (void)index;  // Suppress warnings in non-debug build.
    690 #if defined(REGEX_DEBUG)
    691     static const char * const opNames[] = {URX_OPCODE_NAMES};
    692     int32_t op          = fCompiledPat->elementAti(index);
    693     int32_t val         = URX_VAL(op);
    694     int32_t type        = URX_TYPE(op);
    695     int32_t pinnedType  = type;
    696     if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
    697         pinnedType = 0;
    698     }
    699 
    700     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    701     switch (type) {
    702     case URX_NOP:
    703     case URX_DOTANY:
    704     case URX_DOTANY_ALL:
    705     case URX_FAIL:
    706     case URX_CARET:
    707     case URX_DOLLAR:
    708     case URX_BACKSLASH_G:
    709     case URX_BACKSLASH_X:
    710     case URX_END:
    711     case URX_DOLLAR_M:
    712     case URX_CARET_M:
    713         // Types with no operand field of interest.
    714         break;
    715 
    716     case URX_RESERVED_OP:
    717     case URX_START_CAPTURE:
    718     case URX_END_CAPTURE:
    719     case URX_STATE_SAVE:
    720     case URX_JMP:
    721     case URX_JMP_SAV:
    722     case URX_JMP_SAV_X:
    723     case URX_BACKSLASH_B:
    724     case URX_BACKSLASH_BU:
    725     case URX_BACKSLASH_D:
    726     case URX_BACKSLASH_Z:
    727     case URX_STRING_LEN:
    728     case URX_CTR_INIT:
    729     case URX_CTR_INIT_NG:
    730     case URX_CTR_LOOP:
    731     case URX_CTR_LOOP_NG:
    732     case URX_RELOC_OPRND:
    733     case URX_STO_SP:
    734     case URX_LD_SP:
    735     case URX_BACKREF:
    736     case URX_STO_INP_LOC:
    737     case URX_JMPX:
    738     case URX_LA_START:
    739     case URX_LA_END:
    740     case URX_BACKREF_I:
    741     case URX_LB_START:
    742     case URX_LB_CONT:
    743     case URX_LB_END:
    744     case URX_LBN_CONT:
    745     case URX_LBN_END:
    746     case URX_LOOP_C:
    747     case URX_LOOP_DOT_I:
    748     case URX_BACKSLASH_H:
    749     case URX_BACKSLASH_R:
    750     case URX_BACKSLASH_V:
    751         // types with an integer operand field.
    752         printf("%d", val);
    753         break;
    754 
    755     case URX_ONECHAR:
    756     case URX_ONECHAR_I:
    757         if (val < 0x20) {
    758             printf("%#x", val);
    759         } else {
    760             printf("'%s'", CStr(UnicodeString(val))());
    761         }
    762         break;
    763 
    764     case URX_STRING:
    765     case URX_STRING_I:
    766         {
    767             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    768             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    769             int32_t length = URX_VAL(lengthOp);
    770             UnicodeString str(fLiteralText, val, length);
    771             printf("%s", CStr(str)());
    772         }
    773         break;
    774 
    775     case URX_SETREF:
    776     case URX_LOOP_SR_I:
    777         {
    778             UnicodeString s;
    779             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    780             set->toPattern(s, TRUE);
    781             printf("%s", CStr(s)());
    782         }
    783         break;
    784 
    785     case URX_STATIC_SETREF:
    786     case URX_STAT_SETREF_N:
    787         {
    788             UnicodeString s;
    789             if (val & URX_NEG_SET) {
    790                 printf("NOT ");
    791                 val &= ~URX_NEG_SET;
    792             }
    793             UnicodeSet *set = fStaticSets[val];
    794             set->toPattern(s, TRUE);
    795             printf("%s", CStr(s)());
    796         }
    797         break;
    798 
    799 
    800     default:
    801         printf("??????");
    802         break;
    803     }
    804     printf("\n");
    805 #endif
    806 }
    807 
    808 
    809 void RegexPattern::dumpPattern() const {
    810 #if defined(REGEX_DEBUG)
    811     int      index;
    812 
    813     UnicodeString patStr;
    814     for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
    815         patStr.append(c);
    816     }
    817     printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
    818     printf("   Min Match Length:  %d\n", fMinMatchLen);
    819     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    820     if (fStartType == START_STRING) {
    821         UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
    822         printf("   Initial match string: \"%s\"\n", CStr(initialString)());
    823     } else if (fStartType == START_SET) {
    824         UnicodeString s;
    825         fInitialChars->toPattern(s, TRUE);
    826         printf("    Match First Chars: %s\n", CStr(s)());
    827 
    828     } else if (fStartType == START_CHAR) {
    829         printf("    First char of Match: ");
    830         if (fInitialChar > 0x20) {
    831                 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
    832             } else {
    833                 printf("%#x\n", fInitialChar);
    834             }
    835     }
    836 
    837     printf("Named Capture Groups:\n");
    838     if (uhash_count(fNamedCaptureMap) == 0) {
    839         printf("   None\n");
    840     } else {
    841         int32_t pos = UHASH_FIRST;
    842         const UHashElement *el = NULL;
    843         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
    844             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
    845             int32_t number = el->value.integer;
    846             printf("   %d\t%s\n", number, CStr(*name)());
    847         }
    848     }
    849 
    850     printf("\nIndex   Binary     Type             Operand\n" \
    851            "-------------------------------------------\n");
    852     for (index = 0; index<fCompiledPat->size(); index++) {
    853         dumpOp(index);
    854     }
    855     printf("\n\n");
    856 #endif
    857 }
    858 
    859 
    860 
    861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    862 
    863 U_NAMESPACE_END
    864 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    865