Home | History | Annotate | Download | only in i18n
      1 //
      2 //  file:  repattrn.cpp
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2015 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uclean.h"
     17 #include "uassert.h"
     18 #include "uhash.h"
     19 #include "uvector.h"
     20 #include "uvectr32.h"
     21 #include "uvectr64.h"
     22 #include "regexcmp.h"
     23 #include "regeximp.h"
     24 #include "regexst.h"
     25 
     26 U_NAMESPACE_BEGIN
     27 
     28 //--------------------------------------------------------------------------
     29 //
     30 //    RegexPattern    Default Constructor
     31 //
     32 //--------------------------------------------------------------------------
     33 RegexPattern::RegexPattern() {
     34     // Init all of this instances data.
     35     init();
     36 }
     37 
     38 
     39 //--------------------------------------------------------------------------
     40 //
     41 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     42 //                                  but it probably doesn't matter.
     43 //
     44 //--------------------------------------------------------------------------
     45 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     46     init();
     47     *this = other;
     48 }
     49 
     50 
     51 
     52 //--------------------------------------------------------------------------
     53 //
     54 //    Assignment Operator
     55 //
     56 //--------------------------------------------------------------------------
     57 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     58     if (this == &other) {
     59         // Source and destination are the same.  Don't do anything.
     60         return *this;
     61     }
     62 
     63     // Clean out any previous contents of object being assigned to.
     64     zap();
     65 
     66     // Give target object a default initialization
     67     init();
     68 
     69     // Copy simple fields
     70     fDeferredStatus   = other.fDeferredStatus;
     71 
     72     if (U_FAILURE(fDeferredStatus)) {
     73         return *this;
     74     }
     75 
     76     if (other.fPatternString == NULL) {
     77         fPatternString = NULL;
     78         fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
     79     } else {
     80         fPatternString = new UnicodeString(*(other.fPatternString));
     81         if (fPatternString == NULL) {
     82             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     83         } else {
     84             fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
     85         }
     86     }
     87     if (U_FAILURE(fDeferredStatus)) {
     88         return *this;
     89     }
     90 
     91     fFlags            = other.fFlags;
     92     fLiteralText      = other.fLiteralText;
     93     fMinMatchLen      = other.fMinMatchLen;
     94     fFrameSize        = other.fFrameSize;
     95     fDataSize         = other.fDataSize;
     96     fStaticSets       = other.fStaticSets;
     97     fStaticSets8      = other.fStaticSets8;
     98 
     99     fStartType        = other.fStartType;
    100     fInitialStringIdx = other.fInitialStringIdx;
    101     fInitialStringLen = other.fInitialStringLen;
    102     *fInitialChars    = *other.fInitialChars;
    103     fInitialChar      = other.fInitialChar;
    104     *fInitialChars8   = *other.fInitialChars8;
    105     fNeedsAltInput    = other.fNeedsAltInput;
    106 
    107     //  Copy the pattern.  It's just values, nothing deep to copy.
    108     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    109     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    110 
    111     //  Copy the Unicode Sets.
    112     //    Could be made more efficient if the sets were reference counted and shared,
    113     //    but I doubt that pattern copying will be particularly common.
    114     //    Note:  init() already added an empty element zero to fSets
    115     int32_t i;
    116     int32_t  numSets = other.fSets->size();
    117     fSets8 = new Regex8BitSet[numSets];
    118     if (fSets8 == NULL) {
    119     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    120     	return *this;
    121     }
    122     for (i=1; i<numSets; i++) {
    123         if (U_FAILURE(fDeferredStatus)) {
    124             return *this;
    125         }
    126         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
    127         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    128         if (newSet == NULL) {
    129             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    130             break;
    131         }
    132         fSets->addElement(newSet, fDeferredStatus);
    133         fSets8[i] = other.fSets8[i];
    134     }
    135 
    136     // Copy the named capture group hash map.
    137     int32_t hashPos = UHASH_FIRST;
    138     while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
    139         if (U_FAILURE(fDeferredStatus)) {
    140             break;
    141         }
    142         const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
    143         UnicodeString *key = new UnicodeString(*name);
    144         int32_t val = hashEl->value.integer;
    145         if (key == NULL) {
    146             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    147         } else {
    148             uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
    149         }
    150     }
    151     return *this;
    152 }
    153 
    154 
    155 //--------------------------------------------------------------------------
    156 //
    157 //    init        Shared initialization for use by constructors.
    158 //                Bring an uninitialized RegexPattern up to a default state.
    159 //
    160 //--------------------------------------------------------------------------
    161 void RegexPattern::init() {
    162     fFlags            = 0;
    163     fCompiledPat      = 0;
    164     fLiteralText.remove();
    165     fSets             = NULL;
    166     fSets8            = NULL;
    167     fDeferredStatus   = U_ZERO_ERROR;
    168     fMinMatchLen      = 0;
    169     fFrameSize        = 0;
    170     fDataSize         = 0;
    171     fGroupMap         = NULL;
    172     fStaticSets       = NULL;
    173     fStaticSets8      = NULL;
    174     fStartType        = START_NO_INFO;
    175     fInitialStringIdx = 0;
    176     fInitialStringLen = 0;
    177     fInitialChars     = NULL;
    178     fInitialChar      = 0;
    179     fInitialChars8    = NULL;
    180     fNeedsAltInput    = FALSE;
    181     fNamedCaptureMap  = NULL;
    182 
    183     fPattern          = NULL; // will be set later
    184     fPatternString    = NULL; // may be set later
    185     fCompiledPat      = new UVector64(fDeferredStatus);
    186     fGroupMap         = new UVector32(fDeferredStatus);
    187     fSets             = new UVector(fDeferredStatus);
    188     fInitialChars     = new UnicodeSet;
    189     fInitialChars8    = new Regex8BitSet;
    190     fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
    191                                    uhash_compareUnicodeString,  // Key comparator function
    192                                    uhash_compareLong,           // Value comparator function
    193                                    &fDeferredStatus);
    194     if (U_FAILURE(fDeferredStatus)) {
    195         return;
    196     }
    197     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
    198             fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
    199         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    200         return;
    201     }
    202 
    203     // Slot zero of the vector of sets is reserved.  Fill it here.
    204     fSets->addElement((int32_t)0, fDeferredStatus);
    205 
    206     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
    207     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
    208 }
    209 
    210 
    211 //--------------------------------------------------------------------------
    212 //
    213 //   zap            Delete everything owned by this RegexPattern.
    214 //
    215 //--------------------------------------------------------------------------
    216 void RegexPattern::zap() {
    217     delete fCompiledPat;
    218     fCompiledPat = NULL;
    219     int i;
    220     for (i=1; i<fSets->size(); i++) {
    221         UnicodeSet *s;
    222         s = (UnicodeSet *)fSets->elementAt(i);
    223         if (s != NULL) {
    224             delete s;
    225         }
    226     }
    227     delete fSets;
    228     fSets = NULL;
    229     delete[] fSets8;
    230     fSets8 = NULL;
    231     delete fGroupMap;
    232     fGroupMap = NULL;
    233     delete fInitialChars;
    234     fInitialChars = NULL;
    235     delete fInitialChars8;
    236     fInitialChars8 = NULL;
    237     if (fPattern != NULL) {
    238         utext_close(fPattern);
    239         fPattern = NULL;
    240     }
    241     if (fPatternString != NULL) {
    242         delete fPatternString;
    243         fPatternString = NULL;
    244     }
    245     uhash_close(fNamedCaptureMap);
    246     fNamedCaptureMap = NULL;
    247 }
    248 
    249 
    250 //--------------------------------------------------------------------------
    251 //
    252 //   Destructor
    253 //
    254 //--------------------------------------------------------------------------
    255 RegexPattern::~RegexPattern() {
    256     zap();
    257 }
    258 
    259 
    260 //--------------------------------------------------------------------------
    261 //
    262 //   Clone
    263 //
    264 //--------------------------------------------------------------------------
    265 RegexPattern  *RegexPattern::clone() const {
    266     RegexPattern  *copy = new RegexPattern(*this);
    267     return copy;
    268 }
    269 
    270 
    271 //--------------------------------------------------------------------------
    272 //
    273 //   operator ==   (comparison)    Consider to patterns to be == if the
    274 //                                 pattern strings and the flags are the same.
    275 //                                 Note that pattern strings with the same
    276 //                                 characters can still be considered different.
    277 //
    278 //--------------------------------------------------------------------------
    279 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
    280     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    281         if (this->fPatternString != NULL && other.fPatternString != NULL) {
    282             return *(this->fPatternString) == *(other.fPatternString);
    283         } else if (this->fPattern == NULL) {
    284             if (other.fPattern == NULL) {
    285                 return TRUE;
    286             }
    287         } else if (other.fPattern != NULL) {
    288             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    289             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    290             return utext_equals(this->fPattern, other.fPattern);
    291         }
    292     }
    293     return FALSE;
    294 }
    295 
    296 //---------------------------------------------------------------------
    297 //
    298 //   compile
    299 //
    300 //---------------------------------------------------------------------
    301 RegexPattern * U_EXPORT2
    302 RegexPattern::compile(const UnicodeString &regex,
    303                       uint32_t             flags,
    304                       UParseError          &pe,
    305                       UErrorCode           &status)
    306 {
    307     if (U_FAILURE(status)) {
    308         return NULL;
    309     }
    310 
    311     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    312     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    313     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    314 
    315     if ((flags & ~allFlags) != 0) {
    316         status = U_REGEX_INVALID_FLAG;
    317         return NULL;
    318     }
    319 
    320     if ((flags & UREGEX_CANON_EQ) != 0) {
    321         status = U_REGEX_UNIMPLEMENTED;
    322         return NULL;
    323     }
    324 
    325     RegexPattern *This = new RegexPattern;
    326     if (This == NULL) {
    327         status = U_MEMORY_ALLOCATION_ERROR;
    328         return NULL;
    329     }
    330     if (U_FAILURE(This->fDeferredStatus)) {
    331         status = This->fDeferredStatus;
    332         delete This;
    333         return NULL;
    334     }
    335     This->fFlags = flags;
    336 
    337     RegexCompile     compiler(This, status);
    338     compiler.compile(regex, pe, status);
    339 
    340     if (U_FAILURE(status)) {
    341         delete This;
    342         This = NULL;
    343     }
    344 
    345     return This;
    346 }
    347 
    348 
    349 //
    350 //   compile, UText mode
    351 //
    352 RegexPattern * U_EXPORT2
    353 RegexPattern::compile(UText                *regex,
    354                       uint32_t             flags,
    355                       UParseError          &pe,
    356                       UErrorCode           &status)
    357 {
    358     if (U_FAILURE(status)) {
    359         return NULL;
    360     }
    361 
    362     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    363                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    364                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    365 
    366     if ((flags & ~allFlags) != 0) {
    367         status = U_REGEX_INVALID_FLAG;
    368         return NULL;
    369     }
    370 
    371     if ((flags & UREGEX_CANON_EQ) != 0) {
    372         status = U_REGEX_UNIMPLEMENTED;
    373         return NULL;
    374     }
    375 
    376     RegexPattern *This = new RegexPattern;
    377     if (This == NULL) {
    378         status = U_MEMORY_ALLOCATION_ERROR;
    379         return NULL;
    380     }
    381     if (U_FAILURE(This->fDeferredStatus)) {
    382         status = This->fDeferredStatus;
    383         delete This;
    384         return NULL;
    385     }
    386     This->fFlags = flags;
    387 
    388     RegexCompile     compiler(This, status);
    389     compiler.compile(regex, pe, status);
    390 
    391     if (U_FAILURE(status)) {
    392         delete This;
    393         This = NULL;
    394     }
    395 
    396     return This;
    397 }
    398 
    399 //
    400 //   compile with default flags.
    401 //
    402 RegexPattern * U_EXPORT2
    403 RegexPattern::compile(const UnicodeString &regex,
    404                       UParseError         &pe,
    405                       UErrorCode          &err)
    406 {
    407     return compile(regex, 0, pe, err);
    408 }
    409 
    410 
    411 //
    412 //   compile with default flags, UText mode
    413 //
    414 RegexPattern * U_EXPORT2
    415 RegexPattern::compile(UText               *regex,
    416                       UParseError         &pe,
    417                       UErrorCode          &err)
    418 {
    419     return compile(regex, 0, pe, err);
    420 }
    421 
    422 
    423 //
    424 //   compile with no UParseErr parameter.
    425 //
    426 RegexPattern * U_EXPORT2
    427 RegexPattern::compile(const UnicodeString &regex,
    428                       uint32_t             flags,
    429                       UErrorCode          &err)
    430 {
    431     UParseError pe;
    432     return compile(regex, flags, pe, err);
    433 }
    434 
    435 
    436 //
    437 //   compile with no UParseErr parameter, UText mode
    438 //
    439 RegexPattern * U_EXPORT2
    440 RegexPattern::compile(UText                *regex,
    441                       uint32_t             flags,
    442                       UErrorCode           &err)
    443 {
    444     UParseError pe;
    445     return compile(regex, flags, pe, err);
    446 }
    447 
    448 
    449 //---------------------------------------------------------------------
    450 //
    451 //   flags
    452 //
    453 //---------------------------------------------------------------------
    454 uint32_t RegexPattern::flags() const {
    455     return fFlags;
    456 }
    457 
    458 
    459 //---------------------------------------------------------------------
    460 //
    461 //   matcher(UnicodeString, err)
    462 //
    463 //---------------------------------------------------------------------
    464 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    465                                     UErrorCode          &status)  const {
    466     RegexMatcher    *retMatcher = matcher(status);
    467     if (retMatcher != NULL) {
    468         retMatcher->fDeferredStatus = status;
    469         retMatcher->reset(input);
    470     }
    471     return retMatcher;
    472 }
    473 
    474 
    475 //---------------------------------------------------------------------
    476 //
    477 //   matcher(status)
    478 //
    479 //---------------------------------------------------------------------
    480 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    481     RegexMatcher    *retMatcher = NULL;
    482 
    483     if (U_FAILURE(status)) {
    484         return NULL;
    485     }
    486     if (U_FAILURE(fDeferredStatus)) {
    487         status = fDeferredStatus;
    488         return NULL;
    489     }
    490 
    491     retMatcher = new RegexMatcher(this);
    492     if (retMatcher == NULL) {
    493         status = U_MEMORY_ALLOCATION_ERROR;
    494         return NULL;
    495     }
    496     return retMatcher;
    497 }
    498 
    499 
    500 
    501 //---------------------------------------------------------------------
    502 //
    503 //   matches        Convenience function to test for a match, starting
    504 //                  with a pattern string and a data string.
    505 //
    506 //---------------------------------------------------------------------
    507 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    508               const UnicodeString   &input,
    509                     UParseError     &pe,
    510                     UErrorCode      &status) {
    511 
    512     if (U_FAILURE(status)) {return FALSE;}
    513 
    514     UBool         retVal;
    515     RegexPattern *pat     = NULL;
    516     RegexMatcher *matcher = NULL;
    517 
    518     pat     = RegexPattern::compile(regex, 0, pe, status);
    519     matcher = pat->matcher(input, status);
    520     retVal  = matcher->matches(status);
    521 
    522     delete matcher;
    523     delete pat;
    524     return retVal;
    525 }
    526 
    527 
    528 //
    529 //   matches, UText mode
    530 //
    531 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    532                     UText           *input,
    533                     UParseError     &pe,
    534                     UErrorCode      &status) {
    535 
    536     if (U_FAILURE(status)) {return FALSE;}
    537 
    538     UBool         retVal  = FALSE;
    539     RegexPattern *pat     = NULL;
    540     RegexMatcher *matcher = NULL;
    541 
    542     pat     = RegexPattern::compile(regex, 0, pe, status);
    543     matcher = pat->matcher(status);
    544     if (U_SUCCESS(status)) {
    545         matcher->reset(input);
    546         retVal  = matcher->matches(status);
    547     }
    548 
    549     delete matcher;
    550     delete pat;
    551     return retVal;
    552 }
    553 
    554 
    555 
    556 
    557 
    558 //---------------------------------------------------------------------
    559 //
    560 //   pattern
    561 //
    562 //---------------------------------------------------------------------
    563 UnicodeString RegexPattern::pattern() const {
    564     if (fPatternString != NULL) {
    565         return *fPatternString;
    566     } else if (fPattern == NULL) {
    567         return UnicodeString();
    568     } else {
    569         UErrorCode status = U_ZERO_ERROR;
    570         int64_t nativeLen = utext_nativeLength(fPattern);
    571         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
    572         UnicodeString result;
    573 
    574         status = U_ZERO_ERROR;
    575         UChar *resultChars = result.getBuffer(len16);
    576         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    577         result.releaseBuffer(len16);
    578 
    579         return result;
    580     }
    581 }
    582 
    583 
    584 
    585 
    586 //---------------------------------------------------------------------
    587 //
    588 //   patternText
    589 //
    590 //---------------------------------------------------------------------
    591 UText *RegexPattern::patternText(UErrorCode      &status) const {
    592     if (U_FAILURE(status)) {return NULL;}
    593     status = U_ZERO_ERROR;
    594 
    595     if (fPattern != NULL) {
    596         return fPattern;
    597     } else {
    598         RegexStaticSets::initGlobals(&status);
    599         return RegexStaticSets::gStaticSets->fEmptyText;
    600     }
    601 }
    602 
    603 
    604 //--------------------------------------------------------------------------------
    605 //
    606 //  groupNumberFromName()
    607 //
    608 //--------------------------------------------------------------------------------
    609 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
    610     if (U_FAILURE(status)) {
    611         return 0;
    612     }
    613 
    614     // No need to explicitly check for syntactically valid names.
    615     // Invalid ones will never be in the map, and the lookup will fail.
    616 
    617     int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
    618     if (number == 0) {
    619         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
    620     }
    621     return number;
    622 }
    623 
    624 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
    625     if (U_FAILURE(status)) {
    626         return 0;
    627     }
    628     UnicodeString name(groupName, nameLength, US_INV);
    629     return groupNumberFromName(name, status);
    630 }
    631 
    632 
    633 //---------------------------------------------------------------------
    634 //
    635 //   split
    636 //
    637 //---------------------------------------------------------------------
    638 int32_t  RegexPattern::split(const UnicodeString &input,
    639         UnicodeString    dest[],
    640         int32_t          destCapacity,
    641         UErrorCode      &status) const
    642 {
    643     if (U_FAILURE(status)) {
    644         return 0;
    645     };
    646 
    647     RegexMatcher  m(this);
    648     int32_t r = 0;
    649     // Check m's status to make sure all is ok.
    650     if (U_SUCCESS(m.fDeferredStatus)) {
    651     	r = m.split(input, dest, destCapacity, status);
    652     }
    653     return r;
    654 }
    655 
    656 //
    657 //   split, UText mode
    658 //
    659 int32_t  RegexPattern::split(UText *input,
    660         UText           *dest[],
    661         int32_t          destCapacity,
    662         UErrorCode      &status) const
    663 {
    664     if (U_FAILURE(status)) {
    665         return 0;
    666     };
    667 
    668     RegexMatcher  m(this);
    669     int32_t r = 0;
    670     // Check m's status to make sure all is ok.
    671     if (U_SUCCESS(m.fDeferredStatus)) {
    672     	r = m.split(input, dest, destCapacity, status);
    673     }
    674     return r;
    675 }
    676 
    677 
    678 
    679 //---------------------------------------------------------------------
    680 //
    681 //   dump    Output the compiled form of the pattern.
    682 //           Debugging function only.
    683 //
    684 //---------------------------------------------------------------------
    685 void   RegexPattern::dumpOp(int32_t index) const {
    686     (void)index;  // Suppress warnings in non-debug build.
    687 #if defined(REGEX_DEBUG)
    688     static const char * const opNames[] = {URX_OPCODE_NAMES};
    689     int32_t op          = fCompiledPat->elementAti(index);
    690     int32_t val         = URX_VAL(op);
    691     int32_t type        = URX_TYPE(op);
    692     int32_t pinnedType  = type;
    693     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
    694         pinnedType = 0;
    695     }
    696 
    697     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    698     switch (type) {
    699     case URX_NOP:
    700     case URX_DOTANY:
    701     case URX_DOTANY_ALL:
    702     case URX_FAIL:
    703     case URX_CARET:
    704     case URX_DOLLAR:
    705     case URX_BACKSLASH_G:
    706     case URX_BACKSLASH_X:
    707     case URX_END:
    708     case URX_DOLLAR_M:
    709     case URX_CARET_M:
    710         // Types with no operand field of interest.
    711         break;
    712 
    713     case URX_RESERVED_OP:
    714     case URX_START_CAPTURE:
    715     case URX_END_CAPTURE:
    716     case URX_STATE_SAVE:
    717     case URX_JMP:
    718     case URX_JMP_SAV:
    719     case URX_JMP_SAV_X:
    720     case URX_BACKSLASH_B:
    721     case URX_BACKSLASH_BU:
    722     case URX_BACKSLASH_D:
    723     case URX_BACKSLASH_Z:
    724     case URX_STRING_LEN:
    725     case URX_CTR_INIT:
    726     case URX_CTR_INIT_NG:
    727     case URX_CTR_LOOP:
    728     case URX_CTR_LOOP_NG:
    729     case URX_RELOC_OPRND:
    730     case URX_STO_SP:
    731     case URX_LD_SP:
    732     case URX_BACKREF:
    733     case URX_STO_INP_LOC:
    734     case URX_JMPX:
    735     case URX_LA_START:
    736     case URX_LA_END:
    737     case URX_BACKREF_I:
    738     case URX_LB_START:
    739     case URX_LB_CONT:
    740     case URX_LB_END:
    741     case URX_LBN_CONT:
    742     case URX_LBN_END:
    743     case URX_LOOP_C:
    744     case URX_LOOP_DOT_I:
    745     case URX_BACKSLASH_H:
    746     case URX_BACKSLASH_R:
    747     case URX_BACKSLASH_V:
    748         // types with an integer operand field.
    749         printf("%d", val);
    750         break;
    751 
    752     case URX_ONECHAR:
    753     case URX_ONECHAR_I:
    754         printf("%c", val<256?val:'?');
    755         break;
    756 
    757     case URX_STRING:
    758     case URX_STRING_I:
    759         {
    760             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    761             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    762             int32_t length = URX_VAL(lengthOp);
    763             int32_t i;
    764             for (i=val; i<val+length; i++) {
    765                 UChar c = fLiteralText[i];
    766                 if (c < 32 || c >= 256) {c = '.';}
    767                 printf("%c", c);
    768             }
    769         }
    770         break;
    771 
    772     case URX_SETREF:
    773     case URX_LOOP_SR_I:
    774         {
    775             UnicodeString s;
    776             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    777             set->toPattern(s, TRUE);
    778             for (int32_t i=0; i<s.length(); i++) {
    779                 printf("%c", s.charAt(i));
    780             }
    781         }
    782         break;
    783 
    784     case URX_STATIC_SETREF:
    785     case URX_STAT_SETREF_N:
    786         {
    787             UnicodeString s;
    788             if (val & URX_NEG_SET) {
    789                 printf("NOT ");
    790                 val &= ~URX_NEG_SET;
    791             }
    792             UnicodeSet *set = fStaticSets[val];
    793             set->toPattern(s, TRUE);
    794             for (int32_t i=0; i<s.length(); i++) {
    795                 printf("%c", s.charAt(i));
    796             }
    797         }
    798         break;
    799 
    800 
    801     default:
    802         printf("??????");
    803         break;
    804     }
    805     printf("\n");
    806 #endif
    807 }
    808 
    809 
    810 void RegexPattern::dumpPattern() const {
    811 #if defined(REGEX_DEBUG)
    812     // TODO: This function assumes an ASCII based charset.
    813     int      index;
    814     int      i;
    815 
    816     printf("Original Pattern:  ");
    817     UChar32 c = utext_next32From(fPattern, 0);
    818     while (c != U_SENTINEL) {
    819         if (c<32 || c>256) {
    820             c = '.';
    821         }
    822         printf("%c", c);
    823 
    824         c = UTEXT_NEXT32(fPattern);
    825     }
    826     printf("\n");
    827     printf("   Min Match Length:  %d\n", fMinMatchLen);
    828     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    829     if (fStartType == START_STRING) {
    830         printf("    Initial match string: \"");
    831         for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
    832             printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
    833         }
    834         printf("\"\n");
    835 
    836     } else if (fStartType == START_SET) {
    837         int32_t numSetChars = fInitialChars->size();
    838         if (numSetChars > 20) {
    839             numSetChars = 20;
    840         }
    841         printf("     Match First Chars : ");
    842         for (i=0; i<numSetChars; i++) {
    843             UChar32 c = fInitialChars->charAt(i);
    844             if (0x20<c && c <0x7e) {
    845                 printf("%c ", c);
    846             } else {
    847                 printf("%#x ", c);
    848             }
    849         }
    850         if (numSetChars < fInitialChars->size()) {
    851             printf(" ...");
    852         }
    853         printf("\n");
    854 
    855     } else if (fStartType == START_CHAR) {
    856         printf("    First char of Match : ");
    857         if (0x20 < fInitialChar && fInitialChar<0x7e) {
    858                 printf("%c\n", fInitialChar);
    859             } else {
    860                 printf("%#x\n", fInitialChar);
    861             }
    862     }
    863 
    864     printf("Named Capture Groups:\n");
    865     if (uhash_count(fNamedCaptureMap) == 0) {
    866         printf("   None\n");
    867     } else {
    868         int32_t pos = UHASH_FIRST;
    869         const UHashElement *el = NULL;
    870         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
    871             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
    872             char s[100];
    873             name->extract(0, 99, s, sizeof(s), US_INV);  // capture group names are invariant.
    874             int32_t number = el->value.integer;
    875             printf("   %d\t%s\n", number, s);
    876         }
    877     }
    878 
    879     printf("\nIndex   Binary     Type             Operand\n" \
    880            "-------------------------------------------\n");
    881     for (index = 0; index<fCompiledPat->size(); index++) {
    882         dumpOp(index);
    883     }
    884     printf("\n\n");
    885 #endif
    886 }
    887 
    888 
    889 
    890 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    891 
    892 U_NAMESPACE_END
    893 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    894