Home | History | Annotate | Download | only in i18n
      1 //
      2 //  file:  repattrn.cpp
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2010 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uclean.h"
     17 #include "uassert.h"
     18 #include "uvector.h"
     19 #include "uvectr32.h"
     20 #include "uvectr64.h"
     21 #include "regexcmp.h"
     22 #include "regeximp.h"
     23 #include "regexst.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 //--------------------------------------------------------------------------
     28 //
     29 //    RegexPattern    Default Constructor
     30 //
     31 //--------------------------------------------------------------------------
     32 RegexPattern::RegexPattern() {
     33     UErrorCode status = U_ZERO_ERROR;
     34     u_init(&status);
     35 
     36     // Init all of this instances data.
     37     init();
     38 }
     39 
     40 
     41 //--------------------------------------------------------------------------
     42 //
     43 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     44 //                                  but it probably doesn't matter.
     45 //
     46 //--------------------------------------------------------------------------
     47 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     48     init();
     49     *this = other;
     50 }
     51 
     52 
     53 
     54 //--------------------------------------------------------------------------
     55 //
     56 //    Assignment Operator
     57 //
     58 //--------------------------------------------------------------------------
     59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     60     if (this == &other) {
     61         // Source and destination are the same.  Don't do anything.
     62         return *this;
     63     }
     64 
     65     // Clean out any previous contents of object being assigned to.
     66     zap();
     67 
     68     // Give target object a default initialization
     69     init();
     70 
     71     // Copy simple fields
     72     if ( other.fPatternString == NULL ) {
     73         fPatternString = NULL;
     74         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
     75     } else {
     76         fPatternString = new UnicodeString(*(other.fPatternString));
     77         UErrorCode status = U_ZERO_ERROR;
     78         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
     79         if (U_FAILURE(status)) {
     80             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     81             return *this;
     82         }
     83     }
     84     fFlags            = other.fFlags;
     85     fLiteralText      = other.fLiteralText;
     86     fDeferredStatus   = other.fDeferredStatus;
     87     fMinMatchLen      = other.fMinMatchLen;
     88     fFrameSize        = other.fFrameSize;
     89     fDataSize         = other.fDataSize;
     90     fMaxCaptureDigits = other.fMaxCaptureDigits;
     91     fStaticSets       = other.fStaticSets;
     92     fStaticSets8      = other.fStaticSets8;
     93 
     94     fStartType        = other.fStartType;
     95     fInitialStringIdx = other.fInitialStringIdx;
     96     fInitialStringLen = other.fInitialStringLen;
     97     *fInitialChars    = *other.fInitialChars;
     98     fInitialChar      = other.fInitialChar;
     99     *fInitialChars8   = *other.fInitialChars8;
    100     fNeedsAltInput    = other.fNeedsAltInput;
    101 
    102     //  Copy the pattern.  It's just values, nothing deep to copy.
    103     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    104     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    105 
    106     //  Copy the Unicode Sets.
    107     //    Could be made more efficient if the sets were reference counted and shared,
    108     //    but I doubt that pattern copying will be particularly common.
    109     //    Note:  init() already added an empty element zero to fSets
    110     int32_t i;
    111     int32_t  numSets = other.fSets->size();
    112     fSets8 = new Regex8BitSet[numSets];
    113     if (fSets8 == NULL) {
    114     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    115     	return *this;
    116     }
    117     for (i=1; i<numSets; i++) {
    118         if (U_FAILURE(fDeferredStatus)) {
    119             return *this;
    120         }
    121         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
    122         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    123         if (newSet == NULL) {
    124             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    125             break;
    126         }
    127         fSets->addElement(newSet, fDeferredStatus);
    128         fSets8[i] = other.fSets8[i];
    129     }
    130 
    131     return *this;
    132 }
    133 
    134 
    135 //--------------------------------------------------------------------------
    136 //
    137 //    init        Shared initialization for use by constructors.
    138 //                Bring an uninitialized RegexPattern up to a default state.
    139 //
    140 //--------------------------------------------------------------------------
    141 void RegexPattern::init() {
    142     fFlags            = 0;
    143     fCompiledPat      = 0;
    144     fLiteralText.remove();
    145     fSets             = NULL;
    146     fSets8            = NULL;
    147     fDeferredStatus   = U_ZERO_ERROR;
    148     fMinMatchLen      = 0;
    149     fFrameSize        = 0;
    150     fDataSize         = 0;
    151     fGroupMap         = NULL;
    152     fMaxCaptureDigits = 1;
    153     fStaticSets       = NULL;
    154     fStaticSets8      = NULL;
    155     fStartType        = START_NO_INFO;
    156     fInitialStringIdx = 0;
    157     fInitialStringLen = 0;
    158     fInitialChars     = NULL;
    159     fInitialChar      = 0;
    160     fInitialChars8    = NULL;
    161     fNeedsAltInput    = FALSE;
    162 
    163     fPattern          = NULL; // will be set later
    164     fPatternString    = NULL; // may be set later
    165     fCompiledPat      = new UVector64(fDeferredStatus);
    166     fGroupMap         = new UVector32(fDeferredStatus);
    167     fSets             = new UVector(fDeferredStatus);
    168     fInitialChars     = new UnicodeSet;
    169     fInitialChars8    = new Regex8BitSet;
    170     if (U_FAILURE(fDeferredStatus)) {
    171         return;
    172     }
    173     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
    174         fInitialChars == NULL || fInitialChars8 == NULL) {
    175         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    176         return;
    177     }
    178 
    179     // Slot zero of the vector of sets is reserved.  Fill it here.
    180     fSets->addElement((int32_t)0, fDeferredStatus);
    181 }
    182 
    183 
    184 //--------------------------------------------------------------------------
    185 //
    186 //   zap            Delete everything owned by this RegexPattern.
    187 //
    188 //--------------------------------------------------------------------------
    189 void RegexPattern::zap() {
    190     delete fCompiledPat;
    191     fCompiledPat = NULL;
    192     int i;
    193     for (i=1; i<fSets->size(); i++) {
    194         UnicodeSet *s;
    195         s = (UnicodeSet *)fSets->elementAt(i);
    196         if (s != NULL) {
    197             delete s;
    198         }
    199     }
    200     delete fSets;
    201     fSets = NULL;
    202     delete[] fSets8;
    203     fSets8 = NULL;
    204     delete fGroupMap;
    205     fGroupMap = NULL;
    206     delete fInitialChars;
    207     fInitialChars = NULL;
    208     delete fInitialChars8;
    209     fInitialChars8 = NULL;
    210     if (fPattern != NULL) {
    211         utext_close(fPattern);
    212         fPattern = NULL;
    213     }
    214     if (fPatternString != NULL) {
    215         delete fPatternString;
    216         fPatternString = NULL;
    217     }
    218 }
    219 
    220 
    221 //--------------------------------------------------------------------------
    222 //
    223 //   Destructor
    224 //
    225 //--------------------------------------------------------------------------
    226 RegexPattern::~RegexPattern() {
    227     zap();
    228 }
    229 
    230 
    231 //--------------------------------------------------------------------------
    232 //
    233 //   Clone
    234 //
    235 //--------------------------------------------------------------------------
    236 RegexPattern  *RegexPattern::clone() const {
    237     RegexPattern  *copy = new RegexPattern(*this);
    238     return copy;
    239 }
    240 
    241 
    242 //--------------------------------------------------------------------------
    243 //
    244 //   operator ==   (comparison)    Consider to patterns to be == if the
    245 //                                 pattern strings and the flags are the same.
    246 //                                 Note that pattern strings with the same
    247 //                                 characters can still be considered different.
    248 //
    249 //--------------------------------------------------------------------------
    250 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
    251     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    252         if (this->fPatternString != NULL && other.fPatternString != NULL) {
    253             return *(this->fPatternString) == *(other.fPatternString);
    254         } else if (this->fPattern == NULL) {
    255             if (other.fPattern == NULL) {
    256                 return TRUE;
    257             }
    258         } else if (other.fPattern != NULL) {
    259             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    260             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    261             return utext_equals(this->fPattern, other.fPattern);
    262         }
    263     }
    264     return FALSE;
    265 }
    266 
    267 //---------------------------------------------------------------------
    268 //
    269 //   compile
    270 //
    271 //---------------------------------------------------------------------
    272 RegexPattern * U_EXPORT2
    273 RegexPattern::compile(const UnicodeString &regex,
    274                       uint32_t             flags,
    275                       UParseError          &pe,
    276                       UErrorCode           &status)
    277 {
    278     if (U_FAILURE(status)) {
    279         return NULL;
    280     }
    281 
    282     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    283     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    284     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    285 
    286     if ((flags & ~allFlags) != 0) {
    287         status = U_REGEX_INVALID_FLAG;
    288         return NULL;
    289     }
    290 
    291     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
    292         status = U_REGEX_UNIMPLEMENTED;
    293         return NULL;
    294     }
    295 
    296     RegexPattern *This = new RegexPattern;
    297     if (This == NULL) {
    298         status = U_MEMORY_ALLOCATION_ERROR;
    299         return NULL;
    300     }
    301     if (U_FAILURE(This->fDeferredStatus)) {
    302         status = This->fDeferredStatus;
    303         delete This;
    304         return NULL;
    305     }
    306     This->fFlags = flags;
    307 
    308     RegexCompile     compiler(This, status);
    309     compiler.compile(regex, pe, status);
    310 
    311     if (U_FAILURE(status)) {
    312         delete This;
    313         This = NULL;
    314     }
    315 
    316     return This;
    317 }
    318 
    319 
    320 //
    321 //   compile, UText mode
    322 //
    323 RegexPattern * U_EXPORT2
    324 RegexPattern::compile(UText                *regex,
    325                       uint32_t             flags,
    326                       UParseError          &pe,
    327                       UErrorCode           &status)
    328 {
    329     if (U_FAILURE(status)) {
    330         return NULL;
    331     }
    332 
    333     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    334                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    335                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    336 
    337     if ((flags & ~allFlags) != 0) {
    338         status = U_REGEX_INVALID_FLAG;
    339         return NULL;
    340     }
    341 
    342     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
    343         status = U_REGEX_UNIMPLEMENTED;
    344         return NULL;
    345     }
    346 
    347     RegexPattern *This = new RegexPattern;
    348     if (This == NULL) {
    349         status = U_MEMORY_ALLOCATION_ERROR;
    350         return NULL;
    351     }
    352     if (U_FAILURE(This->fDeferredStatus)) {
    353         status = This->fDeferredStatus;
    354         delete This;
    355         return NULL;
    356     }
    357     This->fFlags = flags;
    358 
    359     RegexCompile     compiler(This, status);
    360     compiler.compile(regex, pe, status);
    361 
    362     if (U_FAILURE(status)) {
    363         delete This;
    364         This = NULL;
    365     }
    366 
    367     return This;
    368 }
    369 
    370 //
    371 //   compile with default flags.
    372 //
    373 RegexPattern * U_EXPORT2
    374 RegexPattern::compile(const UnicodeString &regex,
    375                       UParseError         &pe,
    376                       UErrorCode          &err)
    377 {
    378     return compile(regex, 0, pe, err);
    379 }
    380 
    381 
    382 //
    383 //   compile with default flags, UText mode
    384 //
    385 RegexPattern * U_EXPORT2
    386 RegexPattern::compile(UText               *regex,
    387                       UParseError         &pe,
    388                       UErrorCode          &err)
    389 {
    390     return compile(regex, 0, pe, err);
    391 }
    392 
    393 
    394 //
    395 //   compile with no UParseErr parameter.
    396 //
    397 RegexPattern * U_EXPORT2
    398 RegexPattern::compile(const UnicodeString &regex,
    399                       uint32_t             flags,
    400                       UErrorCode          &err)
    401 {
    402     UParseError pe;
    403     return compile(regex, flags, pe, err);
    404 }
    405 
    406 
    407 //
    408 //   compile with no UParseErr parameter, UText mode
    409 //
    410 RegexPattern * U_EXPORT2
    411 RegexPattern::compile(UText                *regex,
    412                       uint32_t             flags,
    413                       UErrorCode           &err)
    414 {
    415     UParseError pe;
    416     return compile(regex, flags, pe, err);
    417 }
    418 
    419 
    420 //---------------------------------------------------------------------
    421 //
    422 //   flags
    423 //
    424 //---------------------------------------------------------------------
    425 uint32_t RegexPattern::flags() const {
    426     return fFlags;
    427 }
    428 
    429 
    430 //---------------------------------------------------------------------
    431 //
    432 //   matcher(UnicodeString, err)
    433 //
    434 //---------------------------------------------------------------------
    435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    436                                     UErrorCode          &status)  const {
    437     RegexMatcher    *retMatcher = matcher(status);
    438     if (retMatcher != NULL) {
    439         retMatcher->fDeferredStatus = status;
    440         retMatcher->reset(input);
    441     }
    442     return retMatcher;
    443 }
    444 
    445 //
    446 //   matcher, UText mode
    447 //
    448 RegexMatcher *RegexPattern::matcher(UText               *input,
    449                                     PatternIsUTextFlag  /*flag*/,
    450                                     UErrorCode          &status)  const {
    451     RegexMatcher    *retMatcher = matcher(status);
    452     if (retMatcher != NULL) {
    453         retMatcher->fDeferredStatus = status;
    454         retMatcher->reset(input);
    455     }
    456     return retMatcher;
    457 }
    458 
    459 #if 0
    460 RegexMatcher *RegexPattern::matcher(const UChar * /*input*/,
    461                                     UErrorCode          &status)  const
    462 {
    463     /* This should never get called. The API with UnicodeString should be called instead. */
    464     if (U_SUCCESS(status)) {
    465         status = U_UNSUPPORTED_ERROR;
    466     }
    467     return NULL;
    468 }
    469 #endif
    470 
    471 //---------------------------------------------------------------------
    472 //
    473 //   matcher(status)
    474 //
    475 //---------------------------------------------------------------------
    476 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    477     RegexMatcher    *retMatcher = NULL;
    478 
    479     if (U_FAILURE(status)) {
    480         return NULL;
    481     }
    482     if (U_FAILURE(fDeferredStatus)) {
    483         status = fDeferredStatus;
    484         return NULL;
    485     }
    486 
    487     retMatcher = new RegexMatcher(this);
    488     if (retMatcher == NULL) {
    489         status = U_MEMORY_ALLOCATION_ERROR;
    490         return NULL;
    491     }
    492     return retMatcher;
    493 }
    494 
    495 
    496 
    497 //---------------------------------------------------------------------
    498 //
    499 //   matches        Convenience function to test for a match, starting
    500 //                  with a pattern string and a data string.
    501 //
    502 //---------------------------------------------------------------------
    503 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    504               const UnicodeString   &input,
    505                     UParseError     &pe,
    506                     UErrorCode      &status) {
    507 
    508     if (U_FAILURE(status)) {return FALSE;}
    509 
    510     UBool         retVal;
    511     RegexPattern *pat     = NULL;
    512     RegexMatcher *matcher = NULL;
    513 
    514     pat     = RegexPattern::compile(regex, 0, pe, status);
    515     matcher = pat->matcher(input, status);
    516     retVal  = matcher->matches(status);
    517 
    518     delete matcher;
    519     delete pat;
    520     return retVal;
    521 }
    522 
    523 
    524 //
    525 //   matches, UText mode
    526 //
    527 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    528                     UText           *input,
    529                     UParseError     &pe,
    530                     UErrorCode      &status) {
    531 
    532     if (U_FAILURE(status)) {return FALSE;}
    533 
    534     UBool         retVal;
    535     RegexPattern *pat     = NULL;
    536     RegexMatcher *matcher = NULL;
    537 
    538     pat     = RegexPattern::compile(regex, 0, pe, status);
    539     matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
    540     retVal  = matcher->matches(status);
    541 
    542     delete matcher;
    543     delete pat;
    544     return retVal;
    545 }
    546 
    547 
    548 
    549 
    550 
    551 //---------------------------------------------------------------------
    552 //
    553 //   pattern
    554 //
    555 //---------------------------------------------------------------------
    556 UnicodeString RegexPattern::pattern() const {
    557     if (fPatternString != NULL) {
    558         return *fPatternString;
    559     } else if (fPattern == NULL) {
    560         return UnicodeString();
    561     } else {
    562         UErrorCode status = U_ZERO_ERROR;
    563         int64_t nativeLen = utext_nativeLength(fPattern);
    564         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
    565         UnicodeString result;
    566 
    567         status = U_ZERO_ERROR;
    568         UChar *resultChars = result.getBuffer(len16);
    569         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    570         result.releaseBuffer(len16);
    571 
    572         return result;
    573     }
    574 }
    575 
    576 
    577 
    578 
    579 //---------------------------------------------------------------------
    580 //
    581 //   patternText
    582 //
    583 //---------------------------------------------------------------------
    584 UText *RegexPattern::patternText(UErrorCode      &status) const {
    585     if (U_FAILURE(status)) {return NULL;}
    586     status = U_ZERO_ERROR;
    587 
    588     if (fPattern != NULL) {
    589         return fPattern;
    590     } else {
    591         RegexStaticSets::initGlobals(&status);
    592         return RegexStaticSets::gStaticSets->fEmptyText;
    593     }
    594 }
    595 
    596 
    597 
    598 //---------------------------------------------------------------------
    599 //
    600 //   split
    601 //
    602 //---------------------------------------------------------------------
    603 int32_t  RegexPattern::split(const UnicodeString &input,
    604         UnicodeString    dest[],
    605         int32_t          destCapacity,
    606         UErrorCode      &status) const
    607 {
    608     if (U_FAILURE(status)) {
    609         return 0;
    610     };
    611 
    612     RegexMatcher  m(this);
    613     int32_t r = 0;
    614     // Check m's status to make sure all is ok.
    615     if (U_SUCCESS(m.fDeferredStatus)) {
    616     	r = m.split(input, dest, destCapacity, status);
    617     }
    618     return r;
    619 }
    620 
    621 //
    622 //   split, UText mode
    623 //
    624 int32_t  RegexPattern::split(UText *input,
    625         UText           *dest[],
    626         int32_t          destCapacity,
    627         UErrorCode      &status) const
    628 {
    629     if (U_FAILURE(status)) {
    630         return 0;
    631     };
    632 
    633     RegexMatcher  m(this);
    634     int32_t r = 0;
    635     // Check m's status to make sure all is ok.
    636     if (U_SUCCESS(m.fDeferredStatus)) {
    637     	r = m.split(input, dest, destCapacity, status);
    638     }
    639     return r;
    640 }
    641 
    642 
    643 
    644 //---------------------------------------------------------------------
    645 //
    646 //   dump    Output the compiled form of the pattern.
    647 //           Debugging function only.
    648 //
    649 //---------------------------------------------------------------------
    650 #if defined(REGEX_DEBUG)
    651 void   RegexPattern::dumpOp(int32_t index) const {
    652     static const char * const opNames[] = {URX_OPCODE_NAMES};
    653     int32_t op          = fCompiledPat->elementAti(index);
    654     int32_t val         = URX_VAL(op);
    655     int32_t type        = URX_TYPE(op);
    656     int32_t pinnedType  = type;
    657     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
    658         pinnedType = 0;
    659     }
    660 
    661     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
    662     switch (type) {
    663     case URX_NOP:
    664     case URX_DOTANY:
    665     case URX_DOTANY_ALL:
    666     case URX_FAIL:
    667     case URX_CARET:
    668     case URX_DOLLAR:
    669     case URX_BACKSLASH_G:
    670     case URX_BACKSLASH_X:
    671     case URX_END:
    672     case URX_DOLLAR_M:
    673     case URX_CARET_M:
    674         // Types with no operand field of interest.
    675         break;
    676 
    677     case URX_RESERVED_OP:
    678     case URX_START_CAPTURE:
    679     case URX_END_CAPTURE:
    680     case URX_STATE_SAVE:
    681     case URX_JMP:
    682     case URX_JMP_SAV:
    683     case URX_JMP_SAV_X:
    684     case URX_BACKSLASH_B:
    685     case URX_BACKSLASH_BU:
    686     case URX_BACKSLASH_D:
    687     case URX_BACKSLASH_Z:
    688     case URX_STRING_LEN:
    689     case URX_CTR_INIT:
    690     case URX_CTR_INIT_NG:
    691     case URX_CTR_LOOP:
    692     case URX_CTR_LOOP_NG:
    693     case URX_RELOC_OPRND:
    694     case URX_STO_SP:
    695     case URX_LD_SP:
    696     case URX_BACKREF:
    697     case URX_STO_INP_LOC:
    698     case URX_JMPX:
    699     case URX_LA_START:
    700     case URX_LA_END:
    701     case URX_BACKREF_I:
    702     case URX_LB_START:
    703     case URX_LB_CONT:
    704     case URX_LB_END:
    705     case URX_LBN_CONT:
    706     case URX_LBN_END:
    707     case URX_LOOP_C:
    708     case URX_LOOP_DOT_I:
    709         // types with an integer operand field.
    710         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
    711         break;
    712 
    713     case URX_ONECHAR:
    714     case URX_ONECHAR_I:
    715         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
    716         break;
    717 
    718     case URX_STRING:
    719     case URX_STRING_I:
    720         {
    721             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    722             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    723             int32_t length = URX_VAL(lengthOp);
    724             int32_t i;
    725             for (i=val; i<val+length; i++) {
    726                 UChar c = fLiteralText[i];
    727                 if (c < 32 || c >= 256) {c = '.';}
    728                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
    729             }
    730         }
    731         break;
    732 
    733     case URX_SETREF:
    734     case URX_LOOP_SR_I:
    735         {
    736             UnicodeString s;
    737             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    738             set->toPattern(s, TRUE);
    739             for (int32_t i=0; i<s.length(); i++) {
    740                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
    741             }
    742         }
    743         break;
    744 
    745     case URX_STATIC_SETREF:
    746     case URX_STAT_SETREF_N:
    747         {
    748             UnicodeString s;
    749             if (val & URX_NEG_SET) {
    750                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
    751                 val &= ~URX_NEG_SET;
    752             }
    753             UnicodeSet *set = fStaticSets[val];
    754             set->toPattern(s, TRUE);
    755             for (int32_t i=0; i<s.length(); i++) {
    756                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
    757             }
    758         }
    759         break;
    760 
    761 
    762     default:
    763         REGEX_DUMP_DEBUG_PRINTF(("??????"));
    764         break;
    765     }
    766     REGEX_DUMP_DEBUG_PRINTF(("\n"));
    767 }
    768 #endif
    769 
    770 
    771 #if defined(REGEX_DEBUG)
    772 U_CAPI void  U_EXPORT2
    773 RegexPatternDump(const RegexPattern *This) {
    774     int      index;
    775     int      i;
    776 
    777     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
    778     UChar32 c = utext_next32From(This->fPattern, 0);
    779     while (c != U_SENTINEL) {
    780         if (c<32 || c>256) {
    781             c = '.';
    782         }
    783         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
    784 
    785         c = UTEXT_NEXT32(This->fPattern);
    786     }
    787     REGEX_DUMP_DEBUG_PRINTF(("\n"));
    788     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
    789     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
    790     if (This->fStartType == START_STRING) {
    791         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
    792         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
    793             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
    794         }
    795         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
    796 
    797     } else if (This->fStartType == START_SET) {
    798         int32_t numSetChars = This->fInitialChars->size();
    799         if (numSetChars > 20) {
    800             numSetChars = 20;
    801         }
    802         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
    803         for (i=0; i<numSetChars; i++) {
    804             UChar32 c = This->fInitialChars->charAt(i);
    805             if (0x20<c && c <0x7e) {
    806                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
    807             } else {
    808                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
    809             }
    810         }
    811         if (numSetChars < This->fInitialChars->size()) {
    812             REGEX_DUMP_DEBUG_PRINTF((" ..."));
    813         }
    814         REGEX_DUMP_DEBUG_PRINTF(("\n"));
    815 
    816     } else if (This->fStartType == START_CHAR) {
    817         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
    818         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
    819                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
    820             } else {
    821                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
    822             }
    823     }
    824 
    825     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
    826            "-------------------------------------------\n"));
    827     for (index = 0; index<This->fCompiledPat->size(); index++) {
    828         This->dumpOp(index);
    829     }
    830     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
    831 }
    832 #endif
    833 
    834 
    835 
    836 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    837 
    838 U_NAMESPACE_END
    839 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    840