Home | History | Annotate | Download | only in i18n
      1 //
      2 //  file:  repattrn.cpp
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2013 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uclean.h"
     17 #include "uassert.h"
     18 #include "uvector.h"
     19 #include "uvectr32.h"
     20 #include "uvectr64.h"
     21 #include "regexcmp.h"
     22 #include "regeximp.h"
     23 #include "regexst.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 //--------------------------------------------------------------------------
     28 //
     29 //    RegexPattern    Default Constructor
     30 //
     31 //--------------------------------------------------------------------------
     32 RegexPattern::RegexPattern() {
     33     // Init all of this instances data.
     34     init();
     35 }
     36 
     37 
     38 //--------------------------------------------------------------------------
     39 //
     40 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     41 //                                  but it probably doesn't matter.
     42 //
     43 //--------------------------------------------------------------------------
     44 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     45     init();
     46     *this = other;
     47 }
     48 
     49 
     50 
     51 //--------------------------------------------------------------------------
     52 //
     53 //    Assignment Operator
     54 //
     55 //--------------------------------------------------------------------------
     56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     57     if (this == &other) {
     58         // Source and destination are the same.  Don't do anything.
     59         return *this;
     60     }
     61 
     62     // Clean out any previous contents of object being assigned to.
     63     zap();
     64 
     65     // Give target object a default initialization
     66     init();
     67 
     68     // Copy simple fields
     69     if ( other.fPatternString == NULL ) {
     70         fPatternString = NULL;
     71         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
     72     } else {
     73         fPatternString = new UnicodeString(*(other.fPatternString));
     74         UErrorCode status = U_ZERO_ERROR;
     75         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
     76         if (U_FAILURE(status)) {
     77             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     78             return *this;
     79         }
     80     }
     81     fFlags            = other.fFlags;
     82     fLiteralText      = other.fLiteralText;
     83     fDeferredStatus   = other.fDeferredStatus;
     84     fMinMatchLen      = other.fMinMatchLen;
     85     fFrameSize        = other.fFrameSize;
     86     fDataSize         = other.fDataSize;
     87     fMaxCaptureDigits = other.fMaxCaptureDigits;
     88     fStaticSets       = other.fStaticSets;
     89     fStaticSets8      = other.fStaticSets8;
     90 
     91     fStartType        = other.fStartType;
     92     fInitialStringIdx = other.fInitialStringIdx;
     93     fInitialStringLen = other.fInitialStringLen;
     94     *fInitialChars    = *other.fInitialChars;
     95     fInitialChar      = other.fInitialChar;
     96     *fInitialChars8   = *other.fInitialChars8;
     97     fNeedsAltInput    = other.fNeedsAltInput;
     98 
     99     //  Copy the pattern.  It's just values, nothing deep to copy.
    100     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    101     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    102 
    103     //  Copy the Unicode Sets.
    104     //    Could be made more efficient if the sets were reference counted and shared,
    105     //    but I doubt that pattern copying will be particularly common.
    106     //    Note:  init() already added an empty element zero to fSets
    107     int32_t i;
    108     int32_t  numSets = other.fSets->size();
    109     fSets8 = new Regex8BitSet[numSets];
    110     if (fSets8 == NULL) {
    111     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    112     	return *this;
    113     }
    114     for (i=1; i<numSets; i++) {
    115         if (U_FAILURE(fDeferredStatus)) {
    116             return *this;
    117         }
    118         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
    119         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    120         if (newSet == NULL) {
    121             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    122             break;
    123         }
    124         fSets->addElement(newSet, fDeferredStatus);
    125         fSets8[i] = other.fSets8[i];
    126     }
    127 
    128     return *this;
    129 }
    130 
    131 
    132 //--------------------------------------------------------------------------
    133 //
    134 //    init        Shared initialization for use by constructors.
    135 //                Bring an uninitialized RegexPattern up to a default state.
    136 //
    137 //--------------------------------------------------------------------------
    138 void RegexPattern::init() {
    139     fFlags            = 0;
    140     fCompiledPat      = 0;
    141     fLiteralText.remove();
    142     fSets             = NULL;
    143     fSets8            = NULL;
    144     fDeferredStatus   = U_ZERO_ERROR;
    145     fMinMatchLen      = 0;
    146     fFrameSize        = 0;
    147     fDataSize         = 0;
    148     fGroupMap         = NULL;
    149     fMaxCaptureDigits = 1;
    150     fStaticSets       = NULL;
    151     fStaticSets8      = NULL;
    152     fStartType        = START_NO_INFO;
    153     fInitialStringIdx = 0;
    154     fInitialStringLen = 0;
    155     fInitialChars     = NULL;
    156     fInitialChar      = 0;
    157     fInitialChars8    = NULL;
    158     fNeedsAltInput    = FALSE;
    159 
    160     fPattern          = NULL; // will be set later
    161     fPatternString    = NULL; // may be set later
    162     fCompiledPat      = new UVector64(fDeferredStatus);
    163     fGroupMap         = new UVector32(fDeferredStatus);
    164     fSets             = new UVector(fDeferredStatus);
    165     fInitialChars     = new UnicodeSet;
    166     fInitialChars8    = new Regex8BitSet;
    167     if (U_FAILURE(fDeferredStatus)) {
    168         return;
    169     }
    170     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
    171         fInitialChars == NULL || fInitialChars8 == NULL) {
    172         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    173         return;
    174     }
    175 
    176     // Slot zero of the vector of sets is reserved.  Fill it here.
    177     fSets->addElement((int32_t)0, fDeferredStatus);
    178 }
    179 
    180 
    181 //--------------------------------------------------------------------------
    182 //
    183 //   zap            Delete everything owned by this RegexPattern.
    184 //
    185 //--------------------------------------------------------------------------
    186 void RegexPattern::zap() {
    187     delete fCompiledPat;
    188     fCompiledPat = NULL;
    189     int i;
    190     for (i=1; i<fSets->size(); i++) {
    191         UnicodeSet *s;
    192         s = (UnicodeSet *)fSets->elementAt(i);
    193         if (s != NULL) {
    194             delete s;
    195         }
    196     }
    197     delete fSets;
    198     fSets = NULL;
    199     delete[] fSets8;
    200     fSets8 = NULL;
    201     delete fGroupMap;
    202     fGroupMap = NULL;
    203     delete fInitialChars;
    204     fInitialChars = NULL;
    205     delete fInitialChars8;
    206     fInitialChars8 = NULL;
    207     if (fPattern != NULL) {
    208         utext_close(fPattern);
    209         fPattern = NULL;
    210     }
    211     if (fPatternString != NULL) {
    212         delete fPatternString;
    213         fPatternString = NULL;
    214     }
    215 }
    216 
    217 
    218 //--------------------------------------------------------------------------
    219 //
    220 //   Destructor
    221 //
    222 //--------------------------------------------------------------------------
    223 RegexPattern::~RegexPattern() {
    224     zap();
    225 }
    226 
    227 
    228 //--------------------------------------------------------------------------
    229 //
    230 //   Clone
    231 //
    232 //--------------------------------------------------------------------------
    233 RegexPattern  *RegexPattern::clone() const {
    234     RegexPattern  *copy = new RegexPattern(*this);
    235     return copy;
    236 }
    237 
    238 
    239 //--------------------------------------------------------------------------
    240 //
    241 //   operator ==   (comparison)    Consider to patterns to be == if the
    242 //                                 pattern strings and the flags are the same.
    243 //                                 Note that pattern strings with the same
    244 //                                 characters can still be considered different.
    245 //
    246 //--------------------------------------------------------------------------
    247 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
    248     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    249         if (this->fPatternString != NULL && other.fPatternString != NULL) {
    250             return *(this->fPatternString) == *(other.fPatternString);
    251         } else if (this->fPattern == NULL) {
    252             if (other.fPattern == NULL) {
    253                 return TRUE;
    254             }
    255         } else if (other.fPattern != NULL) {
    256             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    257             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    258             return utext_equals(this->fPattern, other.fPattern);
    259         }
    260     }
    261     return FALSE;
    262 }
    263 
    264 //---------------------------------------------------------------------
    265 //
    266 //   compile
    267 //
    268 //---------------------------------------------------------------------
    269 RegexPattern * U_EXPORT2
    270 RegexPattern::compile(const UnicodeString &regex,
    271                       uint32_t             flags,
    272                       UParseError          &pe,
    273                       UErrorCode           &status)
    274 {
    275     if (U_FAILURE(status)) {
    276         return NULL;
    277     }
    278 
    279     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    280     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    281     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    282 
    283     if ((flags & ~allFlags) != 0) {
    284         status = U_REGEX_INVALID_FLAG;
    285         return NULL;
    286     }
    287 
    288     if ((flags & UREGEX_CANON_EQ) != 0) {
    289         status = U_REGEX_UNIMPLEMENTED;
    290         return NULL;
    291     }
    292 
    293     RegexPattern *This = new RegexPattern;
    294     if (This == NULL) {
    295         status = U_MEMORY_ALLOCATION_ERROR;
    296         return NULL;
    297     }
    298     if (U_FAILURE(This->fDeferredStatus)) {
    299         status = This->fDeferredStatus;
    300         delete This;
    301         return NULL;
    302     }
    303     This->fFlags = flags;
    304 
    305     RegexCompile     compiler(This, status);
    306     compiler.compile(regex, pe, status);
    307 
    308     if (U_FAILURE(status)) {
    309         delete This;
    310         This = NULL;
    311     }
    312 
    313     return This;
    314 }
    315 
    316 
    317 //
    318 //   compile, UText mode
    319 //
    320 RegexPattern * U_EXPORT2
    321 RegexPattern::compile(UText                *regex,
    322                       uint32_t             flags,
    323                       UParseError          &pe,
    324                       UErrorCode           &status)
    325 {
    326     if (U_FAILURE(status)) {
    327         return NULL;
    328     }
    329 
    330     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    331                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    332                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    333 
    334     if ((flags & ~allFlags) != 0) {
    335         status = U_REGEX_INVALID_FLAG;
    336         return NULL;
    337     }
    338 
    339     if ((flags & UREGEX_CANON_EQ) != 0) {
    340         status = U_REGEX_UNIMPLEMENTED;
    341         return NULL;
    342     }
    343 
    344     RegexPattern *This = new RegexPattern;
    345     if (This == NULL) {
    346         status = U_MEMORY_ALLOCATION_ERROR;
    347         return NULL;
    348     }
    349     if (U_FAILURE(This->fDeferredStatus)) {
    350         status = This->fDeferredStatus;
    351         delete This;
    352         return NULL;
    353     }
    354     This->fFlags = flags;
    355 
    356     RegexCompile     compiler(This, status);
    357     compiler.compile(regex, pe, status);
    358 
    359     if (U_FAILURE(status)) {
    360         delete This;
    361         This = NULL;
    362     }
    363 
    364     return This;
    365 }
    366 
    367 //
    368 //   compile with default flags.
    369 //
    370 RegexPattern * U_EXPORT2
    371 RegexPattern::compile(const UnicodeString &regex,
    372                       UParseError         &pe,
    373                       UErrorCode          &err)
    374 {
    375     return compile(regex, 0, pe, err);
    376 }
    377 
    378 
    379 //
    380 //   compile with default flags, UText mode
    381 //
    382 RegexPattern * U_EXPORT2
    383 RegexPattern::compile(UText               *regex,
    384                       UParseError         &pe,
    385                       UErrorCode          &err)
    386 {
    387     return compile(regex, 0, pe, err);
    388 }
    389 
    390 
    391 //
    392 //   compile with no UParseErr parameter.
    393 //
    394 RegexPattern * U_EXPORT2
    395 RegexPattern::compile(const UnicodeString &regex,
    396                       uint32_t             flags,
    397                       UErrorCode          &err)
    398 {
    399     UParseError pe;
    400     return compile(regex, flags, pe, err);
    401 }
    402 
    403 
    404 //
    405 //   compile with no UParseErr parameter, UText mode
    406 //
    407 RegexPattern * U_EXPORT2
    408 RegexPattern::compile(UText                *regex,
    409                       uint32_t             flags,
    410                       UErrorCode           &err)
    411 {
    412     UParseError pe;
    413     return compile(regex, flags, pe, err);
    414 }
    415 
    416 
    417 //---------------------------------------------------------------------
    418 //
    419 //   flags
    420 //
    421 //---------------------------------------------------------------------
    422 uint32_t RegexPattern::flags() const {
    423     return fFlags;
    424 }
    425 
    426 
    427 //---------------------------------------------------------------------
    428 //
    429 //   matcher(UnicodeString, err)
    430 //
    431 //---------------------------------------------------------------------
    432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    433                                     UErrorCode          &status)  const {
    434     RegexMatcher    *retMatcher = matcher(status);
    435     if (retMatcher != NULL) {
    436         retMatcher->fDeferredStatus = status;
    437         retMatcher->reset(input);
    438     }
    439     return retMatcher;
    440 }
    441 
    442 
    443 //---------------------------------------------------------------------
    444 //
    445 //   matcher(status)
    446 //
    447 //---------------------------------------------------------------------
    448 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    449     RegexMatcher    *retMatcher = NULL;
    450 
    451     if (U_FAILURE(status)) {
    452         return NULL;
    453     }
    454     if (U_FAILURE(fDeferredStatus)) {
    455         status = fDeferredStatus;
    456         return NULL;
    457     }
    458 
    459     retMatcher = new RegexMatcher(this);
    460     if (retMatcher == NULL) {
    461         status = U_MEMORY_ALLOCATION_ERROR;
    462         return NULL;
    463     }
    464     return retMatcher;
    465 }
    466 
    467 
    468 
    469 //---------------------------------------------------------------------
    470 //
    471 //   matches        Convenience function to test for a match, starting
    472 //                  with a pattern string and a data string.
    473 //
    474 //---------------------------------------------------------------------
    475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    476               const UnicodeString   &input,
    477                     UParseError     &pe,
    478                     UErrorCode      &status) {
    479 
    480     if (U_FAILURE(status)) {return FALSE;}
    481 
    482     UBool         retVal;
    483     RegexPattern *pat     = NULL;
    484     RegexMatcher *matcher = NULL;
    485 
    486     pat     = RegexPattern::compile(regex, 0, pe, status);
    487     matcher = pat->matcher(input, status);
    488     retVal  = matcher->matches(status);
    489 
    490     delete matcher;
    491     delete pat;
    492     return retVal;
    493 }
    494 
    495 
    496 //
    497 //   matches, UText mode
    498 //
    499 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    500                     UText           *input,
    501                     UParseError     &pe,
    502                     UErrorCode      &status) {
    503 
    504     if (U_FAILURE(status)) {return FALSE;}
    505 
    506     UBool         retVal  = FALSE;
    507     RegexPattern *pat     = NULL;
    508     RegexMatcher *matcher = NULL;
    509 
    510     pat     = RegexPattern::compile(regex, 0, pe, status);
    511     matcher = pat->matcher(status);
    512     if (U_SUCCESS(status)) {
    513         matcher->reset(input);
    514         retVal  = matcher->matches(status);
    515     }
    516 
    517     delete matcher;
    518     delete pat;
    519     return retVal;
    520 }
    521 
    522 
    523 
    524 
    525 
    526 //---------------------------------------------------------------------
    527 //
    528 //   pattern
    529 //
    530 //---------------------------------------------------------------------
    531 UnicodeString RegexPattern::pattern() const {
    532     if (fPatternString != NULL) {
    533         return *fPatternString;
    534     } else if (fPattern == NULL) {
    535         return UnicodeString();
    536     } else {
    537         UErrorCode status = U_ZERO_ERROR;
    538         int64_t nativeLen = utext_nativeLength(fPattern);
    539         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
    540         UnicodeString result;
    541 
    542         status = U_ZERO_ERROR;
    543         UChar *resultChars = result.getBuffer(len16);
    544         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    545         result.releaseBuffer(len16);
    546 
    547         return result;
    548     }
    549 }
    550 
    551 
    552 
    553 
    554 //---------------------------------------------------------------------
    555 //
    556 //   patternText
    557 //
    558 //---------------------------------------------------------------------
    559 UText *RegexPattern::patternText(UErrorCode      &status) const {
    560     if (U_FAILURE(status)) {return NULL;}
    561     status = U_ZERO_ERROR;
    562 
    563     if (fPattern != NULL) {
    564         return fPattern;
    565     } else {
    566         RegexStaticSets::initGlobals(&status);
    567         return RegexStaticSets::gStaticSets->fEmptyText;
    568     }
    569 }
    570 
    571 
    572 
    573 //---------------------------------------------------------------------
    574 //
    575 //   split
    576 //
    577 //---------------------------------------------------------------------
    578 int32_t  RegexPattern::split(const UnicodeString &input,
    579         UnicodeString    dest[],
    580         int32_t          destCapacity,
    581         UErrorCode      &status) const
    582 {
    583     if (U_FAILURE(status)) {
    584         return 0;
    585     };
    586 
    587     RegexMatcher  m(this);
    588     int32_t r = 0;
    589     // Check m's status to make sure all is ok.
    590     if (U_SUCCESS(m.fDeferredStatus)) {
    591     	r = m.split(input, dest, destCapacity, status);
    592     }
    593     return r;
    594 }
    595 
    596 //
    597 //   split, UText mode
    598 //
    599 int32_t  RegexPattern::split(UText *input,
    600         UText           *dest[],
    601         int32_t          destCapacity,
    602         UErrorCode      &status) const
    603 {
    604     if (U_FAILURE(status)) {
    605         return 0;
    606     };
    607 
    608     RegexMatcher  m(this);
    609     int32_t r = 0;
    610     // Check m's status to make sure all is ok.
    611     if (U_SUCCESS(m.fDeferredStatus)) {
    612     	r = m.split(input, dest, destCapacity, status);
    613     }
    614     return r;
    615 }
    616 
    617 
    618 
    619 //---------------------------------------------------------------------
    620 //
    621 //   dump    Output the compiled form of the pattern.
    622 //           Debugging function only.
    623 //
    624 //---------------------------------------------------------------------
    625 void   RegexPattern::dumpOp(int32_t index) const {
    626     (void)index;  // Suppress warnings in non-debug build.
    627 #if defined(REGEX_DEBUG)
    628     static const char * const opNames[] = {URX_OPCODE_NAMES};
    629     int32_t op          = fCompiledPat->elementAti(index);
    630     int32_t val         = URX_VAL(op);
    631     int32_t type        = URX_TYPE(op);
    632     int32_t pinnedType  = type;
    633     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
    634         pinnedType = 0;
    635     }
    636 
    637     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    638     switch (type) {
    639     case URX_NOP:
    640     case URX_DOTANY:
    641     case URX_DOTANY_ALL:
    642     case URX_FAIL:
    643     case URX_CARET:
    644     case URX_DOLLAR:
    645     case URX_BACKSLASH_G:
    646     case URX_BACKSLASH_X:
    647     case URX_END:
    648     case URX_DOLLAR_M:
    649     case URX_CARET_M:
    650         // Types with no operand field of interest.
    651         break;
    652 
    653     case URX_RESERVED_OP:
    654     case URX_START_CAPTURE:
    655     case URX_END_CAPTURE:
    656     case URX_STATE_SAVE:
    657     case URX_JMP:
    658     case URX_JMP_SAV:
    659     case URX_JMP_SAV_X:
    660     case URX_BACKSLASH_B:
    661     case URX_BACKSLASH_BU:
    662     case URX_BACKSLASH_D:
    663     case URX_BACKSLASH_Z:
    664     case URX_STRING_LEN:
    665     case URX_CTR_INIT:
    666     case URX_CTR_INIT_NG:
    667     case URX_CTR_LOOP:
    668     case URX_CTR_LOOP_NG:
    669     case URX_RELOC_OPRND:
    670     case URX_STO_SP:
    671     case URX_LD_SP:
    672     case URX_BACKREF:
    673     case URX_STO_INP_LOC:
    674     case URX_JMPX:
    675     case URX_LA_START:
    676     case URX_LA_END:
    677     case URX_BACKREF_I:
    678     case URX_LB_START:
    679     case URX_LB_CONT:
    680     case URX_LB_END:
    681     case URX_LBN_CONT:
    682     case URX_LBN_END:
    683     case URX_LOOP_C:
    684     case URX_LOOP_DOT_I:
    685         // types with an integer operand field.
    686         printf("%d", val);
    687         break;
    688 
    689     case URX_ONECHAR:
    690     case URX_ONECHAR_I:
    691         printf("%c", val<256?val:'?');
    692         break;
    693 
    694     case URX_STRING:
    695     case URX_STRING_I:
    696         {
    697             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    698             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    699             int32_t length = URX_VAL(lengthOp);
    700             int32_t i;
    701             for (i=val; i<val+length; i++) {
    702                 UChar c = fLiteralText[i];
    703                 if (c < 32 || c >= 256) {c = '.';}
    704                 printf("%c", c);
    705             }
    706         }
    707         break;
    708 
    709     case URX_SETREF:
    710     case URX_LOOP_SR_I:
    711         {
    712             UnicodeString s;
    713             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    714             set->toPattern(s, TRUE);
    715             for (int32_t i=0; i<s.length(); i++) {
    716                 printf("%c", s.charAt(i));
    717             }
    718         }
    719         break;
    720 
    721     case URX_STATIC_SETREF:
    722     case URX_STAT_SETREF_N:
    723         {
    724             UnicodeString s;
    725             if (val & URX_NEG_SET) {
    726                 printf("NOT ");
    727                 val &= ~URX_NEG_SET;
    728             }
    729             UnicodeSet *set = fStaticSets[val];
    730             set->toPattern(s, TRUE);
    731             for (int32_t i=0; i<s.length(); i++) {
    732                 printf("%c", s.charAt(i));
    733             }
    734         }
    735         break;
    736 
    737 
    738     default:
    739         printf("??????");
    740         break;
    741     }
    742     printf("\n");
    743 #endif
    744 }
    745 
    746 
    747 void RegexPattern::dumpPattern() const {
    748 #if defined(REGEX_DEBUG)
    749     int      index;
    750     int      i;
    751 
    752     printf("Original Pattern:  ");
    753     UChar32 c = utext_next32From(fPattern, 0);
    754     while (c != U_SENTINEL) {
    755         if (c<32 || c>256) {
    756             c = '.';
    757         }
    758         printf("%c", c);
    759 
    760         c = UTEXT_NEXT32(fPattern);
    761     }
    762     printf("\n");
    763     printf("   Min Match Length:  %d\n", fMinMatchLen);
    764     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    765     if (fStartType == START_STRING) {
    766         printf("    Initial match string: \"");
    767         for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
    768             printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
    769         }
    770         printf("\"\n");
    771 
    772     } else if (fStartType == START_SET) {
    773         int32_t numSetChars = fInitialChars->size();
    774         if (numSetChars > 20) {
    775             numSetChars = 20;
    776         }
    777         printf("     Match First Chars : ");
    778         for (i=0; i<numSetChars; i++) {
    779             UChar32 c = fInitialChars->charAt(i);
    780             if (0x20<c && c <0x7e) {
    781                 printf("%c ", c);
    782             } else {
    783                 printf("%#x ", c);
    784             }
    785         }
    786         if (numSetChars < fInitialChars->size()) {
    787             printf(" ...");
    788         }
    789         printf("\n");
    790 
    791     } else if (fStartType == START_CHAR) {
    792         printf("    First char of Match : ");
    793         if (0x20 < fInitialChar && fInitialChar<0x7e) {
    794                 printf("%c\n", fInitialChar);
    795             } else {
    796                 printf("%#x\n", fInitialChar);
    797             }
    798     }
    799 
    800     printf("\nIndex   Binary     Type             Operand\n" \
    801            "-------------------------------------------\n");
    802     for (index = 0; index<fCompiledPat->size(); index++) {
    803         dumpOp(index);
    804     }
    805     printf("\n\n");
    806 #endif
    807 }
    808 
    809 
    810 
    811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    812 
    813 U_NAMESPACE_END
    814 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    815