Home | History | Annotate | Download | only in i18n
      1 //
      2 //  file:  repattrn.cpp
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2011 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uclean.h"
     17 #include "uassert.h"
     18 #include "uvector.h"
     19 #include "uvectr32.h"
     20 #include "uvectr64.h"
     21 #include "regexcmp.h"
     22 #include "regeximp.h"
     23 #include "regexst.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 //--------------------------------------------------------------------------
     28 //
     29 //    RegexPattern    Default Constructor
     30 //
     31 //--------------------------------------------------------------------------
     32 RegexPattern::RegexPattern() {
     33     UErrorCode status = U_ZERO_ERROR;
     34     u_init(&status);
     35 
     36     // Init all of this instances data.
     37     init();
     38 }
     39 
     40 
     41 //--------------------------------------------------------------------------
     42 //
     43 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     44 //                                  but it probably doesn't matter.
     45 //
     46 //--------------------------------------------------------------------------
     47 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     48     init();
     49     *this = other;
     50 }
     51 
     52 
     53 
     54 //--------------------------------------------------------------------------
     55 //
     56 //    Assignment Operator
     57 //
     58 //--------------------------------------------------------------------------
     59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     60     if (this == &other) {
     61         // Source and destination are the same.  Don't do anything.
     62         return *this;
     63     }
     64 
     65     // Clean out any previous contents of object being assigned to.
     66     zap();
     67 
     68     // Give target object a default initialization
     69     init();
     70 
     71     // Copy simple fields
     72     if ( other.fPatternString == NULL ) {
     73         fPatternString = NULL;
     74         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
     75     } else {
     76         fPatternString = new UnicodeString(*(other.fPatternString));
     77         UErrorCode status = U_ZERO_ERROR;
     78         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
     79         if (U_FAILURE(status)) {
     80             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     81             return *this;
     82         }
     83     }
     84     fFlags            = other.fFlags;
     85     fLiteralText      = other.fLiteralText;
     86     fDeferredStatus   = other.fDeferredStatus;
     87     fMinMatchLen      = other.fMinMatchLen;
     88     fFrameSize        = other.fFrameSize;
     89     fDataSize         = other.fDataSize;
     90     fMaxCaptureDigits = other.fMaxCaptureDigits;
     91     fStaticSets       = other.fStaticSets;
     92     fStaticSets8      = other.fStaticSets8;
     93 
     94     fStartType        = other.fStartType;
     95     fInitialStringIdx = other.fInitialStringIdx;
     96     fInitialStringLen = other.fInitialStringLen;
     97     *fInitialChars    = *other.fInitialChars;
     98     fInitialChar      = other.fInitialChar;
     99     *fInitialChars8   = *other.fInitialChars8;
    100     fNeedsAltInput    = other.fNeedsAltInput;
    101 
    102     //  Copy the pattern.  It's just values, nothing deep to copy.
    103     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    104     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    105 
    106     //  Copy the Unicode Sets.
    107     //    Could be made more efficient if the sets were reference counted and shared,
    108     //    but I doubt that pattern copying will be particularly common.
    109     //    Note:  init() already added an empty element zero to fSets
    110     int32_t i;
    111     int32_t  numSets = other.fSets->size();
    112     fSets8 = new Regex8BitSet[numSets];
    113     if (fSets8 == NULL) {
    114     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    115     	return *this;
    116     }
    117     for (i=1; i<numSets; i++) {
    118         if (U_FAILURE(fDeferredStatus)) {
    119             return *this;
    120         }
    121         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
    122         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    123         if (newSet == NULL) {
    124             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    125             break;
    126         }
    127         fSets->addElement(newSet, fDeferredStatus);
    128         fSets8[i] = other.fSets8[i];
    129     }
    130 
    131     return *this;
    132 }
    133 
    134 
    135 //--------------------------------------------------------------------------
    136 //
    137 //    init        Shared initialization for use by constructors.
    138 //                Bring an uninitialized RegexPattern up to a default state.
    139 //
    140 //--------------------------------------------------------------------------
    141 void RegexPattern::init() {
    142     fFlags            = 0;
    143     fCompiledPat      = 0;
    144     fLiteralText.remove();
    145     fSets             = NULL;
    146     fSets8            = NULL;
    147     fDeferredStatus   = U_ZERO_ERROR;
    148     fMinMatchLen      = 0;
    149     fFrameSize        = 0;
    150     fDataSize         = 0;
    151     fGroupMap         = NULL;
    152     fMaxCaptureDigits = 1;
    153     fStaticSets       = NULL;
    154     fStaticSets8      = NULL;
    155     fStartType        = START_NO_INFO;
    156     fInitialStringIdx = 0;
    157     fInitialStringLen = 0;
    158     fInitialChars     = NULL;
    159     fInitialChar      = 0;
    160     fInitialChars8    = NULL;
    161     fNeedsAltInput    = FALSE;
    162 
    163     fPattern          = NULL; // will be set later
    164     fPatternString    = NULL; // may be set later
    165     fCompiledPat      = new UVector64(fDeferredStatus);
    166     fGroupMap         = new UVector32(fDeferredStatus);
    167     fSets             = new UVector(fDeferredStatus);
    168     fInitialChars     = new UnicodeSet;
    169     fInitialChars8    = new Regex8BitSet;
    170     if (U_FAILURE(fDeferredStatus)) {
    171         return;
    172     }
    173     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
    174         fInitialChars == NULL || fInitialChars8 == NULL) {
    175         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    176         return;
    177     }
    178 
    179     // Slot zero of the vector of sets is reserved.  Fill it here.
    180     fSets->addElement((int32_t)0, fDeferredStatus);
    181 }
    182 
    183 
    184 //--------------------------------------------------------------------------
    185 //
    186 //   zap            Delete everything owned by this RegexPattern.
    187 //
    188 //--------------------------------------------------------------------------
    189 void RegexPattern::zap() {
    190     delete fCompiledPat;
    191     fCompiledPat = NULL;
    192     int i;
    193     for (i=1; i<fSets->size(); i++) {
    194         UnicodeSet *s;
    195         s = (UnicodeSet *)fSets->elementAt(i);
    196         if (s != NULL) {
    197             delete s;
    198         }
    199     }
    200     delete fSets;
    201     fSets = NULL;
    202     delete[] fSets8;
    203     fSets8 = NULL;
    204     delete fGroupMap;
    205     fGroupMap = NULL;
    206     delete fInitialChars;
    207     fInitialChars = NULL;
    208     delete fInitialChars8;
    209     fInitialChars8 = NULL;
    210     if (fPattern != NULL) {
    211         utext_close(fPattern);
    212         fPattern = NULL;
    213     }
    214     if (fPatternString != NULL) {
    215         delete fPatternString;
    216         fPatternString = NULL;
    217     }
    218 }
    219 
    220 
    221 //--------------------------------------------------------------------------
    222 //
    223 //   Destructor
    224 //
    225 //--------------------------------------------------------------------------
    226 RegexPattern::~RegexPattern() {
    227     zap();
    228 }
    229 
    230 
    231 //--------------------------------------------------------------------------
    232 //
    233 //   Clone
    234 //
    235 //--------------------------------------------------------------------------
    236 RegexPattern  *RegexPattern::clone() const {
    237     RegexPattern  *copy = new RegexPattern(*this);
    238     return copy;
    239 }
    240 
    241 
    242 //--------------------------------------------------------------------------
    243 //
    244 //   operator ==   (comparison)    Consider to patterns to be == if the
    245 //                                 pattern strings and the flags are the same.
    246 //                                 Note that pattern strings with the same
    247 //                                 characters can still be considered different.
    248 //
    249 //--------------------------------------------------------------------------
    250 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
    251     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    252         if (this->fPatternString != NULL && other.fPatternString != NULL) {
    253             return *(this->fPatternString) == *(other.fPatternString);
    254         } else if (this->fPattern == NULL) {
    255             if (other.fPattern == NULL) {
    256                 return TRUE;
    257             }
    258         } else if (other.fPattern != NULL) {
    259             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    260             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    261             return utext_equals(this->fPattern, other.fPattern);
    262         }
    263     }
    264     return FALSE;
    265 }
    266 
    267 //---------------------------------------------------------------------
    268 //
    269 //   compile
    270 //
    271 //---------------------------------------------------------------------
    272 RegexPattern * U_EXPORT2
    273 RegexPattern::compile(const UnicodeString &regex,
    274                       uint32_t             flags,
    275                       UParseError          &pe,
    276                       UErrorCode           &status)
    277 {
    278     if (U_FAILURE(status)) {
    279         return NULL;
    280     }
    281 
    282     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    283     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    284     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    285 
    286     if ((flags & ~allFlags) != 0) {
    287         status = U_REGEX_INVALID_FLAG;
    288         return NULL;
    289     }
    290 
    291     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
    292         status = U_REGEX_UNIMPLEMENTED;
    293         return NULL;
    294     }
    295 
    296     RegexPattern *This = new RegexPattern;
    297     if (This == NULL) {
    298         status = U_MEMORY_ALLOCATION_ERROR;
    299         return NULL;
    300     }
    301     if (U_FAILURE(This->fDeferredStatus)) {
    302         status = This->fDeferredStatus;
    303         delete This;
    304         return NULL;
    305     }
    306     This->fFlags = flags;
    307 
    308     RegexCompile     compiler(This, status);
    309     compiler.compile(regex, pe, status);
    310 
    311     if (U_FAILURE(status)) {
    312         delete This;
    313         This = NULL;
    314     }
    315 
    316     return This;
    317 }
    318 
    319 
    320 //
    321 //   compile, UText mode
    322 //
    323 RegexPattern * U_EXPORT2
    324 RegexPattern::compile(UText                *regex,
    325                       uint32_t             flags,
    326                       UParseError          &pe,
    327                       UErrorCode           &status)
    328 {
    329     if (U_FAILURE(status)) {
    330         return NULL;
    331     }
    332 
    333     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    334                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    335                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    336 
    337     if ((flags & ~allFlags) != 0) {
    338         status = U_REGEX_INVALID_FLAG;
    339         return NULL;
    340     }
    341 
    342     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
    343         status = U_REGEX_UNIMPLEMENTED;
    344         return NULL;
    345     }
    346 
    347     RegexPattern *This = new RegexPattern;
    348     if (This == NULL) {
    349         status = U_MEMORY_ALLOCATION_ERROR;
    350         return NULL;
    351     }
    352     if (U_FAILURE(This->fDeferredStatus)) {
    353         status = This->fDeferredStatus;
    354         delete This;
    355         return NULL;
    356     }
    357     This->fFlags = flags;
    358 
    359     RegexCompile     compiler(This, status);
    360     compiler.compile(regex, pe, status);
    361 
    362     if (U_FAILURE(status)) {
    363         delete This;
    364         This = NULL;
    365     }
    366 
    367     return This;
    368 }
    369 
    370 //
    371 //   compile with default flags.
    372 //
    373 RegexPattern * U_EXPORT2
    374 RegexPattern::compile(const UnicodeString &regex,
    375                       UParseError         &pe,
    376                       UErrorCode          &err)
    377 {
    378     return compile(regex, 0, pe, err);
    379 }
    380 
    381 
    382 //
    383 //   compile with default flags, UText mode
    384 //
    385 RegexPattern * U_EXPORT2
    386 RegexPattern::compile(UText               *regex,
    387                       UParseError         &pe,
    388                       UErrorCode          &err)
    389 {
    390     return compile(regex, 0, pe, err);
    391 }
    392 
    393 
    394 //
    395 //   compile with no UParseErr parameter.
    396 //
    397 RegexPattern * U_EXPORT2
    398 RegexPattern::compile(const UnicodeString &regex,
    399                       uint32_t             flags,
    400                       UErrorCode          &err)
    401 {
    402     UParseError pe;
    403     return compile(regex, flags, pe, err);
    404 }
    405 
    406 
    407 //
    408 //   compile with no UParseErr parameter, UText mode
    409 //
    410 RegexPattern * U_EXPORT2
    411 RegexPattern::compile(UText                *regex,
    412                       uint32_t             flags,
    413                       UErrorCode           &err)
    414 {
    415     UParseError pe;
    416     return compile(regex, flags, pe, err);
    417 }
    418 
    419 
    420 //---------------------------------------------------------------------
    421 //
    422 //   flags
    423 //
    424 //---------------------------------------------------------------------
    425 uint32_t RegexPattern::flags() const {
    426     return fFlags;
    427 }
    428 
    429 
    430 //---------------------------------------------------------------------
    431 //
    432 //   matcher(UnicodeString, err)
    433 //
    434 //---------------------------------------------------------------------
    435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    436                                     UErrorCode          &status)  const {
    437     RegexMatcher    *retMatcher = matcher(status);
    438     if (retMatcher != NULL) {
    439         retMatcher->fDeferredStatus = status;
    440         retMatcher->reset(input);
    441     }
    442     return retMatcher;
    443 }
    444 
    445 
    446 //---------------------------------------------------------------------
    447 //
    448 //   matcher(status)
    449 //
    450 //---------------------------------------------------------------------
    451 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    452     RegexMatcher    *retMatcher = NULL;
    453 
    454     if (U_FAILURE(status)) {
    455         return NULL;
    456     }
    457     if (U_FAILURE(fDeferredStatus)) {
    458         status = fDeferredStatus;
    459         return NULL;
    460     }
    461 
    462     retMatcher = new RegexMatcher(this);
    463     if (retMatcher == NULL) {
    464         status = U_MEMORY_ALLOCATION_ERROR;
    465         return NULL;
    466     }
    467     return retMatcher;
    468 }
    469 
    470 
    471 
    472 //---------------------------------------------------------------------
    473 //
    474 //   matches        Convenience function to test for a match, starting
    475 //                  with a pattern string and a data string.
    476 //
    477 //---------------------------------------------------------------------
    478 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    479               const UnicodeString   &input,
    480                     UParseError     &pe,
    481                     UErrorCode      &status) {
    482 
    483     if (U_FAILURE(status)) {return FALSE;}
    484 
    485     UBool         retVal;
    486     RegexPattern *pat     = NULL;
    487     RegexMatcher *matcher = NULL;
    488 
    489     pat     = RegexPattern::compile(regex, 0, pe, status);
    490     matcher = pat->matcher(input, status);
    491     retVal  = matcher->matches(status);
    492 
    493     delete matcher;
    494     delete pat;
    495     return retVal;
    496 }
    497 
    498 
    499 //
    500 //   matches, UText mode
    501 //
    502 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    503                     UText           *input,
    504                     UParseError     &pe,
    505                     UErrorCode      &status) {
    506 
    507     if (U_FAILURE(status)) {return FALSE;}
    508 
    509     UBool         retVal  = FALSE;
    510     RegexPattern *pat     = NULL;
    511     RegexMatcher *matcher = NULL;
    512 
    513     pat     = RegexPattern::compile(regex, 0, pe, status);
    514     matcher = pat->matcher(status);
    515     if (U_SUCCESS(status)) {
    516         matcher->reset(input);
    517         retVal  = matcher->matches(status);
    518     }
    519 
    520     delete matcher;
    521     delete pat;
    522     return retVal;
    523 }
    524 
    525 
    526 
    527 
    528 
    529 //---------------------------------------------------------------------
    530 //
    531 //   pattern
    532 //
    533 //---------------------------------------------------------------------
    534 UnicodeString RegexPattern::pattern() const {
    535     if (fPatternString != NULL) {
    536         return *fPatternString;
    537     } else if (fPattern == NULL) {
    538         return UnicodeString();
    539     } else {
    540         UErrorCode status = U_ZERO_ERROR;
    541         int64_t nativeLen = utext_nativeLength(fPattern);
    542         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
    543         UnicodeString result;
    544 
    545         status = U_ZERO_ERROR;
    546         UChar *resultChars = result.getBuffer(len16);
    547         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    548         result.releaseBuffer(len16);
    549 
    550         return result;
    551     }
    552 }
    553 
    554 
    555 
    556 
    557 //---------------------------------------------------------------------
    558 //
    559 //   patternText
    560 //
    561 //---------------------------------------------------------------------
    562 UText *RegexPattern::patternText(UErrorCode      &status) const {
    563     if (U_FAILURE(status)) {return NULL;}
    564     status = U_ZERO_ERROR;
    565 
    566     if (fPattern != NULL) {
    567         return fPattern;
    568     } else {
    569         RegexStaticSets::initGlobals(&status);
    570         return RegexStaticSets::gStaticSets->fEmptyText;
    571     }
    572 }
    573 
    574 
    575 
    576 //---------------------------------------------------------------------
    577 //
    578 //   split
    579 //
    580 //---------------------------------------------------------------------
    581 int32_t  RegexPattern::split(const UnicodeString &input,
    582         UnicodeString    dest[],
    583         int32_t          destCapacity,
    584         UErrorCode      &status) const
    585 {
    586     if (U_FAILURE(status)) {
    587         return 0;
    588     };
    589 
    590     RegexMatcher  m(this);
    591     int32_t r = 0;
    592     // Check m's status to make sure all is ok.
    593     if (U_SUCCESS(m.fDeferredStatus)) {
    594     	r = m.split(input, dest, destCapacity, status);
    595     }
    596     return r;
    597 }
    598 
    599 //
    600 //   split, UText mode
    601 //
    602 int32_t  RegexPattern::split(UText *input,
    603         UText           *dest[],
    604         int32_t          destCapacity,
    605         UErrorCode      &status) const
    606 {
    607     if (U_FAILURE(status)) {
    608         return 0;
    609     };
    610 
    611     RegexMatcher  m(this);
    612     int32_t r = 0;
    613     // Check m's status to make sure all is ok.
    614     if (U_SUCCESS(m.fDeferredStatus)) {
    615     	r = m.split(input, dest, destCapacity, status);
    616     }
    617     return r;
    618 }
    619 
    620 
    621 
    622 //---------------------------------------------------------------------
    623 //
    624 //   dump    Output the compiled form of the pattern.
    625 //           Debugging function only.
    626 //
    627 //---------------------------------------------------------------------
    628 #if defined(REGEX_DEBUG)
    629 void   RegexPattern::dumpOp(int32_t index) const {
    630     static const char * const opNames[] = {URX_OPCODE_NAMES};
    631     int32_t op          = fCompiledPat->elementAti(index);
    632     int32_t val         = URX_VAL(op);
    633     int32_t type        = URX_TYPE(op);
    634     int32_t pinnedType  = type;
    635     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
    636         pinnedType = 0;
    637     }
    638 
    639     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
    640     switch (type) {
    641     case URX_NOP:
    642     case URX_DOTANY:
    643     case URX_DOTANY_ALL:
    644     case URX_FAIL:
    645     case URX_CARET:
    646     case URX_DOLLAR:
    647     case URX_BACKSLASH_G:
    648     case URX_BACKSLASH_X:
    649     case URX_END:
    650     case URX_DOLLAR_M:
    651     case URX_CARET_M:
    652         // Types with no operand field of interest.
    653         break;
    654 
    655     case URX_RESERVED_OP:
    656     case URX_START_CAPTURE:
    657     case URX_END_CAPTURE:
    658     case URX_STATE_SAVE:
    659     case URX_JMP:
    660     case URX_JMP_SAV:
    661     case URX_JMP_SAV_X:
    662     case URX_BACKSLASH_B:
    663     case URX_BACKSLASH_BU:
    664     case URX_BACKSLASH_D:
    665     case URX_BACKSLASH_Z:
    666     case URX_STRING_LEN:
    667     case URX_CTR_INIT:
    668     case URX_CTR_INIT_NG:
    669     case URX_CTR_LOOP:
    670     case URX_CTR_LOOP_NG:
    671     case URX_RELOC_OPRND:
    672     case URX_STO_SP:
    673     case URX_LD_SP:
    674     case URX_BACKREF:
    675     case URX_STO_INP_LOC:
    676     case URX_JMPX:
    677     case URX_LA_START:
    678     case URX_LA_END:
    679     case URX_BACKREF_I:
    680     case URX_LB_START:
    681     case URX_LB_CONT:
    682     case URX_LB_END:
    683     case URX_LBN_CONT:
    684     case URX_LBN_END:
    685     case URX_LOOP_C:
    686     case URX_LOOP_DOT_I:
    687         // types with an integer operand field.
    688         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
    689         break;
    690 
    691     case URX_ONECHAR:
    692     case URX_ONECHAR_I:
    693         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
    694         break;
    695 
    696     case URX_STRING:
    697     case URX_STRING_I:
    698         {
    699             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    700             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    701             int32_t length = URX_VAL(lengthOp);
    702             int32_t i;
    703             for (i=val; i<val+length; i++) {
    704                 UChar c = fLiteralText[i];
    705                 if (c < 32 || c >= 256) {c = '.';}
    706                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
    707             }
    708         }
    709         break;
    710 
    711     case URX_SETREF:
    712     case URX_LOOP_SR_I:
    713         {
    714             UnicodeString s;
    715             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    716             set->toPattern(s, TRUE);
    717             for (int32_t i=0; i<s.length(); i++) {
    718                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
    719             }
    720         }
    721         break;
    722 
    723     case URX_STATIC_SETREF:
    724     case URX_STAT_SETREF_N:
    725         {
    726             UnicodeString s;
    727             if (val & URX_NEG_SET) {
    728                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
    729                 val &= ~URX_NEG_SET;
    730             }
    731             UnicodeSet *set = fStaticSets[val];
    732             set->toPattern(s, TRUE);
    733             for (int32_t i=0; i<s.length(); i++) {
    734                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
    735             }
    736         }
    737         break;
    738 
    739 
    740     default:
    741         REGEX_DUMP_DEBUG_PRINTF(("??????"));
    742         break;
    743     }
    744     REGEX_DUMP_DEBUG_PRINTF(("\n"));
    745 }
    746 #endif
    747 
    748 
    749 #if defined(REGEX_DEBUG)
    750 U_CAPI void  U_EXPORT2
    751 RegexPatternDump(const RegexPattern *This) {
    752     int      index;
    753     int      i;
    754 
    755     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
    756     UChar32 c = utext_next32From(This->fPattern, 0);
    757     while (c != U_SENTINEL) {
    758         if (c<32 || c>256) {
    759             c = '.';
    760         }
    761         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
    762 
    763         c = UTEXT_NEXT32(This->fPattern);
    764     }
    765     REGEX_DUMP_DEBUG_PRINTF(("\n"));
    766     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
    767     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
    768     if (This->fStartType == START_STRING) {
    769         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
    770         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
    771             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
    772         }
    773         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
    774 
    775     } else if (This->fStartType == START_SET) {
    776         int32_t numSetChars = This->fInitialChars->size();
    777         if (numSetChars > 20) {
    778             numSetChars = 20;
    779         }
    780         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
    781         for (i=0; i<numSetChars; i++) {
    782             UChar32 c = This->fInitialChars->charAt(i);
    783             if (0x20<c && c <0x7e) {
    784                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
    785             } else {
    786                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
    787             }
    788         }
    789         if (numSetChars < This->fInitialChars->size()) {
    790             REGEX_DUMP_DEBUG_PRINTF((" ..."));
    791         }
    792         REGEX_DUMP_DEBUG_PRINTF(("\n"));
    793 
    794     } else if (This->fStartType == START_CHAR) {
    795         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
    796         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
    797                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
    798             } else {
    799                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
    800             }
    801     }
    802 
    803     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
    804            "-------------------------------------------\n"));
    805     for (index = 0; index<This->fCompiledPat->size(); index++) {
    806         This->dumpOp(index);
    807     }
    808     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
    809 }
    810 #endif
    811 
    812 
    813 
    814 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    815 
    816 U_NAMESPACE_END
    817 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    818