1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // 4 // file: repattrn.cpp 5 // 6 /* 7 *************************************************************************** 8 * Copyright (C) 2002-2016 International Business Machines Corporation 9 * and others. All rights reserved. 10 *************************************************************************** 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 16 17 #include "unicode/regex.h" 18 #include "unicode/uclean.h" 19 #include "cmemory.h" 20 #include "cstr.h" 21 #include "uassert.h" 22 #include "uhash.h" 23 #include "uvector.h" 24 #include "uvectr32.h" 25 #include "uvectr64.h" 26 #include "regexcmp.h" 27 #include "regeximp.h" 28 #include "regexst.h" 29 30 U_NAMESPACE_BEGIN 31 32 //-------------------------------------------------------------------------- 33 // 34 // RegexPattern Default Constructor 35 // 36 //-------------------------------------------------------------------------- 37 RegexPattern::RegexPattern() { 38 // Init all of this instances data. 39 init(); 40 } 41 42 43 //-------------------------------------------------------------------------- 44 // 45 // Copy Constructor Note: This is a rather inefficient implementation, 46 // but it probably doesn't matter. 47 // 48 //-------------------------------------------------------------------------- 49 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 50 init(); 51 *this = other; 52 } 53 54 55 56 //-------------------------------------------------------------------------- 57 // 58 // Assignment Operator 59 // 60 //-------------------------------------------------------------------------- 61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) { 62 if (this == &other) { 63 // Source and destination are the same. Don't do anything. 64 return *this; 65 } 66 67 // Clean out any previous contents of object being assigned to. 68 zap(); 69 70 // Give target object a default initialization 71 init(); 72 73 // Copy simple fields 74 fDeferredStatus = other.fDeferredStatus; 75 76 if (U_FAILURE(fDeferredStatus)) { 77 return *this; 78 } 79 80 if (other.fPatternString == NULL) { 81 fPatternString = NULL; 82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 83 } else { 84 fPatternString = new UnicodeString(*(other.fPatternString)); 85 if (fPatternString == NULL) { 86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 87 } else { 88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus); 89 } 90 } 91 if (U_FAILURE(fDeferredStatus)) { 92 return *this; 93 } 94 95 fFlags = other.fFlags; 96 fLiteralText = other.fLiteralText; 97 fMinMatchLen = other.fMinMatchLen; 98 fFrameSize = other.fFrameSize; 99 fDataSize = other.fDataSize; 100 fStaticSets = other.fStaticSets; 101 fStaticSets8 = other.fStaticSets8; 102 103 fStartType = other.fStartType; 104 fInitialStringIdx = other.fInitialStringIdx; 105 fInitialStringLen = other.fInitialStringLen; 106 *fInitialChars = *other.fInitialChars; 107 fInitialChar = other.fInitialChar; 108 *fInitialChars8 = *other.fInitialChars8; 109 fNeedsAltInput = other.fNeedsAltInput; 110 111 // Copy the pattern. It's just values, nothing deep to copy. 112 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 113 fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 114 115 // Copy the Unicode Sets. 116 // Could be made more efficient if the sets were reference counted and shared, 117 // but I doubt that pattern copying will be particularly common. 118 // Note: init() already added an empty element zero to fSets 119 int32_t i; 120 int32_t numSets = other.fSets->size(); 121 fSets8 = new Regex8BitSet[numSets]; 122 if (fSets8 == NULL) { 123 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 124 return *this; 125 } 126 for (i=1; i<numSets; i++) { 127 if (U_FAILURE(fDeferredStatus)) { 128 return *this; 129 } 130 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 131 UnicodeSet *newSet = new UnicodeSet(*sourceSet); 132 if (newSet == NULL) { 133 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 134 break; 135 } 136 fSets->addElement(newSet, fDeferredStatus); 137 fSets8[i] = other.fSets8[i]; 138 } 139 140 // Copy the named capture group hash map. 141 int32_t hashPos = UHASH_FIRST; 142 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) { 143 if (U_FAILURE(fDeferredStatus)) { 144 break; 145 } 146 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; 147 UnicodeString *key = new UnicodeString(*name); 148 int32_t val = hashEl->value.integer; 149 if (key == NULL) { 150 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 151 } else { 152 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); 153 } 154 } 155 return *this; 156 } 157 158 159 //-------------------------------------------------------------------------- 160 // 161 // init Shared initialization for use by constructors. 162 // Bring an uninitialized RegexPattern up to a default state. 163 // 164 //-------------------------------------------------------------------------- 165 void RegexPattern::init() { 166 fFlags = 0; 167 fCompiledPat = 0; 168 fLiteralText.remove(); 169 fSets = NULL; 170 fSets8 = NULL; 171 fDeferredStatus = U_ZERO_ERROR; 172 fMinMatchLen = 0; 173 fFrameSize = 0; 174 fDataSize = 0; 175 fGroupMap = NULL; 176 fStaticSets = NULL; 177 fStaticSets8 = NULL; 178 fStartType = START_NO_INFO; 179 fInitialStringIdx = 0; 180 fInitialStringLen = 0; 181 fInitialChars = NULL; 182 fInitialChar = 0; 183 fInitialChars8 = NULL; 184 fNeedsAltInput = FALSE; 185 fNamedCaptureMap = NULL; 186 187 fPattern = NULL; // will be set later 188 fPatternString = NULL; // may be set later 189 fCompiledPat = new UVector64(fDeferredStatus); 190 fGroupMap = new UVector32(fDeferredStatus); 191 fSets = new UVector(fDeferredStatus); 192 fInitialChars = new UnicodeSet; 193 fInitialChars8 = new Regex8BitSet; 194 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function 195 uhash_compareUnicodeString, // Key comparator function 196 uhash_compareLong, // Value comparator function 197 &fDeferredStatus); 198 if (U_FAILURE(fDeferredStatus)) { 199 return; 200 } 201 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 202 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) { 203 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 204 return; 205 } 206 207 // Slot zero of the vector of sets is reserved. Fill it here. 208 fSets->addElement((int32_t)0, fDeferredStatus); 209 210 // fNamedCaptureMap owns its key strings, type (UnicodeString *) 211 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); 212 } 213 214 215 //-------------------------------------------------------------------------- 216 // 217 // zap Delete everything owned by this RegexPattern. 218 // 219 //-------------------------------------------------------------------------- 220 void RegexPattern::zap() { 221 delete fCompiledPat; 222 fCompiledPat = NULL; 223 int i; 224 for (i=1; i<fSets->size(); i++) { 225 UnicodeSet *s; 226 s = (UnicodeSet *)fSets->elementAt(i); 227 if (s != NULL) { 228 delete s; 229 } 230 } 231 delete fSets; 232 fSets = NULL; 233 delete[] fSets8; 234 fSets8 = NULL; 235 delete fGroupMap; 236 fGroupMap = NULL; 237 delete fInitialChars; 238 fInitialChars = NULL; 239 delete fInitialChars8; 240 fInitialChars8 = NULL; 241 if (fPattern != NULL) { 242 utext_close(fPattern); 243 fPattern = NULL; 244 } 245 if (fPatternString != NULL) { 246 delete fPatternString; 247 fPatternString = NULL; 248 } 249 uhash_close(fNamedCaptureMap); 250 fNamedCaptureMap = NULL; 251 } 252 253 254 //-------------------------------------------------------------------------- 255 // 256 // Destructor 257 // 258 //-------------------------------------------------------------------------- 259 RegexPattern::~RegexPattern() { 260 zap(); 261 } 262 263 264 //-------------------------------------------------------------------------- 265 // 266 // Clone 267 // 268 //-------------------------------------------------------------------------- 269 RegexPattern *RegexPattern::clone() const { 270 RegexPattern *copy = new RegexPattern(*this); 271 return copy; 272 } 273 274 275 //-------------------------------------------------------------------------- 276 // 277 // operator == (comparison) Consider to patterns to be == if the 278 // pattern strings and the flags are the same. 279 // Note that pattern strings with the same 280 // characters can still be considered different. 281 // 282 //-------------------------------------------------------------------------- 283 UBool RegexPattern::operator ==(const RegexPattern &other) const { 284 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 285 if (this->fPatternString != NULL && other.fPatternString != NULL) { 286 return *(this->fPatternString) == *(other.fPatternString); 287 } else if (this->fPattern == NULL) { 288 if (other.fPattern == NULL) { 289 return TRUE; 290 } 291 } else if (other.fPattern != NULL) { 292 UTEXT_SETNATIVEINDEX(this->fPattern, 0); 293 UTEXT_SETNATIVEINDEX(other.fPattern, 0); 294 return utext_equals(this->fPattern, other.fPattern); 295 } 296 } 297 return FALSE; 298 } 299 300 //--------------------------------------------------------------------- 301 // 302 // compile 303 // 304 //--------------------------------------------------------------------- 305 RegexPattern * U_EXPORT2 306 RegexPattern::compile(const UnicodeString ®ex, 307 uint32_t flags, 308 UParseError &pe, 309 UErrorCode &status) 310 { 311 if (U_FAILURE(status)) { 312 return NULL; 313 } 314 315 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 316 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 317 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 318 319 if ((flags & ~allFlags) != 0) { 320 status = U_REGEX_INVALID_FLAG; 321 return NULL; 322 } 323 324 if ((flags & UREGEX_CANON_EQ) != 0) { 325 status = U_REGEX_UNIMPLEMENTED; 326 return NULL; 327 } 328 329 RegexPattern *This = new RegexPattern; 330 if (This == NULL) { 331 status = U_MEMORY_ALLOCATION_ERROR; 332 return NULL; 333 } 334 if (U_FAILURE(This->fDeferredStatus)) { 335 status = This->fDeferredStatus; 336 delete This; 337 return NULL; 338 } 339 This->fFlags = flags; 340 341 RegexCompile compiler(This, status); 342 compiler.compile(regex, pe, status); 343 344 if (U_FAILURE(status)) { 345 delete This; 346 This = NULL; 347 } 348 349 return This; 350 } 351 352 353 // 354 // compile, UText mode 355 // 356 RegexPattern * U_EXPORT2 357 RegexPattern::compile(UText *regex, 358 uint32_t flags, 359 UParseError &pe, 360 UErrorCode &status) 361 { 362 if (U_FAILURE(status)) { 363 return NULL; 364 } 365 366 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 367 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 368 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 369 370 if ((flags & ~allFlags) != 0) { 371 status = U_REGEX_INVALID_FLAG; 372 return NULL; 373 } 374 375 if ((flags & UREGEX_CANON_EQ) != 0) { 376 status = U_REGEX_UNIMPLEMENTED; 377 return NULL; 378 } 379 380 RegexPattern *This = new RegexPattern; 381 if (This == NULL) { 382 status = U_MEMORY_ALLOCATION_ERROR; 383 return NULL; 384 } 385 if (U_FAILURE(This->fDeferredStatus)) { 386 status = This->fDeferredStatus; 387 delete This; 388 return NULL; 389 } 390 This->fFlags = flags; 391 392 RegexCompile compiler(This, status); 393 compiler.compile(regex, pe, status); 394 395 if (U_FAILURE(status)) { 396 delete This; 397 This = NULL; 398 } 399 400 return This; 401 } 402 403 // 404 // compile with default flags. 405 // 406 RegexPattern * U_EXPORT2 407 RegexPattern::compile(const UnicodeString ®ex, 408 UParseError &pe, 409 UErrorCode &err) 410 { 411 return compile(regex, 0, pe, err); 412 } 413 414 415 // 416 // compile with default flags, UText mode 417 // 418 RegexPattern * U_EXPORT2 419 RegexPattern::compile(UText *regex, 420 UParseError &pe, 421 UErrorCode &err) 422 { 423 return compile(regex, 0, pe, err); 424 } 425 426 427 // 428 // compile with no UParseErr parameter. 429 // 430 RegexPattern * U_EXPORT2 431 RegexPattern::compile(const UnicodeString ®ex, 432 uint32_t flags, 433 UErrorCode &err) 434 { 435 UParseError pe; 436 return compile(regex, flags, pe, err); 437 } 438 439 440 // 441 // compile with no UParseErr parameter, UText mode 442 // 443 RegexPattern * U_EXPORT2 444 RegexPattern::compile(UText *regex, 445 uint32_t flags, 446 UErrorCode &err) 447 { 448 UParseError pe; 449 return compile(regex, flags, pe, err); 450 } 451 452 453 //--------------------------------------------------------------------- 454 // 455 // flags 456 // 457 //--------------------------------------------------------------------- 458 uint32_t RegexPattern::flags() const { 459 return fFlags; 460 } 461 462 463 //--------------------------------------------------------------------- 464 // 465 // matcher(UnicodeString, err) 466 // 467 //--------------------------------------------------------------------- 468 RegexMatcher *RegexPattern::matcher(const UnicodeString &input, 469 UErrorCode &status) const { 470 RegexMatcher *retMatcher = matcher(status); 471 if (retMatcher != NULL) { 472 retMatcher->fDeferredStatus = status; 473 retMatcher->reset(input); 474 } 475 return retMatcher; 476 } 477 478 479 //--------------------------------------------------------------------- 480 // 481 // matcher(status) 482 // 483 //--------------------------------------------------------------------- 484 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 485 RegexMatcher *retMatcher = NULL; 486 487 if (U_FAILURE(status)) { 488 return NULL; 489 } 490 if (U_FAILURE(fDeferredStatus)) { 491 status = fDeferredStatus; 492 return NULL; 493 } 494 495 retMatcher = new RegexMatcher(this); 496 if (retMatcher == NULL) { 497 status = U_MEMORY_ALLOCATION_ERROR; 498 return NULL; 499 } 500 return retMatcher; 501 } 502 503 504 505 //--------------------------------------------------------------------- 506 // 507 // matches Convenience function to test for a match, starting 508 // with a pattern string and a data string. 509 // 510 //--------------------------------------------------------------------- 511 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 512 const UnicodeString &input, 513 UParseError &pe, 514 UErrorCode &status) { 515 516 if (U_FAILURE(status)) {return FALSE;} 517 518 UBool retVal; 519 RegexPattern *pat = NULL; 520 RegexMatcher *matcher = NULL; 521 522 pat = RegexPattern::compile(regex, 0, pe, status); 523 matcher = pat->matcher(input, status); 524 retVal = matcher->matches(status); 525 526 delete matcher; 527 delete pat; 528 return retVal; 529 } 530 531 532 // 533 // matches, UText mode 534 // 535 UBool U_EXPORT2 RegexPattern::matches(UText *regex, 536 UText *input, 537 UParseError &pe, 538 UErrorCode &status) { 539 540 if (U_FAILURE(status)) {return FALSE;} 541 542 UBool retVal = FALSE; 543 RegexPattern *pat = NULL; 544 RegexMatcher *matcher = NULL; 545 546 pat = RegexPattern::compile(regex, 0, pe, status); 547 matcher = pat->matcher(status); 548 if (U_SUCCESS(status)) { 549 matcher->reset(input); 550 retVal = matcher->matches(status); 551 } 552 553 delete matcher; 554 delete pat; 555 return retVal; 556 } 557 558 559 560 561 562 //--------------------------------------------------------------------- 563 // 564 // pattern 565 // 566 //--------------------------------------------------------------------- 567 UnicodeString RegexPattern::pattern() const { 568 if (fPatternString != NULL) { 569 return *fPatternString; 570 } else if (fPattern == NULL) { 571 return UnicodeString(); 572 } else { 573 UErrorCode status = U_ZERO_ERROR; 574 int64_t nativeLen = utext_nativeLength(fPattern); 575 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 576 UnicodeString result; 577 578 status = U_ZERO_ERROR; 579 UChar *resultChars = result.getBuffer(len16); 580 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 581 result.releaseBuffer(len16); 582 583 return result; 584 } 585 } 586 587 588 589 590 //--------------------------------------------------------------------- 591 // 592 // patternText 593 // 594 //--------------------------------------------------------------------- 595 UText *RegexPattern::patternText(UErrorCode &status) const { 596 if (U_FAILURE(status)) {return NULL;} 597 status = U_ZERO_ERROR; 598 599 if (fPattern != NULL) { 600 return fPattern; 601 } else { 602 RegexStaticSets::initGlobals(&status); 603 return RegexStaticSets::gStaticSets->fEmptyText; 604 } 605 } 606 607 608 //-------------------------------------------------------------------------------- 609 // 610 // groupNumberFromName() 611 // 612 //-------------------------------------------------------------------------------- 613 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const { 614 if (U_FAILURE(status)) { 615 return 0; 616 } 617 618 // No need to explicitly check for syntactically valid names. 619 // Invalid ones will never be in the map, and the lookup will fail. 620 621 int32_t number = uhash_geti(fNamedCaptureMap, &groupName); 622 if (number == 0) { 623 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 624 } 625 return number; 626 } 627 628 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const { 629 if (U_FAILURE(status)) { 630 return 0; 631 } 632 UnicodeString name(groupName, nameLength, US_INV); 633 return groupNumberFromName(name, status); 634 } 635 636 637 //--------------------------------------------------------------------- 638 // 639 // split 640 // 641 //--------------------------------------------------------------------- 642 int32_t RegexPattern::split(const UnicodeString &input, 643 UnicodeString dest[], 644 int32_t destCapacity, 645 UErrorCode &status) const 646 { 647 if (U_FAILURE(status)) { 648 return 0; 649 }; 650 651 RegexMatcher m(this); 652 int32_t r = 0; 653 // Check m's status to make sure all is ok. 654 if (U_SUCCESS(m.fDeferredStatus)) { 655 r = m.split(input, dest, destCapacity, status); 656 } 657 return r; 658 } 659 660 // 661 // split, UText mode 662 // 663 int32_t RegexPattern::split(UText *input, 664 UText *dest[], 665 int32_t destCapacity, 666 UErrorCode &status) const 667 { 668 if (U_FAILURE(status)) { 669 return 0; 670 }; 671 672 RegexMatcher m(this); 673 int32_t r = 0; 674 // Check m's status to make sure all is ok. 675 if (U_SUCCESS(m.fDeferredStatus)) { 676 r = m.split(input, dest, destCapacity, status); 677 } 678 return r; 679 } 680 681 682 //--------------------------------------------------------------------- 683 // 684 // dump Output the compiled form of the pattern. 685 // Debugging function only. 686 // 687 //--------------------------------------------------------------------- 688 void RegexPattern::dumpOp(int32_t index) const { 689 (void)index; // Suppress warnings in non-debug build. 690 #if defined(REGEX_DEBUG) 691 static const char * const opNames[] = {URX_OPCODE_NAMES}; 692 int32_t op = fCompiledPat->elementAti(index); 693 int32_t val = URX_VAL(op); 694 int32_t type = URX_TYPE(op); 695 int32_t pinnedType = type; 696 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) { 697 pinnedType = 0; 698 } 699 700 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 701 switch (type) { 702 case URX_NOP: 703 case URX_DOTANY: 704 case URX_DOTANY_ALL: 705 case URX_FAIL: 706 case URX_CARET: 707 case URX_DOLLAR: 708 case URX_BACKSLASH_G: 709 case URX_BACKSLASH_X: 710 case URX_END: 711 case URX_DOLLAR_M: 712 case URX_CARET_M: 713 // Types with no operand field of interest. 714 break; 715 716 case URX_RESERVED_OP: 717 case URX_START_CAPTURE: 718 case URX_END_CAPTURE: 719 case URX_STATE_SAVE: 720 case URX_JMP: 721 case URX_JMP_SAV: 722 case URX_JMP_SAV_X: 723 case URX_BACKSLASH_B: 724 case URX_BACKSLASH_BU: 725 case URX_BACKSLASH_D: 726 case URX_BACKSLASH_Z: 727 case URX_STRING_LEN: 728 case URX_CTR_INIT: 729 case URX_CTR_INIT_NG: 730 case URX_CTR_LOOP: 731 case URX_CTR_LOOP_NG: 732 case URX_RELOC_OPRND: 733 case URX_STO_SP: 734 case URX_LD_SP: 735 case URX_BACKREF: 736 case URX_STO_INP_LOC: 737 case URX_JMPX: 738 case URX_LA_START: 739 case URX_LA_END: 740 case URX_BACKREF_I: 741 case URX_LB_START: 742 case URX_LB_CONT: 743 case URX_LB_END: 744 case URX_LBN_CONT: 745 case URX_LBN_END: 746 case URX_LOOP_C: 747 case URX_LOOP_DOT_I: 748 case URX_BACKSLASH_H: 749 case URX_BACKSLASH_R: 750 case URX_BACKSLASH_V: 751 // types with an integer operand field. 752 printf("%d", val); 753 break; 754 755 case URX_ONECHAR: 756 case URX_ONECHAR_I: 757 if (val < 0x20) { 758 printf("%#x", val); 759 } else { 760 printf("'%s'", CStr(UnicodeString(val))()); 761 } 762 break; 763 764 case URX_STRING: 765 case URX_STRING_I: 766 { 767 int32_t lengthOp = fCompiledPat->elementAti(index+1); 768 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 769 int32_t length = URX_VAL(lengthOp); 770 UnicodeString str(fLiteralText, val, length); 771 printf("%s", CStr(str)()); 772 } 773 break; 774 775 case URX_SETREF: 776 case URX_LOOP_SR_I: 777 { 778 UnicodeString s; 779 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 780 set->toPattern(s, TRUE); 781 printf("%s", CStr(s)()); 782 } 783 break; 784 785 case URX_STATIC_SETREF: 786 case URX_STAT_SETREF_N: 787 { 788 UnicodeString s; 789 if (val & URX_NEG_SET) { 790 printf("NOT "); 791 val &= ~URX_NEG_SET; 792 } 793 UnicodeSet *set = fStaticSets[val]; 794 set->toPattern(s, TRUE); 795 printf("%s", CStr(s)()); 796 } 797 break; 798 799 800 default: 801 printf("??????"); 802 break; 803 } 804 printf("\n"); 805 #endif 806 } 807 808 809 void RegexPattern::dumpPattern() const { 810 #if defined(REGEX_DEBUG) 811 int index; 812 813 UnicodeString patStr; 814 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) { 815 patStr.append(c); 816 } 817 printf("Original Pattern: \"%s\"\n", CStr(patStr)()); 818 printf(" Min Match Length: %d\n", fMinMatchLen); 819 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 820 if (fStartType == START_STRING) { 821 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen); 822 printf(" Initial match string: \"%s\"\n", CStr(initialString)()); 823 } else if (fStartType == START_SET) { 824 UnicodeString s; 825 fInitialChars->toPattern(s, TRUE); 826 printf(" Match First Chars: %s\n", CStr(s)()); 827 828 } else if (fStartType == START_CHAR) { 829 printf(" First char of Match: "); 830 if (fInitialChar > 0x20) { 831 printf("'%s'\n", CStr(UnicodeString(fInitialChar))()); 832 } else { 833 printf("%#x\n", fInitialChar); 834 } 835 } 836 837 printf("Named Capture Groups:\n"); 838 if (uhash_count(fNamedCaptureMap) == 0) { 839 printf(" None\n"); 840 } else { 841 int32_t pos = UHASH_FIRST; 842 const UHashElement *el = NULL; 843 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { 844 const UnicodeString *name = (const UnicodeString *)el->key.pointer; 845 int32_t number = el->value.integer; 846 printf(" %d\t%s\n", number, CStr(*name)()); 847 } 848 } 849 850 printf("\nIndex Binary Type Operand\n" \ 851 "-------------------------------------------\n"); 852 for (index = 0; index<fCompiledPat->size(); index++) { 853 dumpOp(index); 854 } 855 printf("\n\n"); 856 #endif 857 } 858 859 860 861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 862 863 U_NAMESPACE_END 864 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 865