1 /********************************************************************** 2 regcomp.c - Oniguruma (regular expression library) 3 **********************************************************************/ 4 /*- 5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "regparse.h" 33 34 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; 35 36 extern OnigCaseFoldType 37 onig_get_default_case_fold_flag(void) 38 { 39 return OnigDefaultCaseFoldFlag; 40 } 41 42 extern int 43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 44 { 45 OnigDefaultCaseFoldFlag = case_fold_flag; 46 return 0; 47 } 48 49 50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 51 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; 52 #endif 53 54 static UChar* 55 str_dup(UChar* s, UChar* end) 56 { 57 int len = (int)(end - s); 58 59 if (len > 0) { 60 UChar* r = (UChar* )xmalloc(len + 1); 61 CHECK_NULL_RETURN(r); 62 xmemcpy(r, s, len); 63 r[len] = (UChar )0; 64 return r; 65 } 66 else return NULL; 67 } 68 69 static void 70 swap_node(Node* a, Node* b) 71 { 72 Node c; 73 CopyMem (&c, a, sizeof (Node)); 74 CopyMem (a, b, sizeof (Node)); 75 CopyMem (b, &c, sizeof (Node)); 76 77 if (NTYPE(a) == NT_STR) { 78 StrNode* sn = NSTR(a); 79 if (sn->capa == 0) { 80 int len = (int)(sn->end - sn->s); 81 sn->s = sn->buf; 82 sn->end = sn->s + len; 83 } 84 } 85 86 if (NTYPE(b) == NT_STR) { 87 StrNode* sn = NSTR(b); 88 if (sn->capa == 0) { 89 int len = (int)(sn->end - sn->s); 90 sn->s = sn->buf; 91 sn->end = sn->s + len; 92 } 93 } 94 } 95 96 static OnigDistance 97 distance_add(OnigDistance d1, OnigDistance d2) 98 { 99 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) 100 return ONIG_INFINITE_DISTANCE; 101 else { 102 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; 103 else return ONIG_INFINITE_DISTANCE; 104 } 105 } 106 107 static OnigDistance 108 distance_multiply(OnigDistance d, int m) 109 { 110 if (m == 0) return 0; 111 112 if (d < ONIG_INFINITE_DISTANCE / m) 113 return d * m; 114 else 115 return ONIG_INFINITE_DISTANCE; 116 } 117 118 static int 119 bitset_is_empty(BitSetRef bs) 120 { 121 int i; 122 for (i = 0; i < (int )BITSET_SIZE; i++) { 123 if (bs[i] != 0) return 0; 124 } 125 return 1; 126 } 127 128 #ifdef ONIG_DEBUG 129 static int 130 bitset_on_num(BitSetRef bs) 131 { 132 int i, n; 133 134 n = 0; 135 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 136 if (BITSET_AT(bs, i)) n++; 137 } 138 return n; 139 } 140 #endif 141 142 extern int 143 onig_bbuf_init(BBuf* buf, int size) 144 { 145 if (size <= 0) { 146 size = 0; 147 buf->p = NULL; 148 } 149 else { 150 buf->p = (UChar* )xmalloc(size); 151 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); 152 } 153 154 buf->alloc = size; 155 buf->used = 0; 156 return 0; 157 } 158 159 160 #ifdef USE_SUBEXP_CALL 161 162 static int 163 unset_addr_list_init(UnsetAddrList* uslist, int size) 164 { 165 UnsetAddr* p; 166 167 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); 168 CHECK_NULL_RETURN_MEMERR(p); 169 uslist->num = 0; 170 uslist->alloc = size; 171 uslist->us = p; 172 return 0; 173 } 174 175 static void 176 unset_addr_list_end(UnsetAddrList* uslist) 177 { 178 if (IS_NOT_NULL(uslist->us)) 179 xfree(uslist->us); 180 } 181 182 static int 183 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) 184 { 185 UnsetAddr* p; 186 int size; 187 188 if (uslist->num >= uslist->alloc) { 189 size = uslist->alloc * 2; 190 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc); 191 CHECK_NULL_RETURN_MEMERR(p); 192 uslist->alloc = size; 193 uslist->us = p; 194 } 195 196 uslist->us[uslist->num].offset = offset; 197 uslist->us[uslist->num].target = node; 198 uslist->num++; 199 return 0; 200 } 201 #endif /* USE_SUBEXP_CALL */ 202 203 204 static int 205 add_opcode(regex_t* reg, int opcode) 206 { 207 BBUF_ADD1(reg, ((unsigned char)opcode)); 208 return 0; 209 } 210 211 #ifdef USE_COMBINATION_EXPLOSION_CHECK 212 static int 213 add_state_check_num(regex_t* reg, int num) 214 { 215 StateCheckNumType n = (StateCheckNumType )num; 216 217 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); 218 return 0; 219 } 220 #endif 221 222 static int 223 add_rel_addr(regex_t* reg, int addr) 224 { 225 RelAddrType ra = (RelAddrType )addr; 226 227 BBUF_ADD(reg, &ra, SIZE_RELADDR); 228 return 0; 229 } 230 231 static int 232 add_abs_addr(regex_t* reg, int addr) 233 { 234 AbsAddrType ra = (AbsAddrType )addr; 235 236 BBUF_ADD(reg, &ra, SIZE_ABSADDR); 237 return 0; 238 } 239 240 static int 241 add_length(regex_t* reg, int len) 242 { 243 LengthType l = (LengthType )len; 244 245 BBUF_ADD(reg, &l, SIZE_LENGTH); 246 return 0; 247 } 248 249 static int 250 add_mem_num(regex_t* reg, int num) 251 { 252 MemNumType n = (MemNumType )num; 253 254 BBUF_ADD(reg, &n, SIZE_MEMNUM); 255 return 0; 256 } 257 258 static int 259 add_pointer(regex_t* reg, void* addr) 260 { 261 PointerType ptr = (PointerType )addr; 262 263 BBUF_ADD(reg, &ptr, SIZE_POINTER); 264 return 0; 265 } 266 267 static int 268 add_option(regex_t* reg, OnigOptionType option) 269 { 270 BBUF_ADD(reg, &option, SIZE_OPTION); 271 return 0; 272 } 273 274 static int 275 add_opcode_rel_addr(regex_t* reg, int opcode, int addr) 276 { 277 int r; 278 279 r = add_opcode(reg, opcode); 280 if (r) return r; 281 r = add_rel_addr(reg, addr); 282 return r; 283 } 284 285 static int 286 add_bytes(regex_t* reg, UChar* bytes, int len) 287 { 288 BBUF_ADD(reg, bytes, len); 289 return 0; 290 } 291 292 static int 293 add_bitset(regex_t* reg, BitSetRef bs) 294 { 295 BBUF_ADD(reg, bs, SIZE_BITSET); 296 return 0; 297 } 298 299 static int 300 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) 301 { 302 int r; 303 304 r = add_opcode(reg, opcode); 305 if (r) return r; 306 r = add_option(reg, option); 307 return r; 308 } 309 310 static int compile_length_tree(Node* node, regex_t* reg); 311 static int compile_tree(Node* node, regex_t* reg); 312 313 314 #define IS_NEED_STR_LEN_OP_EXACT(op) \ 315 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ 316 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) 317 318 static int 319 select_str_opcode(int mb_len, int str_len, int ignore_case) 320 { 321 int op; 322 323 if (ignore_case) { 324 switch (str_len) { 325 case 1: op = OP_EXACT1_IC; break; 326 default: op = OP_EXACTN_IC; break; 327 } 328 } 329 else { 330 switch (mb_len) { 331 case 1: 332 switch (str_len) { 333 case 1: op = OP_EXACT1; break; 334 case 2: op = OP_EXACT2; break; 335 case 3: op = OP_EXACT3; break; 336 case 4: op = OP_EXACT4; break; 337 case 5: op = OP_EXACT5; break; 338 default: op = OP_EXACTN; break; 339 } 340 break; 341 342 case 2: 343 switch (str_len) { 344 case 1: op = OP_EXACTMB2N1; break; 345 case 2: op = OP_EXACTMB2N2; break; 346 case 3: op = OP_EXACTMB2N3; break; 347 default: op = OP_EXACTMB2N; break; 348 } 349 break; 350 351 case 3: 352 op = OP_EXACTMB3N; 353 break; 354 355 default: 356 op = OP_EXACTMBN; 357 break; 358 } 359 } 360 return op; 361 } 362 363 static int 364 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) 365 { 366 int r; 367 int saved_num_null_check = reg->num_null_check; 368 369 if (empty_info != 0) { 370 r = add_opcode(reg, OP_NULL_CHECK_START); 371 if (r) return r; 372 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ 373 if (r) return r; 374 reg->num_null_check++; 375 } 376 377 r = compile_tree(node, reg); 378 if (r) return r; 379 380 if (empty_info != 0) { 381 if (empty_info == NQ_TARGET_IS_EMPTY) 382 r = add_opcode(reg, OP_NULL_CHECK_END); 383 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) 384 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); 385 else if (empty_info == NQ_TARGET_IS_EMPTY_REC) 386 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); 387 388 if (r) return r; 389 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ 390 } 391 return r; 392 } 393 394 #ifdef USE_SUBEXP_CALL 395 static int 396 compile_call(CallNode* node, regex_t* reg) 397 { 398 int r; 399 400 r = add_opcode(reg, OP_CALL); 401 if (r) return r; 402 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), 403 node->target); 404 if (r) return r; 405 r = add_abs_addr(reg, 0 /*dummy addr.*/); 406 return r; 407 } 408 #endif 409 410 static int 411 compile_tree_n_times(Node* node, int n, regex_t* reg) 412 { 413 int i, r; 414 415 for (i = 0; i < n; i++) { 416 r = compile_tree(node, reg); 417 if (r) return r; 418 } 419 return 0; 420 } 421 422 static int 423 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, 424 regex_t* reg ARG_UNUSED, int ignore_case) 425 { 426 int len; 427 int op = select_str_opcode(mb_len, str_len, ignore_case); 428 429 len = SIZE_OPCODE; 430 431 if (op == OP_EXACTMBN) len += SIZE_LENGTH; 432 if (IS_NEED_STR_LEN_OP_EXACT(op)) 433 len += SIZE_LENGTH; 434 435 len += mb_len * str_len; 436 return len; 437 } 438 439 static int 440 add_compile_string(UChar* s, int mb_len, int str_len, 441 regex_t* reg, int ignore_case) 442 { 443 int op = select_str_opcode(mb_len, str_len, ignore_case); 444 add_opcode(reg, op); 445 446 if (op == OP_EXACTMBN) 447 add_length(reg, mb_len); 448 449 if (IS_NEED_STR_LEN_OP_EXACT(op)) { 450 if (op == OP_EXACTN_IC) 451 add_length(reg, mb_len * str_len); 452 else 453 add_length(reg, str_len); 454 } 455 456 add_bytes(reg, s, mb_len * str_len); 457 return 0; 458 } 459 460 461 static int 462 compile_length_string_node(Node* node, regex_t* reg) 463 { 464 int rlen, r, len, prev_len, slen, ambig; 465 OnigEncoding enc = reg->enc; 466 UChar *p, *prev; 467 StrNode* sn; 468 469 sn = NSTR(node); 470 if (sn->end <= sn->s) 471 return 0; 472 473 ambig = NSTRING_IS_AMBIG(node); 474 475 p = prev = sn->s; 476 prev_len = enclen(enc, p); 477 p += prev_len; 478 slen = 1; 479 rlen = 0; 480 481 for (; p < sn->end; ) { 482 len = enclen(enc, p); 483 if (len == prev_len) { 484 slen++; 485 } 486 else { 487 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 488 rlen += r; 489 prev = p; 490 slen = 1; 491 prev_len = len; 492 } 493 p += len; 494 } 495 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 496 rlen += r; 497 return rlen; 498 } 499 500 static int 501 compile_length_string_raw_node(StrNode* sn, regex_t* reg) 502 { 503 if (sn->end <= sn->s) 504 return 0; 505 506 return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0); 507 } 508 509 static int 510 compile_string_node(Node* node, regex_t* reg) 511 { 512 int r, len, prev_len, slen, ambig; 513 OnigEncoding enc = reg->enc; 514 UChar *p, *prev, *end; 515 StrNode* sn; 516 517 sn = NSTR(node); 518 if (sn->end <= sn->s) 519 return 0; 520 521 end = sn->end; 522 ambig = NSTRING_IS_AMBIG(node); 523 524 p = prev = sn->s; 525 prev_len = enclen(enc, p); 526 p += prev_len; 527 slen = 1; 528 529 for (; p < end; ) { 530 len = enclen(enc, p); 531 if (len == prev_len) { 532 slen++; 533 } 534 else { 535 r = add_compile_string(prev, prev_len, slen, reg, ambig); 536 if (r) return r; 537 538 prev = p; 539 slen = 1; 540 prev_len = len; 541 } 542 543 p += len; 544 } 545 return add_compile_string(prev, prev_len, slen, reg, ambig); 546 } 547 548 static int 549 compile_string_raw_node(StrNode* sn, regex_t* reg) 550 { 551 if (sn->end <= sn->s) 552 return 0; 553 554 return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0); 555 } 556 557 static int 558 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) 559 { 560 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 561 add_length(reg, mbuf->used); 562 return add_bytes(reg, mbuf->p, mbuf->used); 563 #else 564 int r, pad_size; 565 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; 566 567 GET_ALIGNMENT_PAD_SIZE(p, pad_size); 568 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); 569 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 570 571 r = add_bytes(reg, mbuf->p, mbuf->used); 572 573 /* padding for return value from compile_length_cclass_node() to be fix. */ 574 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; 575 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 576 return r; 577 #endif 578 } 579 580 static int 581 compile_length_cclass_node(CClassNode* cc, regex_t* reg) 582 { 583 int len; 584 585 if (IS_NCCLASS_SHARE(cc)) { 586 len = SIZE_OPCODE + SIZE_POINTER; 587 return len; 588 } 589 590 if (IS_NULL(cc->mbuf)) { 591 len = SIZE_OPCODE + SIZE_BITSET; 592 } 593 else { 594 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 595 len = SIZE_OPCODE; 596 } 597 else { 598 len = SIZE_OPCODE + SIZE_BITSET; 599 } 600 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 601 len += SIZE_LENGTH + cc->mbuf->used; 602 #else 603 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); 604 #endif 605 } 606 607 return len; 608 } 609 610 static int 611 compile_cclass_node(CClassNode* cc, regex_t* reg) 612 { 613 int r; 614 615 if (IS_NCCLASS_SHARE(cc)) { 616 add_opcode(reg, OP_CCLASS_NODE); 617 r = add_pointer(reg, cc); 618 return r; 619 } 620 621 if (IS_NULL(cc->mbuf)) { 622 if (IS_NCCLASS_NOT(cc)) 623 add_opcode(reg, OP_CCLASS_NOT); 624 else 625 add_opcode(reg, OP_CCLASS); 626 627 r = add_bitset(reg, cc->bs); 628 } 629 else { 630 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 631 if (IS_NCCLASS_NOT(cc)) 632 add_opcode(reg, OP_CCLASS_MB_NOT); 633 else 634 add_opcode(reg, OP_CCLASS_MB); 635 636 r = add_multi_byte_cclass(cc->mbuf, reg); 637 } 638 else { 639 if (IS_NCCLASS_NOT(cc)) 640 add_opcode(reg, OP_CCLASS_MIX_NOT); 641 else 642 add_opcode(reg, OP_CCLASS_MIX); 643 644 r = add_bitset(reg, cc->bs); 645 if (r) return r; 646 r = add_multi_byte_cclass(cc->mbuf, reg); 647 } 648 } 649 650 return r; 651 } 652 653 static int 654 entry_repeat_range(regex_t* reg, int id, int lower, int upper) 655 { 656 #define REPEAT_RANGE_ALLOC 4 657 658 OnigRepeatRange* p; 659 660 if (reg->repeat_range_alloc == 0) { 661 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); 662 CHECK_NULL_RETURN_MEMERR(p); 663 reg->repeat_range = p; 664 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; 665 } 666 else if (reg->repeat_range_alloc <= id) { 667 int n; 668 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; 669 p = (OnigRepeatRange* )xrealloc(reg->repeat_range, 670 sizeof(OnigRepeatRange) * n, 671 sizeof(OnigRepeatRange) * reg->repeat_range_alloc); 672 CHECK_NULL_RETURN_MEMERR(p); 673 reg->repeat_range = p; 674 reg->repeat_range_alloc = n; 675 } 676 else { 677 p = reg->repeat_range; 678 } 679 680 p[id].lower = lower; 681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); 682 return 0; 683 } 684 685 static int 686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, 687 regex_t* reg) 688 { 689 int r; 690 int num_repeat = reg->num_repeat; 691 692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); 693 if (r) return r; 694 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 695 reg->num_repeat++; 696 if (r) return r; 697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); 698 if (r) return r; 699 700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); 701 if (r) return r; 702 703 r = compile_tree_empty_check(qn->target, reg, empty_info); 704 if (r) return r; 705 706 if ( 707 #ifdef USE_SUBEXP_CALL 708 reg->num_call > 0 || 709 #endif 710 IS_QUANTIFIER_IN_REPEAT(qn)) { 711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); 712 } 713 else { 714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); 715 } 716 if (r) return r; 717 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 718 return r; 719 } 720 721 static int 722 is_anychar_star_quantifier(QtfrNode* qn) 723 { 724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && 725 NTYPE(qn->target) == NT_CANY) 726 return 1; 727 else 728 return 0; 729 } 730 731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 732 #define CKN_ON (ckn > 0) 733 734 #ifdef USE_COMBINATION_EXPLOSION_CHECK 735 736 static int 737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 738 { 739 int len, mod_tlen, cklen; 740 int ckn; 741 int infinite = IS_REPEAT_INFINITE(qn->upper); 742 int empty_info = qn->target_empty_info; 743 int tlen = compile_length_tree(qn->target, reg); 744 745 if (tlen < 0) return tlen; 746 747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 748 749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); 750 751 /* anychar repeat */ 752 if (NTYPE(qn->target) == NT_CANY) { 753 if (qn->greedy && infinite) { 754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) 755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; 756 else 757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; 758 } 759 } 760 761 if (empty_info != 0) 762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 763 else 764 mod_tlen = tlen; 765 766 if (infinite && qn->lower <= 1) { 767 if (qn->greedy) { 768 if (qn->lower == 1) 769 len = SIZE_OP_JUMP; 770 else 771 len = 0; 772 773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; 774 } 775 else { 776 if (qn->lower == 0) 777 len = SIZE_OP_JUMP; 778 else 779 len = 0; 780 781 len += mod_tlen + SIZE_OP_PUSH + cklen; 782 } 783 } 784 else if (qn->upper == 0) { 785 if (qn->is_refered != 0) /* /(?<n>..){0}/ */ 786 len = SIZE_OP_JUMP + tlen; 787 else 788 len = 0; 789 } 790 else if (qn->upper == 1 && qn->greedy) { 791 if (qn->lower == 0) { 792 if (CKN_ON) { 793 len = SIZE_OP_STATE_CHECK_PUSH + tlen; 794 } 795 else { 796 len = SIZE_OP_PUSH + tlen; 797 } 798 } 799 else { 800 len = tlen; 801 } 802 } 803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; 805 } 806 else { 807 len = SIZE_OP_REPEAT_INC 808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 809 if (CKN_ON) 810 len += SIZE_OP_STATE_CHECK; 811 } 812 813 return len; 814 } 815 816 static int 817 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 818 { 819 int r, mod_tlen; 820 int ckn; 821 int infinite = IS_REPEAT_INFINITE(qn->upper); 822 int empty_info = qn->target_empty_info; 823 int tlen = compile_length_tree(qn->target, reg); 824 825 if (tlen < 0) return tlen; 826 827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 828 829 if (is_anychar_star_quantifier(qn)) { 830 r = compile_tree_n_times(qn->target, qn->lower, reg); 831 if (r) return r; 832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { 833 if (IS_MULTILINE(reg->options)) 834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 835 else 836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 837 if (r) return r; 838 if (CKN_ON) { 839 r = add_state_check_num(reg, ckn); 840 if (r) return r; 841 } 842 843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 844 } 845 else { 846 if (IS_MULTILINE(reg->options)) { 847 r = add_opcode(reg, (CKN_ON ? 848 OP_STATE_CHECK_ANYCHAR_ML_STAR 849 : OP_ANYCHAR_ML_STAR)); 850 } 851 else { 852 r = add_opcode(reg, (CKN_ON ? 853 OP_STATE_CHECK_ANYCHAR_STAR 854 : OP_ANYCHAR_STAR)); 855 } 856 if (r) return r; 857 if (CKN_ON) 858 r = add_state_check_num(reg, ckn); 859 860 return r; 861 } 862 } 863 864 if (empty_info != 0) 865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 866 else 867 mod_tlen = tlen; 868 869 if (infinite && qn->lower <= 1) { 870 if (qn->greedy) { 871 if (qn->lower == 1) { 872 r = add_opcode_rel_addr(reg, OP_JUMP, 873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); 874 if (r) return r; 875 } 876 877 if (CKN_ON) { 878 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 879 if (r) return r; 880 r = add_state_check_num(reg, ckn); 881 if (r) return r; 882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); 883 } 884 else { 885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 886 } 887 if (r) return r; 888 r = compile_tree_empty_check(qn->target, reg, empty_info); 889 if (r) return r; 890 r = add_opcode_rel_addr(reg, OP_JUMP, 891 -(mod_tlen + (int )SIZE_OP_JUMP 892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); 893 } 894 else { 895 if (qn->lower == 0) { 896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 897 if (r) return r; 898 } 899 r = compile_tree_empty_check(qn->target, reg, empty_info); 900 if (r) return r; 901 if (CKN_ON) { 902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); 903 if (r) return r; 904 r = add_state_check_num(reg, ckn); 905 if (r) return r; 906 r = add_rel_addr(reg, 907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); 908 } 909 else 910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 911 } 912 } 913 else if (qn->upper == 0) { 914 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ 915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 916 if (r) return r; 917 r = compile_tree(qn->target, reg); 918 } 919 else 920 r = 0; 921 } 922 else if (qn->upper == 1 && qn->greedy) { 923 if (qn->lower == 0) { 924 if (CKN_ON) { 925 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 926 if (r) return r; 927 r = add_state_check_num(reg, ckn); 928 if (r) return r; 929 r = add_rel_addr(reg, tlen); 930 } 931 else { 932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen); 933 } 934 if (r) return r; 935 } 936 937 r = compile_tree(qn->target, reg); 938 } 939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 940 if (CKN_ON) { 941 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 942 if (r) return r; 943 r = add_state_check_num(reg, ckn); 944 if (r) return r; 945 r = add_rel_addr(reg, SIZE_OP_JUMP); 946 } 947 else { 948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 949 } 950 951 if (r) return r; 952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 953 if (r) return r; 954 r = compile_tree(qn->target, reg); 955 } 956 else { 957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 958 if (CKN_ON) { 959 if (r) return r; 960 r = add_opcode(reg, OP_STATE_CHECK); 961 if (r) return r; 962 r = add_state_check_num(reg, ckn); 963 } 964 } 965 return r; 966 } 967 968 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 969 970 static int 971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 972 { 973 int len, mod_tlen; 974 int infinite = IS_REPEAT_INFINITE(qn->upper); 975 int empty_info = qn->target_empty_info; 976 int tlen = compile_length_tree(qn->target, reg); 977 978 if (tlen < 0) return tlen; 979 980 /* anychar repeat */ 981 if (NTYPE(qn->target) == NT_CANY) { 982 if (qn->greedy && infinite) { 983 if (IS_NOT_NULL(qn->next_head_exact)) 984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; 985 else 986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; 987 } 988 } 989 990 if (empty_info != 0) 991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 992 else 993 mod_tlen = tlen; 994 995 if (infinite && 996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 998 len = SIZE_OP_JUMP; 999 } 1000 else { 1001 len = tlen * qn->lower; 1002 } 1003 1004 if (qn->greedy) { 1005 if (IS_NOT_NULL(qn->head_exact)) 1006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; 1007 else if (IS_NOT_NULL(qn->next_head_exact)) 1008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; 1009 else 1010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; 1011 } 1012 else 1013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; 1014 } 1015 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 1016 len = SIZE_OP_JUMP + tlen; 1017 } 1018 else if (!infinite && qn->greedy && 1019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 1020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 1021 len = tlen * qn->lower; 1022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); 1023 } 1024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 1025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; 1026 } 1027 else { 1028 len = SIZE_OP_REPEAT_INC 1029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 1030 } 1031 1032 return len; 1033 } 1034 1035 static int 1036 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 1037 { 1038 int i, r, mod_tlen; 1039 int infinite = IS_REPEAT_INFINITE(qn->upper); 1040 int empty_info = qn->target_empty_info; 1041 int tlen = compile_length_tree(qn->target, reg); 1042 1043 if (tlen < 0) return tlen; 1044 1045 if (is_anychar_star_quantifier(qn)) { 1046 r = compile_tree_n_times(qn->target, qn->lower, reg); 1047 if (r) return r; 1048 if (IS_NOT_NULL(qn->next_head_exact)) { 1049 if (IS_MULTILINE(reg->options)) 1050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 1051 else 1052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 1053 if (r) return r; 1054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 1055 } 1056 else { 1057 if (IS_MULTILINE(reg->options)) 1058 return add_opcode(reg, OP_ANYCHAR_ML_STAR); 1059 else 1060 return add_opcode(reg, OP_ANYCHAR_STAR); 1061 } 1062 } 1063 1064 if (empty_info != 0) 1065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 1066 else 1067 mod_tlen = tlen; 1068 1069 if (infinite && 1070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 1071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 1072 if (qn->greedy) { 1073 if (IS_NOT_NULL(qn->head_exact)) 1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); 1075 else if (IS_NOT_NULL(qn->next_head_exact)) 1076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); 1077 else 1078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); 1079 } 1080 else { 1081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); 1082 } 1083 if (r) return r; 1084 } 1085 else { 1086 r = compile_tree_n_times(qn->target, qn->lower, reg); 1087 if (r) return r; 1088 } 1089 1090 if (qn->greedy) { 1091 if (IS_NOT_NULL(qn->head_exact)) { 1092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, 1093 mod_tlen + SIZE_OP_JUMP); 1094 if (r) return r; 1095 add_bytes(reg, NSTR(qn->head_exact)->s, 1); 1096 r = compile_tree_empty_check(qn->target, reg, empty_info); 1097 if (r) return r; 1098 r = add_opcode_rel_addr(reg, OP_JUMP, 1099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); 1100 } 1101 else if (IS_NOT_NULL(qn->next_head_exact)) { 1102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, 1103 mod_tlen + SIZE_OP_JUMP); 1104 if (r) return r; 1105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 1106 r = compile_tree_empty_check(qn->target, reg, empty_info); 1107 if (r) return r; 1108 r = add_opcode_rel_addr(reg, OP_JUMP, 1109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); 1110 } 1111 else { 1112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 1113 if (r) return r; 1114 r = compile_tree_empty_check(qn->target, reg, empty_info); 1115 if (r) return r; 1116 r = add_opcode_rel_addr(reg, OP_JUMP, 1117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); 1118 } 1119 } 1120 else { 1121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 1122 if (r) return r; 1123 r = compile_tree_empty_check(qn->target, reg, empty_info); 1124 if (r) return r; 1125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 1126 } 1127 } 1128 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 1129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 1130 if (r) return r; 1131 r = compile_tree(qn->target, reg); 1132 } 1133 else if (!infinite && qn->greedy && 1134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 1135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 1136 int n = qn->upper - qn->lower; 1137 1138 r = compile_tree_n_times(qn->target, qn->lower, reg); 1139 if (r) return r; 1140 1141 for (i = 0; i < n; i++) { 1142 r = add_opcode_rel_addr(reg, OP_PUSH, 1143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); 1144 if (r) return r; 1145 r = compile_tree(qn->target, reg); 1146 if (r) return r; 1147 } 1148 } 1149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 1150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 1151 if (r) return r; 1152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 1153 if (r) return r; 1154 r = compile_tree(qn->target, reg); 1155 } 1156 else { 1157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 1158 } 1159 return r; 1160 } 1161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 1162 1163 static int 1164 compile_length_option_node(EncloseNode* node, regex_t* reg) 1165 { 1166 int tlen; 1167 OnigOptionType prev = reg->options; 1168 1169 reg->options = node->option; 1170 tlen = compile_length_tree(node->target, reg); 1171 reg->options = prev; 1172 1173 if (tlen < 0) return tlen; 1174 1175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 1176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL 1177 + tlen + SIZE_OP_SET_OPTION; 1178 } 1179 else 1180 return tlen; 1181 } 1182 1183 static int 1184 compile_option_node(EncloseNode* node, regex_t* reg) 1185 { 1186 int r; 1187 OnigOptionType prev = reg->options; 1188 1189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 1190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); 1191 if (r) return r; 1192 r = add_opcode_option(reg, OP_SET_OPTION, prev); 1193 if (r) return r; 1194 r = add_opcode(reg, OP_FAIL); 1195 if (r) return r; 1196 } 1197 1198 reg->options = node->option; 1199 r = compile_tree(node->target, reg); 1200 reg->options = prev; 1201 1202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 1203 if (r) return r; 1204 r = add_opcode_option(reg, OP_SET_OPTION, prev); 1205 } 1206 return r; 1207 } 1208 1209 static int 1210 compile_length_enclose_node(EncloseNode* node, regex_t* reg) 1211 { 1212 int len; 1213 int tlen; 1214 QtfrNode* qn; 1215 1216 qn = NULL; 1217 1218 if (node->type == ENCLOSE_OPTION) 1219 return compile_length_option_node(node, reg); 1220 1221 if (node->target) { 1222 tlen = compile_length_tree(node->target, reg); 1223 if (tlen < 0) return tlen; 1224 } 1225 else 1226 tlen = 0; 1227 1228 switch (node->type) { 1229 case ENCLOSE_MEMORY: 1230 #ifdef USE_SUBEXP_CALL 1231 if (IS_ENCLOSE_CALLED(node)) { 1232 len = SIZE_OP_MEMORY_START_PUSH + tlen 1233 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; 1234 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 1235 len += (IS_ENCLOSE_RECURSION(node) 1236 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 1237 else 1238 len += (IS_ENCLOSE_RECURSION(node) 1239 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 1240 } 1241 else 1242 #endif 1243 { 1244 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 1245 len = SIZE_OP_MEMORY_START_PUSH; 1246 else 1247 len = SIZE_OP_MEMORY_START; 1248 1249 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) 1250 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); 1251 } 1252 break; 1253 1254 case ENCLOSE_STOP_BACKTRACK: 1255 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 1256 if (node->target == NULL) { 1257 CHECK_NULL_RETURN_MEMERR(node->target); 1258 } 1259 qn = NQTFR(node->target); 1260 tlen = compile_length_tree(qn->target, reg); 1261 if (tlen < 0) return tlen; 1262 1263 len = tlen * qn->lower 1264 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; 1265 } 1266 else { 1267 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; 1268 } 1269 break; 1270 1271 default: 1272 return ONIGERR_TYPE_BUG; 1273 break; 1274 } 1275 1276 return len; 1277 } 1278 1279 static int get_char_length_tree(Node* node, regex_t* reg, int* len); 1280 1281 static int 1282 compile_enclose_node(EncloseNode* node, regex_t* reg) 1283 { 1284 int r, len; 1285 1286 if (node->type == ENCLOSE_OPTION) 1287 return compile_option_node(node, reg); 1288 1289 switch (node->type) { 1290 case ENCLOSE_MEMORY: 1291 #ifdef USE_SUBEXP_CALL 1292 if (IS_ENCLOSE_CALLED(node)) { 1293 r = add_opcode(reg, OP_CALL); 1294 if (r) return r; 1295 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; 1296 node->state |= NST_ADDR_FIXED; 1297 r = add_abs_addr(reg, (int )node->call_addr); 1298 if (r) return r; 1299 len = compile_length_tree(node->target, reg); 1300 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); 1301 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 1302 len += (IS_ENCLOSE_RECURSION(node) 1303 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 1304 else 1305 len += (IS_ENCLOSE_RECURSION(node) 1306 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 1307 1308 r = add_opcode_rel_addr(reg, OP_JUMP, len); 1309 if (r) return r; 1310 } 1311 #endif 1312 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 1313 r = add_opcode(reg, OP_MEMORY_START_PUSH); 1314 else 1315 r = add_opcode(reg, OP_MEMORY_START); 1316 if (r) return r; 1317 r = add_mem_num(reg, node->regnum); 1318 if (r) return r; 1319 r = compile_tree(node->target, reg); 1320 if (r) return r; 1321 #ifdef USE_SUBEXP_CALL 1322 if (IS_ENCLOSE_CALLED(node)) { 1323 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 1324 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 1325 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); 1326 else 1327 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 1328 ? OP_MEMORY_END_REC : OP_MEMORY_END)); 1329 1330 if (r) return r; 1331 r = add_mem_num(reg, node->regnum); 1332 if (r) return r; 1333 r = add_opcode(reg, OP_RETURN); 1334 } 1335 else 1336 #endif 1337 { 1338 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 1339 r = add_opcode(reg, OP_MEMORY_END_PUSH); 1340 else 1341 r = add_opcode(reg, OP_MEMORY_END); 1342 if (r) return r; 1343 r = add_mem_num(reg, node->regnum); 1344 } 1345 break; 1346 1347 case ENCLOSE_STOP_BACKTRACK: 1348 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 1349 QtfrNode* qn = NQTFR(node->target); 1350 r = compile_tree_n_times(qn->target, qn->lower, reg); 1351 if (r) return r; 1352 1353 len = compile_length_tree(qn->target, reg); 1354 if (len < 0) return len; 1355 1356 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); 1357 if (r) return r; 1358 r = compile_tree(qn->target, reg); 1359 if (r) return r; 1360 r = add_opcode(reg, OP_POP); 1361 if (r) return r; 1362 r = add_opcode_rel_addr(reg, OP_JUMP, 1363 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); 1364 } 1365 else { 1366 r = add_opcode(reg, OP_PUSH_STOP_BT); 1367 if (r) return r; 1368 r = compile_tree(node->target, reg); 1369 if (r) return r; 1370 r = add_opcode(reg, OP_POP_STOP_BT); 1371 } 1372 break; 1373 1374 default: 1375 return ONIGERR_TYPE_BUG; 1376 break; 1377 } 1378 1379 return r; 1380 } 1381 1382 static int 1383 compile_length_anchor_node(AnchorNode* node, regex_t* reg) 1384 { 1385 int len; 1386 int tlen = 0; 1387 1388 if (node->target) { 1389 tlen = compile_length_tree(node->target, reg); 1390 if (tlen < 0) return tlen; 1391 } 1392 1393 switch (node->type) { 1394 case ANCHOR_PREC_READ: 1395 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; 1396 break; 1397 case ANCHOR_PREC_READ_NOT: 1398 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; 1399 break; 1400 case ANCHOR_LOOK_BEHIND: 1401 len = SIZE_OP_LOOK_BEHIND + tlen; 1402 break; 1403 case ANCHOR_LOOK_BEHIND_NOT: 1404 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; 1405 break; 1406 1407 default: 1408 len = SIZE_OPCODE; 1409 break; 1410 } 1411 1412 return len; 1413 } 1414 1415 static int 1416 compile_anchor_node(AnchorNode* node, regex_t* reg) 1417 { 1418 int r, len; 1419 1420 switch (node->type) { 1421 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; 1422 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; 1423 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; 1424 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; 1425 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; 1426 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; 1427 1428 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; 1429 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; 1430 #ifdef USE_WORD_BEGIN_END 1431 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; 1432 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; 1433 #endif 1434 1435 case ANCHOR_PREC_READ: 1436 r = add_opcode(reg, OP_PUSH_POS); 1437 if (r) return r; 1438 r = compile_tree(node->target, reg); 1439 if (r) return r; 1440 r = add_opcode(reg, OP_POP_POS); 1441 break; 1442 1443 case ANCHOR_PREC_READ_NOT: 1444 len = compile_length_tree(node->target, reg); 1445 if (len < 0) return len; 1446 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); 1447 if (r) return r; 1448 r = compile_tree(node->target, reg); 1449 if (r) return r; 1450 r = add_opcode(reg, OP_FAIL_POS); 1451 break; 1452 1453 case ANCHOR_LOOK_BEHIND: 1454 { 1455 int n; 1456 r = add_opcode(reg, OP_LOOK_BEHIND); 1457 if (r) return r; 1458 if (node->char_len < 0) { 1459 r = get_char_length_tree(node->target, reg, &n); 1460 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 1461 } 1462 else 1463 n = node->char_len; 1464 r = add_length(reg, n); 1465 if (r) return r; 1466 r = compile_tree(node->target, reg); 1467 } 1468 break; 1469 1470 case ANCHOR_LOOK_BEHIND_NOT: 1471 { 1472 int n; 1473 len = compile_length_tree(node->target, reg); 1474 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, 1475 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); 1476 if (r) return r; 1477 if (node->char_len < 0) { 1478 r = get_char_length_tree(node->target, reg, &n); 1479 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 1480 } 1481 else 1482 n = node->char_len; 1483 r = add_length(reg, n); 1484 if (r) return r; 1485 r = compile_tree(node->target, reg); 1486 if (r) return r; 1487 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); 1488 } 1489 break; 1490 1491 default: 1492 return ONIGERR_TYPE_BUG; 1493 break; 1494 } 1495 1496 return r; 1497 } 1498 1499 static int 1500 compile_length_tree(Node* node, regex_t* reg) 1501 { 1502 int len, type, r; 1503 1504 type = NTYPE(node); 1505 switch (type) { 1506 case NT_LIST: 1507 len = 0; 1508 do { 1509 r = compile_length_tree(NCAR(node), reg); 1510 if (r < 0) return r; 1511 len += r; 1512 } while (IS_NOT_NULL(node = NCDR(node))); 1513 r = len; 1514 break; 1515 1516 case NT_ALT: 1517 { 1518 int n; 1519 1520 n = r = 0; 1521 do { 1522 r += compile_length_tree(NCAR(node), reg); 1523 n++; 1524 } while (IS_NOT_NULL(node = NCDR(node))); 1525 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); 1526 } 1527 break; 1528 1529 case NT_STR: 1530 if (NSTRING_IS_RAW(node)) 1531 r = compile_length_string_raw_node(NSTR(node), reg); 1532 else 1533 r = compile_length_string_node(node, reg); 1534 break; 1535 1536 case NT_CCLASS: 1537 r = compile_length_cclass_node(NCCLASS(node), reg); 1538 break; 1539 1540 case NT_CTYPE: 1541 case NT_CANY: 1542 r = SIZE_OPCODE; 1543 break; 1544 1545 case NT_BREF: 1546 { 1547 BRefNode* br = NBREF(node); 1548 1549 #ifdef USE_BACKREF_WITH_LEVEL 1550 if (IS_BACKREF_NEST_LEVEL(br)) { 1551 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + 1552 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 1553 } 1554 else 1555 #endif 1556 if (br->back_num == 1) { 1557 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) 1558 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); 1559 } 1560 else { 1561 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 1562 } 1563 } 1564 break; 1565 1566 #ifdef USE_SUBEXP_CALL 1567 case NT_CALL: 1568 r = SIZE_OP_CALL; 1569 break; 1570 #endif 1571 1572 case NT_QTFR: 1573 r = compile_length_quantifier_node(NQTFR(node), reg); 1574 break; 1575 1576 case NT_ENCLOSE: 1577 r = compile_length_enclose_node(NENCLOSE(node), reg); 1578 break; 1579 1580 case NT_ANCHOR: 1581 r = compile_length_anchor_node(NANCHOR(node), reg); 1582 break; 1583 1584 default: 1585 return ONIGERR_TYPE_BUG; 1586 break; 1587 } 1588 1589 return r; 1590 } 1591 1592 static int 1593 compile_tree(Node* node, regex_t* reg) 1594 { 1595 int n, type, len, pos, r = 0; 1596 1597 type = NTYPE(node); 1598 switch (type) { 1599 case NT_LIST: 1600 do { 1601 r = compile_tree(NCAR(node), reg); 1602 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 1603 break; 1604 1605 case NT_ALT: 1606 { 1607 Node* x = node; 1608 len = 0; 1609 do { 1610 len += compile_length_tree(NCAR(x), reg); 1611 if (NCDR(x) != NULL) { 1612 len += SIZE_OP_PUSH + SIZE_OP_JUMP; 1613 } 1614 } while (IS_NOT_NULL(x = NCDR(x))); 1615 pos = reg->used + len; /* goal position */ 1616 1617 do { 1618 len = compile_length_tree(NCAR(node), reg); 1619 if (IS_NOT_NULL(NCDR(node))) { 1620 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); 1621 if (r) break; 1622 } 1623 r = compile_tree(NCAR(node), reg); 1624 if (r) break; 1625 if (IS_NOT_NULL(NCDR(node))) { 1626 len = pos - (reg->used + SIZE_OP_JUMP); 1627 r = add_opcode_rel_addr(reg, OP_JUMP, len); 1628 if (r) break; 1629 } 1630 } while (IS_NOT_NULL(node = NCDR(node))); 1631 } 1632 break; 1633 1634 case NT_STR: 1635 if (NSTRING_IS_RAW(node)) 1636 r = compile_string_raw_node(NSTR(node), reg); 1637 else 1638 r = compile_string_node(node, reg); 1639 break; 1640 1641 case NT_CCLASS: 1642 r = compile_cclass_node(NCCLASS(node), reg); 1643 break; 1644 1645 case NT_CTYPE: 1646 { 1647 int op; 1648 1649 switch (NCTYPE(node)->ctype) { 1650 case ONIGENC_CTYPE_WORD: 1651 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; 1652 else op = OP_WORD; 1653 break; 1654 default: 1655 return ONIGERR_TYPE_BUG; 1656 break; 1657 } 1658 r = add_opcode(reg, op); 1659 } 1660 break; 1661 1662 case NT_CANY: 1663 if (IS_MULTILINE(reg->options)) 1664 r = add_opcode(reg, OP_ANYCHAR_ML); 1665 else 1666 r = add_opcode(reg, OP_ANYCHAR); 1667 break; 1668 1669 case NT_BREF: 1670 { 1671 BRefNode* br = NBREF(node); 1672 1673 #ifdef USE_BACKREF_WITH_LEVEL 1674 if (IS_BACKREF_NEST_LEVEL(br)) { 1675 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); 1676 if (r) return r; 1677 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); 1678 if (r) return r; 1679 r = add_length(reg, br->nest_level); 1680 if (r) return r; 1681 1682 goto add_bacref_mems; 1683 } 1684 else 1685 #endif 1686 if (br->back_num == 1) { 1687 n = br->back_static[0]; 1688 if (IS_IGNORECASE(reg->options)) { 1689 r = add_opcode(reg, OP_BACKREFN_IC); 1690 if (r) return r; 1691 r = add_mem_num(reg, n); 1692 } 1693 else { 1694 switch (n) { 1695 case 1: r = add_opcode(reg, OP_BACKREF1); break; 1696 case 2: r = add_opcode(reg, OP_BACKREF2); break; 1697 default: 1698 r = add_opcode(reg, OP_BACKREFN); 1699 if (r) return r; 1700 r = add_mem_num(reg, n); 1701 break; 1702 } 1703 } 1704 } 1705 else { 1706 int i; 1707 int* p; 1708 1709 if (IS_IGNORECASE(reg->options)) { 1710 r = add_opcode(reg, OP_BACKREF_MULTI_IC); 1711 } 1712 else { 1713 r = add_opcode(reg, OP_BACKREF_MULTI); 1714 } 1715 if (r) return r; 1716 1717 #ifdef USE_BACKREF_WITH_LEVEL 1718 add_bacref_mems: 1719 #endif 1720 r = add_length(reg, br->back_num); 1721 if (r) return r; 1722 p = BACKREFS_P(br); 1723 for (i = br->back_num - 1; i >= 0; i--) { 1724 r = add_mem_num(reg, p[i]); 1725 if (r) return r; 1726 } 1727 } 1728 } 1729 break; 1730 1731 #ifdef USE_SUBEXP_CALL 1732 case NT_CALL: 1733 r = compile_call(NCALL(node), reg); 1734 break; 1735 #endif 1736 1737 case NT_QTFR: 1738 r = compile_quantifier_node(NQTFR(node), reg); 1739 break; 1740 1741 case NT_ENCLOSE: 1742 r = compile_enclose_node(NENCLOSE(node), reg); 1743 break; 1744 1745 case NT_ANCHOR: 1746 r = compile_anchor_node(NANCHOR(node), reg); 1747 break; 1748 1749 default: 1750 #ifdef ONIG_DEBUG 1751 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); 1752 #endif 1753 break; 1754 } 1755 1756 return r; 1757 } 1758 1759 #ifdef USE_NAMED_GROUP 1760 1761 static int 1762 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) 1763 { 1764 int r = 0; 1765 Node* node = *plink; 1766 1767 switch (NTYPE(node)) { 1768 case NT_LIST: 1769 case NT_ALT: 1770 do { 1771 r = noname_disable_map(&(NCAR(node)), map, counter); 1772 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 1773 break; 1774 1775 case NT_QTFR: 1776 { 1777 Node** ptarget = &(NQTFR(node)->target); 1778 Node* old = *ptarget; 1779 r = noname_disable_map(ptarget, map, counter); 1780 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { 1781 onig_reduce_nested_quantifier(node, *ptarget); 1782 } 1783 } 1784 break; 1785 1786 case NT_ENCLOSE: 1787 { 1788 EncloseNode* en = NENCLOSE(node); 1789 if (en->type == ENCLOSE_MEMORY) { 1790 if (IS_ENCLOSE_NAMED_GROUP(en)) { 1791 (*counter)++; 1792 map[en->regnum].new_val = *counter; 1793 en->regnum = *counter; 1794 r = noname_disable_map(&(en->target), map, counter); 1795 } 1796 else { 1797 *plink = en->target; 1798 en->target = NULL_NODE; 1799 onig_node_free(node); 1800 r = noname_disable_map(plink, map, counter); 1801 } 1802 } 1803 else 1804 r = noname_disable_map(&(en->target), map, counter); 1805 } 1806 break; 1807 1808 default: 1809 break; 1810 } 1811 1812 return r; 1813 } 1814 1815 static int 1816 renumber_node_backref(Node* node, GroupNumRemap* map) 1817 { 1818 int i, pos, n, old_num; 1819 int *backs; 1820 BRefNode* bn = NBREF(node); 1821 1822 if (! IS_BACKREF_NAME_REF(bn)) 1823 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 1824 1825 old_num = bn->back_num; 1826 if (IS_NULL(bn->back_dynamic)) 1827 backs = bn->back_static; 1828 else 1829 backs = bn->back_dynamic; 1830 1831 for (i = 0, pos = 0; i < old_num; i++) { 1832 n = map[backs[i]].new_val; 1833 if (n > 0) { 1834 backs[pos] = n; 1835 pos++; 1836 } 1837 } 1838 1839 bn->back_num = pos; 1840 return 0; 1841 } 1842 1843 static int 1844 renumber_by_map(Node* node, GroupNumRemap* map) 1845 { 1846 int r = 0; 1847 1848 switch (NTYPE(node)) { 1849 case NT_LIST: 1850 case NT_ALT: 1851 do { 1852 r = renumber_by_map(NCAR(node), map); 1853 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 1854 break; 1855 case NT_QTFR: 1856 r = renumber_by_map(NQTFR(node)->target, map); 1857 break; 1858 case NT_ENCLOSE: 1859 r = renumber_by_map(NENCLOSE(node)->target, map); 1860 break; 1861 1862 case NT_BREF: 1863 r = renumber_node_backref(node, map); 1864 break; 1865 1866 default: 1867 break; 1868 } 1869 1870 return r; 1871 } 1872 1873 static int 1874 numbered_ref_check(Node* node) 1875 { 1876 int r = 0; 1877 1878 switch (NTYPE(node)) { 1879 case NT_LIST: 1880 case NT_ALT: 1881 do { 1882 r = numbered_ref_check(NCAR(node)); 1883 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 1884 break; 1885 case NT_QTFR: 1886 r = numbered_ref_check(NQTFR(node)->target); 1887 break; 1888 case NT_ENCLOSE: 1889 r = numbered_ref_check(NENCLOSE(node)->target); 1890 break; 1891 1892 case NT_BREF: 1893 if (! IS_BACKREF_NAME_REF(NBREF(node))) 1894 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 1895 break; 1896 1897 default: 1898 break; 1899 } 1900 1901 return r; 1902 } 1903 1904 static int 1905 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) 1906 { 1907 int r, i, pos, counter; 1908 int Result; 1909 BitStatusType loc; 1910 GroupNumRemap* map; 1911 1912 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1)); 1913 CHECK_NULL_RETURN_MEMERR(map); 1914 for (i = 1; i <= env->num_mem; i++) { 1915 map[i].new_val = 0; 1916 } 1917 counter = 0; 1918 r = noname_disable_map(root, map, &counter); 1919 if (r != 0) return r; 1920 1921 r = renumber_by_map(*root, map); 1922 if (r != 0) return r; 1923 1924 for (i = 1, pos = 1; i <= env->num_mem; i++) { 1925 if (map[i].new_val > 0) { 1926 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; 1927 pos++; 1928 } 1929 } 1930 1931 loc = env->capture_history; 1932 BIT_STATUS_CLEAR(env->capture_history); 1933 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 1934 if (BIT_STATUS_AT(loc, i)) { 1935 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); 1936 } 1937 } 1938 1939 env->num_mem = env->num_named; 1940 reg->num_mem = env->num_named; 1941 1942 Result = onig_renumber_name_table(reg, map); 1943 xfree(map); 1944 return Result; 1945 } 1946 #endif /* USE_NAMED_GROUP */ 1947 1948 #ifdef USE_SUBEXP_CALL 1949 static int 1950 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) 1951 { 1952 int i, offset; 1953 EncloseNode* en; 1954 AbsAddrType addr; 1955 1956 for (i = 0; i < uslist->num; i++) { 1957 en = NENCLOSE(uslist->us[i].target); 1958 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; 1959 addr = en->call_addr; 1960 offset = uslist->us[i].offset; 1961 1962 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); 1963 } 1964 return 0; 1965 } 1966 #endif 1967 1968 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 1969 static int 1970 quantifiers_memory_node_info(Node* node) 1971 { 1972 int r = 0; 1973 1974 switch (NTYPE(node)) { 1975 case NT_LIST: 1976 case NT_ALT: 1977 { 1978 int v; 1979 do { 1980 v = quantifiers_memory_node_info(NCAR(node)); 1981 if (v > r) r = v; 1982 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); 1983 } 1984 break; 1985 1986 #ifdef USE_SUBEXP_CALL 1987 case NT_CALL: 1988 if (IS_CALL_RECURSION(NCALL(node))) { 1989 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ 1990 } 1991 else 1992 r = quantifiers_memory_node_info(NCALL(node)->target); 1993 break; 1994 #endif 1995 1996 case NT_QTFR: 1997 { 1998 QtfrNode* qn = NQTFR(node); 1999 if (qn->upper != 0) { 2000 r = quantifiers_memory_node_info(qn->target); 2001 } 2002 } 2003 break; 2004 2005 case NT_ENCLOSE: 2006 { 2007 EncloseNode* en = NENCLOSE(node); 2008 switch (en->type) { 2009 case ENCLOSE_MEMORY: 2010 return NQ_TARGET_IS_EMPTY_MEM; 2011 break; 2012 2013 case ENCLOSE_OPTION: 2014 case ENCLOSE_STOP_BACKTRACK: 2015 r = quantifiers_memory_node_info(en->target); 2016 break; 2017 default: 2018 break; 2019 } 2020 } 2021 break; 2022 2023 case NT_BREF: 2024 case NT_STR: 2025 case NT_CTYPE: 2026 case NT_CCLASS: 2027 case NT_CANY: 2028 case NT_ANCHOR: 2029 default: 2030 break; 2031 } 2032 2033 return r; 2034 } 2035 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ 2036 2037 static int 2038 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) 2039 { 2040 OnigDistance tmin; 2041 int r = 0; 2042 2043 *min = 0; 2044 switch (NTYPE(node)) { 2045 case NT_BREF: 2046 { 2047 int i; 2048 int* backs; 2049 Node** nodes = SCANENV_MEM_NODES(env); 2050 BRefNode* br = NBREF(node); 2051 if (br->state & NST_RECURSION) break; 2052 2053 backs = BACKREFS_P(br); 2054 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; 2055 r = get_min_match_length(nodes[backs[0]], min, env); 2056 if (r != 0) break; 2057 for (i = 1; i < br->back_num; i++) { 2058 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 2059 r = get_min_match_length(nodes[backs[i]], &tmin, env); 2060 if (r != 0) break; 2061 if (*min > tmin) *min = tmin; 2062 } 2063 } 2064 break; 2065 2066 #ifdef USE_SUBEXP_CALL 2067 case NT_CALL: 2068 if (IS_CALL_RECURSION(NCALL(node))) { 2069 EncloseNode* en = NENCLOSE(NCALL(node)->target); 2070 if (IS_ENCLOSE_MIN_FIXED(en)) 2071 *min = en->min_len; 2072 } 2073 else 2074 r = get_min_match_length(NCALL(node)->target, min, env); 2075 break; 2076 #endif 2077 2078 case NT_LIST: 2079 do { 2080 r = get_min_match_length(NCAR(node), &tmin, env); 2081 if (r == 0) *min += tmin; 2082 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2083 break; 2084 2085 case NT_ALT: 2086 { 2087 Node *x, *y; 2088 y = node; 2089 do { 2090 x = NCAR(y); 2091 r = get_min_match_length(x, &tmin, env); 2092 if (r != 0) break; 2093 if (y == node) *min = tmin; 2094 else if (*min > tmin) *min = tmin; 2095 } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); 2096 } 2097 break; 2098 2099 case NT_STR: 2100 { 2101 StrNode* sn = NSTR(node); 2102 *min = (OnigDistance)(sn->end - sn->s); 2103 } 2104 break; 2105 2106 case NT_CTYPE: 2107 *min = 1; 2108 break; 2109 2110 case NT_CCLASS: 2111 case NT_CANY: 2112 *min = 1; 2113 break; 2114 2115 case NT_QTFR: 2116 { 2117 QtfrNode* qn = NQTFR(node); 2118 2119 if (qn->lower > 0) { 2120 r = get_min_match_length(qn->target, min, env); 2121 if (r == 0) 2122 *min = distance_multiply(*min, qn->lower); 2123 } 2124 } 2125 break; 2126 2127 case NT_ENCLOSE: 2128 { 2129 EncloseNode* en = NENCLOSE(node); 2130 switch (en->type) { 2131 case ENCLOSE_MEMORY: 2132 #ifdef USE_SUBEXP_CALL 2133 if (IS_ENCLOSE_MIN_FIXED(en)) 2134 *min = en->min_len; 2135 else { 2136 r = get_min_match_length(en->target, min, env); 2137 if (r == 0) { 2138 en->min_len = *min; 2139 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); 2140 } 2141 } 2142 break; 2143 #endif 2144 case ENCLOSE_OPTION: 2145 case ENCLOSE_STOP_BACKTRACK: 2146 r = get_min_match_length(en->target, min, env); 2147 break; 2148 } 2149 } 2150 break; 2151 2152 case NT_ANCHOR: 2153 default: 2154 break; 2155 } 2156 2157 return r; 2158 } 2159 2160 static int 2161 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) 2162 { 2163 OnigDistance tmax; 2164 int r = 0; 2165 2166 *max = 0; 2167 switch (NTYPE(node)) { 2168 case NT_LIST: 2169 do { 2170 r = get_max_match_length(NCAR(node), &tmax, env); 2171 if (r == 0) 2172 *max = distance_add(*max, tmax); 2173 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2174 break; 2175 2176 case NT_ALT: 2177 do { 2178 r = get_max_match_length(NCAR(node), &tmax, env); 2179 if (r == 0 && *max < tmax) *max = tmax; 2180 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2181 break; 2182 2183 case NT_STR: 2184 { 2185 StrNode* sn = NSTR(node); 2186 *max = (OnigDistance)(sn->end - sn->s); 2187 } 2188 break; 2189 2190 case NT_CTYPE: 2191 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 2192 break; 2193 2194 case NT_CCLASS: 2195 case NT_CANY: 2196 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 2197 break; 2198 2199 case NT_BREF: 2200 { 2201 int i; 2202 int* backs; 2203 Node** nodes = SCANENV_MEM_NODES(env); 2204 BRefNode* br = NBREF(node); 2205 if (br->state & NST_RECURSION) { 2206 *max = ONIG_INFINITE_DISTANCE; 2207 break; 2208 } 2209 backs = BACKREFS_P(br); 2210 for (i = 0; i < br->back_num; i++) { 2211 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 2212 r = get_max_match_length(nodes[backs[i]], &tmax, env); 2213 if (r != 0) break; 2214 if (*max < tmax) *max = tmax; 2215 } 2216 } 2217 break; 2218 2219 #ifdef USE_SUBEXP_CALL 2220 case NT_CALL: 2221 if (! IS_CALL_RECURSION(NCALL(node))) 2222 r = get_max_match_length(NCALL(node)->target, max, env); 2223 else 2224 *max = ONIG_INFINITE_DISTANCE; 2225 break; 2226 #endif 2227 2228 case NT_QTFR: 2229 { 2230 QtfrNode* qn = NQTFR(node); 2231 2232 if (qn->upper != 0) { 2233 r = get_max_match_length(qn->target, max, env); 2234 if (r == 0 && *max != 0) { 2235 if (! IS_REPEAT_INFINITE(qn->upper)) 2236 *max = distance_multiply(*max, qn->upper); 2237 else 2238 *max = ONIG_INFINITE_DISTANCE; 2239 } 2240 } 2241 } 2242 break; 2243 2244 case NT_ENCLOSE: 2245 { 2246 EncloseNode* en = NENCLOSE(node); 2247 switch (en->type) { 2248 case ENCLOSE_MEMORY: 2249 #ifdef USE_SUBEXP_CALL 2250 if (IS_ENCLOSE_MAX_FIXED(en)) 2251 *max = en->max_len; 2252 else { 2253 r = get_max_match_length(en->target, max, env); 2254 if (r == 0) { 2255 en->max_len = *max; 2256 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); 2257 } 2258 } 2259 break; 2260 #endif 2261 case ENCLOSE_OPTION: 2262 case ENCLOSE_STOP_BACKTRACK: 2263 r = get_max_match_length(en->target, max, env); 2264 break; 2265 } 2266 } 2267 break; 2268 2269 case NT_ANCHOR: 2270 default: 2271 break; 2272 } 2273 2274 return r; 2275 } 2276 2277 #define GET_CHAR_LEN_VARLEN -1 2278 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 2279 2280 /* fixed size pattern node only */ 2281 static int 2282 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) 2283 { 2284 int tlen; 2285 int r = 0; 2286 2287 level++; 2288 *len = 0; 2289 switch (NTYPE(node)) { 2290 case NT_LIST: 2291 do { 2292 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 2293 if (r == 0) 2294 *len = distance_add(*len, tlen); 2295 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2296 break; 2297 2298 case NT_ALT: 2299 { 2300 int tlen2; 2301 int varlen = 0; 2302 2303 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 2304 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { 2305 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); 2306 if (r == 0) { 2307 if (tlen != tlen2) 2308 varlen = 1; 2309 } 2310 } 2311 if (r == 0) { 2312 if (varlen != 0) { 2313 if (level == 1) 2314 r = GET_CHAR_LEN_TOP_ALT_VARLEN; 2315 else 2316 r = GET_CHAR_LEN_VARLEN; 2317 } 2318 else 2319 *len = tlen; 2320 } 2321 } 2322 break; 2323 2324 case NT_STR: 2325 { 2326 StrNode* sn = NSTR(node); 2327 UChar *s = sn->s; 2328 while (s < sn->end) { 2329 s += enclen(reg->enc, s); 2330 (*len)++; 2331 } 2332 } 2333 break; 2334 2335 case NT_QTFR: 2336 { 2337 QtfrNode* qn = NQTFR(node); 2338 if (qn->lower == qn->upper) { 2339 r = get_char_length_tree1(qn->target, reg, &tlen, level); 2340 if (r == 0) 2341 *len = distance_multiply(tlen, qn->lower); 2342 } 2343 else 2344 r = GET_CHAR_LEN_VARLEN; 2345 } 2346 break; 2347 2348 #ifdef USE_SUBEXP_CALL 2349 case NT_CALL: 2350 if (! IS_CALL_RECURSION(NCALL(node))) 2351 r = get_char_length_tree1(NCALL(node)->target, reg, len, level); 2352 else 2353 r = GET_CHAR_LEN_VARLEN; 2354 break; 2355 #endif 2356 2357 case NT_CTYPE: 2358 *len = 1; 2359 break; 2360 2361 case NT_CCLASS: 2362 case NT_CANY: 2363 *len = 1; 2364 break; 2365 2366 case NT_ENCLOSE: 2367 { 2368 EncloseNode* en = NENCLOSE(node); 2369 switch (en->type) { 2370 case ENCLOSE_MEMORY: 2371 #ifdef USE_SUBEXP_CALL 2372 if (IS_ENCLOSE_CLEN_FIXED(en)) 2373 *len = en->char_len; 2374 else { 2375 r = get_char_length_tree1(en->target, reg, len, level); 2376 if (r == 0) { 2377 en->char_len = *len; 2378 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); 2379 } 2380 } 2381 break; 2382 #endif 2383 case ENCLOSE_OPTION: 2384 case ENCLOSE_STOP_BACKTRACK: 2385 r = get_char_length_tree1(en->target, reg, len, level); 2386 break; 2387 default: 2388 break; 2389 } 2390 } 2391 break; 2392 2393 case NT_ANCHOR: 2394 break; 2395 2396 default: 2397 r = GET_CHAR_LEN_VARLEN; 2398 break; 2399 } 2400 2401 return r; 2402 } 2403 2404 static int 2405 get_char_length_tree(Node* node, regex_t* reg, int* len) 2406 { 2407 return get_char_length_tree1(node, reg, len, 0); 2408 } 2409 2410 /* x is not included y ==> 1 : 0 */ 2411 static int 2412 is_not_included(Node* x, Node* y, regex_t* reg) 2413 { 2414 int i, len; 2415 OnigCodePoint code; 2416 UChar *p; 2417 int ytype; 2418 2419 retry: 2420 ytype = NTYPE(y); 2421 switch (NTYPE(x)) { 2422 case NT_CTYPE: 2423 { 2424 switch (ytype) { 2425 case NT_CTYPE: 2426 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && 2427 NCTYPE(y)->not != NCTYPE(x)->not) 2428 return 1; 2429 else 2430 return 0; 2431 break; 2432 2433 case NT_CCLASS: 2434 swap: 2435 { 2436 Node* tmp; 2437 tmp = x; x = y; y = tmp; 2438 goto retry; 2439 } 2440 break; 2441 2442 case NT_STR: 2443 goto swap; 2444 break; 2445 2446 default: 2447 break; 2448 } 2449 } 2450 break; 2451 2452 case NT_CCLASS: 2453 { 2454 CClassNode* xc = NCCLASS(x); 2455 switch (ytype) { 2456 case NT_CTYPE: 2457 switch (NCTYPE(y)->ctype) { 2458 case ONIGENC_CTYPE_WORD: 2459 if (NCTYPE(y)->not == 0) { 2460 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { 2461 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 2462 if (BITSET_AT(xc->bs, i)) { 2463 if (IS_CODE_SB_WORD(reg->enc, i)) return 0; 2464 } 2465 } 2466 return 1; 2467 } 2468 return 0; 2469 } 2470 else { 2471 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 2472 if (! IS_CODE_SB_WORD(reg->enc, i)) { 2473 if (!IS_NCCLASS_NOT(xc)) { 2474 if (BITSET_AT(xc->bs, i)) 2475 return 0; 2476 } 2477 else { 2478 if (! BITSET_AT(xc->bs, i)) 2479 return 0; 2480 } 2481 } 2482 } 2483 return 1; 2484 } 2485 break; 2486 2487 default: 2488 break; 2489 } 2490 break; 2491 2492 case NT_CCLASS: 2493 { 2494 int v; 2495 CClassNode* yc = NCCLASS(y); 2496 2497 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 2498 v = BITSET_AT(xc->bs, i); 2499 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || 2500 (v == 0 && IS_NCCLASS_NOT(xc))) { 2501 v = BITSET_AT(yc->bs, i); 2502 if ((v != 0 && !IS_NCCLASS_NOT(yc)) || 2503 (v == 0 && IS_NCCLASS_NOT(yc))) 2504 return 0; 2505 } 2506 } 2507 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || 2508 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) 2509 return 1; 2510 return 0; 2511 } 2512 break; 2513 2514 case NT_STR: 2515 goto swap; 2516 break; 2517 2518 default: 2519 break; 2520 } 2521 } 2522 break; 2523 2524 case NT_STR: 2525 { 2526 StrNode* xs = NSTR(x); 2527 if (NSTRING_LEN(x) == 0) 2528 break; 2529 2530 //c = *(xs->s); 2531 switch (ytype) { 2532 case NT_CTYPE: 2533 switch (NCTYPE(y)->ctype) { 2534 case ONIGENC_CTYPE_WORD: 2535 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) 2536 return NCTYPE(y)->not; 2537 else 2538 return !(NCTYPE(y)->not); 2539 break; 2540 default: 2541 break; 2542 } 2543 break; 2544 2545 case NT_CCLASS: 2546 { 2547 CClassNode* cc = NCCLASS(y); 2548 2549 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, 2550 xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); 2551 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); 2552 } 2553 break; 2554 2555 case NT_STR: 2556 { 2557 UChar *q; 2558 StrNode* ys = NSTR(y); 2559 len = NSTRING_LEN(x); 2560 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); 2561 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { 2562 /* tiny version */ 2563 return 0; 2564 } 2565 else { 2566 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { 2567 if (*p != *q) return 1; 2568 } 2569 } 2570 } 2571 break; 2572 2573 default: 2574 break; 2575 } 2576 } 2577 break; 2578 2579 default: 2580 break; 2581 } 2582 2583 return 0; 2584 } 2585 2586 static Node* 2587 get_head_value_node(Node* node, int exact, regex_t* reg) 2588 { 2589 Node* n = NULL_NODE; 2590 2591 switch (NTYPE(node)) { 2592 case NT_BREF: 2593 case NT_ALT: 2594 case NT_CANY: 2595 #ifdef USE_SUBEXP_CALL 2596 case NT_CALL: 2597 #endif 2598 break; 2599 2600 case NT_CTYPE: 2601 case NT_CCLASS: 2602 if (exact == 0) { 2603 n = node; 2604 } 2605 break; 2606 2607 case NT_LIST: 2608 n = get_head_value_node(NCAR(node), exact, reg); 2609 break; 2610 2611 case NT_STR: 2612 { 2613 StrNode* sn = NSTR(node); 2614 2615 if (sn->end <= sn->s) 2616 break; 2617 2618 if (exact != 0 && 2619 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { 2620 } 2621 else { 2622 n = node; 2623 } 2624 } 2625 break; 2626 2627 case NT_QTFR: 2628 { 2629 QtfrNode* qn = NQTFR(node); 2630 if (qn->lower > 0) { 2631 if (IS_NOT_NULL(qn->head_exact)) 2632 n = qn->head_exact; 2633 else 2634 n = get_head_value_node(qn->target, exact, reg); 2635 } 2636 } 2637 break; 2638 2639 case NT_ENCLOSE: 2640 { 2641 EncloseNode* en = NENCLOSE(node); 2642 switch (en->type) { 2643 case ENCLOSE_OPTION: 2644 { 2645 OnigOptionType options = reg->options; 2646 2647 reg->options = NENCLOSE(node)->option; 2648 n = get_head_value_node(NENCLOSE(node)->target, exact, reg); 2649 reg->options = options; 2650 } 2651 break; 2652 2653 case ENCLOSE_MEMORY: 2654 case ENCLOSE_STOP_BACKTRACK: 2655 n = get_head_value_node(en->target, exact, reg); 2656 break; 2657 } 2658 } 2659 break; 2660 2661 case NT_ANCHOR: 2662 if (NANCHOR(node)->type == ANCHOR_PREC_READ) 2663 n = get_head_value_node(NANCHOR(node)->target, exact, reg); 2664 break; 2665 2666 default: 2667 break; 2668 } 2669 2670 return n; 2671 } 2672 2673 static int 2674 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) 2675 { 2676 int type, r = 0; 2677 2678 type = NTYPE(node); 2679 if ((NTYPE2BIT(type) & type_mask) == 0) 2680 return 1; 2681 2682 switch (type) { 2683 case NT_LIST: 2684 case NT_ALT: 2685 do { 2686 r = check_type_tree(NCAR(node), type_mask, enclose_mask, 2687 anchor_mask); 2688 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2689 break; 2690 2691 case NT_QTFR: 2692 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, 2693 anchor_mask); 2694 break; 2695 2696 case NT_ENCLOSE: 2697 { 2698 EncloseNode* en = NENCLOSE(node); 2699 if ((en->type & enclose_mask) == 0) 2700 return 1; 2701 2702 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); 2703 } 2704 break; 2705 2706 case NT_ANCHOR: 2707 type = NANCHOR(node)->type; 2708 if ((type & anchor_mask) == 0) 2709 return 1; 2710 2711 if (NANCHOR(node)->target) 2712 r = check_type_tree(NANCHOR(node)->target, 2713 type_mask, enclose_mask, anchor_mask); 2714 break; 2715 2716 default: 2717 break; 2718 } 2719 return r; 2720 } 2721 2722 #ifdef USE_SUBEXP_CALL 2723 2724 #define RECURSION_EXIST 1 2725 #define RECURSION_INFINITE 2 2726 2727 static int 2728 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) 2729 { 2730 int type; 2731 int r = 0; 2732 2733 type = NTYPE(node); 2734 switch (type) { 2735 case NT_LIST: 2736 { 2737 Node *x; 2738 OnigDistance min; 2739 int ret; 2740 2741 x = node; 2742 do { 2743 ret = subexp_inf_recursive_check(NCAR(x), env, head); 2744 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 2745 r |= ret; 2746 if (head) { 2747 ret = get_min_match_length(NCAR(x), &min, env); 2748 if (ret != 0) return ret; 2749 if (min != 0) head = 0; 2750 } 2751 } while (IS_NOT_NULL(x = NCDR(x))); 2752 } 2753 break; 2754 2755 case NT_ALT: 2756 { 2757 int ret; 2758 r = RECURSION_EXIST; 2759 do { 2760 ret = subexp_inf_recursive_check(NCAR(node), env, head); 2761 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 2762 r &= ret; 2763 } while (IS_NOT_NULL(node = NCDR(node))); 2764 } 2765 break; 2766 2767 case NT_QTFR: 2768 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); 2769 if (r == RECURSION_EXIST) { 2770 if (NQTFR(node)->lower == 0) r = 0; 2771 } 2772 break; 2773 2774 case NT_ANCHOR: 2775 { 2776 AnchorNode* an = NANCHOR(node); 2777 switch (an->type) { 2778 case ANCHOR_PREC_READ: 2779 case ANCHOR_PREC_READ_NOT: 2780 case ANCHOR_LOOK_BEHIND: 2781 case ANCHOR_LOOK_BEHIND_NOT: 2782 r = subexp_inf_recursive_check(an->target, env, head); 2783 break; 2784 } 2785 } 2786 break; 2787 2788 case NT_CALL: 2789 r = subexp_inf_recursive_check(NCALL(node)->target, env, head); 2790 break; 2791 2792 case NT_ENCLOSE: 2793 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 2794 return 0; 2795 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 2796 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); 2797 else { 2798 SET_ENCLOSE_STATUS(node, NST_MARK2); 2799 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); 2800 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 2801 } 2802 break; 2803 2804 default: 2805 break; 2806 } 2807 2808 return r; 2809 } 2810 2811 static int 2812 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) 2813 { 2814 int type; 2815 int r = 0; 2816 2817 type = NTYPE(node); 2818 switch (type) { 2819 case NT_LIST: 2820 case NT_ALT: 2821 do { 2822 r = subexp_inf_recursive_check_trav(NCAR(node), env); 2823 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 2824 break; 2825 2826 case NT_QTFR: 2827 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); 2828 break; 2829 2830 case NT_ANCHOR: 2831 { 2832 AnchorNode* an = NANCHOR(node); 2833 switch (an->type) { 2834 case ANCHOR_PREC_READ: 2835 case ANCHOR_PREC_READ_NOT: 2836 case ANCHOR_LOOK_BEHIND: 2837 case ANCHOR_LOOK_BEHIND_NOT: 2838 r = subexp_inf_recursive_check_trav(an->target, env); 2839 break; 2840 } 2841 } 2842 break; 2843 2844 case NT_ENCLOSE: 2845 { 2846 EncloseNode* en = NENCLOSE(node); 2847 2848 if (IS_ENCLOSE_RECURSION(en)) { 2849 SET_ENCLOSE_STATUS(node, NST_MARK1); 2850 r = subexp_inf_recursive_check(en->target, env, 1); 2851 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; 2852 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 2853 } 2854 r = subexp_inf_recursive_check_trav(en->target, env); 2855 } 2856 2857 break; 2858 2859 default: 2860 break; 2861 } 2862 2863 return r; 2864 } 2865 2866 static int 2867 subexp_recursive_check(Node* node) 2868 { 2869 int r = 0; 2870 2871 switch (NTYPE(node)) { 2872 case NT_LIST: 2873 case NT_ALT: 2874 do { 2875 r |= subexp_recursive_check(NCAR(node)); 2876 } while (IS_NOT_NULL(node = NCDR(node))); 2877 break; 2878 2879 case NT_QTFR: 2880 r = subexp_recursive_check(NQTFR(node)->target); 2881 break; 2882 2883 case NT_ANCHOR: 2884 { 2885 AnchorNode* an = NANCHOR(node); 2886 switch (an->type) { 2887 case ANCHOR_PREC_READ: 2888 case ANCHOR_PREC_READ_NOT: 2889 case ANCHOR_LOOK_BEHIND: 2890 case ANCHOR_LOOK_BEHIND_NOT: 2891 r = subexp_recursive_check(an->target); 2892 break; 2893 } 2894 } 2895 break; 2896 2897 case NT_CALL: 2898 r = subexp_recursive_check(NCALL(node)->target); 2899 if (r != 0) SET_CALL_RECURSION(node); 2900 break; 2901 2902 case NT_ENCLOSE: 2903 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 2904 return 0; 2905 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 2906 return 1; /* recursion */ 2907 else { 2908 SET_ENCLOSE_STATUS(node, NST_MARK2); 2909 r = subexp_recursive_check(NENCLOSE(node)->target); 2910 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 2911 } 2912 break; 2913 2914 default: 2915 break; 2916 } 2917 2918 return r; 2919 } 2920 2921 2922 static int 2923 subexp_recursive_check_trav(Node* node, ScanEnv* env) 2924 { 2925 #define FOUND_CALLED_NODE 1 2926 2927 int type; 2928 int r = 0; 2929 2930 type = NTYPE(node); 2931 switch (type) { 2932 case NT_LIST: 2933 case NT_ALT: 2934 { 2935 int ret; 2936 do { 2937 ret = subexp_recursive_check_trav(NCAR(node), env); 2938 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; 2939 else if (ret < 0) return ret; 2940 } while (IS_NOT_NULL(node = NCDR(node))); 2941 } 2942 break; 2943 2944 case NT_QTFR: 2945 r = subexp_recursive_check_trav(NQTFR(node)->target, env); 2946 if (NQTFR(node)->upper == 0) { 2947 if (r == FOUND_CALLED_NODE) 2948 NQTFR(node)->is_refered = 1; 2949 } 2950 break; 2951 2952 case NT_ANCHOR: 2953 { 2954 AnchorNode* an = NANCHOR(node); 2955 switch (an->type) { 2956 case ANCHOR_PREC_READ: 2957 case ANCHOR_PREC_READ_NOT: 2958 case ANCHOR_LOOK_BEHIND: 2959 case ANCHOR_LOOK_BEHIND_NOT: 2960 r = subexp_recursive_check_trav(an->target, env); 2961 break; 2962 } 2963 } 2964 break; 2965 2966 case NT_ENCLOSE: 2967 { 2968 EncloseNode* en = NENCLOSE(node); 2969 2970 if (! IS_ENCLOSE_RECURSION(en)) { 2971 if (IS_ENCLOSE_CALLED(en)) { 2972 SET_ENCLOSE_STATUS(node, NST_MARK1); 2973 r = subexp_recursive_check(en->target); 2974 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); 2975 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 2976 } 2977 } 2978 r = subexp_recursive_check_trav(en->target, env); 2979 if (IS_ENCLOSE_CALLED(en)) 2980 r |= FOUND_CALLED_NODE; 2981 } 2982 break; 2983 2984 default: 2985 break; 2986 } 2987 2988 return r; 2989 } 2990 2991 static int 2992 setup_subexp_call(Node* node, ScanEnv* env) 2993 { 2994 int type; 2995 int r = 0; 2996 2997 type = NTYPE(node); 2998 switch (type) { 2999 case NT_LIST: 3000 do { 3001 r = setup_subexp_call(NCAR(node), env); 3002 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 3003 break; 3004 3005 case NT_ALT: 3006 do { 3007 r = setup_subexp_call(NCAR(node), env); 3008 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 3009 break; 3010 3011 case NT_QTFR: 3012 r = setup_subexp_call(NQTFR(node)->target, env); 3013 break; 3014 case NT_ENCLOSE: 3015 r = setup_subexp_call(NENCLOSE(node)->target, env); 3016 break; 3017 3018 case NT_CALL: 3019 { 3020 CallNode* cn = NCALL(node); 3021 Node** nodes = SCANENV_MEM_NODES(env); 3022 3023 if (cn->group_num != 0) { 3024 int gnum = cn->group_num; 3025 3026 #ifdef USE_NAMED_GROUP 3027 if (env->num_named > 0 && 3028 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 3029 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 3030 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 3031 } 3032 #endif 3033 if (gnum > env->num_mem) { 3034 onig_scan_env_set_error_string(env, 3035 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); 3036 return ONIGERR_UNDEFINED_GROUP_REFERENCE; 3037 } 3038 3039 #ifdef USE_NAMED_GROUP 3040 set_call_attr: 3041 #endif 3042 cn->target = nodes[cn->group_num]; 3043 if (IS_NULL(cn->target)) { 3044 onig_scan_env_set_error_string(env, 3045 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 3046 return ONIGERR_UNDEFINED_NAME_REFERENCE; 3047 } 3048 SET_ENCLOSE_STATUS(cn->target, NST_CALLED); 3049 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); 3050 cn->unset_addr_list = env->unset_addr_list; 3051 } 3052 #ifdef USE_NAMED_GROUP 3053 else { 3054 int *refs; 3055 3056 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, 3057 &refs); 3058 if (n <= 0) { 3059 onig_scan_env_set_error_string(env, 3060 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 3061 return ONIGERR_UNDEFINED_NAME_REFERENCE; 3062 } 3063 else if (n > 1) { 3064 onig_scan_env_set_error_string(env, 3065 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); 3066 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; 3067 } 3068 else { 3069 cn->group_num = refs[0]; 3070 goto set_call_attr; 3071 } 3072 } 3073 #endif 3074 } 3075 break; 3076 3077 case NT_ANCHOR: 3078 { 3079 AnchorNode* an = NANCHOR(node); 3080 3081 switch (an->type) { 3082 case ANCHOR_PREC_READ: 3083 case ANCHOR_PREC_READ_NOT: 3084 case ANCHOR_LOOK_BEHIND: 3085 case ANCHOR_LOOK_BEHIND_NOT: 3086 r = setup_subexp_call(an->target, env); 3087 break; 3088 } 3089 } 3090 break; 3091 3092 default: 3093 break; 3094 } 3095 3096 return r; 3097 } 3098 #endif 3099 3100 /* divide different length alternatives in look-behind. 3101 (?<=A|B) ==> (?<=A)|(?<=B) 3102 (?<!A|B) ==> (?<!A)(?<!B) 3103 */ 3104 static int 3105 divide_look_behind_alternatives(Node* node) 3106 { 3107 Node *head, *np, *insert_node; 3108 AnchorNode* an = NANCHOR(node); 3109 int anc_type = an->type; 3110 3111 head = an->target; 3112 np = NCAR(head); 3113 swap_node(node, head); 3114 NCAR(node) = head; 3115 NANCHOR(head)->target = np; 3116 3117 np = node; 3118 while ((np = NCDR(np)) != NULL_NODE) { 3119 insert_node = onig_node_new_anchor(anc_type); 3120 CHECK_NULL_RETURN_MEMERR(insert_node); 3121 NANCHOR(insert_node)->target = NCAR(np); 3122 NCAR(np) = insert_node; 3123 } 3124 3125 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { 3126 np = node; 3127 do { 3128 SET_NTYPE(np, NT_LIST); /* alt -> list */ 3129 } while ((np = NCDR(np)) != NULL_NODE); 3130 } 3131 return 0; 3132 } 3133 3134 static int 3135 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) 3136 { 3137 int r, len; 3138 AnchorNode* an = NANCHOR(node); 3139 3140 r = get_char_length_tree(an->target, reg, &len); 3141 if (r == 0) 3142 an->char_len = len; 3143 else if (r == GET_CHAR_LEN_VARLEN) 3144 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 3145 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { 3146 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) 3147 r = divide_look_behind_alternatives(node); 3148 else 3149 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 3150 } 3151 3152 return r; 3153 } 3154 3155 static int 3156 next_setup(Node* node, Node* next_node, regex_t* reg) 3157 { 3158 int type; 3159 3160 retry: 3161 type = NTYPE(node); 3162 if (type == NT_QTFR) { 3163 QtfrNode* qn = NQTFR(node); 3164 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { 3165 #ifdef USE_QTFR_PEEK_NEXT 3166 Node* n = get_head_value_node(next_node, 1, reg); 3167 /* '\0': for UTF-16BE etc... */ 3168 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { 3169 qn->next_head_exact = n; 3170 } 3171 #endif 3172 /* automatic posseivation a*b ==> (?>a*)b */ 3173 if (qn->lower <= 1) { 3174 int ttype = NTYPE(qn->target); 3175 if (IS_NODE_TYPE_SIMPLE(ttype)) { 3176 Node *x, *y; 3177 x = get_head_value_node(qn->target, 0, reg); 3178 if (IS_NOT_NULL(x)) { 3179 y = get_head_value_node(next_node, 0, reg); 3180 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { 3181 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); 3182 CHECK_NULL_RETURN_MEMERR(en); 3183 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); 3184 swap_node(node, en); 3185 NENCLOSE(node)->target = en; 3186 } 3187 } 3188 } 3189 } 3190 } 3191 } 3192 else if (type == NT_ENCLOSE) { 3193 EncloseNode* en = NENCLOSE(node); 3194 if (en->type == ENCLOSE_MEMORY) { 3195 node = en->target; 3196 goto retry; 3197 } 3198 } 3199 return 0; 3200 } 3201 3202 3203 static int 3204 update_string_node_case_fold(regex_t* reg, Node *node) 3205 { 3206 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 3207 UChar *sbuf, *ebuf, *sp; 3208 int r, i, len, sbuf_size; 3209 StrNode* sn = NSTR(node); 3210 3211 end = sn->end; 3212 sbuf_size = (int)(end - sn->s) * 2; 3213 sbuf = (UChar* )xmalloc(sbuf_size); 3214 CHECK_NULL_RETURN_MEMERR(sbuf); 3215 ebuf = sbuf + sbuf_size; 3216 3217 sp = sbuf; 3218 p = sn->s; 3219 while (p < end) { 3220 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); 3221 for (i = 0; i < len; i++) { 3222 if (sp >= ebuf) { 3223 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size); 3224 CHECK_NULL_RETURN_MEMERR(sbuf); 3225 sp = sbuf + sbuf_size; 3226 sbuf_size *= 2; 3227 ebuf = sbuf + sbuf_size; 3228 } 3229 3230 *sp++ = buf[i]; 3231 } 3232 } 3233 3234 r = onig_node_str_set(node, sbuf, sp); 3235 if (r != 0) { 3236 xfree(sbuf); 3237 return r; 3238 } 3239 3240 xfree(sbuf); 3241 return 0; 3242 } 3243 3244 static int 3245 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, 3246 regex_t* reg) 3247 { 3248 int r; 3249 Node *node; 3250 3251 node = onig_node_new_str(s, end); 3252 if (IS_NULL(node)) return ONIGERR_MEMORY; 3253 3254 r = update_string_node_case_fold(reg, node); 3255 if (r != 0) { 3256 onig_node_free(node); 3257 return r; 3258 } 3259 3260 NSTRING_SET_AMBIG(node); 3261 NSTRING_SET_DONT_GET_OPT_INFO(node); 3262 *rnode = node; 3263 return 0; 3264 } 3265 3266 static int 3267 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], 3268 UChar *p, int slen, UChar *end, 3269 regex_t* reg, Node **rnode) 3270 { 3271 int r, i, j, len, varlen; 3272 Node *anode, *var_anode, *snode, *xnode, *an; 3273 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 3274 xnode = NULL_NODE; 3275 3276 *rnode = var_anode = NULL_NODE; 3277 3278 varlen = 0; 3279 for (i = 0; i < item_num; i++) { 3280 if (items[i].byte_len != slen) { 3281 varlen = 1; 3282 break; 3283 } 3284 } 3285 3286 if (varlen != 0) { 3287 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 3288 if (IS_NULL(var_anode)) return ONIGERR_MEMORY; 3289 3290 xnode = onig_node_new_list(NULL, NULL); 3291 if (IS_NULL(xnode)) goto mem_err; 3292 NCAR(var_anode) = xnode; 3293 3294 anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 3295 if (IS_NULL(anode)) goto mem_err; 3296 NCAR(xnode) = anode; 3297 } 3298 else { 3299 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 3300 if (IS_NULL(anode)) return ONIGERR_MEMORY; 3301 } 3302 3303 snode = onig_node_new_str(p, p + slen); 3304 if (IS_NULL(snode)) goto mem_err; 3305 3306 NCAR(anode) = snode; 3307 3308 for (i = 0; i < item_num; i++) { 3309 snode = onig_node_new_str(NULL, NULL); 3310 if (IS_NULL(snode)) goto mem_err; 3311 3312 for (j = 0; j < items[i].code_len; j++) { 3313 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); 3314 if (len < 0) { 3315 r = len; 3316 goto mem_err2; 3317 } 3318 3319 r = onig_node_str_cat(snode, buf, buf + len); 3320 if (r != 0) goto mem_err2; 3321 } 3322 3323 an = onig_node_new_alt(NULL_NODE, NULL_NODE); 3324 if (IS_NULL(an)) { 3325 goto mem_err2; 3326 } 3327 3328 if (items[i].byte_len != slen) { 3329 Node *rem = NULL_NODE; 3330 UChar *q = p + items[i].byte_len; 3331 3332 if (q < end) { 3333 r = expand_case_fold_make_rem_string(&rem, q, end, reg); 3334 if (r != 0) { 3335 onig_node_free(an); 3336 goto mem_err2; 3337 } 3338 3339 xnode = onig_node_list_add(NULL_NODE, snode); 3340 if (IS_NULL(xnode)) { 3341 onig_node_free(an); 3342 onig_node_free(rem); 3343 goto mem_err2; 3344 } 3345 if (IS_NULL(onig_node_list_add(xnode, rem))) { 3346 onig_node_free(an); 3347 onig_node_free(xnode); 3348 onig_node_free(rem); 3349 goto mem_err; 3350 } 3351 3352 NCAR(an) = xnode; 3353 } 3354 else { 3355 NCAR(an) = snode; 3356 } 3357 3358 if (var_anode == NULL) { 3359 onig_node_free(an); 3360 onig_node_free(xnode); 3361 onig_node_free(rem); 3362 goto mem_err2; 3363 } 3364 NCDR(var_anode) = an; 3365 var_anode = an; 3366 } 3367 else { 3368 NCAR(an) = snode; 3369 NCDR(anode) = an; 3370 anode = an; 3371 } 3372 } 3373 3374 return varlen; 3375 3376 mem_err2: 3377 onig_node_free(snode); 3378 3379 mem_err: 3380 onig_node_free(*rnode); 3381 3382 return ONIGERR_MEMORY; 3383 } 3384 3385 static int 3386 expand_case_fold_string(Node* node, regex_t* reg) 3387 { 3388 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 3389 3390 int r, n, len, alt_num; 3391 UChar *start, *end, *p; 3392 Node *top_root, *root, *snode, *prev_node; 3393 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 3394 StrNode* sn = NSTR(node); 3395 3396 if (NSTRING_IS_AMBIG(node)) return 0; 3397 3398 start = sn->s; 3399 end = sn->end; 3400 if (start >= end) return 0; 3401 3402 r = 0; 3403 top_root = root = prev_node = snode = NULL_NODE; 3404 alt_num = 1; 3405 p = start; 3406 while (p < end) { 3407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, 3408 p, end, items); 3409 if (n < 0) { 3410 r = n; 3411 goto err; 3412 } 3413 3414 len = enclen(reg->enc, p); 3415 3416 if (n == 0) { 3417 if (IS_NULL(snode)) { 3418 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 3419 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 3420 if (IS_NULL(root)) { 3421 onig_node_free(prev_node); 3422 goto mem_err; 3423 } 3424 } 3425 3426 prev_node = snode = onig_node_new_str(NULL, NULL); 3427 if (IS_NULL(snode)) goto mem_err; 3428 if (IS_NOT_NULL(root)) { 3429 if (IS_NULL(onig_node_list_add(root, snode))) { 3430 onig_node_free(snode); 3431 goto mem_err; 3432 } 3433 } 3434 } 3435 3436 r = onig_node_str_cat(snode, p, p + len); 3437 if (r != 0) goto err; 3438 } 3439 else { 3440 alt_num *= (n + 1); 3441 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 3442 3443 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 3444 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 3445 if (IS_NULL(root)) { 3446 onig_node_free(prev_node); 3447 goto mem_err; 3448 } 3449 } 3450 3451 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); 3452 if (r < 0) goto mem_err; 3453 if (r == 1) { 3454 if (IS_NULL(root)) { 3455 top_root = prev_node; 3456 } 3457 else { 3458 if (IS_NULL(onig_node_list_add(root, prev_node))) { 3459 onig_node_free(prev_node); 3460 goto mem_err; 3461 } 3462 } 3463 3464 root = NCAR(prev_node); 3465 } 3466 else { /* r == 0 */ 3467 if (IS_NOT_NULL(root)) { 3468 if (IS_NULL(onig_node_list_add(root, prev_node))) { 3469 onig_node_free(prev_node); 3470 goto mem_err; 3471 } 3472 } 3473 } 3474 3475 snode = NULL_NODE; 3476 } 3477 3478 p += len; 3479 } 3480 3481 if (p < end) { 3482 Node *srem; 3483 3484 r = expand_case_fold_make_rem_string(&srem, p, end, reg); 3485 if (r != 0) goto mem_err; 3486 3487 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { 3488 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 3489 if (IS_NULL(root)) { 3490 onig_node_free(srem); 3491 onig_node_free(prev_node); 3492 goto mem_err; 3493 } 3494 } 3495 3496 if (IS_NULL(root)) { 3497 prev_node = srem; 3498 } 3499 else { 3500 if (IS_NULL(onig_node_list_add(root, srem))) { 3501 onig_node_free(srem); 3502 goto mem_err; 3503 } 3504 } 3505 } 3506 3507 /* ending */ 3508 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); 3509 swap_node(node, top_root); 3510 onig_node_free(top_root); 3511 return 0; 3512 3513 mem_err: 3514 r = ONIGERR_MEMORY; 3515 3516 err: 3517 onig_node_free(top_root); 3518 return r; 3519 } 3520 3521 3522 #ifdef USE_COMBINATION_EXPLOSION_CHECK 3523 3524 #define CEC_THRES_NUM_BIG_REPEAT 512 3525 #define CEC_INFINITE_NUM 0x7fffffff 3526 3527 #define CEC_IN_INFINITE_REPEAT (1<<0) 3528 #define CEC_IN_FINITE_REPEAT (1<<1) 3529 #define CEC_CONT_BIG_REPEAT (1<<2) 3530 3531 static int 3532 setup_comb_exp_check(Node* node, int state, ScanEnv* env) 3533 { 3534 int type; 3535 int r = state; 3536 3537 type = NTYPE(node); 3538 switch (type) { 3539 case NT_LIST: 3540 { 3541 Node* prev = NULL_NODE; 3542 do { 3543 r = setup_comb_exp_check(NCAR(node), r, env); 3544 prev = NCAR(node); 3545 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); 3546 } 3547 break; 3548 3549 case NT_ALT: 3550 { 3551 int ret; 3552 do { 3553 ret = setup_comb_exp_check(NCAR(node), state, env); 3554 r |= ret; 3555 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); 3556 } 3557 break; 3558 3559 case NT_QTFR: 3560 { 3561 int child_state = state; 3562 int add_state = 0; 3563 QtfrNode* qn = NQTFR(node); 3564 Node* target = qn->target; 3565 int var_num; 3566 3567 if (! IS_REPEAT_INFINITE(qn->upper)) { 3568 if (qn->upper > 1) { 3569 /* {0,1}, {1,1} are allowed */ 3570 child_state |= CEC_IN_FINITE_REPEAT; 3571 3572 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 3573 if (env->backrefed_mem == 0) { 3574 if (NTYPE(qn->target) == NT_ENCLOSE) { 3575 EncloseNode* en = NENCLOSE(qn->target); 3576 if (en->type == ENCLOSE_MEMORY) { 3577 if (NTYPE(en->target) == NT_QTFR) { 3578 QtfrNode* q = NQTFR(en->target); 3579 if (IS_REPEAT_INFINITE(q->upper) 3580 && q->greedy == qn->greedy) { 3581 qn->upper = (qn->lower == 0 ? 1 : qn->lower); 3582 if (qn->upper == 1) 3583 child_state = state; 3584 } 3585 } 3586 } 3587 } 3588 } 3589 } 3590 } 3591 3592 if (state & CEC_IN_FINITE_REPEAT) { 3593 qn->comb_exp_check_num = -1; 3594 } 3595 else { 3596 if (IS_REPEAT_INFINITE(qn->upper)) { 3597 var_num = CEC_INFINITE_NUM; 3598 child_state |= CEC_IN_INFINITE_REPEAT; 3599 } 3600 else { 3601 var_num = qn->upper - qn->lower; 3602 } 3603 3604 if (var_num >= CEC_THRES_NUM_BIG_REPEAT) 3605 add_state |= CEC_CONT_BIG_REPEAT; 3606 3607 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || 3608 ((state & CEC_CONT_BIG_REPEAT) != 0 && 3609 var_num >= CEC_THRES_NUM_BIG_REPEAT)) { 3610 if (qn->comb_exp_check_num == 0) { 3611 env->num_comb_exp_check++; 3612 qn->comb_exp_check_num = env->num_comb_exp_check; 3613 if (env->curr_max_regnum > env->comb_exp_max_regnum) 3614 env->comb_exp_max_regnum = env->curr_max_regnum; 3615 } 3616 } 3617 } 3618 3619 r = setup_comb_exp_check(target, child_state, env); 3620 r |= add_state; 3621 } 3622 break; 3623 3624 case NT_ENCLOSE: 3625 { 3626 EncloseNode* en = NENCLOSE(node); 3627 3628 switch (en->type) { 3629 case ENCLOSE_MEMORY: 3630 { 3631 if (env->curr_max_regnum < en->regnum) 3632 env->curr_max_regnum = en->regnum; 3633 3634 r = setup_comb_exp_check(en->target, state, env); 3635 } 3636 break; 3637 3638 default: 3639 r = setup_comb_exp_check(en->target, state, env); 3640 break; 3641 } 3642 } 3643 break; 3644 3645 #ifdef USE_SUBEXP_CALL 3646 case NT_CALL: 3647 if (IS_CALL_RECURSION(NCALL(node))) 3648 env->has_recursion = 1; 3649 else 3650 r = setup_comb_exp_check(NCALL(node)->target, state, env); 3651 break; 3652 #endif 3653 3654 default: 3655 break; 3656 } 3657 3658 return r; 3659 } 3660 #endif 3661 3662 #define IN_ALT (1<<0) 3663 #define IN_NOT (1<<1) 3664 #define IN_REPEAT (1<<2) 3665 #define IN_VAR_REPEAT (1<<3) 3666 3667 /* setup_tree does the following work. 3668 1. check empty loop. (set qn->target_empty_info) 3669 2. expand ignore-case in char class. 3670 3. set memory status bit flags. (reg->mem_stats) 3671 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 3672 5. find invalid patterns in look-behind. 3673 6. expand repeated string. 3674 */ 3675 static int 3676 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) 3677 { 3678 int type; 3679 int r = 0; 3680 3681 type = NTYPE(node); 3682 switch (type) { 3683 case NT_LIST: 3684 { 3685 Node* prev = NULL_NODE; 3686 do { 3687 r = setup_tree(NCAR(node), reg, state, env); 3688 if (IS_NOT_NULL(prev) && r == 0) { 3689 r = next_setup(prev, NCAR(node), reg); 3690 } 3691 prev = NCAR(node); 3692 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 3693 } 3694 break; 3695 3696 case NT_ALT: 3697 do { 3698 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); 3699 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 3700 break; 3701 3702 case NT_CCLASS: 3703 break; 3704 3705 case NT_STR: 3706 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { 3707 r = expand_case_fold_string(node, reg); 3708 } 3709 break; 3710 3711 case NT_CTYPE: 3712 case NT_CANY: 3713 break; 3714 3715 #ifdef USE_SUBEXP_CALL 3716 case NT_CALL: 3717 break; 3718 #endif 3719 3720 case NT_BREF: 3721 { 3722 int i; 3723 int* p; 3724 Node** nodes = SCANENV_MEM_NODES(env); 3725 BRefNode* br = NBREF(node); 3726 p = BACKREFS_P(br); 3727 for (i = 0; i < br->back_num; i++) { 3728 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 3729 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); 3730 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); 3731 #ifdef USE_BACKREF_WITH_LEVEL 3732 if (IS_BACKREF_NEST_LEVEL(br)) { 3733 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); 3734 } 3735 #endif 3736 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); 3737 } 3738 } 3739 break; 3740 3741 case NT_QTFR: 3742 { 3743 OnigDistance d; 3744 QtfrNode* qn = NQTFR(node); 3745 Node* target = qn->target; 3746 3747 if ((state & IN_REPEAT) != 0) { 3748 qn->state |= NST_IN_REPEAT; 3749 } 3750 3751 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { 3752 r = get_min_match_length(target, &d, env); 3753 if (r) break; 3754 if (d == 0) { 3755 qn->target_empty_info = NQ_TARGET_IS_EMPTY; 3756 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 3757 r = quantifiers_memory_node_info(target); 3758 if (r < 0) break; 3759 if (r > 0) { 3760 qn->target_empty_info = r; 3761 } 3762 #endif 3763 #if 0 3764 r = get_max_match_length(target, &d, env); 3765 if (r == 0 && d == 0) { 3766 /* ()* ==> ()?, ()+ ==> () */ 3767 qn->upper = 1; 3768 if (qn->lower > 1) qn->lower = 1; 3769 if (NTYPE(target) == NT_STR) { 3770 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ 3771 } 3772 } 3773 #endif 3774 } 3775 } 3776 3777 state |= IN_REPEAT; 3778 if (qn->lower != qn->upper) 3779 state |= IN_VAR_REPEAT; 3780 r = setup_tree(target, reg, state, env); 3781 if (r) break; 3782 3783 /* expand string */ 3784 #define EXPAND_STRING_MAX_LENGTH 100 3785 if (NTYPE(target) == NT_STR) { 3786 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && 3787 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { 3788 int len = NSTRING_LEN(target); 3789 StrNode* sn = NSTR(target); 3790 3791 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { 3792 int i, n = qn->lower; 3793 onig_node_conv_to_str_node(node, NSTR(target)->flag); 3794 for (i = 0; i < n; i++) { 3795 r = onig_node_str_cat(node, sn->s, sn->end); 3796 if (r) break; 3797 } 3798 onig_node_free(target); 3799 break; /* break case NT_QTFR: */ 3800 } 3801 } 3802 } 3803 3804 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 3805 if (qn->greedy && (qn->target_empty_info != 0)) { 3806 if (NTYPE(target) == NT_QTFR) { 3807 QtfrNode* tqn = NQTFR(target); 3808 if (IS_NOT_NULL(tqn->head_exact)) { 3809 qn->head_exact = tqn->head_exact; 3810 tqn->head_exact = NULL; 3811 } 3812 } 3813 else { 3814 qn->head_exact = get_head_value_node(qn->target, 1, reg); 3815 } 3816 } 3817 #endif 3818 } 3819 break; 3820 3821 case NT_ENCLOSE: 3822 { 3823 EncloseNode* en = NENCLOSE(node); 3824 3825 switch (en->type) { 3826 case ENCLOSE_OPTION: 3827 { 3828 OnigOptionType options = reg->options; 3829 reg->options = NENCLOSE(node)->option; 3830 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 3831 reg->options = options; 3832 } 3833 break; 3834 3835 case ENCLOSE_MEMORY: 3836 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 3837 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); 3838 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 3839 } 3840 r = setup_tree(en->target, reg, state, env); 3841 break; 3842 3843 case ENCLOSE_STOP_BACKTRACK: 3844 { 3845 Node* target = en->target; 3846 r = setup_tree(target, reg, state, env); 3847 if (NTYPE(target) == NT_QTFR) { 3848 QtfrNode* tqn = NQTFR(target); 3849 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && 3850 tqn->greedy != 0) { /* (?>a*), a*+ etc... */ 3851 int qtype = NTYPE(tqn->target); 3852 if (IS_NODE_TYPE_SIMPLE(qtype)) 3853 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); 3854 } 3855 } 3856 } 3857 break; 3858 } 3859 } 3860 break; 3861 3862 case NT_ANCHOR: 3863 { 3864 AnchorNode* an = NANCHOR(node); 3865 3866 switch (an->type) { 3867 case ANCHOR_PREC_READ: 3868 r = setup_tree(an->target, reg, state, env); 3869 break; 3870 case ANCHOR_PREC_READ_NOT: 3871 r = setup_tree(an->target, reg, (state | IN_NOT), env); 3872 break; 3873 3874 /* allowed node types in look-behind */ 3875 #define ALLOWED_TYPE_IN_LB \ 3876 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ 3877 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) 3878 3879 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) 3880 #define ALLOWED_ENCLOSE_IN_LB_NOT 0 3881 3882 #define ALLOWED_ANCHOR_IN_LB \ 3883 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 3884 #define ALLOWED_ANCHOR_IN_LB_NOT \ 3885 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 3886 3887 case ANCHOR_LOOK_BEHIND: 3888 { 3889 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 3890 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); 3891 if (r < 0) return r; 3892 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 3893 r = setup_look_behind(node, reg, env); 3894 if (r != 0) return r; 3895 r = setup_tree(an->target, reg, state, env); 3896 } 3897 break; 3898 3899 case ANCHOR_LOOK_BEHIND_NOT: 3900 { 3901 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 3902 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); 3903 if (r < 0) return r; 3904 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 3905 r = setup_look_behind(node, reg, env); 3906 if (r != 0) return r; 3907 r = setup_tree(an->target, reg, (state | IN_NOT), env); 3908 } 3909 break; 3910 } 3911 } 3912 break; 3913 3914 default: 3915 break; 3916 } 3917 3918 return r; 3919 } 3920 3921 /* set skip map for Boyer-Moor search */ 3922 static int 3923 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 3924 UChar skip[], int** int_skip) 3925 { 3926 int i, len; 3927 3928 len = (int)(end - s); 3929 if (len < ONIG_CHAR_TABLE_SIZE) { 3930 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len; 3931 3932 for (i = 0; i < len - 1; i++) 3933 skip[s[i]] = (UChar)(len - 1 - i); 3934 } 3935 else { 3936 if (IS_NULL(*int_skip)) { 3937 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 3938 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 3939 } 3940 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; 3941 3942 for (i = 0; i < len - 1; i++) 3943 (*int_skip)[s[i]] = len - 1 - i; 3944 } 3945 return 0; 3946 } 3947 3948 #define OPT_EXACT_MAXLEN 24 3949 3950 typedef struct { 3951 OnigDistance min; /* min byte length */ 3952 OnigDistance max; /* max byte length */ 3953 } MinMaxLen; 3954 3955 typedef struct { 3956 MinMaxLen mmd; 3957 OnigEncoding enc; 3958 OnigOptionType options; 3959 OnigCaseFoldType case_fold_flag; 3960 ScanEnv* scan_env; 3961 } OptEnv; 3962 3963 typedef struct { 3964 int left_anchor; 3965 int right_anchor; 3966 } OptAncInfo; 3967 3968 typedef struct { 3969 MinMaxLen mmd; /* info position */ 3970 OptAncInfo anc; 3971 3972 int reach_end; 3973 int ignore_case; 3974 int len; 3975 UChar s[OPT_EXACT_MAXLEN]; 3976 } OptExactInfo; 3977 3978 typedef struct { 3979 MinMaxLen mmd; /* info position */ 3980 OptAncInfo anc; 3981 3982 int value; /* weighted value */ 3983 UChar map[ONIG_CHAR_TABLE_SIZE]; 3984 } OptMapInfo; 3985 3986 typedef struct { 3987 MinMaxLen len; 3988 3989 OptAncInfo anc; 3990 OptExactInfo exb; /* boundary */ 3991 OptExactInfo exm; /* middle */ 3992 OptExactInfo expr; /* prec read (?=...) */ 3993 3994 OptMapInfo map; /* boundary */ 3995 } NodeOptInfo; 3996 3997 3998 static int 3999 map_position_value(OnigEncoding enc, int i) 4000 { 4001 static const short int ByteValTable[] = { 4002 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 4003 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4004 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 4005 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4006 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4007 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 4008 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4009 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 4010 }; 4011 4012 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { 4013 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) 4014 return 20; 4015 else 4016 return (int )ByteValTable[i]; 4017 } 4018 else 4019 return 4; /* Take it easy. */ 4020 } 4021 4022 static int 4023 distance_value(MinMaxLen* mm) 4024 { 4025 /* 1000 / (min-max-dist + 1) */ 4026 static const short int dist_vals[] = { 4027 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 4028 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 4029 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 4030 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 4031 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 4032 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 4033 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 4034 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 4035 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 4036 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 4037 }; 4038 4039 int d; 4040 4041 if (mm->max == ONIG_INFINITE_DISTANCE) return 0; 4042 4043 d = mm->max - mm->min; 4044 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) 4045 /* return dist_vals[d] * 16 / (mm->min + 12); */ 4046 return (int )dist_vals[d]; 4047 else 4048 return 1; 4049 } 4050 4051 static int 4052 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) 4053 { 4054 if (v2 <= 0) return -1; 4055 if (v1 <= 0) return 1; 4056 4057 v1 *= distance_value(d1); 4058 v2 *= distance_value(d2); 4059 4060 if (v2 > v1) return 1; 4061 if (v2 < v1) return -1; 4062 4063 if (d2->min < d1->min) return 1; 4064 if (d2->min > d1->min) return -1; 4065 return 0; 4066 } 4067 4068 static int 4069 is_equal_mml(MinMaxLen* a, MinMaxLen* b) 4070 { 4071 return (a->min == b->min && a->max == b->max) ? 1 : 0; 4072 } 4073 4074 4075 static void 4076 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) 4077 { 4078 mml->min = min; 4079 mml->max = max; 4080 } 4081 4082 static void 4083 clear_mml(MinMaxLen* mml) 4084 { 4085 mml->min = mml->max = 0; 4086 } 4087 4088 static void 4089 copy_mml(MinMaxLen* to, MinMaxLen* from) 4090 { 4091 to->min = from->min; 4092 to->max = from->max; 4093 } 4094 4095 static void 4096 add_mml(MinMaxLen* to, MinMaxLen* from) 4097 { 4098 to->min = distance_add(to->min, from->min); 4099 to->max = distance_add(to->max, from->max); 4100 } 4101 4102 #if 0 4103 static void 4104 add_len_mml(MinMaxLen* to, OnigDistance len) 4105 { 4106 to->min = distance_add(to->min, len); 4107 to->max = distance_add(to->max, len); 4108 } 4109 #endif 4110 4111 static void 4112 alt_merge_mml(MinMaxLen* to, MinMaxLen* from) 4113 { 4114 if (to->min > from->min) to->min = from->min; 4115 if (to->max < from->max) to->max = from->max; 4116 } 4117 4118 static void 4119 copy_opt_env(OptEnv* to, OptEnv* from) 4120 { 4121 CopyMem (to, from, sizeof (OptEnv)); 4122 } 4123 4124 static void 4125 clear_opt_anc_info(OptAncInfo* anc) 4126 { 4127 anc->left_anchor = 0; 4128 anc->right_anchor = 0; 4129 } 4130 4131 static void 4132 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) 4133 { 4134 CopyMem (to, from, sizeof (OptAncInfo)); 4135 } 4136 4137 static void 4138 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, 4139 OnigDistance left_len, OnigDistance right_len) 4140 { 4141 clear_opt_anc_info(to); 4142 4143 to->left_anchor = left->left_anchor; 4144 if (left_len == 0) { 4145 to->left_anchor |= right->left_anchor; 4146 } 4147 4148 to->right_anchor = right->right_anchor; 4149 if (right_len == 0) { 4150 to->right_anchor |= left->right_anchor; 4151 } 4152 } 4153 4154 static int 4155 is_left_anchor(int anc) 4156 { 4157 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || 4158 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || 4159 anc == ANCHOR_PREC_READ_NOT) 4160 return 0; 4161 4162 return 1; 4163 } 4164 4165 static int 4166 is_set_opt_anc_info(OptAncInfo* to, int anc) 4167 { 4168 if ((to->left_anchor & anc) != 0) return 1; 4169 4170 return ((to->right_anchor & anc) != 0 ? 1 : 0); 4171 } 4172 4173 static void 4174 add_opt_anc_info(OptAncInfo* to, int anc) 4175 { 4176 if (is_left_anchor(anc)) 4177 to->left_anchor |= anc; 4178 else 4179 to->right_anchor |= anc; 4180 } 4181 4182 static void 4183 remove_opt_anc_info(OptAncInfo* to, int anc) 4184 { 4185 if (is_left_anchor(anc)) 4186 to->left_anchor &= ~anc; 4187 else 4188 to->right_anchor &= ~anc; 4189 } 4190 4191 static void 4192 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) 4193 { 4194 to->left_anchor &= add->left_anchor; 4195 to->right_anchor &= add->right_anchor; 4196 } 4197 4198 static int 4199 is_full_opt_exact_info(OptExactInfo* ex) 4200 { 4201 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); 4202 } 4203 4204 static void 4205 clear_opt_exact_info(OptExactInfo* ex) 4206 { 4207 clear_mml(&ex->mmd); 4208 clear_opt_anc_info(&ex->anc); 4209 ex->reach_end = 0; 4210 ex->ignore_case = 0; 4211 ex->len = 0; 4212 ex->s[0] = '\0'; 4213 } 4214 4215 static void 4216 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) 4217 { 4218 CopyMem (to, from, sizeof (OptExactInfo)); 4219 } 4220 4221 static void 4222 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) 4223 { 4224 int i, j, len; 4225 UChar *p, *end; 4226 OptAncInfo tanc; 4227 4228 if (! to->ignore_case && add->ignore_case) { 4229 if (to->len >= add->len) return ; /* avoid */ 4230 4231 to->ignore_case = 1; 4232 } 4233 4234 p = add->s; 4235 end = p + add->len; 4236 for (i = to->len; p < end; ) { 4237 len = enclen(enc, p); 4238 if (i + len > OPT_EXACT_MAXLEN) break; 4239 for (j = 0; j < len && p < end; j++) 4240 to->s[i++] = *p++; 4241 } 4242 4243 to->len = i; 4244 to->reach_end = (p == end ? add->reach_end : 0); 4245 4246 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); 4247 if (! to->reach_end) tanc.right_anchor = 0; 4248 copy_opt_anc_info(&to->anc, &tanc); 4249 } 4250 4251 static void 4252 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, 4253 int raw ARG_UNUSED, OnigEncoding enc) 4254 { 4255 int i, j, len; 4256 UChar *p; 4257 4258 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { 4259 len = enclen(enc, p); 4260 if (i + len > OPT_EXACT_MAXLEN) break; 4261 for (j = 0; j < len && p < end; j++) 4262 to->s[i++] = *p++; 4263 } 4264 4265 to->len = i; 4266 } 4267 4268 static void 4269 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) 4270 { 4271 int i, j, len; 4272 4273 if (add->len == 0 || to->len == 0) { 4274 clear_opt_exact_info(to); 4275 return ; 4276 } 4277 4278 if (! is_equal_mml(&to->mmd, &add->mmd)) { 4279 clear_opt_exact_info(to); 4280 return ; 4281 } 4282 4283 for (i = 0; i < to->len && i < add->len; ) { 4284 if (to->s[i] != add->s[i]) break; 4285 len = enclen(env->enc, to->s + i); 4286 4287 for (j = 1; j < len; j++) { 4288 if (to->s[i+j] != add->s[i+j]) break; 4289 } 4290 if (j < len) break; 4291 i += len; 4292 } 4293 4294 if (! add->reach_end || i < add->len || i < to->len) { 4295 to->reach_end = 0; 4296 } 4297 to->len = i; 4298 to->ignore_case |= add->ignore_case; 4299 4300 alt_merge_opt_anc_info(&to->anc, &add->anc); 4301 if (! to->reach_end) to->anc.right_anchor = 0; 4302 } 4303 4304 static void 4305 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) 4306 { 4307 int v1, v2; 4308 4309 v1 = now->len; 4310 v2 = alt->len; 4311 4312 if (v2 == 0) { 4313 return ; 4314 } 4315 else if (v1 == 0) { 4316 copy_opt_exact_info(now, alt); 4317 return ; 4318 } 4319 else if (v1 <= 2 && v2 <= 2) { 4320 /* ByteValTable[x] is big value --> low price */ 4321 v2 = map_position_value(enc, now->s[0]); 4322 v1 = map_position_value(enc, alt->s[0]); 4323 4324 if (now->len > 1) v1 += 5; 4325 if (alt->len > 1) v2 += 5; 4326 } 4327 4328 if (now->ignore_case == 0) v1 *= 2; 4329 if (alt->ignore_case == 0) v2 *= 2; 4330 4331 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 4332 copy_opt_exact_info(now, alt); 4333 } 4334 4335 static void 4336 clear_opt_map_info(OptMapInfo* map) 4337 { 4338 static const OptMapInfo clean_info = { 4339 {0, 0}, {0, 0}, 0, 4340 { 4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 4357 } 4358 }; 4359 4360 xmemcpy(map, &clean_info, sizeof(OptMapInfo)); 4361 } 4362 4363 static void 4364 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) 4365 { 4366 CopyMem (to, from, sizeof (OptMapInfo)); 4367 } 4368 4369 static void 4370 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) 4371 { 4372 if (map->map[c] == 0) { 4373 map->map[c] = 1; 4374 map->value += map_position_value(enc, c); 4375 } 4376 } 4377 4378 static int 4379 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, 4380 OnigEncoding enc, OnigCaseFoldType case_fold_flag) 4381 { 4382 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 4383 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 4384 int i, n; 4385 4386 add_char_opt_map_info(map, p[0], enc); 4387 4388 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); 4389 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); 4390 if (n < 0) return n; 4391 4392 for (i = 0; i < n; i++) { 4393 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); 4394 add_char_opt_map_info(map, buf[0], enc); 4395 } 4396 4397 return 0; 4398 } 4399 4400 static void 4401 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) 4402 { 4403 static int z = 1<<15; /* 32768: something big value */ 4404 4405 int v1, v2; 4406 4407 if (alt->value == 0) return ; 4408 if (now->value == 0) { 4409 copy_opt_map_info(now, alt); 4410 return ; 4411 } 4412 4413 v1 = z / now->value; 4414 v2 = z / alt->value; 4415 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 4416 copy_opt_map_info(now, alt); 4417 } 4418 4419 static int 4420 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) 4421 { 4422 #define COMP_EM_BASE 20 4423 int ve, vm; 4424 4425 if (m->value <= 0) return -1; 4426 4427 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); 4428 vm = COMP_EM_BASE * 5 * 2 / m->value; 4429 return comp_distance_value(&e->mmd, &m->mmd, ve, vm); 4430 } 4431 4432 static void 4433 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) 4434 { 4435 int i, val; 4436 4437 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 4438 if (to->value == 0) return ; 4439 if (add->value == 0 || to->mmd.max < add->mmd.min) { 4440 clear_opt_map_info(to); 4441 return ; 4442 } 4443 4444 alt_merge_mml(&to->mmd, &add->mmd); 4445 4446 val = 0; 4447 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 4448 if (add->map[i]) 4449 to->map[i] = 1; 4450 4451 if (to->map[i]) 4452 val += map_position_value(enc, i); 4453 } 4454 to->value = val; 4455 4456 alt_merge_opt_anc_info(&to->anc, &add->anc); 4457 } 4458 4459 static void 4460 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) 4461 { 4462 copy_mml(&(opt->exb.mmd), mmd); 4463 copy_mml(&(opt->expr.mmd), mmd); 4464 copy_mml(&(opt->map.mmd), mmd); 4465 } 4466 4467 static void 4468 clear_node_opt_info(NodeOptInfo* opt) 4469 { 4470 clear_mml(&opt->len); 4471 clear_opt_anc_info(&opt->anc); 4472 clear_opt_exact_info(&opt->exb); 4473 clear_opt_exact_info(&opt->exm); 4474 clear_opt_exact_info(&opt->expr); 4475 clear_opt_map_info(&opt->map); 4476 } 4477 4478 static void 4479 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) 4480 { 4481 CopyMem (to, from, sizeof (NodeOptInfo)); 4482 } 4483 4484 static void 4485 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) 4486 { 4487 int exb_reach, exm_reach; 4488 OptAncInfo tanc; 4489 4490 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); 4491 copy_opt_anc_info(&to->anc, &tanc); 4492 4493 if (add->exb.len > 0 && to->len.max == 0) { 4494 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, 4495 to->len.max, add->len.max); 4496 copy_opt_anc_info(&add->exb.anc, &tanc); 4497 } 4498 4499 if (add->map.value > 0 && to->len.max == 0) { 4500 if (add->map.mmd.max == 0) 4501 add->map.anc.left_anchor |= to->anc.left_anchor; 4502 } 4503 4504 exb_reach = to->exb.reach_end; 4505 exm_reach = to->exm.reach_end; 4506 4507 if (add->len.max != 0) 4508 to->exb.reach_end = to->exm.reach_end = 0; 4509 4510 if (add->exb.len > 0) { 4511 if (exb_reach) { 4512 concat_opt_exact_info(&to->exb, &add->exb, enc); 4513 clear_opt_exact_info(&add->exb); 4514 } 4515 else if (exm_reach) { 4516 concat_opt_exact_info(&to->exm, &add->exb, enc); 4517 clear_opt_exact_info(&add->exb); 4518 } 4519 } 4520 select_opt_exact_info(enc, &to->exm, &add->exb); 4521 select_opt_exact_info(enc, &to->exm, &add->exm); 4522 4523 if (to->expr.len > 0) { 4524 if (add->len.max > 0) { 4525 if (to->expr.len > (int )add->len.max) 4526 to->expr.len = add->len.max; 4527 4528 if (to->expr.mmd.max == 0) 4529 select_opt_exact_info(enc, &to->exb, &to->expr); 4530 else 4531 select_opt_exact_info(enc, &to->exm, &to->expr); 4532 } 4533 } 4534 else if (add->expr.len > 0) { 4535 copy_opt_exact_info(&to->expr, &add->expr); 4536 } 4537 4538 select_opt_map_info(&to->map, &add->map); 4539 4540 add_mml(&to->len, &add->len); 4541 } 4542 4543 static void 4544 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) 4545 { 4546 alt_merge_opt_anc_info (&to->anc, &add->anc); 4547 alt_merge_opt_exact_info(&to->exb, &add->exb, env); 4548 alt_merge_opt_exact_info(&to->exm, &add->exm, env); 4549 alt_merge_opt_exact_info(&to->expr, &add->expr, env); 4550 alt_merge_opt_map_info(env->enc, &to->map, &add->map); 4551 4552 alt_merge_mml(&to->len, &add->len); 4553 } 4554 4555 4556 #define MAX_NODE_OPT_INFO_REF_COUNT 5 4557 4558 static int 4559 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) 4560 { 4561 int type; 4562 int r = 0; 4563 4564 clear_node_opt_info(opt); 4565 set_bound_node_opt_info(opt, &env->mmd); 4566 4567 type = NTYPE(node); 4568 switch (type) { 4569 case NT_LIST: 4570 { 4571 OptEnv nenv; 4572 NodeOptInfo nopt; 4573 Node* nd = node; 4574 4575 copy_opt_env(&nenv, env); 4576 do { 4577 r = optimize_node_left(NCAR(nd), &nopt, &nenv); 4578 if (r == 0) { 4579 add_mml(&nenv.mmd, &nopt.len); 4580 concat_left_node_opt_info(env->enc, opt, &nopt); 4581 } 4582 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); 4583 } 4584 break; 4585 4586 case NT_ALT: 4587 { 4588 NodeOptInfo nopt; 4589 Node* nd = node; 4590 4591 do { 4592 r = optimize_node_left(NCAR(nd), &nopt, env); 4593 if (r == 0) { 4594 if (nd == node) copy_node_opt_info(opt, &nopt); 4595 else alt_merge_node_opt_info(opt, &nopt, env); 4596 } 4597 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); 4598 } 4599 break; 4600 4601 case NT_STR: 4602 { 4603 StrNode* sn = NSTR(node); 4604 int slen = (int)(sn->end - sn->s); 4605 int is_raw = NSTRING_IS_RAW(node); 4606 4607 if (! NSTRING_IS_AMBIG(node)) { 4608 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 4609 NSTRING_IS_RAW(node), env->enc); 4610 if (slen > 0) { 4611 add_char_opt_map_info(&opt->map, *(sn->s), env->enc); 4612 } 4613 set_mml(&opt->len, slen, slen); 4614 } 4615 else { 4616 int max; 4617 4618 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { 4619 int n = onigenc_strlen(env->enc, sn->s, sn->end); 4620 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; 4621 } 4622 else { 4623 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 4624 is_raw, env->enc); 4625 opt->exb.ignore_case = 1; 4626 4627 if (slen > 0) { 4628 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, 4629 env->enc, env->case_fold_flag); 4630 if (r != 0) break; 4631 } 4632 4633 max = slen; 4634 } 4635 4636 set_mml(&opt->len, slen, max); 4637 } 4638 4639 if (opt->exb.len == slen) 4640 opt->exb.reach_end = 1; 4641 } 4642 break; 4643 4644 case NT_CCLASS: 4645 { 4646 int i, z; 4647 CClassNode* cc = NCCLASS(node); 4648 4649 /* no need to check ignore case. (setted in setup_tree()) */ 4650 4651 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { 4652 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 4653 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 4654 4655 set_mml(&opt->len, min, max); 4656 } 4657 else { 4658 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 4659 z = BITSET_AT(cc->bs, i); 4660 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { 4661 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 4662 } 4663 } 4664 set_mml(&opt->len, 1, 1); 4665 } 4666 } 4667 break; 4668 4669 case NT_CTYPE: 4670 { 4671 int i, min, max; 4672 4673 max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 4674 4675 if (max == 1) { 4676 min = 1; 4677 4678 switch (NCTYPE(node)->ctype) { 4679 case ONIGENC_CTYPE_WORD: 4680 if (NCTYPE(node)->not != 0) { 4681 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 4682 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { 4683 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 4684 } 4685 } 4686 } 4687 else { 4688 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 4689 if (ONIGENC_IS_CODE_WORD(env->enc, i)) { 4690 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 4691 } 4692 } 4693 } 4694 break; 4695 } 4696 } 4697 else { 4698 min = ONIGENC_MBC_MINLEN(env->enc); 4699 } 4700 set_mml(&opt->len, min, max); 4701 } 4702 break; 4703 4704 case NT_CANY: 4705 { 4706 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 4707 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 4708 set_mml(&opt->len, min, max); 4709 } 4710 break; 4711 4712 case NT_ANCHOR: 4713 switch (NANCHOR(node)->type) { 4714 case ANCHOR_BEGIN_BUF: 4715 case ANCHOR_BEGIN_POSITION: 4716 case ANCHOR_BEGIN_LINE: 4717 case ANCHOR_END_BUF: 4718 case ANCHOR_SEMI_END_BUF: 4719 case ANCHOR_END_LINE: 4720 add_opt_anc_info(&opt->anc, NANCHOR(node)->type); 4721 break; 4722 4723 case ANCHOR_PREC_READ: 4724 { 4725 NodeOptInfo nopt; 4726 4727 r = optimize_node_left(NANCHOR(node)->target, &nopt, env); 4728 if (r == 0) { 4729 if (nopt.exb.len > 0) 4730 copy_opt_exact_info(&opt->expr, &nopt.exb); 4731 else if (nopt.exm.len > 0) 4732 copy_opt_exact_info(&opt->expr, &nopt.exm); 4733 4734 opt->expr.reach_end = 0; 4735 4736 if (nopt.map.value > 0) 4737 copy_opt_map_info(&opt->map, &nopt.map); 4738 } 4739 } 4740 break; 4741 4742 case ANCHOR_PREC_READ_NOT: 4743 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ 4744 case ANCHOR_LOOK_BEHIND_NOT: 4745 break; 4746 } 4747 break; 4748 4749 case NT_BREF: 4750 { 4751 int i; 4752 int* backs; 4753 OnigDistance min, max, tmin, tmax; 4754 Node** nodes = SCANENV_MEM_NODES(env->scan_env); 4755 BRefNode* br = NBREF(node); 4756 4757 if (br->state & NST_RECURSION) { 4758 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 4759 break; 4760 } 4761 backs = BACKREFS_P(br); 4762 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); 4763 if (r != 0) break; 4764 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); 4765 if (r != 0) break; 4766 for (i = 1; i < br->back_num; i++) { 4767 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); 4768 if (r != 0) break; 4769 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); 4770 if (r != 0) break; 4771 if (min > tmin) min = tmin; 4772 if (max < tmax) max = tmax; 4773 } 4774 if (r == 0) set_mml(&opt->len, min, max); 4775 } 4776 break; 4777 4778 #ifdef USE_SUBEXP_CALL 4779 case NT_CALL: 4780 if (IS_CALL_RECURSION(NCALL(node))) 4781 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 4782 else { 4783 OnigOptionType save = env->options; 4784 env->options = NENCLOSE(NCALL(node)->target)->option; 4785 r = optimize_node_left(NCALL(node)->target, opt, env); 4786 env->options = save; 4787 } 4788 break; 4789 #endif 4790 4791 case NT_QTFR: 4792 { 4793 int i; 4794 OnigDistance min, max; 4795 NodeOptInfo nopt; 4796 QtfrNode* qn = NQTFR(node); 4797 4798 r = optimize_node_left(qn->target, &nopt, env); 4799 if (r) break; 4800 4801 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { 4802 if (env->mmd.max == 0 && 4803 NTYPE(qn->target) == NT_CANY && qn->greedy) { 4804 if (IS_MULTILINE(env->options)) 4805 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); 4806 else 4807 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); 4808 } 4809 } 4810 else { 4811 if (qn->lower > 0) { 4812 copy_node_opt_info(opt, &nopt); 4813 if (nopt.exb.len > 0) { 4814 if (nopt.exb.reach_end) { 4815 for (i = 2; i <= qn->lower && 4816 ! is_full_opt_exact_info(&opt->exb); i++) { 4817 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); 4818 } 4819 if (i < qn->lower) { 4820 opt->exb.reach_end = 0; 4821 } 4822 } 4823 } 4824 4825 if (qn->lower != qn->upper) { 4826 opt->exb.reach_end = 0; 4827 opt->exm.reach_end = 0; 4828 } 4829 if (qn->lower > 1) 4830 opt->exm.reach_end = 0; 4831 } 4832 } 4833 4834 min = distance_multiply(nopt.len.min, qn->lower); 4835 if (IS_REPEAT_INFINITE(qn->upper)) 4836 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); 4837 else 4838 max = distance_multiply(nopt.len.max, qn->upper); 4839 4840 set_mml(&opt->len, min, max); 4841 } 4842 break; 4843 4844 case NT_ENCLOSE: 4845 { 4846 EncloseNode* en = NENCLOSE(node); 4847 4848 switch (en->type) { 4849 case ENCLOSE_OPTION: 4850 { 4851 OnigOptionType save = env->options; 4852 4853 env->options = en->option; 4854 r = optimize_node_left(en->target, opt, env); 4855 env->options = save; 4856 } 4857 break; 4858 4859 case ENCLOSE_MEMORY: 4860 #ifdef USE_SUBEXP_CALL 4861 en->opt_count++; 4862 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { 4863 OnigDistance min, max; 4864 4865 min = 0; 4866 max = ONIG_INFINITE_DISTANCE; 4867 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; 4868 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; 4869 set_mml(&opt->len, min, max); 4870 } 4871 else 4872 #endif 4873 { 4874 r = optimize_node_left(en->target, opt, env); 4875 4876 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { 4877 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) 4878 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); 4879 } 4880 } 4881 break; 4882 4883 case ENCLOSE_STOP_BACKTRACK: 4884 r = optimize_node_left(en->target, opt, env); 4885 break; 4886 } 4887 } 4888 break; 4889 4890 default: 4891 #ifdef ONIG_DEBUG 4892 fprintf(stderr, "optimize_node_left: undefined node type %d\n", 4893 NTYPE(node)); 4894 #endif 4895 r = ONIGERR_TYPE_BUG; 4896 break; 4897 } 4898 4899 return r; 4900 } 4901 4902 static int 4903 set_optimize_exact_info(regex_t* reg, OptExactInfo* e) 4904 { 4905 int r; 4906 4907 if (e->len == 0) return 0; 4908 4909 if (e->ignore_case) { 4910 reg->exact = (UChar* )xmalloc(e->len); 4911 CHECK_NULL_RETURN_MEMERR(reg->exact); 4912 xmemcpy(reg->exact, e->s, e->len); 4913 reg->exact_end = reg->exact + e->len; 4914 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 4915 } 4916 else { 4917 int allow_reverse; 4918 4919 reg->exact = str_dup(e->s, e->s + e->len); 4920 CHECK_NULL_RETURN_MEMERR(reg->exact); 4921 reg->exact_end = reg->exact + e->len; 4922 4923 allow_reverse = 4924 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); 4925 4926 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 4927 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 4928 reg->map, &(reg->int_map)); 4929 if (r) return r; 4930 4931 reg->optimize = (allow_reverse != 0 4932 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); 4933 } 4934 else { 4935 reg->optimize = ONIG_OPTIMIZE_EXACT; 4936 } 4937 } 4938 4939 reg->dmin = e->mmd.min; 4940 reg->dmax = e->mmd.max; 4941 4942 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 4943 reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact); 4944 } 4945 4946 return 0; 4947 } 4948 4949 static void 4950 set_optimize_map_info(regex_t* reg, OptMapInfo* m) 4951 { 4952 int i; 4953 4954 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 4955 reg->map[i] = m->map[i]; 4956 4957 reg->optimize = ONIG_OPTIMIZE_MAP; 4958 reg->dmin = m->mmd.min; 4959 reg->dmax = m->mmd.max; 4960 4961 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 4962 reg->threshold_len = reg->dmin + 1; 4963 } 4964 } 4965 4966 static void 4967 set_sub_anchor(regex_t* reg, OptAncInfo* anc) 4968 { 4969 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; 4970 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; 4971 } 4972 4973 #ifdef ONIG_DEBUG 4974 static void print_optimize_info(FILE* f, regex_t* reg); 4975 #endif 4976 4977 static int 4978 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) 4979 { 4980 4981 int r; 4982 NodeOptInfo opt; 4983 OptEnv env; 4984 4985 env.enc = reg->enc; 4986 env.options = reg->options; 4987 env.case_fold_flag = reg->case_fold_flag; 4988 env.scan_env = scan_env; 4989 clear_mml(&env.mmd); 4990 4991 r = optimize_node_left(node, &opt, &env); 4992 if (r) return r; 4993 4994 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | 4995 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); 4996 4997 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); 4998 4999 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { 5000 reg->anchor_dmin = opt.len.min; 5001 reg->anchor_dmax = opt.len.max; 5002 } 5003 5004 if (opt.exb.len > 0 || opt.exm.len > 0) { 5005 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); 5006 if (opt.map.value > 0 && 5007 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { 5008 goto set_map; 5009 } 5010 else { 5011 r = set_optimize_exact_info(reg, &opt.exb); 5012 set_sub_anchor(reg, &opt.exb.anc); 5013 } 5014 } 5015 else if (opt.map.value > 0) { 5016 set_map: 5017 set_optimize_map_info(reg, &opt.map); 5018 set_sub_anchor(reg, &opt.map.anc); 5019 } 5020 else { 5021 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; 5022 if (opt.len.max == 0) 5023 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; 5024 } 5025 5026 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) 5027 print_optimize_info(stderr, reg); 5028 #endif 5029 return r; 5030 } 5031 5032 static void 5033 clear_optimize_info(regex_t* reg) 5034 { 5035 reg->optimize = ONIG_OPTIMIZE_NONE; 5036 reg->anchor = 0; 5037 reg->anchor_dmin = 0; 5038 reg->anchor_dmax = 0; 5039 reg->sub_anchor = 0; 5040 reg->exact_end = (UChar* )NULL; 5041 reg->threshold_len = 0; 5042 if (IS_NOT_NULL(reg->exact)) { 5043 xfree(reg->exact); 5044 reg->exact = (UChar* )NULL; 5045 } 5046 } 5047 5048 #ifdef ONIG_DEBUG 5049 5050 static void print_enc_string(FILE* fp, OnigEncoding enc, 5051 const UChar *s, const UChar *end) 5052 { 5053 fprintf(fp, "\nPATTERN: /"); 5054 5055 if (ONIGENC_MBC_MINLEN(enc) > 1) { 5056 const UChar *p; 5057 OnigCodePoint code; 5058 5059 p = s; 5060 while (p < end) { 5061 code = ONIGENC_MBC_TO_CODE(enc, p, end); 5062 if (code >= 0x80) { 5063 fprintf(fp, " 0x%04x ", (int )code); 5064 } 5065 else { 5066 fputc((int )code, fp); 5067 } 5068 5069 p += enclen(enc, p); 5070 } 5071 } 5072 else { 5073 while (s < end) { 5074 fputc((int )*s, fp); 5075 s++; 5076 } 5077 } 5078 5079 fprintf(fp, "/\n"); 5080 } 5081 5082 static void 5083 print_distance_range(FILE* f, OnigDistance a, OnigDistance b) 5084 { 5085 if (a == ONIG_INFINITE_DISTANCE) 5086 fputs("inf", f); 5087 else 5088 fprintf(f, "(%u)", a); 5089 5090 fputs("-", f); 5091 5092 if (b == ONIG_INFINITE_DISTANCE) 5093 fputs("inf", f); 5094 else 5095 fprintf(f, "(%u)", b); 5096 } 5097 5098 static void 5099 print_anchor(FILE* f, int anchor) 5100 { 5101 int q = 0; 5102 5103 fprintf(f, "["); 5104 5105 if (anchor & ANCHOR_BEGIN_BUF) { 5106 fprintf(f, "begin-buf"); 5107 q = 1; 5108 } 5109 if (anchor & ANCHOR_BEGIN_LINE) { 5110 if (q) fprintf(f, ", "); 5111 q = 1; 5112 fprintf(f, "begin-line"); 5113 } 5114 if (anchor & ANCHOR_BEGIN_POSITION) { 5115 if (q) fprintf(f, ", "); 5116 q = 1; 5117 fprintf(f, "begin-pos"); 5118 } 5119 if (anchor & ANCHOR_END_BUF) { 5120 if (q) fprintf(f, ", "); 5121 q = 1; 5122 fprintf(f, "end-buf"); 5123 } 5124 if (anchor & ANCHOR_SEMI_END_BUF) { 5125 if (q) fprintf(f, ", "); 5126 q = 1; 5127 fprintf(f, "semi-end-buf"); 5128 } 5129 if (anchor & ANCHOR_END_LINE) { 5130 if (q) fprintf(f, ", "); 5131 q = 1; 5132 fprintf(f, "end-line"); 5133 } 5134 if (anchor & ANCHOR_ANYCHAR_STAR) { 5135 if (q) fprintf(f, ", "); 5136 q = 1; 5137 fprintf(f, "anychar-star"); 5138 } 5139 if (anchor & ANCHOR_ANYCHAR_STAR_ML) { 5140 if (q) fprintf(f, ", "); 5141 fprintf(f, "anychar-star-pl"); 5142 } 5143 5144 fprintf(f, "]"); 5145 } 5146 5147 static void 5148 print_optimize_info(FILE* f, regex_t* reg) 5149 { 5150 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", 5151 "EXACT_IC", "MAP" }; 5152 5153 fprintf(f, "optimize: %s\n", on[reg->optimize]); 5154 fprintf(f, " anchor: "); print_anchor(f, reg->anchor); 5155 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) 5156 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); 5157 fprintf(f, "\n"); 5158 5159 if (reg->optimize) { 5160 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); 5161 fprintf(f, "\n"); 5162 } 5163 fprintf(f, "\n"); 5164 5165 if (reg->exact) { 5166 UChar *p; 5167 fprintf(f, "exact: ["); 5168 for (p = reg->exact; p < reg->exact_end; p++) { 5169 fputc(*p, f); 5170 } 5171 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); 5172 } 5173 else if (reg->optimize & ONIG_OPTIMIZE_MAP) { 5174 int c, i, n = 0; 5175 5176 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 5177 if (reg->map[i]) n++; 5178 5179 fprintf(f, "map: n=%d\n", n); 5180 if (n > 0) { 5181 c = 0; 5182 fputc('[', f); 5183 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 5184 if (reg->map[i] != 0) { 5185 if (c > 0) fputs(", ", f); 5186 c++; 5187 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && 5188 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) 5189 fputc(i, f); 5190 else 5191 fprintf(f, "%d", i); 5192 } 5193 } 5194 fprintf(f, "]\n"); 5195 } 5196 } 5197 } 5198 #endif /* ONIG_DEBUG */ 5199 5200 5201 extern void 5202 onig_free_body(regex_t* reg) 5203 { 5204 if (IS_NOT_NULL(reg)) { 5205 if (IS_NOT_NULL(reg->p)) xfree(reg->p); 5206 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); 5207 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); 5208 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); 5209 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); 5210 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); 5211 5212 #ifdef USE_NAMED_GROUP 5213 onig_names_free(reg); 5214 #endif 5215 } 5216 } 5217 5218 extern void 5219 onig_free(regex_t* reg) 5220 { 5221 if (IS_NOT_NULL(reg)) { 5222 onig_free_body(reg); 5223 xfree(reg); 5224 } 5225 } 5226 5227 #define REGEX_TRANSFER(to,from) do {\ 5228 (to)->state = ONIG_STATE_MODIFY;\ 5229 onig_free_body(to);\ 5230 xmemcpy(to, from, sizeof(regex_t));\ 5231 xfree(from);\ 5232 } while (0) 5233 5234 extern void 5235 onig_transfer(regex_t* to, regex_t* from) 5236 { 5237 THREAD_ATOMIC_START; 5238 REGEX_TRANSFER(to, from); 5239 THREAD_ATOMIC_END; 5240 } 5241 5242 #define REGEX_CHAIN_HEAD(reg) do {\ 5243 while (IS_NOT_NULL((reg)->chain)) {\ 5244 (reg) = (reg)->chain;\ 5245 }\ 5246 } while (0) 5247 5248 extern void 5249 onig_chain_link_add(regex_t* to, regex_t* add) 5250 { 5251 THREAD_ATOMIC_START; 5252 REGEX_CHAIN_HEAD(to); 5253 to->chain = add; 5254 THREAD_ATOMIC_END; 5255 } 5256 5257 extern void 5258 onig_chain_reduce(regex_t* reg) 5259 { 5260 regex_t *head, *prev; 5261 5262 prev = reg; 5263 head = prev->chain; 5264 if (IS_NOT_NULL(head)) { 5265 reg->state = ONIG_STATE_MODIFY; 5266 while (IS_NOT_NULL(head->chain)) { 5267 prev = head; 5268 head = head->chain; 5269 } 5270 prev->chain = (regex_t* )NULL; 5271 REGEX_TRANSFER(reg, head); 5272 } 5273 } 5274 5275 #ifdef ONIG_DEBUG 5276 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); 5277 #endif 5278 #ifdef ONIG_DEBUG_PARSE_TREE 5279 static void print_tree P_((FILE* f, Node* node)); 5280 #endif 5281 5282 extern int 5283 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 5284 OnigErrorInfo* einfo) 5285 { 5286 #define COMPILE_INIT_SIZE 20 5287 5288 int r, init_size; 5289 Node* root; 5290 ScanEnv scan_env; 5291 #ifdef USE_SUBEXP_CALL 5292 UnsetAddrList uslist; 5293 #endif 5294 5295 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 5296 5297 reg->state = ONIG_STATE_COMPILING; 5298 5299 #ifdef ONIG_DEBUG 5300 print_enc_string(stderr, reg->enc, pattern, pattern_end); 5301 #endif 5302 5303 if (reg->alloc == 0) { 5304 init_size = ((int)(pattern_end - pattern)) * 2; 5305 if (init_size <= 0) init_size = COMPILE_INIT_SIZE; 5306 r = BBUF_INIT(reg, init_size); 5307 if (r != 0) goto end; 5308 } 5309 else 5310 reg->used = 0; 5311 5312 reg->num_mem = 0; 5313 reg->num_repeat = 0; 5314 reg->num_null_check = 0; 5315 reg->repeat_range_alloc = 0; 5316 reg->repeat_range = (OnigRepeatRange* )NULL; 5317 #ifdef USE_COMBINATION_EXPLOSION_CHECK 5318 reg->num_comb_exp_check = 0; 5319 #endif 5320 5321 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); 5322 if (r != 0 || root == NULL) goto err; 5323 5324 #ifdef USE_NAMED_GROUP 5325 /* mixed use named group and no-named group */ 5326 if (scan_env.num_named > 0 && 5327 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 5328 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { 5329 if (scan_env.num_named != scan_env.num_mem) 5330 r = disable_noname_group_capture(&root, reg, &scan_env); 5331 else 5332 r = numbered_ref_check(root); 5333 5334 if (r != 0) goto err; 5335 } 5336 #endif 5337 5338 #ifdef USE_SUBEXP_CALL 5339 if (scan_env.num_call > 0) { 5340 r = unset_addr_list_init(&uslist, scan_env.num_call); 5341 if (r != 0) goto err; 5342 scan_env.unset_addr_list = &uslist; 5343 r = setup_subexp_call(root, &scan_env); 5344 if (r != 0) goto err_unset; 5345 r = subexp_recursive_check_trav(root, &scan_env); 5346 if (r < 0) goto err_unset; 5347 r = subexp_inf_recursive_check_trav(root, &scan_env); 5348 if (r != 0) goto err_unset; 5349 5350 reg->num_call = scan_env.num_call; 5351 } 5352 else 5353 reg->num_call = 0; 5354 #endif 5355 5356 r = setup_tree(root, reg, 0, &scan_env); 5357 if (r != 0) goto err_unset; 5358 5359 #ifdef ONIG_DEBUG_PARSE_TREE 5360 print_tree(stderr, root); 5361 #endif 5362 5363 reg->capture_history = scan_env.capture_history; 5364 reg->bt_mem_start = scan_env.bt_mem_start; 5365 reg->bt_mem_start |= reg->capture_history; 5366 if (IS_FIND_CONDITION(reg->options)) 5367 BIT_STATUS_ON_ALL(reg->bt_mem_end); 5368 else { 5369 reg->bt_mem_end = scan_env.bt_mem_end; 5370 reg->bt_mem_end |= reg->capture_history; 5371 } 5372 5373 #ifdef USE_COMBINATION_EXPLOSION_CHECK 5374 if (scan_env.backrefed_mem == 0 5375 #ifdef USE_SUBEXP_CALL 5376 || scan_env.num_call == 0 5377 #endif 5378 ) { 5379 setup_comb_exp_check(root, 0, &scan_env); 5380 #ifdef USE_SUBEXP_CALL 5381 if (scan_env.has_recursion != 0) { 5382 scan_env.num_comb_exp_check = 0; 5383 } 5384 else 5385 #endif 5386 if (scan_env.comb_exp_max_regnum > 0) { 5387 int i; 5388 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { 5389 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { 5390 scan_env.num_comb_exp_check = 0; 5391 break; 5392 } 5393 } 5394 } 5395 } 5396 5397 reg->num_comb_exp_check = scan_env.num_comb_exp_check; 5398 #endif 5399 5400 clear_optimize_info(reg); 5401 #ifndef ONIG_DONT_OPTIMIZE 5402 r = set_optimize_info_from_tree(root, reg, &scan_env); 5403 if (r != 0) goto err_unset; 5404 #endif 5405 5406 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { 5407 xfree(scan_env.mem_nodes_dynamic); 5408 scan_env.mem_nodes_dynamic = (Node** )NULL; 5409 } 5410 5411 r = compile_tree(root, reg); 5412 if (r == 0) { 5413 r = add_opcode(reg, OP_END); 5414 #ifdef USE_SUBEXP_CALL 5415 if (scan_env.num_call > 0) { 5416 r = unset_addr_list_fix(&uslist, reg); 5417 unset_addr_list_end(&uslist); 5418 if (r) goto err; 5419 } 5420 #endif 5421 5422 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) 5423 reg->stack_pop_level = STACK_POP_LEVEL_ALL; 5424 else { 5425 if (reg->bt_mem_start != 0) 5426 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; 5427 else 5428 reg->stack_pop_level = STACK_POP_LEVEL_FREE; 5429 } 5430 } 5431 #ifdef USE_SUBEXP_CALL 5432 else if (scan_env.num_call > 0) { 5433 unset_addr_list_end(&uslist); 5434 } 5435 #endif 5436 onig_node_free(root); 5437 5438 #ifdef ONIG_DEBUG_COMPILE 5439 #ifdef USE_NAMED_GROUP 5440 onig_print_names(stderr, reg); 5441 #endif 5442 print_compiled_byte_code_list(stderr, reg); 5443 #endif 5444 5445 end: 5446 reg->state = ONIG_STATE_NORMAL; 5447 return r; 5448 5449 err_unset: 5450 #ifdef USE_SUBEXP_CALL 5451 if (scan_env.num_call > 0) { 5452 unset_addr_list_end(&uslist); 5453 } 5454 #endif 5455 err: 5456 if (IS_NOT_NULL(scan_env.error)) { 5457 if (IS_NOT_NULL(einfo)) { 5458 einfo->enc = scan_env.enc; 5459 einfo->par = scan_env.error; 5460 einfo->par_end = scan_env.error_end; 5461 } 5462 } 5463 5464 onig_node_free(root); 5465 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) 5466 xfree(scan_env.mem_nodes_dynamic); 5467 return r; 5468 } 5469 5470 #ifdef USE_RECOMPILE_API 5471 extern int 5472 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 5473 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 5474 OnigErrorInfo* einfo) 5475 { 5476 int r; 5477 regex_t *new_reg; 5478 5479 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); 5480 if (r) return r; 5481 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 5482 onig_transfer(reg, new_reg); 5483 } 5484 else { 5485 onig_chain_link_add(reg, new_reg); 5486 } 5487 return 0; 5488 } 5489 #endif 5490 5491 static int onig_inited = 0; 5492 5493 extern int 5494 onig_reg_init(regex_t* reg, OnigOptionType option, 5495 OnigCaseFoldType case_fold_flag, 5496 OnigEncoding enc, OnigSyntaxType* syntax) 5497 { 5498 if (! onig_inited) 5499 onig_init(); 5500 5501 if (IS_NULL(reg)) 5502 return ONIGERR_INVALID_ARGUMENT; 5503 5504 if (ONIGENC_IS_UNDEF(enc)) 5505 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; 5506 5507 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) 5508 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { 5509 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; 5510 } 5511 5512 (reg)->state = ONIG_STATE_MODIFY; 5513 5514 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { 5515 option |= syntax->options; 5516 option &= ~ONIG_OPTION_SINGLELINE; 5517 } 5518 else 5519 option |= syntax->options; 5520 5521 (reg)->enc = enc; 5522 (reg)->options = option; 5523 (reg)->syntax = syntax; 5524 (reg)->optimize = 0; 5525 (reg)->exact = (UChar* )NULL; 5526 (reg)->int_map = (int* )NULL; 5527 (reg)->int_map_backward = (int* )NULL; 5528 (reg)->chain = (regex_t* )NULL; 5529 5530 (reg)->p = (UChar* )NULL; 5531 (reg)->alloc = 0; 5532 (reg)->used = 0; 5533 (reg)->name_table = (void* )NULL; 5534 5535 (reg)->case_fold_flag = case_fold_flag; 5536 return 0; 5537 } 5538 5539 extern int 5540 onig_new_without_alloc(regex_t* reg, const UChar* pattern, 5541 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, 5542 OnigSyntaxType* syntax, OnigErrorInfo* einfo) 5543 { 5544 int r; 5545 5546 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 5547 if (r) return r; 5548 5549 r = onig_compile(reg, pattern, pattern_end, einfo); 5550 return r; 5551 } 5552 5553 extern int 5554 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 5555 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 5556 OnigErrorInfo* einfo) 5557 { 5558 int r; 5559 5560 *reg = (regex_t* )xmalloc(sizeof(regex_t)); 5561 if (IS_NULL(*reg)) return ONIGERR_MEMORY; 5562 5563 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 5564 if (r) goto err; 5565 5566 r = onig_compile(*reg, pattern, pattern_end, einfo); 5567 if (r) { 5568 err: 5569 onig_free(*reg); 5570 *reg = NULL; 5571 } 5572 return r; 5573 } 5574 5575 5576 extern int 5577 onig_init(void) 5578 { 5579 if (onig_inited != 0) 5580 return 0; 5581 5582 THREAD_SYSTEM_INIT; 5583 THREAD_ATOMIC_START; 5584 5585 onig_inited = 1; 5586 5587 onigenc_init(); 5588 /* onigenc_set_default_caseconv_table((UChar* )0); */ 5589 5590 #ifdef ONIG_DEBUG_STATISTICS 5591 onig_statistics_init(); 5592 #endif 5593 5594 THREAD_ATOMIC_END; 5595 return 0; 5596 } 5597 5598 5599 static OnigEndCallListItemType* EndCallTop; 5600 5601 extern void onig_add_end_call(void (*func)(void)) 5602 { 5603 OnigEndCallListItemType* item; 5604 5605 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); 5606 if (item == 0) return ; 5607 5608 item->next = EndCallTop; 5609 item->func = func; 5610 5611 EndCallTop = item; 5612 } 5613 5614 static void 5615 exec_end_call_list(void) 5616 { 5617 OnigEndCallListItemType* prev; 5618 void (*func)(void); 5619 5620 while (EndCallTop != 0) { 5621 func = EndCallTop->func; 5622 (*func)(); 5623 5624 prev = EndCallTop; 5625 EndCallTop = EndCallTop->next; 5626 xfree(prev); 5627 } 5628 } 5629 5630 extern int 5631 onig_end(void) 5632 { 5633 THREAD_ATOMIC_START; 5634 5635 exec_end_call_list(); 5636 5637 #ifdef ONIG_DEBUG_STATISTICS 5638 onig_print_statistics(stderr); 5639 #endif 5640 5641 #ifdef USE_SHARED_CCLASS_TABLE 5642 onig_free_shared_cclass_table(); 5643 #endif 5644 5645 #ifdef USE_PARSE_TREE_NODE_RECYCLE 5646 onig_free_node_list(); 5647 #endif 5648 5649 onig_inited = 0; 5650 5651 THREAD_ATOMIC_END; 5652 THREAD_SYSTEM_END; 5653 return 0; 5654 } 5655 5656 extern int 5657 onig_is_in_code_range(const UChar* p, OnigCodePoint code) 5658 { 5659 OnigCodePoint n, *data; 5660 OnigCodePoint low, high, x; 5661 5662 GET_CODE_POINT(n, p); 5663 data = (OnigCodePoint* )p; 5664 data++; 5665 5666 for (low = 0, high = n; low < high; ) { 5667 x = (low + high) >> 1; 5668 if (code > data[x * 2 + 1]) 5669 low = x + 1; 5670 else 5671 high = x; 5672 } 5673 5674 return ((low < n && code >= data[low * 2]) ? 1 : 0); 5675 } 5676 5677 extern int 5678 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) 5679 { 5680 int found; 5681 5682 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { 5683 if (IS_NULL(cc->mbuf)) { 5684 found = 0; 5685 } 5686 else { 5687 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); 5688 } 5689 } 5690 else { 5691 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); 5692 } 5693 5694 if (IS_NCCLASS_NOT(cc)) 5695 return !found; 5696 else 5697 return found; 5698 } 5699 5700 extern int 5701 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) 5702 { 5703 int len; 5704 5705 if (ONIGENC_MBC_MINLEN(enc) > 1) { 5706 len = 2; 5707 } 5708 else { 5709 len = ONIGENC_CODE_TO_MBCLEN(enc, code); 5710 } 5711 return onig_is_code_in_cc_len(len, code, cc); 5712 } 5713 5714 5715 #ifdef ONIG_DEBUG 5716 5717 /* arguments type */ 5718 #define ARG_SPECIAL -1 5719 #define ARG_NON 0 5720 #define ARG_RELADDR 1 5721 #define ARG_ABSADDR 2 5722 #define ARG_LENGTH 3 5723 #define ARG_MEMNUM 4 5724 #define ARG_OPTION 5 5725 #define ARG_STATE_CHECK 6 5726 5727 OnigOpInfoType OnigOpInfo[] = { 5728 { OP_FINISH, "finish", ARG_NON }, 5729 { OP_END, "end", ARG_NON }, 5730 { OP_EXACT1, "exact1", ARG_SPECIAL }, 5731 { OP_EXACT2, "exact2", ARG_SPECIAL }, 5732 { OP_EXACT3, "exact3", ARG_SPECIAL }, 5733 { OP_EXACT4, "exact4", ARG_SPECIAL }, 5734 { OP_EXACT5, "exact5", ARG_SPECIAL }, 5735 { OP_EXACTN, "exactn", ARG_SPECIAL }, 5736 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, 5737 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, 5738 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, 5739 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, 5740 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, 5741 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, 5742 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, 5743 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, 5744 { OP_CCLASS, "cclass", ARG_SPECIAL }, 5745 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, 5746 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, 5747 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, 5748 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, 5749 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, 5750 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, 5751 { OP_ANYCHAR, "anychar", ARG_NON }, 5752 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, 5753 { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, 5754 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, 5755 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, 5756 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, 5757 { OP_WORD, "word", ARG_NON }, 5758 { OP_NOT_WORD, "not-word", ARG_NON }, 5759 { OP_WORD_BOUND, "word-bound", ARG_NON }, 5760 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, 5761 { OP_WORD_BEGIN, "word-begin", ARG_NON }, 5762 { OP_WORD_END, "word-end", ARG_NON }, 5763 { OP_BEGIN_BUF, "begin-buf", ARG_NON }, 5764 { OP_END_BUF, "end-buf", ARG_NON }, 5765 { OP_BEGIN_LINE, "begin-line", ARG_NON }, 5766 { OP_END_LINE, "end-line", ARG_NON }, 5767 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, 5768 { OP_BEGIN_POSITION, "begin-position", ARG_NON }, 5769 { OP_BACKREF1, "backref1", ARG_NON }, 5770 { OP_BACKREF2, "backref2", ARG_NON }, 5771 { OP_BACKREFN, "backrefn", ARG_MEMNUM }, 5772 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, 5773 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, 5774 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, 5775 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, 5776 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, 5777 { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, 5778 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, 5779 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, 5780 { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, 5781 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, 5782 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, 5783 { OP_SET_OPTION, "set-option", ARG_OPTION }, 5784 { OP_FAIL, "fail", ARG_NON }, 5785 { OP_JUMP, "jump", ARG_RELADDR }, 5786 { OP_PUSH, "push", ARG_RELADDR }, 5787 { OP_POP, "pop", ARG_NON }, 5788 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, 5789 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, 5790 { OP_REPEAT, "repeat", ARG_SPECIAL }, 5791 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, 5792 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, 5793 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, 5794 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, 5795 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, 5796 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, 5797 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, 5798 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, 5799 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, 5800 { OP_PUSH_POS, "push-pos", ARG_NON }, 5801 { OP_POP_POS, "pop-pos", ARG_NON }, 5802 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, 5803 { OP_FAIL_POS, "fail-pos", ARG_NON }, 5804 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, 5805 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, 5806 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, 5807 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, 5808 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, 5809 { OP_CALL, "call", ARG_ABSADDR }, 5810 { OP_RETURN, "return", ARG_NON }, 5811 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, 5812 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, 5813 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, 5814 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, 5815 { OP_STATE_CHECK_ANYCHAR_ML_STAR, 5816 "state-check-anychar-ml*", ARG_STATE_CHECK }, 5817 { -1, "", ARG_NON } 5818 }; 5819 5820 static char* 5821 op2name(int opcode) 5822 { 5823 int i; 5824 5825 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 5826 if (opcode == OnigOpInfo[i].opcode) 5827 return OnigOpInfo[i].name; 5828 } 5829 return ""; 5830 } 5831 5832 static int 5833 op2arg_type(int opcode) 5834 { 5835 int i; 5836 5837 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 5838 if (opcode == OnigOpInfo[i].opcode) 5839 return OnigOpInfo[i].arg_type; 5840 } 5841 return ARG_SPECIAL; 5842 } 5843 5844 static void 5845 Indent(FILE* f, int indent) 5846 { 5847 int i; 5848 for (i = 0; i < indent; i++) putc(' ', f); 5849 } 5850 5851 static void 5852 p_string(FILE* f, int len, UChar* s) 5853 { 5854 fputs(":", f); 5855 while (len-- > 0) { fputc(*s++, f); } 5856 } 5857 5858 static void 5859 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) 5860 { 5861 int x = len * mb_len; 5862 5863 fprintf(f, ":%d:", len); 5864 while (x-- > 0) { fputc(*s++, f); } 5865 } 5866 5867 extern void 5868 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, 5869 OnigEncoding enc) 5870 { 5871 int i, n, arg_type; 5872 RelAddrType addr; 5873 LengthType len; 5874 MemNumType mem; 5875 StateCheckNumType scn; 5876 OnigCodePoint code; 5877 UChar *q; 5878 5879 fprintf(f, "[%s", op2name(*bp)); 5880 arg_type = op2arg_type(*bp); 5881 if (arg_type != ARG_SPECIAL) { 5882 bp++; 5883 switch (arg_type) { 5884 case ARG_NON: 5885 break; 5886 case ARG_RELADDR: 5887 GET_RELADDR_INC(addr, bp); 5888 fprintf(f, ":(%d)", addr); 5889 break; 5890 case ARG_ABSADDR: 5891 GET_ABSADDR_INC(addr, bp); 5892 fprintf(f, ":(%d)", addr); 5893 break; 5894 case ARG_LENGTH: 5895 GET_LENGTH_INC(len, bp); 5896 fprintf(f, ":%d", len); 5897 break; 5898 case ARG_MEMNUM: 5899 mem = *((MemNumType* )bp); 5900 bp += SIZE_MEMNUM; 5901 fprintf(f, ":%d", mem); 5902 break; 5903 case ARG_OPTION: 5904 { 5905 OnigOptionType option = *((OnigOptionType* )bp); 5906 bp += SIZE_OPTION; 5907 fprintf(f, ":%d", option); 5908 } 5909 break; 5910 5911 case ARG_STATE_CHECK: 5912 scn = *((StateCheckNumType* )bp); 5913 bp += SIZE_STATE_CHECK_NUM; 5914 fprintf(f, ":%d", scn); 5915 break; 5916 } 5917 } 5918 else { 5919 switch (*bp++) { 5920 case OP_EXACT1: 5921 case OP_ANYCHAR_STAR_PEEK_NEXT: 5922 case OP_ANYCHAR_ML_STAR_PEEK_NEXT: 5923 p_string(f, 1, bp++); break; 5924 case OP_EXACT2: 5925 p_string(f, 2, bp); bp += 2; break; 5926 case OP_EXACT3: 5927 p_string(f, 3, bp); bp += 3; break; 5928 case OP_EXACT4: 5929 p_string(f, 4, bp); bp += 4; break; 5930 case OP_EXACT5: 5931 p_string(f, 5, bp); bp += 5; break; 5932 case OP_EXACTN: 5933 GET_LENGTH_INC(len, bp); 5934 p_len_string(f, len, 1, bp); 5935 bp += len; 5936 break; 5937 5938 case OP_EXACTMB2N1: 5939 p_string(f, 2, bp); bp += 2; break; 5940 case OP_EXACTMB2N2: 5941 p_string(f, 4, bp); bp += 4; break; 5942 case OP_EXACTMB2N3: 5943 p_string(f, 6, bp); bp += 6; break; 5944 case OP_EXACTMB2N: 5945 GET_LENGTH_INC(len, bp); 5946 p_len_string(f, len, 2, bp); 5947 bp += len * 2; 5948 break; 5949 case OP_EXACTMB3N: 5950 GET_LENGTH_INC(len, bp); 5951 p_len_string(f, len, 3, bp); 5952 bp += len * 3; 5953 break; 5954 case OP_EXACTMBN: 5955 { 5956 int mb_len; 5957 5958 GET_LENGTH_INC(mb_len, bp); 5959 GET_LENGTH_INC(len, bp); 5960 fprintf(f, ":%d:%d:", mb_len, len); 5961 n = len * mb_len; 5962 while (n-- > 0) { fputc(*bp++, f); } 5963 } 5964 break; 5965 5966 case OP_EXACT1_IC: 5967 len = enclen(enc, bp); 5968 p_string(f, len, bp); 5969 bp += len; 5970 break; 5971 case OP_EXACTN_IC: 5972 GET_LENGTH_INC(len, bp); 5973 p_len_string(f, len, 1, bp); 5974 bp += len; 5975 break; 5976 5977 case OP_CCLASS: 5978 n = bitset_on_num((BitSetRef )bp); 5979 bp += SIZE_BITSET; 5980 fprintf(f, ":%d", n); 5981 break; 5982 5983 case OP_CCLASS_NOT: 5984 n = bitset_on_num((BitSetRef )bp); 5985 bp += SIZE_BITSET; 5986 fprintf(f, ":%d", n); 5987 break; 5988 5989 case OP_CCLASS_MB: 5990 case OP_CCLASS_MB_NOT: 5991 GET_LENGTH_INC(len, bp); 5992 q = bp; 5993 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 5994 ALIGNMENT_RIGHT(q); 5995 #endif 5996 GET_CODE_POINT(code, q); 5997 bp += len; 5998 fprintf(f, ":%d:%d", (int )code, len); 5999 break; 6000 6001 case OP_CCLASS_MIX: 6002 case OP_CCLASS_MIX_NOT: 6003 n = bitset_on_num((BitSetRef )bp); 6004 bp += SIZE_BITSET; 6005 GET_LENGTH_INC(len, bp); 6006 q = bp; 6007 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 6008 ALIGNMENT_RIGHT(q); 6009 #endif 6010 GET_CODE_POINT(code, q); 6011 bp += len; 6012 fprintf(f, ":%d:%d:%d", n, (int )code, len); 6013 break; 6014 6015 case OP_CCLASS_NODE: 6016 { 6017 CClassNode *cc; 6018 6019 GET_POINTER_INC(cc, bp); 6020 n = bitset_on_num(cc->bs); 6021 fprintf(f, ":%u:%d", (unsigned int )cc, n); 6022 } 6023 break; 6024 6025 case OP_BACKREFN_IC: 6026 mem = *((MemNumType* )bp); 6027 bp += SIZE_MEMNUM; 6028 fprintf(f, ":%d", mem); 6029 break; 6030 6031 case OP_BACKREF_MULTI_IC: 6032 case OP_BACKREF_MULTI: 6033 fputs(" ", f); 6034 GET_LENGTH_INC(len, bp); 6035 for (i = 0; i < len; i++) { 6036 GET_MEMNUM_INC(mem, bp); 6037 if (i > 0) fputs(", ", f); 6038 fprintf(f, "%d", mem); 6039 } 6040 break; 6041 6042 case OP_BACKREF_WITH_LEVEL: 6043 { 6044 OnigOptionType option; 6045 LengthType level; 6046 6047 GET_OPTION_INC(option, bp); 6048 fprintf(f, ":%d", option); 6049 GET_LENGTH_INC(level, bp); 6050 fprintf(f, ":%d", level); 6051 6052 fputs(" ", f); 6053 GET_LENGTH_INC(len, bp); 6054 for (i = 0; i < len; i++) { 6055 GET_MEMNUM_INC(mem, bp); 6056 if (i > 0) fputs(", ", f); 6057 fprintf(f, "%d", mem); 6058 } 6059 } 6060 break; 6061 6062 case OP_REPEAT: 6063 case OP_REPEAT_NG: 6064 { 6065 mem = *((MemNumType* )bp); 6066 bp += SIZE_MEMNUM; 6067 addr = *((RelAddrType* )bp); 6068 bp += SIZE_RELADDR; 6069 fprintf(f, ":%d:%d", mem, addr); 6070 } 6071 break; 6072 6073 case OP_PUSH_OR_JUMP_EXACT1: 6074 case OP_PUSH_IF_PEEK_NEXT: 6075 addr = *((RelAddrType* )bp); 6076 bp += SIZE_RELADDR; 6077 fprintf(f, ":(%d)", addr); 6078 p_string(f, 1, bp); 6079 bp += 1; 6080 break; 6081 6082 case OP_LOOK_BEHIND: 6083 GET_LENGTH_INC(len, bp); 6084 fprintf(f, ":%d", len); 6085 break; 6086 6087 case OP_PUSH_LOOK_BEHIND_NOT: 6088 GET_RELADDR_INC(addr, bp); 6089 GET_LENGTH_INC(len, bp); 6090 fprintf(f, ":%d:(%d)", len, addr); 6091 break; 6092 6093 case OP_STATE_CHECK_PUSH: 6094 case OP_STATE_CHECK_PUSH_OR_JUMP: 6095 scn = *((StateCheckNumType* )bp); 6096 bp += SIZE_STATE_CHECK_NUM; 6097 addr = *((RelAddrType* )bp); 6098 bp += SIZE_RELADDR; 6099 fprintf(f, ":%d:(%d)", scn, addr); 6100 break; 6101 6102 default: 6103 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", 6104 *--bp); 6105 } 6106 } 6107 fputs("]", f); 6108 if (nextp) *nextp = bp; 6109 } 6110 6111 static void 6112 print_compiled_byte_code_list(FILE* f, regex_t* reg) 6113 { 6114 int ncode; 6115 UChar* bp = reg->p; 6116 UChar* end = reg->p + reg->used; 6117 6118 fprintf(f, "code length: %d\n", reg->used); 6119 6120 ncode = 0; 6121 while (bp < end) { 6122 ncode++; 6123 if (bp > reg->p) { 6124 if (ncode % 5 == 0) 6125 fprintf(f, "\n"); 6126 else 6127 fputs(" ", f); 6128 } 6129 onig_print_compiled_byte_code(f, bp, &bp, reg->enc); 6130 } 6131 6132 fprintf(f, "\n"); 6133 } 6134 6135 static void 6136 print_indent_tree(FILE* f, Node* node, int indent) 6137 { 6138 int i, type; 6139 int add = 3; 6140 UChar* p; 6141 6142 Indent(f, indent); 6143 if (IS_NULL(node)) { 6144 fprintf(f, "ERROR: null node!!!\n"); 6145 exit (0); 6146 } 6147 6148 type = NTYPE(node); 6149 switch (type) { 6150 case NT_LIST: 6151 case NT_ALT: 6152 if (NTYPE(node) == NT_LIST) 6153 fprintf(f, "<list:%x>\n", (int )node); 6154 else 6155 fprintf(f, "<alt:%x>\n", (int )node); 6156 6157 print_indent_tree(f, NCAR(node), indent + add); 6158 while (IS_NOT_NULL(node = NCDR(node))) { 6159 if (NTYPE(node) != type) { 6160 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); 6161 exit(0); 6162 } 6163 print_indent_tree(f, NCAR(node), indent + add); 6164 } 6165 break; 6166 6167 case NT_STR: 6168 fprintf(f, "<string%s:%x>", 6169 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); 6170 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { 6171 if (*p >= 0x20 && *p < 0x7f) 6172 fputc(*p, f); 6173 else { 6174 fprintf(f, " 0x%02x", *p); 6175 } 6176 } 6177 break; 6178 6179 case NT_CCLASS: 6180 fprintf(f, "<cclass:%x>", (int )node); 6181 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); 6182 if (NCCLASS(node)->mbuf) { 6183 BBuf* bbuf = NCCLASS(node)->mbuf; 6184 for (i = 0; i < bbuf->used; i++) { 6185 if (i > 0) fprintf(f, ","); 6186 fprintf(f, "%0x", bbuf->p[i]); 6187 } 6188 } 6189 break; 6190 6191 case NT_CTYPE: 6192 fprintf(f, "<ctype:%x> ", (int )node); 6193 switch (NCTYPE(node)->ctype) { 6194 case ONIGENC_CTYPE_WORD: 6195 if (NCTYPE(node)->not != 0) 6196 fputs("not word", f); 6197 else 6198 fputs("word", f); 6199 break; 6200 6201 default: 6202 fprintf(f, "ERROR: undefined ctype.\n"); 6203 exit(0); 6204 } 6205 break; 6206 6207 case NT_CANY: 6208 fprintf(f, "<anychar:%x>", (int )node); 6209 break; 6210 6211 case NT_ANCHOR: 6212 fprintf(f, "<anchor:%x> ", (int )node); 6213 switch (NANCHOR(node)->type) { 6214 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; 6215 case ANCHOR_END_BUF: fputs("end buf", f); break; 6216 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; 6217 case ANCHOR_END_LINE: fputs("end line", f); break; 6218 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; 6219 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; 6220 6221 case ANCHOR_WORD_BOUND: fputs("word bound", f); break; 6222 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; 6223 #ifdef USE_WORD_BEGIN_END 6224 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; 6225 case ANCHOR_WORD_END: fputs("word end", f); break; 6226 #endif 6227 case ANCHOR_PREC_READ: fputs("prec read", f); break; 6228 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; 6229 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; 6230 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; 6231 6232 default: 6233 fprintf(f, "ERROR: undefined anchor type.\n"); 6234 break; 6235 } 6236 break; 6237 6238 case NT_BREF: 6239 { 6240 int* p; 6241 BRefNode* br = NBREF(node); 6242 p = BACKREFS_P(br); 6243 fprintf(f, "<backref:%x>", (int )node); 6244 for (i = 0; i < br->back_num; i++) { 6245 if (i > 0) fputs(", ", f); 6246 fprintf(f, "%d", p[i]); 6247 } 6248 } 6249 break; 6250 6251 #ifdef USE_SUBEXP_CALL 6252 case NT_CALL: 6253 { 6254 CallNode* cn = NCALL(node); 6255 fprintf(f, "<call:%x>", (int )node); 6256 p_string(f, cn->name_end - cn->name, cn->name); 6257 } 6258 break; 6259 #endif 6260 6261 case NT_QTFR: 6262 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, 6263 NQTFR(node)->lower, NQTFR(node)->upper, 6264 (NQTFR(node)->greedy ? "" : "?")); 6265 print_indent_tree(f, NQTFR(node)->target, indent + add); 6266 break; 6267 6268 case NT_ENCLOSE: 6269 fprintf(f, "<enclose:%x> ", (int )node); 6270 switch (NENCLOSE(node)->type) { 6271 case ENCLOSE_OPTION: 6272 fprintf(f, "option:%d", NENCLOSE(node)->option); 6273 break; 6274 case ENCLOSE_MEMORY: 6275 fprintf(f, "memory:%d", NENCLOSE(node)->regnum); 6276 break; 6277 case ENCLOSE_STOP_BACKTRACK: 6278 fprintf(f, "stop-bt"); 6279 break; 6280 6281 default: 6282 break; 6283 } 6284 fprintf(f, "\n"); 6285 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 6286 break; 6287 6288 default: 6289 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); 6290 break; 6291 } 6292 6293 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && 6294 type != NT_ENCLOSE) 6295 fprintf(f, "\n"); 6296 fflush(f); 6297 } 6298 #endif /* ONIG_DEBUG */ 6299 6300 #ifdef ONIG_DEBUG_PARSE_TREE 6301 static void 6302 print_tree(FILE* f, Node* node) 6303 { 6304 print_indent_tree(f, node, 0); 6305 } 6306 #endif 6307