1 /********************************************************************** 2 regexec.c - Oniguruma (regular expression library) 3 **********************************************************************/ 4 /*- 5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "regint.h" 33 34 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 35 36 #ifdef USE_CRNL_AS_LINE_TERMINATOR 37 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ 38 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ 39 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) 40 #endif 41 42 #ifdef USE_CAPTURE_HISTORY 43 static void history_tree_free(OnigCaptureTreeNode* node); 44 45 static void 46 history_tree_clear(OnigCaptureTreeNode* node) 47 { 48 int i; 49 50 if (IS_NOT_NULL(node)) { 51 for (i = 0; i < node->num_childs; i++) { 52 if (IS_NOT_NULL(node->childs[i])) { 53 history_tree_free(node->childs[i]); 54 } 55 } 56 for (i = 0; i < node->allocated; i++) { 57 node->childs[i] = (OnigCaptureTreeNode* )0; 58 } 59 node->num_childs = 0; 60 node->beg = ONIG_REGION_NOTPOS; 61 node->end = ONIG_REGION_NOTPOS; 62 node->group = -1; 63 } 64 } 65 66 static void 67 history_tree_free(OnigCaptureTreeNode* node) 68 { 69 history_tree_clear(node); 70 xfree(node); 71 } 72 73 static void 74 history_root_free(OnigRegion* r) 75 { 76 if (IS_NOT_NULL(r->history_root)) { 77 history_tree_free(r->history_root); 78 r->history_root = (OnigCaptureTreeNode* )0; 79 } 80 } 81 82 static OnigCaptureTreeNode* 83 history_node_new(void) 84 { 85 OnigCaptureTreeNode* node; 86 87 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); 88 CHECK_NULL_RETURN(node); 89 node->childs = (OnigCaptureTreeNode** )0; 90 node->allocated = 0; 91 node->num_childs = 0; 92 node->group = -1; 93 node->beg = ONIG_REGION_NOTPOS; 94 node->end = ONIG_REGION_NOTPOS; 95 96 return node; 97 } 98 99 static int 100 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) 101 { 102 #define HISTORY_TREE_INIT_ALLOC_SIZE 8 103 104 if (parent->num_childs >= parent->allocated) { 105 int n, i; 106 107 if (IS_NULL(parent->childs)) { 108 n = HISTORY_TREE_INIT_ALLOC_SIZE; 109 parent->childs = 110 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); 111 } 112 else { 113 n = parent->allocated * 2; 114 parent->childs = 115 (OnigCaptureTreeNode** )xrealloc(parent->childs, 116 sizeof(OnigCaptureTreeNode*) * n, 117 sizeof(OnigCaptureTreeNode*) * parent->allocated); 118 } 119 CHECK_NULL_RETURN_MEMERR(parent->childs); 120 for (i = parent->allocated; i < n; i++) { 121 parent->childs[i] = (OnigCaptureTreeNode* )0; 122 } 123 parent->allocated = n; 124 } 125 126 parent->childs[parent->num_childs] = child; 127 parent->num_childs++; 128 return 0; 129 } 130 131 static OnigCaptureTreeNode* 132 history_tree_clone(OnigCaptureTreeNode* node) 133 { 134 int i; 135 OnigCaptureTreeNode *clone, *child; 136 137 clone = history_node_new(); 138 CHECK_NULL_RETURN(clone); 139 140 clone->beg = node->beg; 141 clone->end = node->end; 142 for (i = 0; i < node->num_childs; i++) { 143 child = history_tree_clone(node->childs[i]); 144 if (IS_NULL(child)) { 145 history_tree_free(clone); 146 return (OnigCaptureTreeNode* )0; 147 } 148 history_tree_add_child(clone, child); 149 } 150 151 return clone; 152 } 153 154 extern OnigCaptureTreeNode* 155 onig_get_capture_tree(OnigRegion* region) 156 { 157 return region->history_root; 158 } 159 #endif /* USE_CAPTURE_HISTORY */ 160 161 extern void 162 onig_region_clear(OnigRegion* region) 163 { 164 int i; 165 166 for (i = 0; i < region->num_regs; i++) { 167 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 168 } 169 #ifdef USE_CAPTURE_HISTORY 170 history_root_free(region); 171 #endif 172 } 173 174 extern int 175 onig_region_resize(OnigRegion* region, int n) 176 { 177 region->num_regs = n; 178 179 if (n < ONIG_NREGION) 180 n = ONIG_NREGION; 181 182 if (region->allocated == 0) { 183 region->beg = (int* )xmalloc(n * sizeof(int)); 184 region->end = (int* )xmalloc(n * sizeof(int)); 185 186 if (region->beg == 0 || region->end == 0) 187 return ONIGERR_MEMORY; 188 189 region->allocated = n; 190 } 191 else if (region->allocated < n) { 192 region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int)); 193 region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int)); 194 195 if (region->beg == 0 || region->end == 0) 196 return ONIGERR_MEMORY; 197 198 region->allocated = n; 199 } 200 201 return 0; 202 } 203 204 static int 205 onig_region_resize_clear(OnigRegion* region, int n) 206 { 207 int r; 208 209 r = onig_region_resize(region, n); 210 if (r != 0) return r; 211 onig_region_clear(region); 212 return 0; 213 } 214 215 extern int 216 onig_region_set(OnigRegion* region, int at, int beg, int end) 217 { 218 if (at < 0) return ONIGERR_INVALID_ARGUMENT; 219 220 if (at >= region->allocated) { 221 int r = onig_region_resize(region, at + 1); 222 if (r < 0) return r; 223 } 224 225 region->beg[at] = beg; 226 region->end[at] = end; 227 return 0; 228 } 229 230 extern void 231 onig_region_init(OnigRegion* region) 232 { 233 region->num_regs = 0; 234 region->allocated = 0; 235 region->beg = (int* )0; 236 region->end = (int* )0; 237 region->history_root = (OnigCaptureTreeNode* )0; 238 } 239 240 extern OnigRegion* 241 onig_region_new(void) 242 { 243 OnigRegion* r; 244 245 r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); 246 if (r != NULL) { 247 onig_region_init(r); 248 } 249 return r; 250 } 251 252 extern void 253 onig_region_free(OnigRegion* r, int free_self) 254 { 255 if (r) { 256 if (r->allocated > 0) { 257 if (r->beg) xfree(r->beg); 258 if (r->end) xfree(r->end); 259 r->allocated = 0; 260 } 261 #ifdef USE_CAPTURE_HISTORY 262 history_root_free(r); 263 #endif 264 if (free_self) xfree(r); 265 } 266 } 267 268 extern void 269 onig_region_copy(OnigRegion* to, OnigRegion* from) 270 { 271 #define RREGC_SIZE (sizeof(int) * from->num_regs) 272 int i; 273 274 if (to == from) return; 275 276 if (to->allocated == 0) { 277 if (from->num_regs > 0) { 278 to->beg = (int* )xmalloc(RREGC_SIZE); 279 to->end = (int* )xmalloc(RREGC_SIZE); 280 to->allocated = from->num_regs; 281 } 282 } 283 else if (to->allocated < from->num_regs) { 284 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated); 285 to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated); 286 to->allocated = from->num_regs; 287 } 288 289 if (to->beg == NULL || to->end == NULL) { 290 return; 291 } 292 293 for (i = 0; i < from->num_regs; i++) { 294 to->beg[i] = from->beg[i]; 295 to->end[i] = from->end[i]; 296 } 297 to->num_regs = from->num_regs; 298 299 #ifdef USE_CAPTURE_HISTORY 300 history_root_free(to); 301 302 if (IS_NOT_NULL(from->history_root)) { 303 to->history_root = history_tree_clone(from->history_root); 304 } 305 #endif 306 } 307 308 309 /** stack **/ 310 #define INVALID_STACK_INDEX -1 311 312 /* stack type */ 313 /* used by normal-POP */ 314 #define STK_ALT 0x0001 315 #define STK_LOOK_BEHIND_NOT 0x0002 316 #define STK_POS_NOT 0x0003 317 /* handled by normal-POP */ 318 #define STK_MEM_START 0x0100 319 #define STK_MEM_END 0x8200 320 #define STK_REPEAT_INC 0x0300 321 #define STK_STATE_CHECK_MARK 0x1000 322 /* avoided by normal-POP */ 323 #define STK_NULL_CHECK_START 0x3000 324 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */ 325 #define STK_MEM_END_MARK 0x8400 326 #define STK_POS 0x0500 /* used when POP-POS */ 327 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ 328 #define STK_REPEAT 0x0700 329 #define STK_CALL_FRAME 0x0800 330 #define STK_RETURN 0x0900 331 #define STK_VOID 0x0a00 /* for fill a blank */ 332 333 /* stack type check mask */ 334 #define STK_MASK_POP_USED 0x00ff 335 #define STK_MASK_TO_VOID_TARGET 0x10ff 336 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ 337 338 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 339 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ 340 (msa).stack_p = (void* )0;\ 341 (msa).options = (arg_option);\ 342 (msa).region = (arg_region);\ 343 (msa).start = (arg_start);\ 344 (msa).best_len = ONIG_MISMATCH;\ 345 } while(0) 346 #else 347 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ 348 (msa).stack_p = (void* )0;\ 349 (msa).options = (arg_option);\ 350 (msa).region = (arg_region);\ 351 (msa).start = (arg_start);\ 352 } while(0) 353 #endif 354 355 #ifdef USE_COMBINATION_EXPLOSION_CHECK 356 357 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 358 359 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ 360 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ 361 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ 362 offset = ((offset) * (state_num)) >> 3;\ 363 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ 364 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ 365 (msa).state_check_buff = (void* )xmalloc(size);\ 366 else \ 367 (msa).state_check_buff = (void* )xalloca(size);\ 368 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ 369 (size_t )(size - (offset))); \ 370 (msa).state_check_buff_size = size;\ 371 }\ 372 else {\ 373 (msa).state_check_buff = (void* )0;\ 374 (msa).state_check_buff_size = 0;\ 375 }\ 376 }\ 377 else {\ 378 (msa).state_check_buff = (void* )0;\ 379 (msa).state_check_buff_size = 0;\ 380 }\ 381 } while(0) 382 383 #define MATCH_ARG_FREE(msa) do {\ 384 if ((msa).stack_p) xfree((msa).stack_p);\ 385 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ 386 if ((msa).state_check_buff) xfree((msa).state_check_buff);\ 387 }\ 388 } while(0) 389 #else 390 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) 391 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) 392 #endif 393 394 395 396 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ 397 if (msa->stack_p) {\ 398 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\ 399 stk_alloc = (OnigStackType* )(msa->stack_p);\ 400 stk_base = stk_alloc;\ 401 stk = stk_base;\ 402 stk_end = stk_base + msa->stack_n;\ 403 }\ 404 else {\ 405 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\ 406 + sizeof(OnigStackType) * (stack_num));\ 407 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ 408 stk_base = stk_alloc;\ 409 stk = stk_base;\ 410 stk_end = stk_base + (stack_num);\ 411 }\ 412 } while(0) 413 414 #define STACK_SAVE do{\ 415 if (stk_base != stk_alloc) {\ 416 msa->stack_p = stk_base;\ 417 msa->stack_n = (int)(stk_end - stk_base);\ 418 };\ 419 } while(0) 420 421 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; 422 423 extern unsigned int 424 onig_get_match_stack_limit_size(void) 425 { 426 return MatchStackLimitSize; 427 } 428 429 extern int 430 onig_set_match_stack_limit_size(unsigned int size) 431 { 432 MatchStackLimitSize = size; 433 return 0; 434 } 435 436 static int 437 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, 438 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) 439 { 440 unsigned int n; 441 OnigStackType *x, *stk_base, *stk_end, *stk; 442 443 stk_base = *arg_stk_base; 444 stk_end = *arg_stk_end; 445 stk = *arg_stk; 446 447 n = (unsigned int)(stk_end - stk_base); 448 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { 449 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); 450 if (IS_NULL(x)) { 451 STACK_SAVE; 452 return ONIGERR_MEMORY; 453 } 454 xmemcpy(x, stk_base, n * sizeof(OnigStackType)); 455 n *= 2; 456 } 457 else { 458 n *= 2; 459 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { 460 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) 461 return ONIGERR_MATCH_STACK_LIMIT_OVER; 462 else 463 n = MatchStackLimitSize; 464 } 465 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base)); 466 if (IS_NULL(x)) { 467 STACK_SAVE; 468 return ONIGERR_MEMORY; 469 } 470 } 471 *arg_stk = x + (stk - stk_base); 472 *arg_stk_base = x; 473 *arg_stk_end = x + n; 474 return 0; 475 } 476 477 #define STACK_ENSURE(n) do {\ 478 if (stk_end - stk < (n)) {\ 479 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ 480 if (r != 0) { STACK_SAVE; return r; } \ 481 }\ 482 } while(0) 483 484 #define STACK_AT(index) (stk_base + (index)) 485 #define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base)) 486 487 #define STACK_PUSH_TYPE(stack_type) do {\ 488 STACK_ENSURE(1);\ 489 stk->type = (stack_type);\ 490 STACK_INC;\ 491 } while(0) 492 493 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) 494 495 #ifdef USE_COMBINATION_EXPLOSION_CHECK 496 #define STATE_CHECK_POS(s,snum) \ 497 (((s) - str) * num_comb_exp_check + ((snum) - 1)) 498 #define STATE_CHECK_VAL(v,snum) do {\ 499 if (state_check_buff != NULL) {\ 500 int x = STATE_CHECK_POS(s,snum);\ 501 (v) = state_check_buff[x/8] & (1<<(x%8));\ 502 }\ 503 else (v) = 0;\ 504 } while(0) 505 506 507 #define ELSE_IF_STATE_CHECK_MARK(stk) \ 508 else if ((stk)->type == STK_STATE_CHECK_MARK) { \ 509 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ 510 state_check_buff[x/8] |= (1<<(x%8)); \ 511 } 512 513 #define STACK_PUSH(stack_type,pat,s,sprev) do {\ 514 STACK_ENSURE(1);\ 515 stk->type = (stack_type);\ 516 stk->u.state.pcode = (pat);\ 517 stk->u.state.pstr = (s);\ 518 stk->u.state.pstr_prev = (sprev);\ 519 stk->u.state.state_check = 0;\ 520 STACK_INC;\ 521 } while(0) 522 523 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 524 stk->type = (stack_type);\ 525 stk->u.state.pcode = (pat);\ 526 stk->u.state.state_check = 0;\ 527 STACK_INC;\ 528 } while(0) 529 530 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ 531 STACK_ENSURE(1);\ 532 stk->type = STK_ALT;\ 533 stk->u.state.pcode = (pat);\ 534 stk->u.state.pstr = (s);\ 535 stk->u.state.pstr_prev = (sprev);\ 536 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ 537 STACK_INC;\ 538 } while(0) 539 540 #define STACK_PUSH_STATE_CHECK(s,snum) do {\ 541 if (state_check_buff != NULL) {\ 542 STACK_ENSURE(1);\ 543 stk->type = STK_STATE_CHECK_MARK;\ 544 stk->u.state.pstr = (s);\ 545 stk->u.state.state_check = (snum);\ 546 STACK_INC;\ 547 }\ 548 } while(0) 549 550 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 551 552 #define ELSE_IF_STATE_CHECK_MARK(stk) 553 554 #define STACK_PUSH(stack_type,pat,s,sprev) do {\ 555 STACK_ENSURE(1);\ 556 stk->type = (stack_type);\ 557 stk->u.state.pcode = (pat);\ 558 stk->u.state.pstr = (s);\ 559 stk->u.state.pstr_prev = (sprev);\ 560 STACK_INC;\ 561 } while(0) 562 563 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 564 stk->type = (stack_type);\ 565 stk->u.state.pcode = (pat);\ 566 STACK_INC;\ 567 } while(0) 568 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 569 570 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) 571 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) 572 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) 573 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) 574 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ 575 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) 576 577 #define STACK_PUSH_REPEAT(id, pat) do {\ 578 STACK_ENSURE(1);\ 579 stk->type = STK_REPEAT;\ 580 stk->u.repeat.num = (id);\ 581 stk->u.repeat.pcode = (pat);\ 582 stk->u.repeat.count = 0;\ 583 STACK_INC;\ 584 } while(0) 585 586 #define STACK_PUSH_REPEAT_INC(sindex) do {\ 587 STACK_ENSURE(1);\ 588 stk->type = STK_REPEAT_INC;\ 589 stk->u.repeat_inc.si = (sindex);\ 590 STACK_INC;\ 591 } while(0) 592 593 #define STACK_PUSH_MEM_START(mnum, s) do {\ 594 STACK_ENSURE(1);\ 595 stk->type = STK_MEM_START;\ 596 stk->u.mem.num = (int)(mnum);\ 597 stk->u.mem.pstr = (s);\ 598 stk->u.mem.start = mem_start_stk[mnum];\ 599 stk->u.mem.end = mem_end_stk[mnum];\ 600 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ 601 mem_end_stk[mnum] = INVALID_STACK_INDEX;\ 602 STACK_INC;\ 603 } while(0) 604 605 #define STACK_PUSH_MEM_END(mnum, s) do {\ 606 STACK_ENSURE(1);\ 607 stk->type = STK_MEM_END;\ 608 stk->u.mem.num = (mnum);\ 609 stk->u.mem.pstr = (s);\ 610 stk->u.mem.start = mem_start_stk[mnum];\ 611 stk->u.mem.end = mem_end_stk[mnum];\ 612 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ 613 STACK_INC;\ 614 } while(0) 615 616 #define STACK_PUSH_MEM_END_MARK(mnum) do {\ 617 STACK_ENSURE(1);\ 618 stk->type = STK_MEM_END_MARK;\ 619 stk->u.mem.num = (mnum);\ 620 STACK_INC;\ 621 } while(0) 622 623 #define STACK_GET_MEM_START(mnum, k) do {\ 624 int level = 0;\ 625 k = stk;\ 626 while (k > stk_base) {\ 627 k--;\ 628 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ 629 && k->u.mem.num == (mnum)) {\ 630 level++;\ 631 }\ 632 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 633 if (level == 0) break;\ 634 level--;\ 635 }\ 636 }\ 637 } while(0) 638 639 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ 640 int level = 0;\ 641 while (k < stk) {\ 642 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 643 if (level == 0) (start) = k->u.mem.pstr;\ 644 level++;\ 645 }\ 646 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ 647 level--;\ 648 if (level == 0) {\ 649 (end) = k->u.mem.pstr;\ 650 break;\ 651 }\ 652 }\ 653 k++;\ 654 }\ 655 } while(0) 656 657 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ 658 STACK_ENSURE(1);\ 659 stk->type = STK_NULL_CHECK_START;\ 660 stk->u.null_check.num = (cnum);\ 661 stk->u.null_check.pstr = (s);\ 662 STACK_INC;\ 663 } while(0) 664 665 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ 666 STACK_ENSURE(1);\ 667 stk->type = STK_NULL_CHECK_END;\ 668 stk->u.null_check.num = (cnum);\ 669 STACK_INC;\ 670 } while(0) 671 672 #define STACK_PUSH_CALL_FRAME(pat) do {\ 673 STACK_ENSURE(1);\ 674 stk->type = STK_CALL_FRAME;\ 675 stk->u.call_frame.ret_addr = (pat);\ 676 STACK_INC;\ 677 } while(0) 678 679 #define STACK_PUSH_RETURN do {\ 680 STACK_ENSURE(1);\ 681 stk->type = STK_RETURN;\ 682 STACK_INC;\ 683 } while(0) 684 685 686 #ifdef ONIG_DEBUG 687 #define STACK_BASE_CHECK(p, at) \ 688 if ((p) < stk_base) {\ 689 fprintf(stderr, "at %s\n", at);\ 690 goto stack_error;\ 691 } 692 #else 693 #define STACK_BASE_CHECK(p, at) 694 #endif 695 696 #define STACK_POP_ONE do {\ 697 stk--;\ 698 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ 699 } while(0) 700 701 #define STACK_POP do {\ 702 switch (pop_level) {\ 703 case STACK_POP_LEVEL_FREE:\ 704 while (1) {\ 705 stk--;\ 706 STACK_BASE_CHECK(stk, "STACK_POP"); \ 707 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 708 ELSE_IF_STATE_CHECK_MARK(stk);\ 709 }\ 710 break;\ 711 case STACK_POP_LEVEL_MEM_START:\ 712 while (1) {\ 713 stk--;\ 714 STACK_BASE_CHECK(stk, "STACK_POP 2"); \ 715 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 716 else if (stk->type == STK_MEM_START) {\ 717 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 718 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 719 }\ 720 ELSE_IF_STATE_CHECK_MARK(stk);\ 721 }\ 722 break;\ 723 default:\ 724 while (1) {\ 725 stk--;\ 726 STACK_BASE_CHECK(stk, "STACK_POP 3"); \ 727 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 728 else if (stk->type == STK_MEM_START) {\ 729 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 730 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 731 }\ 732 else if (stk->type == STK_REPEAT_INC) {\ 733 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 734 }\ 735 else if (stk->type == STK_MEM_END) {\ 736 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 737 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 738 }\ 739 ELSE_IF_STATE_CHECK_MARK(stk);\ 740 }\ 741 break;\ 742 }\ 743 } while(0) 744 745 #define STACK_POP_TIL_POS_NOT do {\ 746 while (1) {\ 747 stk--;\ 748 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ 749 if (stk->type == STK_POS_NOT) break;\ 750 else if (stk->type == STK_MEM_START) {\ 751 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 752 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 753 }\ 754 else if (stk->type == STK_REPEAT_INC) {\ 755 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 756 }\ 757 else if (stk->type == STK_MEM_END) {\ 758 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 759 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 760 }\ 761 ELSE_IF_STATE_CHECK_MARK(stk);\ 762 }\ 763 } while(0) 764 765 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ 766 while (1) {\ 767 stk--;\ 768 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ 769 if (stk->type == STK_LOOK_BEHIND_NOT) break;\ 770 else if (stk->type == STK_MEM_START) {\ 771 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 772 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 773 }\ 774 else if (stk->type == STK_REPEAT_INC) {\ 775 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 776 }\ 777 else if (stk->type == STK_MEM_END) {\ 778 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 779 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 780 }\ 781 ELSE_IF_STATE_CHECK_MARK(stk);\ 782 }\ 783 } while(0) 784 785 #define STACK_POS_END(k) do {\ 786 k = stk;\ 787 while (1) {\ 788 k--;\ 789 STACK_BASE_CHECK(k, "STACK_POS_END"); \ 790 if (IS_TO_VOID_TARGET(k)) {\ 791 k->type = STK_VOID;\ 792 }\ 793 else if (k->type == STK_POS) {\ 794 k->type = STK_VOID;\ 795 break;\ 796 }\ 797 }\ 798 } while(0) 799 800 #define STACK_STOP_BT_END do {\ 801 OnigStackType *k = stk;\ 802 while (1) {\ 803 k--;\ 804 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ 805 if (IS_TO_VOID_TARGET(k)) {\ 806 k->type = STK_VOID;\ 807 }\ 808 else if (k->type == STK_STOP_BT) {\ 809 k->type = STK_VOID;\ 810 break;\ 811 }\ 812 }\ 813 } while(0) 814 815 #define STACK_NULL_CHECK(isnull,id,s) do {\ 816 OnigStackType* k = stk;\ 817 while (1) {\ 818 k--;\ 819 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ 820 if (k->type == STK_NULL_CHECK_START) {\ 821 if (k->u.null_check.num == (id)) {\ 822 (isnull) = (k->u.null_check.pstr == (s));\ 823 break;\ 824 }\ 825 }\ 826 }\ 827 } while(0) 828 829 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ 830 int level = 0;\ 831 OnigStackType* k = stk;\ 832 while (1) {\ 833 k--;\ 834 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ 835 if (k->type == STK_NULL_CHECK_START) {\ 836 if (k->u.null_check.num == (id)) {\ 837 if (level == 0) {\ 838 (isnull) = (k->u.null_check.pstr == (s));\ 839 break;\ 840 }\ 841 else level--;\ 842 }\ 843 }\ 844 else if (k->type == STK_NULL_CHECK_END) {\ 845 level++;\ 846 }\ 847 }\ 848 } while(0) 849 850 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ 851 OnigStackType* k = stk;\ 852 while (1) {\ 853 k--;\ 854 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ 855 if (k->type == STK_NULL_CHECK_START) {\ 856 if (k->u.null_check.num == (id)) {\ 857 if (k->u.null_check.pstr != (s)) {\ 858 (isnull) = 0;\ 859 break;\ 860 }\ 861 else {\ 862 UChar* endp;\ 863 (isnull) = 1;\ 864 while (k < stk) {\ 865 if (k->type == STK_MEM_START) {\ 866 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 867 (isnull) = 0; break;\ 868 }\ 869 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 870 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 871 else\ 872 endp = (UChar* )(UINTN)k->u.mem.end;\ 873 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 874 (isnull) = 0; break;\ 875 }\ 876 else if (endp != s) {\ 877 (isnull) = -1; /* empty, but position changed */ \ 878 }\ 879 }\ 880 k++;\ 881 }\ 882 break;\ 883 }\ 884 }\ 885 }\ 886 }\ 887 } while(0) 888 889 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ 890 int level = 0;\ 891 OnigStackType* k = stk;\ 892 while (1) {\ 893 k--;\ 894 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ 895 if (k->type == STK_NULL_CHECK_START) {\ 896 if (k->u.null_check.num == (id)) {\ 897 if (level == 0) {\ 898 if (k->u.null_check.pstr != (s)) {\ 899 (isnull) = 0;\ 900 break;\ 901 }\ 902 else {\ 903 UChar* endp;\ 904 (isnull) = 1;\ 905 while (k < stk) {\ 906 if (k->type == STK_MEM_START) {\ 907 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 908 (isnull) = 0; break;\ 909 }\ 910 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 911 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 912 else\ 913 endp = (UChar* )(UINTN)k->u.mem.end;\ 914 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 915 (isnull) = 0; break;\ 916 }\ 917 else if (endp != s) {\ 918 (isnull) = -1; /* empty, but position changed */ \ 919 }\ 920 }\ 921 k++;\ 922 }\ 923 break;\ 924 }\ 925 }\ 926 else {\ 927 level--;\ 928 }\ 929 }\ 930 }\ 931 else if (k->type == STK_NULL_CHECK_END) {\ 932 if (k->u.null_check.num == (id)) level++;\ 933 }\ 934 }\ 935 } while(0) 936 937 #define STACK_GET_REPEAT(id, k) do {\ 938 int level = 0;\ 939 k = stk;\ 940 while (1) {\ 941 k--;\ 942 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ 943 if (k->type == STK_REPEAT) {\ 944 if (level == 0) {\ 945 if (k->u.repeat.num == (id)) {\ 946 break;\ 947 }\ 948 }\ 949 }\ 950 else if (k->type == STK_CALL_FRAME) level--;\ 951 else if (k->type == STK_RETURN) level++;\ 952 }\ 953 } while(0) 954 955 #define STACK_RETURN(addr) do {\ 956 int level = 0;\ 957 OnigStackType* k = stk;\ 958 while (1) {\ 959 k--;\ 960 STACK_BASE_CHECK(k, "STACK_RETURN"); \ 961 if (k->type == STK_CALL_FRAME) {\ 962 if (level == 0) {\ 963 (addr) = k->u.call_frame.ret_addr;\ 964 break;\ 965 }\ 966 else level--;\ 967 }\ 968 else if (k->type == STK_RETURN)\ 969 level++;\ 970 }\ 971 } while(0) 972 973 974 #define STRING_CMP(s1,s2,len) do {\ 975 while (len-- > 0) {\ 976 if (*s1++ != *s2++) goto fail;\ 977 }\ 978 } while(0) 979 980 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ 981 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ 982 goto fail; \ 983 } while(0) 984 985 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, 986 UChar* s1, UChar** ps2, int mblen) 987 { 988 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 989 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 990 UChar *p1, *p2, *end1, *s2, *end2; 991 int len1, len2; 992 993 s2 = *ps2; 994 end1 = s1 + mblen; 995 end2 = s2 + mblen; 996 while (s1 < end1) { 997 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); 998 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); 999 if (len1 != len2) return 0; 1000 p1 = buf1; 1001 p2 = buf2; 1002 while (len1-- > 0) { 1003 if (*p1 != *p2) return 0; 1004 p1++; 1005 p2++; 1006 } 1007 } 1008 1009 *ps2 = s2; 1010 return 1; 1011 } 1012 1013 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ 1014 is_fail = 0;\ 1015 while (len-- > 0) {\ 1016 if (*s1++ != *s2++) {\ 1017 is_fail = 1; break;\ 1018 }\ 1019 }\ 1020 } while(0) 1021 1022 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ 1023 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ 1024 is_fail = 1; \ 1025 else \ 1026 is_fail = 0; \ 1027 } while(0) 1028 1029 1030 #define IS_EMPTY_STR (str == end) 1031 #define ON_STR_BEGIN(s) ((s) == str) 1032 #define ON_STR_END(s) ((s) == end) 1033 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 1034 #define DATA_ENSURE_CHECK1 (s < right_range) 1035 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) 1036 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail 1037 #else 1038 #define DATA_ENSURE_CHECK1 (s < end) 1039 #define DATA_ENSURE_CHECK(n) (s + (n) <= end) 1040 #define DATA_ENSURE(n) if (s + (n) > end) goto fail 1041 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 1042 1043 1044 #ifdef USE_CAPTURE_HISTORY 1045 static int 1046 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, 1047 OnigStackType* stk_top, UChar* str, regex_t* reg) 1048 { 1049 int n, r; 1050 OnigCaptureTreeNode* child; 1051 OnigStackType* k = *kp; 1052 1053 while (k < stk_top) { 1054 if (k->type == STK_MEM_START) { 1055 n = k->u.mem.num; 1056 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && 1057 BIT_STATUS_AT(reg->capture_history, n) != 0) { 1058 child = history_node_new(); 1059 CHECK_NULL_RETURN_MEMERR(child); 1060 child->group = n; 1061 child->beg = (int )(k->u.mem.pstr - str); 1062 r = history_tree_add_child(node, child); 1063 if (r != 0) return r; 1064 *kp = (k + 1); 1065 r = make_capture_history_tree(child, kp, stk_top, str, reg); 1066 if (r != 0) return r; 1067 1068 k = *kp; 1069 child->end = (int )(k->u.mem.pstr - str); 1070 } 1071 } 1072 else if (k->type == STK_MEM_END) { 1073 if (k->u.mem.num == node->group) { 1074 node->end = (int )(k->u.mem.pstr - str); 1075 *kp = k; 1076 return 0; 1077 } 1078 } 1079 k++; 1080 } 1081 1082 return 1; /* 1: root node ending. */ 1083 } 1084 #endif 1085 1086 #ifdef USE_BACKREF_WITH_LEVEL 1087 static int mem_is_in_memp(int mem, int num, UChar* memp) 1088 { 1089 int i; 1090 MemNumType m; 1091 1092 for (i = 0; i < num; i++) { 1093 GET_MEMNUM_INC(m, memp); 1094 if (mem == (int )m) return 1; 1095 } 1096 return 0; 1097 } 1098 1099 static int backref_match_at_nested_level(regex_t* reg 1100 , OnigStackType* top, OnigStackType* stk_base 1101 , int ignore_case, int case_fold_flag 1102 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) 1103 { 1104 UChar *ss, *p, *pstart, *pend = NULL_UCHARP; 1105 int level; 1106 OnigStackType* k; 1107 1108 level = 0; 1109 k = top; 1110 k--; 1111 while (k >= stk_base) { 1112 if (k->type == STK_CALL_FRAME) { 1113 level--; 1114 } 1115 else if (k->type == STK_RETURN) { 1116 level++; 1117 } 1118 else if (level == nest) { 1119 if (k->type == STK_MEM_START) { 1120 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 1121 pstart = k->u.mem.pstr; 1122 if (pend != NULL_UCHARP) { 1123 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ 1124 p = pstart; 1125 ss = *s; 1126 1127 if (ignore_case != 0) { 1128 if (string_cmp_ic(reg->enc, case_fold_flag, 1129 pstart, &ss, (int )(pend - pstart)) == 0) 1130 return 0; /* or goto next_mem; */ 1131 } 1132 else { 1133 while (p < pend) { 1134 if (*p++ != *ss++) return 0; /* or goto next_mem; */ 1135 } 1136 } 1137 1138 *s = ss; 1139 return 1; 1140 } 1141 } 1142 } 1143 else if (k->type == STK_MEM_END) { 1144 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 1145 pend = k->u.mem.pstr; 1146 } 1147 } 1148 } 1149 k--; 1150 } 1151 1152 return 0; 1153 } 1154 #endif /* USE_BACKREF_WITH_LEVEL */ 1155 1156 1157 #ifdef ONIG_DEBUG_STATISTICS 1158 1159 #define USE_TIMEOFDAY 1160 1161 #ifdef USE_TIMEOFDAY 1162 #ifdef HAVE_SYS_TIME_H 1163 #include <sys/time.h> 1164 #endif 1165 #ifdef HAVE_UNISTD_H 1166 #include <unistd.h> 1167 #endif 1168 static struct timeval ts, te; 1169 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) 1170 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ 1171 (((te).tv_sec - (ts).tv_sec)*1000000)) 1172 #else 1173 #ifdef HAVE_SYS_TIMES_H 1174 #include <sys/times.h> 1175 #endif 1176 static struct tms ts, te; 1177 #define GETTIME(t) times(&(t)) 1178 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) 1179 #endif 1180 1181 static int OpCounter[256]; 1182 static int OpPrevCounter[256]; 1183 static unsigned long OpTime[256]; 1184 static int OpCurr = OP_FINISH; 1185 static int OpPrevTarget = OP_FAIL; 1186 static int MaxStackDepth = 0; 1187 1188 #define MOP_IN(opcode) do {\ 1189 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ 1190 OpCurr = opcode;\ 1191 OpCounter[opcode]++;\ 1192 GETTIME(ts);\ 1193 } while(0) 1194 1195 #define MOP_OUT do {\ 1196 GETTIME(te);\ 1197 OpTime[OpCurr] += TIMEDIFF(te, ts);\ 1198 } while(0) 1199 1200 extern void 1201 onig_statistics_init(void) 1202 { 1203 int i; 1204 for (i = 0; i < 256; i++) { 1205 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; 1206 } 1207 MaxStackDepth = 0; 1208 } 1209 1210 extern void 1211 onig_print_statistics(FILE* f) 1212 { 1213 int i; 1214 fprintf(f, " count prev time\n"); 1215 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 1216 fprintf(f, "%8d: %8d: %10ld: %s\n", 1217 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); 1218 } 1219 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); 1220 } 1221 1222 #define STACK_INC do {\ 1223 stk++;\ 1224 if (stk - stk_base > MaxStackDepth) \ 1225 MaxStackDepth = stk - stk_base;\ 1226 } while(0) 1227 1228 #else 1229 #define STACK_INC stk++ 1230 1231 #define MOP_IN(opcode) 1232 #define MOP_OUT 1233 #endif 1234 1235 1236 /* matching region of POSIX API */ 1237 typedef int regoff_t; 1238 1239 typedef struct { 1240 regoff_t rm_so; 1241 regoff_t rm_eo; 1242 } posix_regmatch_t; 1243 1244 /* match data(str - end) from position (sstart). */ 1245 /* if sstart == str then set sprev to NULL. */ 1246 static int 1247 match_at(regex_t* reg, const UChar* str, const UChar* end, 1248 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 1249 const UChar* right_range, 1250 #endif 1251 const UChar* sstart, UChar* sprev, OnigMatchArg* msa) 1252 { 1253 static UChar FinishCode[] = { OP_FINISH }; 1254 1255 int i, n, num_mem, best_len, pop_level; 1256 LengthType tlen, tlen2; 1257 MemNumType mem; 1258 RelAddrType addr; 1259 OnigOptionType option = reg->options; 1260 OnigEncoding encode = reg->enc; 1261 OnigCaseFoldType case_fold_flag = reg->case_fold_flag; 1262 UChar *s, *q, *sbegin; 1263 UChar *p = reg->p; 1264 char *alloca_base; 1265 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; 1266 OnigStackType *stkp; /* used as any purpose. */ 1267 OnigStackIndex si; 1268 OnigStackIndex *repeat_stk; 1269 OnigStackIndex *mem_start_stk, *mem_end_stk; 1270 #ifdef USE_COMBINATION_EXPLOSION_CHECK 1271 int scv; 1272 unsigned char* state_check_buff = msa->state_check_buff; 1273 int num_comb_exp_check = reg->num_comb_exp_check; 1274 #endif 1275 n = reg->num_repeat + reg->num_mem * 2; 1276 1277 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); 1278 pop_level = reg->stack_pop_level; 1279 num_mem = reg->num_mem; 1280 repeat_stk = (OnigStackIndex* )alloca_base; 1281 1282 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); 1283 mem_end_stk = mem_start_stk + num_mem; 1284 mem_start_stk--; /* for index start from 1, 1285 mem_start_stk[1]..mem_start_stk[num_mem] */ 1286 mem_end_stk--; /* for index start from 1, 1287 mem_end_stk[1]..mem_end_stk[num_mem] */ 1288 for (i = 1; i <= num_mem; i++) { 1289 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; 1290 } 1291 1292 #ifdef ONIG_DEBUG_MATCH 1293 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", 1294 (int )str, (int )end, (int )sstart, (int )sprev); 1295 fprintf(stderr, "size: %d, start offset: %d\n", 1296 (int )(end - str), (int )(sstart - str)); 1297 #endif 1298 1299 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ 1300 best_len = ONIG_MISMATCH; 1301 s = (UChar* )sstart; 1302 while (1) { 1303 #ifdef ONIG_DEBUG_MATCH 1304 { 1305 UChar *q, *bp, buf[50]; 1306 int len; 1307 fprintf(stderr, "%4d> \"", (int )(s - str)); 1308 bp = buf; 1309 for (i = 0, q = s; i < 7 && q < end; i++) { 1310 len = enclen(encode, q); 1311 while (len-- > 0) *bp++ = *q++; 1312 } 1313 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } 1314 else { xmemcpy(bp, "\"", 1); bp += 1; } 1315 *bp = 0; 1316 fputs((char* )buf, stderr); 1317 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); 1318 onig_print_compiled_byte_code(stderr, p, NULL, encode); 1319 fprintf(stderr, "\n"); 1320 } 1321 #endif 1322 1323 sbegin = s; 1324 switch (*p++) { 1325 case OP_END: MOP_IN(OP_END); 1326 n = (int)(s - sstart); 1327 if (n > best_len) { 1328 OnigRegion* region; 1329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 1330 if (IS_FIND_LONGEST(option)) { 1331 if (n > msa->best_len) { 1332 msa->best_len = n; 1333 msa->best_s = (UChar* )sstart; 1334 } 1335 else 1336 goto end_best_len; 1337 } 1338 #endif 1339 best_len = n; 1340 region = msa->region; 1341 if (region) { 1342 #ifdef USE_POSIX_API_REGION_OPTION 1343 if (IS_POSIX_REGION(msa->options)) { 1344 posix_regmatch_t* rmt = (posix_regmatch_t* )region; 1345 1346 rmt[0].rm_so = (regoff_t)(sstart - str); 1347 rmt[0].rm_eo = (regoff_t)(s - str); 1348 for (i = 1; i <= num_mem; i++) { 1349 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 1350 if (BIT_STATUS_AT(reg->bt_mem_start, i)) 1351 rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); 1352 else 1353 rmt[i].rm_so = (regoff_t)((UChar* )((void* )(UINTN)(mem_start_stk[i])) - str); 1354 1355 rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i) 1356 ? STACK_AT(mem_end_stk[i])->u.mem.pstr 1357 : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str); 1358 } 1359 else { 1360 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; 1361 } 1362 } 1363 } 1364 else { 1365 #endif /* USE_POSIX_API_REGION_OPTION */ 1366 region->beg[0] = (int)(sstart - str); 1367 region->end[0] = (int)(s - str); 1368 for (i = 1; i <= num_mem; i++) { 1369 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 1370 if (BIT_STATUS_AT(reg->bt_mem_start, i)) 1371 region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); 1372 else 1373 region->beg[i] = (int)((UChar* )((void* )(UINTN)mem_start_stk[i]) - str); 1374 1375 region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i) 1376 ? STACK_AT(mem_end_stk[i])->u.mem.pstr 1377 : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str); 1378 } 1379 else { 1380 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 1381 } 1382 } 1383 1384 #ifdef USE_CAPTURE_HISTORY 1385 if (reg->capture_history != 0) { 1386 int r; 1387 OnigCaptureTreeNode* node; 1388 1389 if (IS_NULL(region->history_root)) { 1390 region->history_root = node = history_node_new(); 1391 CHECK_NULL_RETURN_MEMERR(node); 1392 } 1393 else { 1394 node = region->history_root; 1395 history_tree_clear(node); 1396 } 1397 1398 node->group = 0; 1399 node->beg = (int)(sstart - str); 1400 node->end = (int)(s - str); 1401 1402 stkp = stk_base; 1403 r = make_capture_history_tree(region->history_root, &stkp, 1404 stk, (UChar* )str, reg); 1405 if (r < 0) { 1406 best_len = r; /* error code */ 1407 goto finish; 1408 } 1409 } 1410 #endif /* USE_CAPTURE_HISTORY */ 1411 #ifdef USE_POSIX_API_REGION_OPTION 1412 } /* else IS_POSIX_REGION() */ 1413 #endif 1414 } /* if (region) */ 1415 } /* n > best_len */ 1416 1417 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 1418 end_best_len: 1419 #endif 1420 MOP_OUT; 1421 1422 if (IS_FIND_CONDITION(option)) { 1423 if (IS_FIND_NOT_EMPTY(option) && s == sstart) { 1424 best_len = ONIG_MISMATCH; 1425 goto fail; /* for retry */ 1426 } 1427 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { 1428 goto fail; /* for retry */ 1429 } 1430 } 1431 1432 /* default behavior: return first-matching result. */ 1433 goto finish; 1434 break; 1435 1436 case OP_EXACT1: MOP_IN(OP_EXACT1); 1437 #if 0 1438 DATA_ENSURE(1); 1439 if (*p != *s) goto fail; 1440 p++; s++; 1441 #endif 1442 if (*p != *s++) goto fail; 1443 DATA_ENSURE(0); 1444 p++; 1445 MOP_OUT; 1446 break; 1447 1448 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); 1449 { 1450 int len; 1451 UChar *q1, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 1452 1453 DATA_ENSURE(1); 1454 len = ONIGENC_MBC_CASE_FOLD(encode, 1455 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 1456 case_fold_flag, 1457 &s, end, lowbuf); 1458 DATA_ENSURE(0); 1459 q1 = lowbuf; 1460 while (len-- > 0) { 1461 if (*p != *q1) { 1462 goto fail; 1463 } 1464 p++; q1++; 1465 } 1466 } 1467 MOP_OUT; 1468 break; 1469 1470 case OP_EXACT2: MOP_IN(OP_EXACT2); 1471 DATA_ENSURE(2); 1472 if (*p != *s) goto fail; 1473 p++; s++; 1474 if (*p != *s) goto fail; 1475 sprev = s; 1476 p++; s++; 1477 MOP_OUT; 1478 continue; 1479 break; 1480 1481 case OP_EXACT3: MOP_IN(OP_EXACT3); 1482 DATA_ENSURE(3); 1483 if (*p != *s) goto fail; 1484 p++; s++; 1485 if (*p != *s) goto fail; 1486 p++; s++; 1487 if (*p != *s) goto fail; 1488 sprev = s; 1489 p++; s++; 1490 MOP_OUT; 1491 continue; 1492 break; 1493 1494 case OP_EXACT4: MOP_IN(OP_EXACT4); 1495 DATA_ENSURE(4); 1496 if (*p != *s) goto fail; 1497 p++; s++; 1498 if (*p != *s) goto fail; 1499 p++; s++; 1500 if (*p != *s) goto fail; 1501 p++; s++; 1502 if (*p != *s) goto fail; 1503 sprev = s; 1504 p++; s++; 1505 MOP_OUT; 1506 continue; 1507 break; 1508 1509 case OP_EXACT5: MOP_IN(OP_EXACT5); 1510 DATA_ENSURE(5); 1511 if (*p != *s) goto fail; 1512 p++; s++; 1513 if (*p != *s) goto fail; 1514 p++; s++; 1515 if (*p != *s) goto fail; 1516 p++; s++; 1517 if (*p != *s) goto fail; 1518 p++; s++; 1519 if (*p != *s) goto fail; 1520 sprev = s; 1521 p++; s++; 1522 MOP_OUT; 1523 continue; 1524 break; 1525 1526 case OP_EXACTN: MOP_IN(OP_EXACTN); 1527 GET_LENGTH_INC(tlen, p); 1528 DATA_ENSURE(tlen); 1529 while (tlen-- > 0) { 1530 if (*p++ != *s++) goto fail; 1531 } 1532 sprev = s - 1; 1533 MOP_OUT; 1534 continue; 1535 break; 1536 1537 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); 1538 { 1539 int len; 1540 UChar *qn, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 1541 1542 GET_LENGTH_INC(tlen, p); 1543 endp = p + tlen; 1544 1545 while (p < endp) { 1546 sprev = s; 1547 DATA_ENSURE(1); 1548 len = ONIGENC_MBC_CASE_FOLD(encode, 1549 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 1550 case_fold_flag, 1551 &s, end, lowbuf); 1552 DATA_ENSURE(0); 1553 qn = lowbuf; 1554 while (len-- > 0) { 1555 if (*p != *qn) goto fail; 1556 p++; qn++; 1557 } 1558 } 1559 } 1560 1561 MOP_OUT; 1562 continue; 1563 break; 1564 1565 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); 1566 DATA_ENSURE(2); 1567 if (*p != *s) goto fail; 1568 p++; s++; 1569 if (*p != *s) goto fail; 1570 p++; s++; 1571 MOP_OUT; 1572 break; 1573 1574 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); 1575 DATA_ENSURE(4); 1576 if (*p != *s) goto fail; 1577 p++; s++; 1578 if (*p != *s) goto fail; 1579 p++; s++; 1580 sprev = s; 1581 if (*p != *s) goto fail; 1582 p++; s++; 1583 if (*p != *s) goto fail; 1584 p++; s++; 1585 MOP_OUT; 1586 continue; 1587 break; 1588 1589 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); 1590 DATA_ENSURE(6); 1591 if (*p != *s) goto fail; 1592 p++; s++; 1593 if (*p != *s) goto fail; 1594 p++; s++; 1595 if (*p != *s) goto fail; 1596 p++; s++; 1597 if (*p != *s) goto fail; 1598 p++; s++; 1599 sprev = s; 1600 if (*p != *s) goto fail; 1601 p++; s++; 1602 if (*p != *s) goto fail; 1603 p++; s++; 1604 MOP_OUT; 1605 continue; 1606 break; 1607 1608 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); 1609 GET_LENGTH_INC(tlen, p); 1610 DATA_ENSURE(tlen * 2); 1611 while (tlen-- > 0) { 1612 if (*p != *s) goto fail; 1613 p++; s++; 1614 if (*p != *s) goto fail; 1615 p++; s++; 1616 } 1617 sprev = s - 2; 1618 MOP_OUT; 1619 continue; 1620 break; 1621 1622 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); 1623 GET_LENGTH_INC(tlen, p); 1624 DATA_ENSURE(tlen * 3); 1625 while (tlen-- > 0) { 1626 if (*p != *s) goto fail; 1627 p++; s++; 1628 if (*p != *s) goto fail; 1629 p++; s++; 1630 if (*p != *s) goto fail; 1631 p++; s++; 1632 } 1633 sprev = s - 3; 1634 MOP_OUT; 1635 continue; 1636 break; 1637 1638 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); 1639 GET_LENGTH_INC(tlen, p); /* mb-len */ 1640 GET_LENGTH_INC(tlen2, p); /* string len */ 1641 tlen2 *= tlen; 1642 DATA_ENSURE(tlen2); 1643 while (tlen2-- > 0) { 1644 if (*p != *s) goto fail; 1645 p++; s++; 1646 } 1647 sprev = s - tlen; 1648 MOP_OUT; 1649 continue; 1650 break; 1651 1652 case OP_CCLASS: MOP_IN(OP_CCLASS); 1653 DATA_ENSURE(1); 1654 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; 1655 p += SIZE_BITSET; 1656 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ 1657 MOP_OUT; 1658 break; 1659 1660 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); 1661 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; 1662 1663 cclass_mb: 1664 GET_LENGTH_INC(tlen, p); 1665 { 1666 OnigCodePoint code; 1667 UChar *ss; 1668 int mb_len; 1669 1670 DATA_ENSURE(1); 1671 mb_len = enclen(encode, s); 1672 DATA_ENSURE(mb_len); 1673 ss = s; 1674 s += mb_len; 1675 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 1676 1677 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 1678 if (! onig_is_in_code_range(p, code)) goto fail; 1679 #else 1680 q = p; 1681 ALIGNMENT_RIGHT(q); 1682 if (! onig_is_in_code_range(q, code)) goto fail; 1683 #endif 1684 } 1685 p += tlen; 1686 MOP_OUT; 1687 break; 1688 1689 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); 1690 DATA_ENSURE(1); 1691 if (ONIGENC_IS_MBC_HEAD(encode, s)) { 1692 p += SIZE_BITSET; 1693 goto cclass_mb; 1694 } 1695 else { 1696 if (BITSET_AT(((BitSetRef )p), *s) == 0) 1697 goto fail; 1698 1699 p += SIZE_BITSET; 1700 GET_LENGTH_INC(tlen, p); 1701 p += tlen; 1702 s++; 1703 } 1704 MOP_OUT; 1705 break; 1706 1707 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); 1708 DATA_ENSURE(1); 1709 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; 1710 p += SIZE_BITSET; 1711 s += enclen(encode, s); 1712 MOP_OUT; 1713 break; 1714 1715 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); 1716 DATA_ENSURE(1); 1717 if (! ONIGENC_IS_MBC_HEAD(encode, s)) { 1718 s++; 1719 GET_LENGTH_INC(tlen, p); 1720 p += tlen; 1721 goto cc_mb_not_success; 1722 } 1723 1724 cclass_mb_not: 1725 GET_LENGTH_INC(tlen, p); 1726 { 1727 OnigCodePoint code; 1728 UChar *ss; 1729 int mb_len = enclen(encode, s); 1730 1731 if (! DATA_ENSURE_CHECK(mb_len)) { 1732 DATA_ENSURE(1); 1733 s = (UChar* )end; 1734 p += tlen; 1735 goto cc_mb_not_success; 1736 } 1737 1738 ss = s; 1739 s += mb_len; 1740 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 1741 1742 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 1743 if (onig_is_in_code_range(p, code)) goto fail; 1744 #else 1745 q = p; 1746 ALIGNMENT_RIGHT(q); 1747 if (onig_is_in_code_range(q, code)) goto fail; 1748 #endif 1749 } 1750 p += tlen; 1751 1752 cc_mb_not_success: 1753 MOP_OUT; 1754 break; 1755 1756 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); 1757 DATA_ENSURE(1); 1758 if (ONIGENC_IS_MBC_HEAD(encode, s)) { 1759 p += SIZE_BITSET; 1760 goto cclass_mb_not; 1761 } 1762 else { 1763 if (BITSET_AT(((BitSetRef )p), *s) != 0) 1764 goto fail; 1765 1766 p += SIZE_BITSET; 1767 GET_LENGTH_INC(tlen, p); 1768 p += tlen; 1769 s++; 1770 } 1771 MOP_OUT; 1772 break; 1773 1774 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); 1775 { 1776 OnigCodePoint code; 1777 void *node; 1778 int mb_len; 1779 UChar *ss; 1780 1781 DATA_ENSURE(1); 1782 GET_POINTER_INC(node, p); 1783 mb_len = enclen(encode, s); 1784 ss = s; 1785 s += mb_len; 1786 DATA_ENSURE(0); 1787 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 1788 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; 1789 } 1790 MOP_OUT; 1791 break; 1792 1793 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); 1794 DATA_ENSURE(1); 1795 n = enclen(encode, s); 1796 DATA_ENSURE(n); 1797 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 1798 s += n; 1799 MOP_OUT; 1800 break; 1801 1802 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); 1803 DATA_ENSURE(1); 1804 n = enclen(encode, s); 1805 DATA_ENSURE(n); 1806 s += n; 1807 MOP_OUT; 1808 break; 1809 1810 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); 1811 while (DATA_ENSURE_CHECK1) { 1812 STACK_PUSH_ALT(p, s, sprev); 1813 n = enclen(encode, s); 1814 DATA_ENSURE(n); 1815 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 1816 sprev = s; 1817 s += n; 1818 } 1819 MOP_OUT; 1820 break; 1821 1822 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); 1823 while (DATA_ENSURE_CHECK1) { 1824 STACK_PUSH_ALT(p, s, sprev); 1825 n = enclen(encode, s); 1826 if (n > 1) { 1827 DATA_ENSURE(n); 1828 sprev = s; 1829 s += n; 1830 } 1831 else { 1832 sprev = s; 1833 s++; 1834 } 1835 } 1836 MOP_OUT; 1837 break; 1838 1839 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); 1840 while (DATA_ENSURE_CHECK1) { 1841 if (*p == *s) { 1842 STACK_PUSH_ALT(p + 1, s, sprev); 1843 } 1844 n = enclen(encode, s); 1845 DATA_ENSURE(n); 1846 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 1847 sprev = s; 1848 s += n; 1849 } 1850 p++; 1851 MOP_OUT; 1852 break; 1853 1854 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); 1855 while (DATA_ENSURE_CHECK1) { 1856 if (*p == *s) { 1857 STACK_PUSH_ALT(p + 1, s, sprev); 1858 } 1859 n = enclen(encode, s); 1860 if (n > 1) { 1861 DATA_ENSURE(n); 1862 sprev = s; 1863 s += n; 1864 } 1865 else { 1866 sprev = s; 1867 s++; 1868 } 1869 } 1870 p++; 1871 MOP_OUT; 1872 break; 1873 1874 #ifdef USE_COMBINATION_EXPLOSION_CHECK 1875 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); 1876 GET_STATE_CHECK_NUM_INC(mem, p); 1877 while (DATA_ENSURE_CHECK1) { 1878 STATE_CHECK_VAL(scv, mem); 1879 if (scv) goto fail; 1880 1881 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); 1882 n = enclen(encode, s); 1883 DATA_ENSURE(n); 1884 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 1885 sprev = s; 1886 s += n; 1887 } 1888 MOP_OUT; 1889 break; 1890 1891 case OP_STATE_CHECK_ANYCHAR_ML_STAR: 1892 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); 1893 1894 GET_STATE_CHECK_NUM_INC(mem, p); 1895 while (DATA_ENSURE_CHECK1) { 1896 STATE_CHECK_VAL(scv, mem); 1897 if (scv) goto fail; 1898 1899 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); 1900 n = enclen(encode, s); 1901 if (n > 1) { 1902 DATA_ENSURE(n); 1903 sprev = s; 1904 s += n; 1905 } 1906 else { 1907 sprev = s; 1908 s++; 1909 } 1910 } 1911 MOP_OUT; 1912 break; 1913 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 1914 1915 case OP_WORD: MOP_IN(OP_WORD); 1916 DATA_ENSURE(1); 1917 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 1918 goto fail; 1919 1920 s += enclen(encode, s); 1921 MOP_OUT; 1922 break; 1923 1924 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); 1925 DATA_ENSURE(1); 1926 if (ONIGENC_IS_MBC_WORD(encode, s, end)) 1927 goto fail; 1928 1929 s += enclen(encode, s); 1930 MOP_OUT; 1931 break; 1932 1933 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); 1934 if (ON_STR_BEGIN(s)) { 1935 DATA_ENSURE(1); 1936 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 1937 goto fail; 1938 } 1939 else if (ON_STR_END(s)) { 1940 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) 1941 goto fail; 1942 } 1943 else { 1944 if (ONIGENC_IS_MBC_WORD(encode, s, end) 1945 == ONIGENC_IS_MBC_WORD(encode, sprev, end)) 1946 goto fail; 1947 } 1948 MOP_OUT; 1949 continue; 1950 break; 1951 1952 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); 1953 if (ON_STR_BEGIN(s)) { 1954 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) 1955 goto fail; 1956 } 1957 else if (ON_STR_END(s)) { 1958 if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) 1959 goto fail; 1960 } 1961 else { 1962 if (ONIGENC_IS_MBC_WORD(encode, s, end) 1963 != ONIGENC_IS_MBC_WORD(encode, sprev, end)) 1964 goto fail; 1965 } 1966 MOP_OUT; 1967 continue; 1968 break; 1969 1970 #ifdef USE_WORD_BEGIN_END 1971 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); 1972 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { 1973 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 1974 MOP_OUT; 1975 continue; 1976 } 1977 } 1978 goto fail; 1979 break; 1980 1981 case OP_WORD_END: MOP_IN(OP_WORD_END); 1982 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 1983 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { 1984 MOP_OUT; 1985 continue; 1986 } 1987 } 1988 goto fail; 1989 break; 1990 #endif 1991 1992 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); 1993 if (! ON_STR_BEGIN(s)) goto fail; 1994 1995 MOP_OUT; 1996 continue; 1997 break; 1998 1999 case OP_END_BUF: MOP_IN(OP_END_BUF); 2000 if (! ON_STR_END(s)) goto fail; 2001 2002 MOP_OUT; 2003 continue; 2004 break; 2005 2006 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); 2007 if (ON_STR_BEGIN(s)) { 2008 if (IS_NOTBOL(msa->options)) goto fail; 2009 MOP_OUT; 2010 continue; 2011 } 2012 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { 2013 MOP_OUT; 2014 continue; 2015 } 2016 goto fail; 2017 break; 2018 2019 case OP_END_LINE: MOP_IN(OP_END_LINE); 2020 if (ON_STR_END(s)) { 2021 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 2022 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { 2023 #endif 2024 if (IS_NOTEOL(msa->options)) goto fail; 2025 MOP_OUT; 2026 continue; 2027 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 2028 } 2029 #endif 2030 } 2031 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { 2032 MOP_OUT; 2033 continue; 2034 } 2035 #ifdef USE_CRNL_AS_LINE_TERMINATOR 2036 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { 2037 MOP_OUT; 2038 continue; 2039 } 2040 #endif 2041 goto fail; 2042 break; 2043 2044 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); 2045 if (ON_STR_END(s)) { 2046 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 2047 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { 2048 #endif 2049 if (IS_NOTEOL(msa->options)) goto fail; 2050 MOP_OUT; 2051 continue; 2052 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 2053 } 2054 #endif 2055 } 2056 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && 2057 ON_STR_END(s + enclen(encode, s))) { 2058 MOP_OUT; 2059 continue; 2060 } 2061 #ifdef USE_CRNL_AS_LINE_TERMINATOR 2062 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { 2063 UChar* ss = s + enclen(encode, s); 2064 ss += enclen(encode, ss); 2065 if (ON_STR_END(ss)) { 2066 MOP_OUT; 2067 continue; 2068 } 2069 } 2070 #endif 2071 goto fail; 2072 break; 2073 2074 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); 2075 if (s != msa->start) 2076 goto fail; 2077 2078 MOP_OUT; 2079 continue; 2080 break; 2081 2082 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); 2083 GET_MEMNUM_INC(mem, p); 2084 STACK_PUSH_MEM_START(mem, s); 2085 MOP_OUT; 2086 continue; 2087 break; 2088 2089 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); 2090 GET_MEMNUM_INC(mem, p); 2091 mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); 2092 MOP_OUT; 2093 continue; 2094 break; 2095 2096 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); 2097 GET_MEMNUM_INC(mem, p); 2098 STACK_PUSH_MEM_END(mem, s); 2099 MOP_OUT; 2100 continue; 2101 break; 2102 2103 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); 2104 GET_MEMNUM_INC(mem, p); 2105 mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); 2106 MOP_OUT; 2107 continue; 2108 break; 2109 2110 #ifdef USE_SUBEXP_CALL 2111 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); 2112 GET_MEMNUM_INC(mem, p); 2113 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ 2114 STACK_PUSH_MEM_END(mem, s); 2115 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 2116 MOP_OUT; 2117 continue; 2118 break; 2119 2120 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); 2121 GET_MEMNUM_INC(mem, p); 2122 mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); 2123 STACK_GET_MEM_START(mem, stkp); 2124 2125 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 2126 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 2127 else 2128 mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )stkp->u.mem.pstr); 2129 2130 STACK_PUSH_MEM_END_MARK(mem); 2131 MOP_OUT; 2132 continue; 2133 break; 2134 #endif 2135 2136 case OP_BACKREF1: MOP_IN(OP_BACKREF1); 2137 mem = 1; 2138 goto backref; 2139 break; 2140 2141 case OP_BACKREF2: MOP_IN(OP_BACKREF2); 2142 mem = 2; 2143 goto backref; 2144 break; 2145 2146 case OP_BACKREFN: MOP_IN(OP_BACKREFN); 2147 GET_MEMNUM_INC(mem, p); 2148 backref: 2149 { 2150 int len; 2151 UChar *pstart, *pend; 2152 2153 /* if you want to remove following line, 2154 you should check in parse and compile time. */ 2155 if (mem > num_mem) goto fail; 2156 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 2157 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 2158 2159 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 2160 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 2161 else 2162 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); 2163 2164 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 2165 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 2166 : (UChar* )((void* )(UINTN)mem_end_stk[mem])); 2167 n = (int)(pend - pstart); 2168 DATA_ENSURE(n); 2169 sprev = s; 2170 STRING_CMP(pstart, s, n); 2171 while (sprev + (len = enclen(encode, sprev)) < s) 2172 sprev += len; 2173 2174 MOP_OUT; 2175 continue; 2176 } 2177 break; 2178 2179 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); 2180 GET_MEMNUM_INC(mem, p); 2181 { 2182 int len; 2183 UChar *pstart, *pend; 2184 2185 /* if you want to remove following line, 2186 you should check in parse and compile time. */ 2187 if (mem > num_mem) goto fail; 2188 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 2189 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 2190 2191 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 2192 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 2193 else 2194 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); 2195 2196 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 2197 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 2198 : (UChar* )((void* )(UINTN)mem_end_stk[mem])); 2199 n = (int)(pend - pstart); 2200 DATA_ENSURE(n); 2201 sprev = s; 2202 STRING_CMP_IC(case_fold_flag, pstart, &s, n); 2203 while (sprev + (len = enclen(encode, sprev)) < s) 2204 sprev += len; 2205 2206 MOP_OUT; 2207 continue; 2208 } 2209 break; 2210 2211 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); 2212 { 2213 int len, is_fail; 2214 UChar *pstart, *pend, *swork; 2215 2216 GET_LENGTH_INC(tlen, p); 2217 for (i = 0; i < tlen; i++) { 2218 GET_MEMNUM_INC(mem, p); 2219 2220 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 2221 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 2222 2223 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 2224 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 2225 else 2226 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); 2227 2228 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 2229 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 2230 : (UChar* )((void* )(UINTN)mem_end_stk[mem])); 2231 n = (int)(pend - pstart); 2232 DATA_ENSURE(n); 2233 sprev = s; 2234 swork = s; 2235 STRING_CMP_VALUE(pstart, swork, n, is_fail); 2236 if (is_fail) continue; 2237 s = swork; 2238 while (sprev + (len = enclen(encode, sprev)) < s) 2239 sprev += len; 2240 2241 p += (SIZE_MEMNUM * (tlen - i - 1)); 2242 break; /* success */ 2243 } 2244 if (i == tlen) goto fail; 2245 MOP_OUT; 2246 continue; 2247 } 2248 break; 2249 2250 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); 2251 { 2252 int len, is_fail; 2253 UChar *pstart, *pend, *swork; 2254 2255 GET_LENGTH_INC(tlen, p); 2256 for (i = 0; i < tlen; i++) { 2257 GET_MEMNUM_INC(mem, p); 2258 2259 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 2260 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 2261 2262 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 2263 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 2264 else 2265 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); 2266 2267 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 2268 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 2269 : (UChar* )((void* )(UINTN)mem_end_stk[mem])); 2270 n = (int)(pend - pstart); 2271 DATA_ENSURE(n); 2272 sprev = s; 2273 swork = s; 2274 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); 2275 if (is_fail) continue; 2276 s = swork; 2277 while (sprev + (len = enclen(encode, sprev)) < s) 2278 sprev += len; 2279 2280 p += (SIZE_MEMNUM * (tlen - i - 1)); 2281 break; /* success */ 2282 } 2283 if (i == tlen) goto fail; 2284 MOP_OUT; 2285 continue; 2286 } 2287 break; 2288 2289 #ifdef USE_BACKREF_WITH_LEVEL 2290 case OP_BACKREF_WITH_LEVEL: 2291 { 2292 int len; 2293 OnigOptionType ic; 2294 LengthType level; 2295 2296 GET_OPTION_INC(ic, p); 2297 GET_LENGTH_INC(level, p); 2298 GET_LENGTH_INC(tlen, p); 2299 2300 sprev = s; 2301 if (backref_match_at_nested_level(reg, stk, stk_base, ic 2302 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { 2303 while (sprev + (len = enclen(encode, sprev)) < s) 2304 sprev += len; 2305 2306 p += (SIZE_MEMNUM * tlen); 2307 } 2308 else 2309 goto fail; 2310 2311 MOP_OUT; 2312 continue; 2313 } 2314 2315 break; 2316 #endif 2317 2318 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ 2319 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); 2320 GET_OPTION_INC(option, p); 2321 STACK_PUSH_ALT(p, s, sprev); 2322 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; 2323 MOP_OUT; 2324 continue; 2325 break; 2326 2327 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); 2328 GET_OPTION_INC(option, p); 2329 MOP_OUT; 2330 continue; 2331 break; 2332 #endif 2333 2334 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); 2335 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 2336 STACK_PUSH_NULL_CHECK_START(mem, s); 2337 MOP_OUT; 2338 continue; 2339 break; 2340 2341 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); 2342 { 2343 int isnull; 2344 2345 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 2346 STACK_NULL_CHECK(isnull, mem, s); 2347 if (isnull) { 2348 #ifdef ONIG_DEBUG_MATCH 2349 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", 2350 (int )mem, (int )s); 2351 #endif 2352 null_check_found: 2353 /* empty loop founded, skip next instruction */ 2354 switch (*p++) { 2355 case OP_JUMP: 2356 case OP_PUSH: 2357 p += SIZE_RELADDR; 2358 break; 2359 case OP_REPEAT_INC: 2360 case OP_REPEAT_INC_NG: 2361 case OP_REPEAT_INC_SG: 2362 case OP_REPEAT_INC_NG_SG: 2363 p += SIZE_MEMNUM; 2364 break; 2365 default: 2366 goto unexpected_bytecode_error; 2367 break; 2368 } 2369 } 2370 } 2371 MOP_OUT; 2372 continue; 2373 break; 2374 2375 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 2376 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); 2377 { 2378 int isnull; 2379 2380 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 2381 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); 2382 if (isnull) { 2383 #ifdef ONIG_DEBUG_MATCH 2384 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", 2385 (int )mem, (int )s); 2386 #endif 2387 if (isnull == -1) goto fail; 2388 goto null_check_found; 2389 } 2390 } 2391 MOP_OUT; 2392 continue; 2393 break; 2394 #endif 2395 2396 #ifdef USE_SUBEXP_CALL 2397 case OP_NULL_CHECK_END_MEMST_PUSH: 2398 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); 2399 { 2400 int isnull; 2401 2402 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 2403 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 2404 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); 2405 #else 2406 STACK_NULL_CHECK_REC(isnull, mem, s); 2407 #endif 2408 if (isnull) { 2409 #ifdef ONIG_DEBUG_MATCH 2410 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", 2411 (int )mem, (int )s); 2412 #endif 2413 if (isnull == -1) goto fail; 2414 goto null_check_found; 2415 } 2416 else { 2417 STACK_PUSH_NULL_CHECK_END(mem); 2418 } 2419 } 2420 MOP_OUT; 2421 continue; 2422 break; 2423 #endif 2424 2425 case OP_JUMP: MOP_IN(OP_JUMP); 2426 GET_RELADDR_INC(addr, p); 2427 p += addr; 2428 MOP_OUT; 2429 CHECK_INTERRUPT_IN_MATCH_AT; 2430 continue; 2431 break; 2432 2433 case OP_PUSH: MOP_IN(OP_PUSH); 2434 GET_RELADDR_INC(addr, p); 2435 STACK_PUSH_ALT(p + addr, s, sprev); 2436 MOP_OUT; 2437 continue; 2438 break; 2439 2440 #ifdef USE_COMBINATION_EXPLOSION_CHECK 2441 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); 2442 GET_STATE_CHECK_NUM_INC(mem, p); 2443 STATE_CHECK_VAL(scv, mem); 2444 if (scv) goto fail; 2445 2446 GET_RELADDR_INC(addr, p); 2447 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); 2448 MOP_OUT; 2449 continue; 2450 break; 2451 2452 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); 2453 GET_STATE_CHECK_NUM_INC(mem, p); 2454 GET_RELADDR_INC(addr, p); 2455 STATE_CHECK_VAL(scv, mem); 2456 if (scv) { 2457 p += addr; 2458 } 2459 else { 2460 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); 2461 } 2462 MOP_OUT; 2463 continue; 2464 break; 2465 2466 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); 2467 GET_STATE_CHECK_NUM_INC(mem, p); 2468 STATE_CHECK_VAL(scv, mem); 2469 if (scv) goto fail; 2470 2471 STACK_PUSH_STATE_CHECK(s, mem); 2472 MOP_OUT; 2473 continue; 2474 break; 2475 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 2476 2477 case OP_POP: MOP_IN(OP_POP); 2478 STACK_POP_ONE; 2479 MOP_OUT; 2480 continue; 2481 break; 2482 2483 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); 2484 GET_RELADDR_INC(addr, p); 2485 if (*p == *s && DATA_ENSURE_CHECK1) { 2486 p++; 2487 STACK_PUSH_ALT(p + addr, s, sprev); 2488 MOP_OUT; 2489 continue; 2490 } 2491 p += (addr + 1); 2492 MOP_OUT; 2493 continue; 2494 break; 2495 2496 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); 2497 GET_RELADDR_INC(addr, p); 2498 if (*p == *s) { 2499 p++; 2500 STACK_PUSH_ALT(p + addr, s, sprev); 2501 MOP_OUT; 2502 continue; 2503 } 2504 p++; 2505 MOP_OUT; 2506 continue; 2507 break; 2508 2509 case OP_REPEAT: MOP_IN(OP_REPEAT); 2510 { 2511 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2512 GET_RELADDR_INC(addr, p); 2513 2514 STACK_ENSURE(1); 2515 repeat_stk[mem] = GET_STACK_INDEX(stk); 2516 STACK_PUSH_REPEAT(mem, p); 2517 2518 if (reg->repeat_range[mem].lower == 0) { 2519 STACK_PUSH_ALT(p + addr, s, sprev); 2520 } 2521 } 2522 MOP_OUT; 2523 continue; 2524 break; 2525 2526 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); 2527 { 2528 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2529 GET_RELADDR_INC(addr, p); 2530 2531 STACK_ENSURE(1); 2532 repeat_stk[mem] = GET_STACK_INDEX(stk); 2533 STACK_PUSH_REPEAT(mem, p); 2534 2535 if (reg->repeat_range[mem].lower == 0) { 2536 STACK_PUSH_ALT(p, s, sprev); 2537 p += addr; 2538 } 2539 } 2540 MOP_OUT; 2541 continue; 2542 break; 2543 2544 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); 2545 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2546 si = repeat_stk[mem]; 2547 stkp = STACK_AT(si); 2548 2549 repeat_inc: 2550 stkp->u.repeat.count++; 2551 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { 2552 /* end of repeat. Nothing to do. */ 2553 } 2554 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 2555 STACK_PUSH_ALT(p, s, sprev); 2556 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ 2557 } 2558 else { 2559 p = stkp->u.repeat.pcode; 2560 } 2561 STACK_PUSH_REPEAT_INC(si); 2562 MOP_OUT; 2563 CHECK_INTERRUPT_IN_MATCH_AT; 2564 continue; 2565 break; 2566 2567 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); 2568 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2569 STACK_GET_REPEAT(mem, stkp); 2570 si = GET_STACK_INDEX(stkp); 2571 goto repeat_inc; 2572 break; 2573 2574 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); 2575 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2576 si = repeat_stk[mem]; 2577 stkp = STACK_AT(si); 2578 2579 repeat_inc_ng: 2580 stkp->u.repeat.count++; 2581 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { 2582 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 2583 UChar* pcode = stkp->u.repeat.pcode; 2584 2585 STACK_PUSH_REPEAT_INC(si); 2586 STACK_PUSH_ALT(pcode, s, sprev); 2587 } 2588 else { 2589 p = stkp->u.repeat.pcode; 2590 STACK_PUSH_REPEAT_INC(si); 2591 } 2592 } 2593 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { 2594 STACK_PUSH_REPEAT_INC(si); 2595 } 2596 MOP_OUT; 2597 CHECK_INTERRUPT_IN_MATCH_AT; 2598 continue; 2599 break; 2600 2601 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); 2602 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 2603 STACK_GET_REPEAT(mem, stkp); 2604 si = GET_STACK_INDEX(stkp); 2605 goto repeat_inc_ng; 2606 break; 2607 2608 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); 2609 STACK_PUSH_POS(s, sprev); 2610 MOP_OUT; 2611 continue; 2612 break; 2613 2614 case OP_POP_POS: MOP_IN(OP_POP_POS); 2615 { 2616 STACK_POS_END(stkp); 2617 s = stkp->u.state.pstr; 2618 sprev = stkp->u.state.pstr_prev; 2619 } 2620 MOP_OUT; 2621 continue; 2622 break; 2623 2624 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); 2625 GET_RELADDR_INC(addr, p); 2626 STACK_PUSH_POS_NOT(p + addr, s, sprev); 2627 MOP_OUT; 2628 continue; 2629 break; 2630 2631 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); 2632 STACK_POP_TIL_POS_NOT; 2633 goto fail; 2634 break; 2635 2636 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); 2637 STACK_PUSH_STOP_BT; 2638 MOP_OUT; 2639 continue; 2640 break; 2641 2642 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); 2643 STACK_STOP_BT_END; 2644 MOP_OUT; 2645 continue; 2646 break; 2647 2648 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); 2649 GET_LENGTH_INC(tlen, p); 2650 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); 2651 if (IS_NULL(s)) goto fail; 2652 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); 2653 MOP_OUT; 2654 continue; 2655 break; 2656 2657 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); 2658 GET_RELADDR_INC(addr, p); 2659 GET_LENGTH_INC(tlen, p); 2660 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); 2661 if (IS_NULL(q)) { 2662 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 2663 If you want to change to fail, replace following line. */ 2664 p += addr; 2665 /* goto fail; */ 2666 } 2667 else { 2668 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); 2669 s = q; 2670 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); 2671 } 2672 MOP_OUT; 2673 continue; 2674 break; 2675 2676 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); 2677 STACK_POP_TIL_LOOK_BEHIND_NOT; 2678 goto fail; 2679 break; 2680 2681 #ifdef USE_SUBEXP_CALL 2682 case OP_CALL: MOP_IN(OP_CALL); 2683 GET_ABSADDR_INC(addr, p); 2684 STACK_PUSH_CALL_FRAME(p); 2685 p = reg->p + addr; 2686 MOP_OUT; 2687 continue; 2688 break; 2689 2690 case OP_RETURN: MOP_IN(OP_RETURN); 2691 STACK_RETURN(p); 2692 STACK_PUSH_RETURN; 2693 MOP_OUT; 2694 continue; 2695 break; 2696 #endif 2697 2698 case OP_FINISH: 2699 goto finish; 2700 break; 2701 2702 fail: 2703 MOP_OUT; 2704 /* fall */ 2705 case OP_FAIL: MOP_IN(OP_FAIL); 2706 STACK_POP; 2707 p = stk->u.state.pcode; 2708 s = stk->u.state.pstr; 2709 sprev = stk->u.state.pstr_prev; 2710 2711 #ifdef USE_COMBINATION_EXPLOSION_CHECK 2712 if (stk->u.state.state_check != 0) { 2713 stk->type = STK_STATE_CHECK_MARK; 2714 stk++; 2715 } 2716 #endif 2717 2718 MOP_OUT; 2719 continue; 2720 break; 2721 2722 default: 2723 goto bytecode_error; 2724 2725 } /* end of switch */ 2726 sprev = sbegin; 2727 } /* end of while(1) */ 2728 2729 finish: 2730 STACK_SAVE; 2731 xfree(alloca_base); 2732 return best_len; 2733 2734 #ifdef ONIG_DEBUG 2735 stack_error: 2736 STACK_SAVE; 2737 xfree(alloca_base); 2738 return ONIGERR_STACK_BUG; 2739 #endif 2740 2741 bytecode_error: 2742 STACK_SAVE; 2743 xfree(alloca_base); 2744 return ONIGERR_UNDEFINED_BYTECODE; 2745 2746 unexpected_bytecode_error: 2747 STACK_SAVE; 2748 xfree(alloca_base); 2749 return ONIGERR_UNEXPECTED_BYTECODE; 2750 } 2751 2752 2753 static UChar* 2754 slow_search(OnigEncoding enc, UChar* target, UChar* target_end, 2755 const UChar* text, const UChar* text_end, UChar* text_range) 2756 { 2757 UChar *t, *p, *s, *end; 2758 2759 end = (UChar* )text_end; 2760 end -= target_end - target - 1; 2761 if (end > text_range) 2762 end = text_range; 2763 2764 s = (UChar* )text; 2765 2766 while (s < end) { 2767 if (*s == *target) { 2768 p = s + 1; 2769 t = target + 1; 2770 while (t < target_end) { 2771 if (*t != *p++) 2772 break; 2773 t++; 2774 } 2775 if (t == target_end) 2776 return s; 2777 } 2778 s += enclen(enc, s); 2779 } 2780 2781 return (UChar* )NULL; 2782 } 2783 2784 static int 2785 str_lower_case_match(OnigEncoding enc, int case_fold_flag, 2786 const UChar* t, const UChar* tend, 2787 const UChar* p, const UChar* end) 2788 { 2789 int lowlen; 2790 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 2791 2792 while (t < tend) { 2793 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); 2794 q = lowbuf; 2795 while (lowlen > 0) { 2796 if (*t++ != *q++) return 0; 2797 lowlen--; 2798 } 2799 } 2800 2801 return 1; 2802 } 2803 2804 static UChar* 2805 slow_search_ic(OnigEncoding enc, int case_fold_flag, 2806 UChar* target, UChar* target_end, 2807 const UChar* text, const UChar* text_end, UChar* text_range) 2808 { 2809 UChar *s, *end; 2810 2811 end = (UChar* )text_end; 2812 end -= target_end - target - 1; 2813 if (end > text_range) 2814 end = text_range; 2815 2816 s = (UChar* )text; 2817 2818 while (s < end) { 2819 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 2820 s, text_end)) 2821 return s; 2822 2823 s += enclen(enc, s); 2824 } 2825 2826 return (UChar* )NULL; 2827 } 2828 2829 static UChar* 2830 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, 2831 const UChar* text, const UChar* adjust_text, 2832 const UChar* text_end, const UChar* text_start) 2833 { 2834 UChar *t, *p, *s; 2835 2836 s = (UChar* )text_end; 2837 s -= (target_end - target); 2838 if (s > text_start) 2839 s = (UChar* )text_start; 2840 else 2841 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); 2842 2843 while (s >= text) { 2844 if (*s == *target) { 2845 p = s + 1; 2846 t = target + 1; 2847 while (t < target_end) { 2848 if (*t != *p++) 2849 break; 2850 t++; 2851 } 2852 if (t == target_end) 2853 return s; 2854 } 2855 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); 2856 } 2857 2858 return (UChar* )NULL; 2859 } 2860 2861 static UChar* 2862 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, 2863 UChar* target, UChar* target_end, 2864 const UChar* text, const UChar* adjust_text, 2865 const UChar* text_end, const UChar* text_start) 2866 { 2867 UChar *s; 2868 2869 s = (UChar* )text_end; 2870 s -= (target_end - target); 2871 if (s > text_start) 2872 s = (UChar* )text_start; 2873 else 2874 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); 2875 2876 while (s >= text) { 2877 if (str_lower_case_match(enc, case_fold_flag, 2878 target, target_end, s, text_end)) 2879 return s; 2880 2881 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); 2882 } 2883 2884 return (UChar* )NULL; 2885 } 2886 2887 static UChar* 2888 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, 2889 const UChar* text, const UChar* text_end, 2890 const UChar* text_range) 2891 { 2892 const UChar *s, *se, *t, *p, *end; 2893 const UChar *tail; 2894 int skip, tlen1; 2895 2896 #ifdef ONIG_DEBUG_SEARCH 2897 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", 2898 (int )text, (int )text_end, (int )text_range); 2899 #endif 2900 2901 tail = target_end - 1; 2902 tlen1 = (int)(tail - target); 2903 end = text_range; 2904 if (end + tlen1 > text_end) 2905 end = text_end - tlen1; 2906 2907 s = text; 2908 2909 if (IS_NULL(reg->int_map)) { 2910 while (s < end) { 2911 p = se = s + tlen1; 2912 t = tail; 2913 while (*p == *t) { 2914 if (t == target) return (UChar* )s; 2915 p--; t--; 2916 } 2917 skip = reg->map[*se]; 2918 t = s; 2919 do { 2920 s += enclen(reg->enc, s); 2921 } while ((s - t) < skip && s < end); 2922 } 2923 } 2924 else { 2925 while (s < end) { 2926 p = se = s + tlen1; 2927 t = tail; 2928 while (*p == *t) { 2929 if (t == target) return (UChar* )s; 2930 p--; t--; 2931 } 2932 skip = reg->int_map[*se]; 2933 t = s; 2934 do { 2935 s += enclen(reg->enc, s); 2936 } while ((s - t) < skip && s < end); 2937 } 2938 } 2939 2940 return (UChar* )NULL; 2941 } 2942 2943 static UChar* 2944 bm_search(regex_t* reg, const UChar* target, const UChar* target_end, 2945 const UChar* text, const UChar* text_end, const UChar* text_range) 2946 { 2947 const UChar *s, *t, *p, *end; 2948 const UChar *tail; 2949 2950 end = text_range + (target_end - target) - 1; 2951 if (end > text_end) 2952 end = text_end; 2953 2954 tail = target_end - 1; 2955 s = text + (target_end - target) - 1; 2956 if (IS_NULL(reg->int_map)) { 2957 while (s < end) { 2958 p = s; 2959 t = tail; 2960 while (*p == *t) { 2961 if (t == target) return (UChar* )p; 2962 p--; t--; 2963 } 2964 s += reg->map[*s]; 2965 } 2966 } 2967 else { /* see int_map[] */ 2968 while (s < end) { 2969 p = s; 2970 t = tail; 2971 while (*p == *t) { 2972 if (t == target) return (UChar* )p; 2973 p--; t--; 2974 } 2975 s += reg->int_map[*s]; 2976 } 2977 } 2978 return (UChar* )NULL; 2979 } 2980 2981 static int 2982 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 2983 int** skip) 2984 2985 { 2986 int i, len; 2987 2988 if (IS_NULL(*skip)) { 2989 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 2990 if (IS_NULL(*skip)) return ONIGERR_MEMORY; 2991 } 2992 2993 len = (int)(end - s); 2994 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 2995 (*skip)[i] = len; 2996 2997 for (i = len - 1; i > 0; i--) 2998 (*skip)[s[i]] = i; 2999 3000 return 0; 3001 } 3002 3003 static UChar* 3004 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, 3005 const UChar* text, const UChar* adjust_text, 3006 const UChar* text_end, const UChar* text_start) 3007 { 3008 const UChar *s, *t, *p; 3009 3010 s = text_end - (target_end - target); 3011 if (text_start < s) 3012 s = text_start; 3013 else 3014 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); 3015 3016 while (s >= text) { 3017 p = s; 3018 t = target; 3019 while (t < target_end && *p == *t) { 3020 p++; t++; 3021 } 3022 if (t == target_end) 3023 return (UChar* )s; 3024 3025 s -= reg->int_map_backward[*s]; 3026 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); 3027 } 3028 3029 return (UChar* )NULL; 3030 } 3031 3032 static UChar* 3033 map_search(OnigEncoding enc, UChar map[], 3034 const UChar* text, const UChar* text_range) 3035 { 3036 const UChar *s = text; 3037 3038 while (s < text_range) { 3039 if (map[*s]) return (UChar* )s; 3040 3041 s += enclen(enc, s); 3042 } 3043 return (UChar* )NULL; 3044 } 3045 3046 static UChar* 3047 map_search_backward(OnigEncoding enc, UChar map[], 3048 const UChar* text, const UChar* adjust_text, 3049 const UChar* text_start) 3050 { 3051 const UChar *s = text_start; 3052 3053 while (s >= text) { 3054 if (map[*s]) return (UChar* )s; 3055 3056 s = onigenc_get_prev_char_head(enc, adjust_text, s); 3057 } 3058 return (UChar* )NULL; 3059 } 3060 3061 extern int 3062 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, 3063 OnigOptionType option) 3064 { 3065 int r; 3066 UChar *prev; 3067 OnigMatchArg msa; 3068 3069 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 3070 start: 3071 THREAD_ATOMIC_START; 3072 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 3073 ONIG_STATE_INC(reg); 3074 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 3075 onig_chain_reduce(reg); 3076 ONIG_STATE_INC(reg); 3077 } 3078 } 3079 else { 3080 int n; 3081 3082 THREAD_ATOMIC_END; 3083 n = 0; 3084 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 3085 if (++n > THREAD_PASS_LIMIT_COUNT) 3086 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 3087 THREAD_PASS; 3088 } 3089 goto start; 3090 } 3091 THREAD_ATOMIC_END; 3092 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 3093 3094 MATCH_ARG_INIT(msa, option, region, at); 3095 #ifdef USE_COMBINATION_EXPLOSION_CHECK 3096 { 3097 int offset = at - str; 3098 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 3099 } 3100 #endif 3101 3102 if (region 3103 #ifdef USE_POSIX_API_REGION_OPTION 3104 && !IS_POSIX_REGION(option) 3105 #endif 3106 ) { 3107 r = onig_region_resize_clear(region, reg->num_mem + 1); 3108 } 3109 else 3110 r = 0; 3111 3112 if (r == 0) { 3113 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); 3114 r = match_at(reg, str, end, 3115 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 3116 end, 3117 #endif 3118 at, prev, &msa); 3119 } 3120 3121 MATCH_ARG_FREE(msa); 3122 ONIG_STATE_DEC_THREAD(reg); 3123 return r; 3124 } 3125 3126 static int 3127 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, 3128 UChar* range, UChar** low, UChar** high, UChar** low_prev) 3129 { 3130 UChar *p, *pprev = (UChar* )NULL; 3131 3132 #ifdef ONIG_DEBUG_SEARCH 3133 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", 3134 (int )str, (int )end, (int )s, (int )range); 3135 #endif 3136 3137 p = s; 3138 if (reg->dmin > 0) { 3139 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { 3140 p += reg->dmin; 3141 } 3142 else { 3143 UChar *q = p + reg->dmin; 3144 while (p < q) p += enclen(reg->enc, p); 3145 } 3146 } 3147 3148 retry: 3149 switch (reg->optimize) { 3150 case ONIG_OPTIMIZE_EXACT: 3151 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); 3152 break; 3153 case ONIG_OPTIMIZE_EXACT_IC: 3154 p = slow_search_ic(reg->enc, reg->case_fold_flag, 3155 reg->exact, reg->exact_end, p, end, range); 3156 break; 3157 3158 case ONIG_OPTIMIZE_EXACT_BM: 3159 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); 3160 break; 3161 3162 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 3163 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); 3164 break; 3165 3166 case ONIG_OPTIMIZE_MAP: 3167 p = map_search(reg->enc, reg->map, p, range); 3168 break; 3169 } 3170 3171 if (p && p < range) { 3172 if (p - reg->dmin < s) { 3173 retry_gate: 3174 pprev = p; 3175 p += enclen(reg->enc, p); 3176 goto retry; 3177 } 3178 3179 if (reg->sub_anchor) { 3180 UChar* prev; 3181 3182 switch (reg->sub_anchor) { 3183 case ANCHOR_BEGIN_LINE: 3184 if (!ON_STR_BEGIN(p)) { 3185 prev = onigenc_get_prev_char_head(reg->enc, 3186 (pprev ? pprev : str), p); 3187 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) 3188 goto retry_gate; 3189 } 3190 break; 3191 3192 case ANCHOR_END_LINE: 3193 if (ON_STR_END(p)) { 3194 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 3195 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, 3196 (pprev ? pprev : str), p); 3197 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) 3198 goto retry_gate; 3199 #endif 3200 } 3201 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) 3202 #ifdef USE_CRNL_AS_LINE_TERMINATOR 3203 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) 3204 #endif 3205 ) 3206 goto retry_gate; 3207 break; 3208 } 3209 } 3210 3211 if (reg->dmax == 0) { 3212 *low = p; 3213 if (low_prev) { 3214 if (*low > s) 3215 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); 3216 else 3217 *low_prev = onigenc_get_prev_char_head(reg->enc, 3218 (pprev ? pprev : str), p); 3219 } 3220 } 3221 else { 3222 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 3223 *low = p - reg->dmax; 3224 if (*low > s) { 3225 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, 3226 *low, (const UChar** )low_prev); 3227 if (low_prev && IS_NULL(*low_prev)) 3228 *low_prev = onigenc_get_prev_char_head(reg->enc, 3229 (pprev ? pprev : s), *low); 3230 } 3231 else { 3232 if (low_prev) 3233 *low_prev = onigenc_get_prev_char_head(reg->enc, 3234 (pprev ? pprev : str), *low); 3235 } 3236 } 3237 } 3238 /* no needs to adjust *high, *high is used as range check only */ 3239 *high = p - reg->dmin; 3240 3241 #ifdef ONIG_DEBUG_SEARCH 3242 fprintf(stderr, 3243 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", 3244 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); 3245 #endif 3246 return 1; /* success */ 3247 } 3248 3249 return 0; /* fail */ 3250 } 3251 3252 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, 3253 int** skip)); 3254 3255 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 3256 3257 static int 3258 backward_search_range(regex_t* reg, const UChar* str, const UChar* end, 3259 UChar* s, const UChar* range, UChar* adjrange, 3260 UChar** low, UChar** high) 3261 { 3262 int r; 3263 UChar *p; 3264 3265 range += reg->dmin; 3266 p = s; 3267 3268 retry: 3269 switch (reg->optimize) { 3270 case ONIG_OPTIMIZE_EXACT: 3271 exact_method: 3272 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, 3273 range, adjrange, end, p); 3274 break; 3275 3276 case ONIG_OPTIMIZE_EXACT_IC: 3277 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, 3278 reg->exact, reg->exact_end, 3279 range, adjrange, end, p); 3280 break; 3281 3282 case ONIG_OPTIMIZE_EXACT_BM: 3283 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 3284 if (IS_NULL(reg->int_map_backward)) { 3285 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) 3286 goto exact_method; 3287 3288 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 3289 &(reg->int_map_backward)); 3290 if (r) return r; 3291 } 3292 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, 3293 end, p); 3294 break; 3295 3296 case ONIG_OPTIMIZE_MAP: 3297 p = map_search_backward(reg->enc, reg->map, range, adjrange, p); 3298 break; 3299 } 3300 3301 if (p) { 3302 if (reg->sub_anchor) { 3303 UChar* prev; 3304 3305 switch (reg->sub_anchor) { 3306 case ANCHOR_BEGIN_LINE: 3307 if (!ON_STR_BEGIN(p)) { 3308 prev = onigenc_get_prev_char_head(reg->enc, str, p); 3309 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { 3310 p = prev; 3311 goto retry; 3312 } 3313 } 3314 break; 3315 3316 case ANCHOR_END_LINE: 3317 if (ON_STR_END(p)) { 3318 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 3319 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); 3320 if (IS_NULL(prev)) goto fail; 3321 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { 3322 p = prev; 3323 goto retry; 3324 } 3325 #endif 3326 } 3327 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) 3328 #ifdef USE_CRNL_AS_LINE_TERMINATOR 3329 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) 3330 #endif 3331 ) { 3332 p = onigenc_get_prev_char_head(reg->enc, adjrange, p); 3333 if (IS_NULL(p)) goto fail; 3334 goto retry; 3335 } 3336 break; 3337 } 3338 } 3339 3340 /* no needs to adjust *high, *high is used as range check only */ 3341 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 3342 *low = p - reg->dmax; 3343 *high = p - reg->dmin; 3344 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); 3345 } 3346 3347 #ifdef ONIG_DEBUG_SEARCH 3348 fprintf(stderr, "backward_search_range: low: %d, high: %d\n", 3349 (int )(*low - str), (int )(*high - str)); 3350 #endif 3351 return 1; /* success */ 3352 } 3353 3354 fail: 3355 #ifdef ONIG_DEBUG_SEARCH 3356 fprintf(stderr, "backward_search_range: fail.\n"); 3357 #endif 3358 return 0; /* fail */ 3359 } 3360 3361 3362 extern int 3363 onig_search(regex_t* reg, const UChar* str, const UChar* end, 3364 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) 3365 { 3366 int r; 3367 UChar *s, *prev; 3368 OnigMatchArg msa; 3369 const UChar *orig_start = start; 3370 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 3371 const UChar *orig_range = range; 3372 #endif 3373 3374 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 3375 start: 3376 THREAD_ATOMIC_START; 3377 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 3378 ONIG_STATE_INC(reg); 3379 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 3380 onig_chain_reduce(reg); 3381 ONIG_STATE_INC(reg); 3382 } 3383 } 3384 else { 3385 int n; 3386 3387 THREAD_ATOMIC_END; 3388 n = 0; 3389 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 3390 if (++n > THREAD_PASS_LIMIT_COUNT) 3391 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 3392 THREAD_PASS; 3393 } 3394 goto start; 3395 } 3396 THREAD_ATOMIC_END; 3397 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 3398 3399 #ifdef ONIG_DEBUG_SEARCH 3400 fprintf(stderr, 3401 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", 3402 (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); 3403 #endif 3404 3405 if (region 3406 #ifdef USE_POSIX_API_REGION_OPTION 3407 && !IS_POSIX_REGION(option) 3408 #endif 3409 ) { 3410 r = onig_region_resize_clear(region, reg->num_mem + 1); 3411 if (r) goto finish_no_msa; 3412 } 3413 3414 if (start > end || start < str) goto mismatch_no_msa; 3415 3416 3417 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 3418 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 3419 #define MATCH_AND_RETURN_CHECK(upper_range) \ 3420 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 3421 if (r != ONIG_MISMATCH) {\ 3422 if (r >= 0) {\ 3423 if (! IS_FIND_LONGEST(reg->options)) {\ 3424 goto match;\ 3425 }\ 3426 }\ 3427 else goto finish; /* error */ \ 3428 } 3429 #else 3430 #define MATCH_AND_RETURN_CHECK(upper_range) \ 3431 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 3432 if (r != ONIG_MISMATCH) {\ 3433 if (r >= 0) {\ 3434 goto match;\ 3435 }\ 3436 else goto finish; /* error */ \ 3437 } 3438 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 3439 #else 3440 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 3441 #define MATCH_AND_RETURN_CHECK(none) \ 3442 r = match_at(reg, str, end, s, prev, &msa);\ 3443 if (r != ONIG_MISMATCH) {\ 3444 if (r >= 0) {\ 3445 if (! IS_FIND_LONGEST(reg->options)) {\ 3446 goto match;\ 3447 }\ 3448 }\ 3449 else goto finish; /* error */ \ 3450 } 3451 #else 3452 #define MATCH_AND_RETURN_CHECK(none) \ 3453 r = match_at(reg, str, end, s, prev, &msa);\ 3454 if (r != ONIG_MISMATCH) {\ 3455 if (r >= 0) {\ 3456 goto match;\ 3457 }\ 3458 else goto finish; /* error */ \ 3459 } 3460 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 3461 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 3462 3463 3464 /* anchor optimize: resume search range */ 3465 if (reg->anchor != 0 && str < end) { 3466 UChar *min_semi_end, *max_semi_end; 3467 3468 if (reg->anchor & ANCHOR_BEGIN_POSITION) { 3469 /* search start-position only */ 3470 begin_position: 3471 if (range > start) 3472 range = start + 1; 3473 else 3474 range = start; 3475 } 3476 else if (reg->anchor & ANCHOR_BEGIN_BUF) { 3477 /* search str-position only */ 3478 if (range > start) { 3479 if (start != str) goto mismatch_no_msa; 3480 range = str + 1; 3481 } 3482 else { 3483 if (range <= str) { 3484 start = str; 3485 range = str; 3486 } 3487 else 3488 goto mismatch_no_msa; 3489 } 3490 } 3491 else if (reg->anchor & ANCHOR_END_BUF) { 3492 min_semi_end = max_semi_end = (UChar* )end; 3493 3494 end_buf: 3495 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) 3496 goto mismatch_no_msa; 3497 3498 if (range > start) { 3499 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { 3500 start = min_semi_end - reg->anchor_dmax; 3501 if (start < end) 3502 start = onigenc_get_right_adjust_char_head(reg->enc, str, start); 3503 else { /* match with empty at end */ 3504 start = onigenc_get_prev_char_head(reg->enc, str, end); 3505 } 3506 } 3507 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { 3508 range = max_semi_end - reg->anchor_dmin + 1; 3509 } 3510 3511 if (start >= range) goto mismatch_no_msa; 3512 } 3513 else { 3514 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { 3515 range = min_semi_end - reg->anchor_dmax; 3516 } 3517 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { 3518 start = max_semi_end - reg->anchor_dmin; 3519 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); 3520 } 3521 if (range > start) goto mismatch_no_msa; 3522 } 3523 } 3524 else if (reg->anchor & ANCHOR_SEMI_END_BUF) { 3525 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); 3526 3527 max_semi_end = (UChar* )end; 3528 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { 3529 min_semi_end = pre_end; 3530 3531 #ifdef USE_CRNL_AS_LINE_TERMINATOR 3532 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); 3533 if (IS_NOT_NULL(pre_end) && 3534 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { 3535 min_semi_end = pre_end; 3536 } 3537 #endif 3538 if (min_semi_end > str && start <= min_semi_end) { 3539 goto end_buf; 3540 } 3541 } 3542 else { 3543 min_semi_end = (UChar* )end; 3544 goto end_buf; 3545 } 3546 } 3547 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { 3548 goto begin_position; 3549 } 3550 } 3551 else if (str == end) { /* empty string */ 3552 static const UChar* address_for_empty_string = (UChar* )""; 3553 3554 #ifdef ONIG_DEBUG_SEARCH 3555 fprintf(stderr, "onig_search: empty string.\n"); 3556 #endif 3557 3558 if (reg->threshold_len == 0) { 3559 start = end = str = address_for_empty_string; 3560 s = (UChar* )start; 3561 prev = (UChar* )NULL; 3562 3563 MATCH_ARG_INIT(msa, option, region, start); 3564 #ifdef USE_COMBINATION_EXPLOSION_CHECK 3565 msa.state_check_buff = (void* )0; 3566 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ 3567 #endif 3568 MATCH_AND_RETURN_CHECK(end); 3569 goto mismatch; 3570 } 3571 goto mismatch_no_msa; 3572 } 3573 3574 #ifdef ONIG_DEBUG_SEARCH 3575 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", 3576 (int )(end - str), (int )(start - str), (int )(range - str)); 3577 #endif 3578 3579 MATCH_ARG_INIT(msa, option, region, orig_start); 3580 #ifdef USE_COMBINATION_EXPLOSION_CHECK 3581 { 3582 int offset = (MIN(start, range) - str); 3583 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 3584 } 3585 #endif 3586 3587 s = (UChar* )start; 3588 if (range > start) { /* forward search */ 3589 if (s > str) 3590 prev = onigenc_get_prev_char_head(reg->enc, str, s); 3591 else 3592 prev = (UChar* )NULL; 3593 3594 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 3595 UChar *sch_range, *low, *high, *low_prev; 3596 3597 sch_range = (UChar* )range; 3598 if (reg->dmax != 0) { 3599 if (reg->dmax == ONIG_INFINITE_DISTANCE) 3600 sch_range = (UChar* )end; 3601 else { 3602 sch_range += reg->dmax; 3603 if (sch_range > end) sch_range = (UChar* )end; 3604 } 3605 } 3606 3607 if ((end - start) < reg->threshold_len) 3608 goto mismatch; 3609 3610 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 3611 do { 3612 if (! forward_search_range(reg, str, end, s, sch_range, 3613 &low, &high, &low_prev)) goto mismatch; 3614 if (s < low) { 3615 s = low; 3616 prev = low_prev; 3617 } 3618 while (s <= high) { 3619 MATCH_AND_RETURN_CHECK(orig_range); 3620 prev = s; 3621 s += enclen(reg->enc, s); 3622 } 3623 } while (s < range); 3624 goto mismatch; 3625 } 3626 else { /* check only. */ 3627 if (! forward_search_range(reg, str, end, s, sch_range, 3628 &low, &high, (UChar** )NULL)) goto mismatch; 3629 3630 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { 3631 do { 3632 MATCH_AND_RETURN_CHECK(orig_range); 3633 prev = s; 3634 s += enclen(reg->enc, s); 3635 3636 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { 3637 prev = s; 3638 s += enclen(reg->enc, s); 3639 } 3640 } while (s < range); 3641 goto mismatch; 3642 } 3643 } 3644 } 3645 3646 do { 3647 MATCH_AND_RETURN_CHECK(orig_range); 3648 prev = s; 3649 s += enclen(reg->enc, s); 3650 } while (s < range); 3651 3652 if (s == range) { /* because empty match with /$/. */ 3653 MATCH_AND_RETURN_CHECK(orig_range); 3654 } 3655 } 3656 else { /* backward search */ 3657 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 3658 if (orig_start < end) 3659 orig_start += enclen(reg->enc, orig_start); /* is upper range */ 3660 #endif 3661 3662 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 3663 UChar *low, *high, *adjrange, *sch_start; 3664 3665 if (range < end) 3666 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); 3667 else 3668 adjrange = (UChar* )end; 3669 3670 if (reg->dmax != ONIG_INFINITE_DISTANCE && 3671 (end - range) >= reg->threshold_len) { 3672 do { 3673 sch_start = s + reg->dmax; 3674 if (sch_start > end) sch_start = (UChar* )end; 3675 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 3676 &low, &high) <= 0) 3677 goto mismatch; 3678 3679 if (s > high) 3680 s = high; 3681 3682 while (s >= low) { 3683 prev = onigenc_get_prev_char_head(reg->enc, str, s); 3684 MATCH_AND_RETURN_CHECK(orig_start); 3685 s = prev; 3686 } 3687 } while (s >= range); 3688 goto mismatch; 3689 } 3690 else { /* check only. */ 3691 if ((end - range) < reg->threshold_len) goto mismatch; 3692 3693 sch_start = s; 3694 if (reg->dmax != 0) { 3695 if (reg->dmax == ONIG_INFINITE_DISTANCE) 3696 sch_start = (UChar* )end; 3697 else { 3698 sch_start += reg->dmax; 3699 if (sch_start > end) sch_start = (UChar* )end; 3700 else 3701 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, 3702 start, sch_start); 3703 } 3704 } 3705 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 3706 &low, &high) <= 0) goto mismatch; 3707 } 3708 } 3709 3710 do { 3711 prev = onigenc_get_prev_char_head(reg->enc, str, s); 3712 MATCH_AND_RETURN_CHECK(orig_start); 3713 s = prev; 3714 } while (s >= range); 3715 } 3716 3717 mismatch: 3718 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 3719 if (IS_FIND_LONGEST(reg->options)) { 3720 if (msa.best_len >= 0) { 3721 s = msa.best_s; 3722 goto match; 3723 } 3724 } 3725 #endif 3726 r = ONIG_MISMATCH; 3727 3728 finish: 3729 MATCH_ARG_FREE(msa); 3730 ONIG_STATE_DEC_THREAD(reg); 3731 3732 /* If result is mismatch and no FIND_NOT_EMPTY option, 3733 then the region is not setted in match_at(). */ 3734 if (IS_FIND_NOT_EMPTY(reg->options) && region 3735 #ifdef USE_POSIX_API_REGION_OPTION 3736 && !IS_POSIX_REGION(option) 3737 #endif 3738 ) { 3739 onig_region_clear(region); 3740 } 3741 3742 #ifdef ONIG_DEBUG 3743 if (r != ONIG_MISMATCH) 3744 fprintf(stderr, "onig_search: error %d\n", r); 3745 #endif 3746 return r; 3747 3748 mismatch_no_msa: 3749 r = ONIG_MISMATCH; 3750 finish_no_msa: 3751 ONIG_STATE_DEC_THREAD(reg); 3752 #ifdef ONIG_DEBUG 3753 if (r != ONIG_MISMATCH) 3754 fprintf(stderr, "onig_search: error %d\n", r); 3755 #endif 3756 return r; 3757 3758 match: 3759 ONIG_STATE_DEC_THREAD(reg); 3760 MATCH_ARG_FREE(msa); 3761 return (int)(s - str); 3762 } 3763 3764 extern OnigEncoding 3765 onig_get_encoding(regex_t* reg) 3766 { 3767 return reg->enc; 3768 } 3769 3770 extern OnigOptionType 3771 onig_get_options(regex_t* reg) 3772 { 3773 return reg->options; 3774 } 3775 3776 extern OnigCaseFoldType 3777 onig_get_case_fold_flag(regex_t* reg) 3778 { 3779 return reg->case_fold_flag; 3780 } 3781 3782 extern OnigSyntaxType* 3783 onig_get_syntax(regex_t* reg) 3784 { 3785 return reg->syntax; 3786 } 3787 3788 extern int 3789 onig_number_of_captures(regex_t* reg) 3790 { 3791 return reg->num_mem; 3792 } 3793 3794 extern int 3795 onig_number_of_capture_histories(regex_t* reg) 3796 { 3797 #ifdef USE_CAPTURE_HISTORY 3798 int i, n; 3799 3800 n = 0; 3801 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 3802 if (BIT_STATUS_AT(reg->capture_history, i) != 0) 3803 n++; 3804 } 3805 return n; 3806 #else 3807 return 0; 3808 #endif 3809 } 3810 3811 extern void 3812 onig_copy_encoding(OnigEncoding to, OnigEncoding from) 3813 { 3814 *to = *from; 3815 } 3816 3817