1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Copyright (c) 1997-2013 University of Cambridge 10 11 The machine code generator part (this module) was written by Zoltan Herczeg 12 Copyright (c) 2010-2013 13 14 ----------------------------------------------------------------------------- 15 Redistribution and use in source and binary forms, with or without 16 modification, are permitted provided that the following conditions are met: 17 18 * Redistributions of source code must retain the above copyright notice, 19 this list of conditions and the following disclaimer. 20 21 * Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in the 23 documentation and/or other materials provided with the distribution. 24 25 * Neither the name of the University of Cambridge nor the names of its 26 contributors may be used to endorse or promote products derived from 27 this software without specific prior written permission. 28 29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 POSSIBILITY OF SUCH DAMAGE. 40 ----------------------------------------------------------------------------- 41 */ 42 43 #ifdef HAVE_CONFIG_H 44 #include "config.h" 45 #endif 46 47 #include "pcre_internal.h" 48 49 #if defined SUPPORT_JIT 50 51 /* All-in-one: Since we use the JIT compiler only from here, 52 we just include it. This way we don't need to touch the build 53 system files. */ 54 55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size) 56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr) 57 #define SLJIT_CONFIG_AUTO 1 58 #define SLJIT_CONFIG_STATIC 1 59 #define SLJIT_VERBOSE 0 60 #define SLJIT_DEBUG 0 61 62 #include "sljit/sljitLir.c" 63 64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED 65 #error Unsupported architecture 66 #endif 67 68 /* Defines for debugging purposes. */ 69 70 /* 1 - Use unoptimized capturing brackets. 71 2 - Enable capture_last_ptr (includes option 1). */ 72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ 73 74 /* 1 - Always have a control head. */ 75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */ 76 77 /* Allocate memory for the regex stack on the real machine stack. 78 Fast, but limited size. */ 79 #define MACHINE_STACK_SIZE 32768 80 81 /* Growth rate for stack allocated by the OS. Should be the multiply 82 of page size. */ 83 #define STACK_GROWTH_RATE 8192 84 85 /* Enable to check that the allocation could destroy temporaries. */ 86 #if defined SLJIT_DEBUG && SLJIT_DEBUG 87 #define DESTROY_REGISTERS 1 88 #endif 89 90 /* 91 Short summary about the backtracking mechanism empolyed by the jit code generator: 92 93 The code generator follows the recursive nature of the PERL compatible regular 94 expressions. The basic blocks of regular expressions are condition checkers 95 whose execute different commands depending on the result of the condition check. 96 The relationship between the operators can be horizontal (concatenation) and 97 vertical (sub-expression) (See struct backtrack_common for more details). 98 99 'ab' - 'a' and 'b' regexps are concatenated 100 'a+' - 'a' is the sub-expression of the '+' operator 101 102 The condition checkers are boolean (true/false) checkers. Machine code is generated 103 for the checker itself and for the actions depending on the result of the checker. 104 The 'true' case is called as the matching path (expected path), and the other is called as 105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken 106 branches on the matching path. 107 108 Greedy star operator (*) : 109 Matching path: match happens. 110 Backtrack path: match failed. 111 Non-greedy star operator (*?) : 112 Matching path: no need to perform a match. 113 Backtrack path: match is required. 114 115 The following example shows how the code generated for a capturing bracket 116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and 117 we have the following regular expression: 118 119 A(B|C)D 120 121 The generated code will be the following: 122 123 A matching path 124 '(' matching path (pushing arguments to the stack) 125 B matching path 126 ')' matching path (pushing arguments to the stack) 127 D matching path 128 return with successful match 129 130 D backtrack path 131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") 132 B backtrack path 133 C expected path 134 jump to D matching path 135 C backtrack path 136 A backtrack path 137 138 Notice, that the order of backtrack code paths are the opposite of the fast 139 code paths. In this way the topmost value on the stack is always belong 140 to the current backtrack code path. The backtrack path must check 141 whether there is a next alternative. If so, it needs to jump back to 142 the matching path eventually. Otherwise it needs to clear out its own stack 143 frame and continue the execution on the backtrack code paths. 144 */ 145 146 /* 147 Saved stack frames: 148 149 Atomic blocks and asserts require reloading the values of private data 150 when the backtrack mechanism performed. Because of OP_RECURSE, the data 151 are not necessarly known in compile time, thus we need a dynamic restore 152 mechanism. 153 154 The stack frames are stored in a chain list, and have the following format: 155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] 156 157 Thus we can restore the private data to a particular point in the stack. 158 */ 159 160 typedef struct jit_arguments { 161 /* Pointers first. */ 162 struct sljit_stack *stack; 163 const pcre_uchar *str; 164 const pcre_uchar *begin; 165 const pcre_uchar *end; 166 int *offsets; 167 pcre_uchar *uchar_ptr; 168 pcre_uchar *mark_ptr; 169 void *callout_data; 170 /* Everything else after. */ 171 pcre_uint32 limit_match; 172 int real_offset_count; 173 int offset_count; 174 pcre_uint8 notbol; 175 pcre_uint8 noteol; 176 pcre_uint8 notempty; 177 pcre_uint8 notempty_atstart; 178 } jit_arguments; 179 180 typedef struct executable_functions { 181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; 182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; 183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; 184 PUBL(jit_callback) callback; 185 void *userdata; 186 pcre_uint32 top_bracket; 187 pcre_uint32 limit_match; 188 } executable_functions; 189 190 typedef struct jump_list { 191 struct sljit_jump *jump; 192 struct jump_list *next; 193 } jump_list; 194 195 typedef struct stub_list { 196 struct sljit_jump *start; 197 struct sljit_label *quit; 198 struct stub_list *next; 199 } stub_list; 200 201 typedef struct label_addr_list { 202 struct sljit_label *label; 203 sljit_uw *update_addr; 204 struct label_addr_list *next; 205 } label_addr_list; 206 207 enum frame_types { 208 no_frame = -1, 209 no_stack = -2 210 }; 211 212 enum control_types { 213 type_mark = 0, 214 type_then_trap = 1 215 }; 216 217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); 218 219 /* The following structure is the key data type for the recursive 220 code generator. It is allocated by compile_matchingpath, and contains 221 the arguments for compile_backtrackingpath. Must be the first member 222 of its descendants. */ 223 typedef struct backtrack_common { 224 /* Concatenation stack. */ 225 struct backtrack_common *prev; 226 jump_list *nextbacktracks; 227 /* Internal stack (for component operators). */ 228 struct backtrack_common *top; 229 jump_list *topbacktracks; 230 /* Opcode pointer. */ 231 pcre_uchar *cc; 232 } backtrack_common; 233 234 typedef struct assert_backtrack { 235 backtrack_common common; 236 jump_list *condfailed; 237 /* Less than 0 if a frame is not needed. */ 238 int framesize; 239 /* Points to our private memory word on the stack. */ 240 int private_data_ptr; 241 /* For iterators. */ 242 struct sljit_label *matchingpath; 243 } assert_backtrack; 244 245 typedef struct bracket_backtrack { 246 backtrack_common common; 247 /* Where to coninue if an alternative is successfully matched. */ 248 struct sljit_label *alternative_matchingpath; 249 /* For rmin and rmax iterators. */ 250 struct sljit_label *recursive_matchingpath; 251 /* For greedy ? operator. */ 252 struct sljit_label *zero_matchingpath; 253 /* Contains the branches of a failed condition. */ 254 union { 255 /* Both for OP_COND, OP_SCOND. */ 256 jump_list *condfailed; 257 assert_backtrack *assert; 258 /* For OP_ONCE. Less than 0 if not needed. */ 259 int framesize; 260 } u; 261 /* Points to our private memory word on the stack. */ 262 int private_data_ptr; 263 } bracket_backtrack; 264 265 typedef struct bracketpos_backtrack { 266 backtrack_common common; 267 /* Points to our private memory word on the stack. */ 268 int private_data_ptr; 269 /* Reverting stack is needed. */ 270 int framesize; 271 /* Allocated stack size. */ 272 int stacksize; 273 } bracketpos_backtrack; 274 275 typedef struct braminzero_backtrack { 276 backtrack_common common; 277 struct sljit_label *matchingpath; 278 } braminzero_backtrack; 279 280 typedef struct iterator_backtrack { 281 backtrack_common common; 282 /* Next iteration. */ 283 struct sljit_label *matchingpath; 284 } iterator_backtrack; 285 286 typedef struct recurse_entry { 287 struct recurse_entry *next; 288 /* Contains the function entry. */ 289 struct sljit_label *entry; 290 /* Collects the calls until the function is not created. */ 291 jump_list *calls; 292 /* Points to the starting opcode. */ 293 sljit_sw start; 294 } recurse_entry; 295 296 typedef struct recurse_backtrack { 297 backtrack_common common; 298 BOOL inlined_pattern; 299 } recurse_backtrack; 300 301 #define OP_THEN_TRAP OP_TABLE_LENGTH 302 303 typedef struct then_trap_backtrack { 304 backtrack_common common; 305 /* If then_trap is not NULL, this structure contains the real 306 then_trap for the backtracking path. */ 307 struct then_trap_backtrack *then_trap; 308 /* Points to the starting opcode. */ 309 sljit_sw start; 310 /* Exit point for the then opcodes of this alternative. */ 311 jump_list *quit; 312 /* Frame size of the current alternative. */ 313 int framesize; 314 } then_trap_backtrack; 315 316 #define MAX_RANGE_SIZE 4 317 318 typedef struct compiler_common { 319 /* The sljit ceneric compiler. */ 320 struct sljit_compiler *compiler; 321 /* First byte code. */ 322 pcre_uchar *start; 323 /* Maps private data offset to each opcode. */ 324 sljit_si *private_data_ptrs; 325 /* Chain list of read-only data ptrs. */ 326 void *read_only_data_head; 327 /* Tells whether the capturing bracket is optimized. */ 328 pcre_uint8 *optimized_cbracket; 329 /* Tells whether the starting offset is a target of then. */ 330 pcre_uint8 *then_offsets; 331 /* Current position where a THEN must jump. */ 332 then_trap_backtrack *then_trap; 333 /* Starting offset of private data for capturing brackets. */ 334 int cbra_ptr; 335 /* Output vector starting point. Must be divisible by 2. */ 336 int ovector_start; 337 /* Last known position of the requested byte. */ 338 int req_char_ptr; 339 /* Head of the last recursion. */ 340 int recursive_head_ptr; 341 /* First inspected character for partial matching. */ 342 int start_used_ptr; 343 /* Starting pointer for partial soft matches. */ 344 int hit_start; 345 /* End pointer of the first line. */ 346 int first_line_end; 347 /* Points to the marked string. */ 348 int mark_ptr; 349 /* Recursive control verb management chain. */ 350 int control_head_ptr; 351 /* Points to the last matched capture block index. */ 352 int capture_last_ptr; 353 /* Points to the starting position of the current match. */ 354 int start_ptr; 355 356 /* Flipped and lower case tables. */ 357 const pcre_uint8 *fcc; 358 sljit_sw lcc; 359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ 360 int mode; 361 /* TRUE, when minlength is greater than 0. */ 362 BOOL might_be_empty; 363 /* \K is found in the pattern. */ 364 BOOL has_set_som; 365 /* (*SKIP:arg) is found in the pattern. */ 366 BOOL has_skip_arg; 367 /* (*THEN) is found in the pattern. */ 368 BOOL has_then; 369 /* Needs to know the start position anytime. */ 370 BOOL needs_start_ptr; 371 /* Currently in recurse or negative assert. */ 372 BOOL local_exit; 373 /* Currently in a positive assert. */ 374 BOOL positive_assert; 375 /* Newline control. */ 376 int nltype; 377 pcre_uint32 nlmax; 378 pcre_uint32 nlmin; 379 int newline; 380 int bsr_nltype; 381 pcre_uint32 bsr_nlmax; 382 pcre_uint32 bsr_nlmin; 383 /* Dollar endonly. */ 384 int endonly; 385 /* Tables. */ 386 sljit_sw ctypes; 387 /* Named capturing brackets. */ 388 pcre_uchar *name_table; 389 sljit_sw name_count; 390 sljit_sw name_entry_size; 391 392 /* Labels and jump lists. */ 393 struct sljit_label *partialmatchlabel; 394 struct sljit_label *quit_label; 395 struct sljit_label *forced_quit_label; 396 struct sljit_label *accept_label; 397 struct sljit_label *ff_newline_shortcut; 398 stub_list *stubs; 399 label_addr_list *label_addrs; 400 recurse_entry *entries; 401 recurse_entry *currententry; 402 jump_list *partialmatch; 403 jump_list *quit; 404 jump_list *positive_assert_quit; 405 jump_list *forced_quit; 406 jump_list *accept; 407 jump_list *calllimit; 408 jump_list *stackalloc; 409 jump_list *revertframes; 410 jump_list *wordboundary; 411 jump_list *anynewline; 412 jump_list *hspace; 413 jump_list *vspace; 414 jump_list *casefulcmp; 415 jump_list *caselesscmp; 416 jump_list *reset_match; 417 BOOL jscript_compat; 418 #ifdef SUPPORT_UTF 419 BOOL utf; 420 #ifdef SUPPORT_UCP 421 BOOL use_ucp; 422 #endif 423 #ifdef COMPILE_PCRE8 424 jump_list *utfreadchar; 425 jump_list *utfreadchar16; 426 jump_list *utfreadtype8; 427 #endif 428 #endif /* SUPPORT_UTF */ 429 #ifdef SUPPORT_UCP 430 jump_list *getucd; 431 #endif 432 } compiler_common; 433 434 /* For byte_sequence_compare. */ 435 436 typedef struct compare_context { 437 int length; 438 int sourcereg; 439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 440 int ucharptr; 441 union { 442 sljit_si asint; 443 sljit_uh asushort; 444 #if defined COMPILE_PCRE8 445 sljit_ub asbyte; 446 sljit_ub asuchars[4]; 447 #elif defined COMPILE_PCRE16 448 sljit_uh asuchars[2]; 449 #elif defined COMPILE_PCRE32 450 sljit_ui asuchars[1]; 451 #endif 452 } c; 453 union { 454 sljit_si asint; 455 sljit_uh asushort; 456 #if defined COMPILE_PCRE8 457 sljit_ub asbyte; 458 sljit_ub asuchars[4]; 459 #elif defined COMPILE_PCRE16 460 sljit_uh asuchars[2]; 461 #elif defined COMPILE_PCRE32 462 sljit_ui asuchars[1]; 463 #endif 464 } oc; 465 #endif 466 } compare_context; 467 468 /* Undefine sljit macros. */ 469 #undef CMP 470 471 /* Used for accessing the elements of the stack. */ 472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) 473 474 #define TMP1 SLJIT_R0 475 #define TMP2 SLJIT_R2 476 #define TMP3 SLJIT_R3 477 #define STR_PTR SLJIT_S0 478 #define STR_END SLJIT_S1 479 #define STACK_TOP SLJIT_R1 480 #define STACK_LIMIT SLJIT_S2 481 #define COUNT_MATCH SLJIT_S3 482 #define ARGUMENTS SLJIT_S4 483 #define RETURN_ADDR SLJIT_R4 484 485 /* Local space layout. */ 486 /* These two locals can be used by the current opcode. */ 487 #define LOCALS0 (0 * sizeof(sljit_sw)) 488 #define LOCALS1 (1 * sizeof(sljit_sw)) 489 /* Two local variables for possessive quantifiers (char1 cannot use them). */ 490 #define POSSESSIVE0 (2 * sizeof(sljit_sw)) 491 #define POSSESSIVE1 (3 * sizeof(sljit_sw)) 492 /* Max limit of recursions. */ 493 #define LIMIT_MATCH (4 * sizeof(sljit_sw)) 494 /* The output vector is stored on the stack, and contains pointers 495 to characters. The vector data is divided into two groups: the first 496 group contains the start / end character pointers, and the second is 497 the start pointers when the end of the capturing group has not yet reached. */ 498 #define OVECTOR_START (common->ovector_start) 499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) 500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) 501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) 502 503 #if defined COMPILE_PCRE8 504 #define MOV_UCHAR SLJIT_MOV_UB 505 #define MOVU_UCHAR SLJIT_MOVU_UB 506 #elif defined COMPILE_PCRE16 507 #define MOV_UCHAR SLJIT_MOV_UH 508 #define MOVU_UCHAR SLJIT_MOVU_UH 509 #elif defined COMPILE_PCRE32 510 #define MOV_UCHAR SLJIT_MOV_UI 511 #define MOVU_UCHAR SLJIT_MOVU_UI 512 #else 513 #error Unsupported compiling mode 514 #endif 515 516 /* Shortcuts. */ 517 #define DEFINE_COMPILER \ 518 struct sljit_compiler *compiler = common->compiler 519 #define OP1(op, dst, dstw, src, srcw) \ 520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) 521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ 522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) 523 #define LABEL() \ 524 sljit_emit_label(compiler) 525 #define JUMP(type) \ 526 sljit_emit_jump(compiler, (type)) 527 #define JUMPTO(type, label) \ 528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) 529 #define JUMPHERE(jump) \ 530 sljit_set_label((jump), sljit_emit_label(compiler)) 531 #define SET_LABEL(jump, label) \ 532 sljit_set_label((jump), (label)) 533 #define CMP(type, src1, src1w, src2, src2w) \ 534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) 535 #define CMPTO(type, src1, src1w, src2, src2w, label) \ 536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) 537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \ 538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type)) 539 #define GET_LOCAL_BASE(dst, dstw, offset) \ 540 sljit_get_local_base(compiler, (dst), (dstw), (offset)) 541 542 #define READ_CHAR_MAX 0x7fffffff 543 544 static pcre_uchar *bracketend(pcre_uchar *cc) 545 { 546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); 547 do cc += GET(cc, 1); while (*cc == OP_ALT); 548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); 549 cc += 1 + LINK_SIZE; 550 return cc; 551 } 552 553 static int no_alternatives(pcre_uchar *cc) 554 { 555 int count = 0; 556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); 557 do 558 { 559 cc += GET(cc, 1); 560 count++; 561 } 562 while (*cc == OP_ALT); 563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); 564 return count; 565 } 566 567 static int ones_in_half_byte[16] = { 568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3, 569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4 570 }; 571 572 /* Functions whose might need modification for all new supported opcodes: 573 next_opcode 574 check_opcode_types 575 set_private_data_ptrs 576 get_framesize 577 init_frame 578 get_private_data_copy_length 579 copy_private_data 580 compile_matchingpath 581 compile_backtrackingpath 582 */ 583 584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) 585 { 586 SLJIT_UNUSED_ARG(common); 587 switch(*cc) 588 { 589 case OP_SOD: 590 case OP_SOM: 591 case OP_SET_SOM: 592 case OP_NOT_WORD_BOUNDARY: 593 case OP_WORD_BOUNDARY: 594 case OP_NOT_DIGIT: 595 case OP_DIGIT: 596 case OP_NOT_WHITESPACE: 597 case OP_WHITESPACE: 598 case OP_NOT_WORDCHAR: 599 case OP_WORDCHAR: 600 case OP_ANY: 601 case OP_ALLANY: 602 case OP_NOTPROP: 603 case OP_PROP: 604 case OP_ANYNL: 605 case OP_NOT_HSPACE: 606 case OP_HSPACE: 607 case OP_NOT_VSPACE: 608 case OP_VSPACE: 609 case OP_EXTUNI: 610 case OP_EODN: 611 case OP_EOD: 612 case OP_CIRC: 613 case OP_CIRCM: 614 case OP_DOLL: 615 case OP_DOLLM: 616 case OP_CRSTAR: 617 case OP_CRMINSTAR: 618 case OP_CRPLUS: 619 case OP_CRMINPLUS: 620 case OP_CRQUERY: 621 case OP_CRMINQUERY: 622 case OP_CRRANGE: 623 case OP_CRMINRANGE: 624 case OP_CRPOSSTAR: 625 case OP_CRPOSPLUS: 626 case OP_CRPOSQUERY: 627 case OP_CRPOSRANGE: 628 case OP_CLASS: 629 case OP_NCLASS: 630 case OP_REF: 631 case OP_REFI: 632 case OP_DNREF: 633 case OP_DNREFI: 634 case OP_RECURSE: 635 case OP_CALLOUT: 636 case OP_ALT: 637 case OP_KET: 638 case OP_KETRMAX: 639 case OP_KETRMIN: 640 case OP_KETRPOS: 641 case OP_REVERSE: 642 case OP_ASSERT: 643 case OP_ASSERT_NOT: 644 case OP_ASSERTBACK: 645 case OP_ASSERTBACK_NOT: 646 case OP_ONCE: 647 case OP_ONCE_NC: 648 case OP_BRA: 649 case OP_BRAPOS: 650 case OP_CBRA: 651 case OP_CBRAPOS: 652 case OP_COND: 653 case OP_SBRA: 654 case OP_SBRAPOS: 655 case OP_SCBRA: 656 case OP_SCBRAPOS: 657 case OP_SCOND: 658 case OP_CREF: 659 case OP_DNCREF: 660 case OP_RREF: 661 case OP_DNRREF: 662 case OP_DEF: 663 case OP_BRAZERO: 664 case OP_BRAMINZERO: 665 case OP_BRAPOSZERO: 666 case OP_PRUNE: 667 case OP_SKIP: 668 case OP_THEN: 669 case OP_COMMIT: 670 case OP_FAIL: 671 case OP_ACCEPT: 672 case OP_ASSERT_ACCEPT: 673 case OP_CLOSE: 674 case OP_SKIPZERO: 675 return cc + PRIV(OP_lengths)[*cc]; 676 677 case OP_CHAR: 678 case OP_CHARI: 679 case OP_NOT: 680 case OP_NOTI: 681 case OP_STAR: 682 case OP_MINSTAR: 683 case OP_PLUS: 684 case OP_MINPLUS: 685 case OP_QUERY: 686 case OP_MINQUERY: 687 case OP_UPTO: 688 case OP_MINUPTO: 689 case OP_EXACT: 690 case OP_POSSTAR: 691 case OP_POSPLUS: 692 case OP_POSQUERY: 693 case OP_POSUPTO: 694 case OP_STARI: 695 case OP_MINSTARI: 696 case OP_PLUSI: 697 case OP_MINPLUSI: 698 case OP_QUERYI: 699 case OP_MINQUERYI: 700 case OP_UPTOI: 701 case OP_MINUPTOI: 702 case OP_EXACTI: 703 case OP_POSSTARI: 704 case OP_POSPLUSI: 705 case OP_POSQUERYI: 706 case OP_POSUPTOI: 707 case OP_NOTSTAR: 708 case OP_NOTMINSTAR: 709 case OP_NOTPLUS: 710 case OP_NOTMINPLUS: 711 case OP_NOTQUERY: 712 case OP_NOTMINQUERY: 713 case OP_NOTUPTO: 714 case OP_NOTMINUPTO: 715 case OP_NOTEXACT: 716 case OP_NOTPOSSTAR: 717 case OP_NOTPOSPLUS: 718 case OP_NOTPOSQUERY: 719 case OP_NOTPOSUPTO: 720 case OP_NOTSTARI: 721 case OP_NOTMINSTARI: 722 case OP_NOTPLUSI: 723 case OP_NOTMINPLUSI: 724 case OP_NOTQUERYI: 725 case OP_NOTMINQUERYI: 726 case OP_NOTUPTOI: 727 case OP_NOTMINUPTOI: 728 case OP_NOTEXACTI: 729 case OP_NOTPOSSTARI: 730 case OP_NOTPOSPLUSI: 731 case OP_NOTPOSQUERYI: 732 case OP_NOTPOSUPTOI: 733 cc += PRIV(OP_lengths)[*cc]; 734 #ifdef SUPPORT_UTF 735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 736 #endif 737 return cc; 738 739 /* Special cases. */ 740 case OP_TYPESTAR: 741 case OP_TYPEMINSTAR: 742 case OP_TYPEPLUS: 743 case OP_TYPEMINPLUS: 744 case OP_TYPEQUERY: 745 case OP_TYPEMINQUERY: 746 case OP_TYPEUPTO: 747 case OP_TYPEMINUPTO: 748 case OP_TYPEEXACT: 749 case OP_TYPEPOSSTAR: 750 case OP_TYPEPOSPLUS: 751 case OP_TYPEPOSQUERY: 752 case OP_TYPEPOSUPTO: 753 return cc + PRIV(OP_lengths)[*cc] - 1; 754 755 case OP_ANYBYTE: 756 #ifdef SUPPORT_UTF 757 if (common->utf) return NULL; 758 #endif 759 return cc + 1; 760 761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 762 case OP_XCLASS: 763 return cc + GET(cc, 1); 764 #endif 765 766 case OP_MARK: 767 case OP_PRUNE_ARG: 768 case OP_SKIP_ARG: 769 case OP_THEN_ARG: 770 return cc + 1 + 2 + cc[1]; 771 772 default: 773 /* All opcodes are supported now! */ 774 SLJIT_ASSERT_STOP(); 775 return NULL; 776 } 777 } 778 779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) 780 { 781 int count; 782 pcre_uchar *slot; 783 784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ 785 while (cc < ccend) 786 { 787 switch(*cc) 788 { 789 case OP_SET_SOM: 790 common->has_set_som = TRUE; 791 common->might_be_empty = TRUE; 792 cc += 1; 793 break; 794 795 case OP_REF: 796 case OP_REFI: 797 common->optimized_cbracket[GET2(cc, 1)] = 0; 798 cc += 1 + IMM2_SIZE; 799 break; 800 801 case OP_CBRAPOS: 802 case OP_SCBRAPOS: 803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; 804 cc += 1 + LINK_SIZE + IMM2_SIZE; 805 break; 806 807 case OP_COND: 808 case OP_SCOND: 809 /* Only AUTO_CALLOUT can insert this opcode. We do 810 not intend to support this case. */ 811 if (cc[1 + LINK_SIZE] == OP_CALLOUT) 812 return FALSE; 813 cc += 1 + LINK_SIZE; 814 break; 815 816 case OP_CREF: 817 common->optimized_cbracket[GET2(cc, 1)] = 0; 818 cc += 1 + IMM2_SIZE; 819 break; 820 821 case OP_DNREF: 822 case OP_DNREFI: 823 case OP_DNCREF: 824 count = GET2(cc, 1 + IMM2_SIZE); 825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size; 826 while (count-- > 0) 827 { 828 common->optimized_cbracket[GET2(slot, 0)] = 0; 829 slot += common->name_entry_size; 830 } 831 cc += 1 + 2 * IMM2_SIZE; 832 break; 833 834 case OP_RECURSE: 835 /* Set its value only once. */ 836 if (common->recursive_head_ptr == 0) 837 { 838 common->recursive_head_ptr = common->ovector_start; 839 common->ovector_start += sizeof(sljit_sw); 840 } 841 cc += 1 + LINK_SIZE; 842 break; 843 844 case OP_CALLOUT: 845 if (common->capture_last_ptr == 0) 846 { 847 common->capture_last_ptr = common->ovector_start; 848 common->ovector_start += sizeof(sljit_sw); 849 } 850 cc += 2 + 2 * LINK_SIZE; 851 break; 852 853 case OP_THEN_ARG: 854 common->has_then = TRUE; 855 common->control_head_ptr = 1; 856 /* Fall through. */ 857 858 case OP_PRUNE_ARG: 859 common->needs_start_ptr = TRUE; 860 /* Fall through. */ 861 862 case OP_MARK: 863 if (common->mark_ptr == 0) 864 { 865 common->mark_ptr = common->ovector_start; 866 common->ovector_start += sizeof(sljit_sw); 867 } 868 cc += 1 + 2 + cc[1]; 869 break; 870 871 case OP_THEN: 872 common->has_then = TRUE; 873 common->control_head_ptr = 1; 874 /* Fall through. */ 875 876 case OP_PRUNE: 877 case OP_SKIP: 878 common->needs_start_ptr = TRUE; 879 cc += 1; 880 break; 881 882 case OP_SKIP_ARG: 883 common->control_head_ptr = 1; 884 common->has_skip_arg = TRUE; 885 cc += 1 + 2 + cc[1]; 886 break; 887 888 default: 889 cc = next_opcode(common, cc); 890 if (cc == NULL) 891 return FALSE; 892 break; 893 } 894 } 895 return TRUE; 896 } 897 898 static int get_class_iterator_size(pcre_uchar *cc) 899 { 900 switch(*cc) 901 { 902 case OP_CRSTAR: 903 case OP_CRPLUS: 904 return 2; 905 906 case OP_CRMINSTAR: 907 case OP_CRMINPLUS: 908 case OP_CRQUERY: 909 case OP_CRMINQUERY: 910 return 1; 911 912 case OP_CRRANGE: 913 case OP_CRMINRANGE: 914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) 915 return 0; 916 return 2; 917 918 default: 919 return 0; 920 } 921 } 922 923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin) 924 { 925 pcre_uchar *end = bracketend(begin); 926 pcre_uchar *next; 927 pcre_uchar *next_end; 928 pcre_uchar *max_end; 929 pcre_uchar type; 930 sljit_sw length = end - begin; 931 int min, max, i; 932 933 /* Detect fixed iterations first. */ 934 if (end[-(1 + LINK_SIZE)] != OP_KET) 935 return FALSE; 936 937 /* Already detected repeat. */ 938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) 939 return TRUE; 940 941 next = end; 942 min = 1; 943 while (1) 944 { 945 if (*next != *begin) 946 break; 947 next_end = bracketend(next); 948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) 949 break; 950 next = next_end; 951 min++; 952 } 953 954 if (min == 2) 955 return FALSE; 956 957 max = 0; 958 max_end = next; 959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) 960 { 961 type = *next; 962 while (1) 963 { 964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) 965 break; 966 next_end = bracketend(next + 2 + LINK_SIZE); 967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) 968 break; 969 next = next_end; 970 max++; 971 } 972 973 if (next[0] == type && next[1] == *begin && max >= 1) 974 { 975 next_end = bracketend(next + 1); 976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) 977 { 978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) 979 if (*next_end != OP_KET) 980 break; 981 982 if (i == max) 983 { 984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; 985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; 986 /* +2 the original and the last. */ 987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; 988 if (min == 1) 989 return TRUE; 990 min--; 991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); 992 } 993 } 994 } 995 } 996 997 if (min >= 3) 998 { 999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; 1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; 1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; 1002 return TRUE; 1003 } 1004 1005 return FALSE; 1006 } 1007 1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \ 1009 case OP_MINSTAR: \ 1010 case OP_MINPLUS: \ 1011 case OP_QUERY: \ 1012 case OP_MINQUERY: \ 1013 case OP_MINSTARI: \ 1014 case OP_MINPLUSI: \ 1015 case OP_QUERYI: \ 1016 case OP_MINQUERYI: \ 1017 case OP_NOTMINSTAR: \ 1018 case OP_NOTMINPLUS: \ 1019 case OP_NOTQUERY: \ 1020 case OP_NOTMINQUERY: \ 1021 case OP_NOTMINSTARI: \ 1022 case OP_NOTMINPLUSI: \ 1023 case OP_NOTQUERYI: \ 1024 case OP_NOTMINQUERYI: 1025 1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \ 1027 case OP_STAR: \ 1028 case OP_PLUS: \ 1029 case OP_STARI: \ 1030 case OP_PLUSI: \ 1031 case OP_NOTSTAR: \ 1032 case OP_NOTPLUS: \ 1033 case OP_NOTSTARI: \ 1034 case OP_NOTPLUSI: 1035 1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \ 1037 case OP_UPTO: \ 1038 case OP_MINUPTO: \ 1039 case OP_UPTOI: \ 1040 case OP_MINUPTOI: \ 1041 case OP_NOTUPTO: \ 1042 case OP_NOTMINUPTO: \ 1043 case OP_NOTUPTOI: \ 1044 case OP_NOTMINUPTOI: 1045 1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ 1047 case OP_TYPEMINSTAR: \ 1048 case OP_TYPEMINPLUS: \ 1049 case OP_TYPEQUERY: \ 1050 case OP_TYPEMINQUERY: 1051 1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ 1053 case OP_TYPESTAR: \ 1054 case OP_TYPEPLUS: 1055 1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ 1057 case OP_TYPEUPTO: \ 1058 case OP_TYPEMINUPTO: 1059 1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend) 1061 { 1062 pcre_uchar *cc = common->start; 1063 pcre_uchar *alternative; 1064 pcre_uchar *end = NULL; 1065 int private_data_ptr = *private_data_start; 1066 int space, size, bracketlen; 1067 BOOL repeat_check = TRUE; 1068 1069 while (cc < ccend) 1070 { 1071 space = 0; 1072 size = 0; 1073 bracketlen = 0; 1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) 1075 break; 1076 1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) 1078 { 1079 if (detect_repeat(common, cc)) 1080 { 1081 /* These brackets are converted to repeats, so no global 1082 based single character repeat is allowed. */ 1083 if (cc >= end) 1084 end = bracketend(cc); 1085 } 1086 } 1087 repeat_check = TRUE; 1088 1089 switch(*cc) 1090 { 1091 case OP_KET: 1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0) 1093 { 1094 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1095 private_data_ptr += sizeof(sljit_sw); 1096 cc += common->private_data_ptrs[cc + 1 - common->start]; 1097 } 1098 cc += 1 + LINK_SIZE; 1099 break; 1100 1101 case OP_ASSERT: 1102 case OP_ASSERT_NOT: 1103 case OP_ASSERTBACK: 1104 case OP_ASSERTBACK_NOT: 1105 case OP_ONCE: 1106 case OP_ONCE_NC: 1107 case OP_BRAPOS: 1108 case OP_SBRA: 1109 case OP_SBRAPOS: 1110 case OP_SCOND: 1111 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1112 private_data_ptr += sizeof(sljit_sw); 1113 bracketlen = 1 + LINK_SIZE; 1114 break; 1115 1116 case OP_CBRAPOS: 1117 case OP_SCBRAPOS: 1118 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1119 private_data_ptr += sizeof(sljit_sw); 1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE; 1121 break; 1122 1123 case OP_COND: 1124 /* Might be a hidden SCOND. */ 1125 alternative = cc + GET(cc, 1); 1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1127 { 1128 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1129 private_data_ptr += sizeof(sljit_sw); 1130 } 1131 bracketlen = 1 + LINK_SIZE; 1132 break; 1133 1134 case OP_BRA: 1135 bracketlen = 1 + LINK_SIZE; 1136 break; 1137 1138 case OP_CBRA: 1139 case OP_SCBRA: 1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE; 1141 break; 1142 1143 case OP_BRAZERO: 1144 case OP_BRAMINZERO: 1145 case OP_BRAPOSZERO: 1146 repeat_check = FALSE; 1147 size = 1; 1148 break; 1149 1150 CASE_ITERATOR_PRIVATE_DATA_1 1151 space = 1; 1152 size = -2; 1153 break; 1154 1155 CASE_ITERATOR_PRIVATE_DATA_2A 1156 space = 2; 1157 size = -2; 1158 break; 1159 1160 CASE_ITERATOR_PRIVATE_DATA_2B 1161 space = 2; 1162 size = -(2 + IMM2_SIZE); 1163 break; 1164 1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1166 space = 1; 1167 size = 1; 1168 break; 1169 1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) 1172 space = 2; 1173 size = 1; 1174 break; 1175 1176 case OP_TYPEUPTO: 1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) 1178 space = 2; 1179 size = 1 + IMM2_SIZE; 1180 break; 1181 1182 case OP_TYPEMINUPTO: 1183 space = 2; 1184 size = 1 + IMM2_SIZE; 1185 break; 1186 1187 case OP_CLASS: 1188 case OP_NCLASS: 1189 size += 1 + 32 / sizeof(pcre_uchar); 1190 space = get_class_iterator_size(cc + size); 1191 break; 1192 1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1194 case OP_XCLASS: 1195 size = GET(cc, 1); 1196 space = get_class_iterator_size(cc + size); 1197 break; 1198 #endif 1199 1200 default: 1201 cc = next_opcode(common, cc); 1202 SLJIT_ASSERT(cc != NULL); 1203 break; 1204 } 1205 1206 /* Character iterators, which are not inside a repeated bracket, 1207 gets a private slot instead of allocating it on the stack. */ 1208 if (space > 0 && cc >= end) 1209 { 1210 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1211 private_data_ptr += sizeof(sljit_sw) * space; 1212 } 1213 1214 if (size != 0) 1215 { 1216 if (size < 0) 1217 { 1218 cc += -size; 1219 #ifdef SUPPORT_UTF 1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1221 #endif 1222 } 1223 else 1224 cc += size; 1225 } 1226 1227 if (bracketlen > 0) 1228 { 1229 if (cc >= end) 1230 { 1231 end = bracketend(cc); 1232 if (end[-1 - LINK_SIZE] == OP_KET) 1233 end = NULL; 1234 } 1235 cc += bracketlen; 1236 } 1237 } 1238 *private_data_start = private_data_ptr; 1239 } 1240 1241 /* Returns with a frame_types (always < 0) if no need for frame. */ 1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head) 1243 { 1244 int length = 0; 1245 int possessive = 0; 1246 BOOL stack_restore = FALSE; 1247 BOOL setsom_found = recursive; 1248 BOOL setmark_found = recursive; 1249 /* The last capture is a local variable even for recursions. */ 1250 BOOL capture_last_found = FALSE; 1251 1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD 1253 SLJIT_ASSERT(common->control_head_ptr != 0); 1254 *needs_control_head = TRUE; 1255 #else 1256 *needs_control_head = FALSE; 1257 #endif 1258 1259 if (ccend == NULL) 1260 { 1261 ccend = bracketend(cc) - (1 + LINK_SIZE); 1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) 1263 { 1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; 1265 /* This is correct regardless of common->capture_last_ptr. */ 1266 capture_last_found = TRUE; 1267 } 1268 cc = next_opcode(common, cc); 1269 } 1270 1271 SLJIT_ASSERT(cc != NULL); 1272 while (cc < ccend) 1273 switch(*cc) 1274 { 1275 case OP_SET_SOM: 1276 SLJIT_ASSERT(common->has_set_som); 1277 stack_restore = TRUE; 1278 if (!setsom_found) 1279 { 1280 length += 2; 1281 setsom_found = TRUE; 1282 } 1283 cc += 1; 1284 break; 1285 1286 case OP_MARK: 1287 case OP_PRUNE_ARG: 1288 case OP_THEN_ARG: 1289 SLJIT_ASSERT(common->mark_ptr != 0); 1290 stack_restore = TRUE; 1291 if (!setmark_found) 1292 { 1293 length += 2; 1294 setmark_found = TRUE; 1295 } 1296 if (common->control_head_ptr != 0) 1297 *needs_control_head = TRUE; 1298 cc += 1 + 2 + cc[1]; 1299 break; 1300 1301 case OP_RECURSE: 1302 stack_restore = TRUE; 1303 if (common->has_set_som && !setsom_found) 1304 { 1305 length += 2; 1306 setsom_found = TRUE; 1307 } 1308 if (common->mark_ptr != 0 && !setmark_found) 1309 { 1310 length += 2; 1311 setmark_found = TRUE; 1312 } 1313 if (common->capture_last_ptr != 0 && !capture_last_found) 1314 { 1315 length += 2; 1316 capture_last_found = TRUE; 1317 } 1318 cc += 1 + LINK_SIZE; 1319 break; 1320 1321 case OP_CBRA: 1322 case OP_CBRAPOS: 1323 case OP_SCBRA: 1324 case OP_SCBRAPOS: 1325 stack_restore = TRUE; 1326 if (common->capture_last_ptr != 0 && !capture_last_found) 1327 { 1328 length += 2; 1329 capture_last_found = TRUE; 1330 } 1331 length += 3; 1332 cc += 1 + LINK_SIZE + IMM2_SIZE; 1333 break; 1334 1335 case OP_THEN: 1336 stack_restore = TRUE; 1337 if (common->control_head_ptr != 0) 1338 *needs_control_head = TRUE; 1339 cc ++; 1340 break; 1341 1342 default: 1343 stack_restore = TRUE; 1344 /* Fall through. */ 1345 1346 case OP_NOT_WORD_BOUNDARY: 1347 case OP_WORD_BOUNDARY: 1348 case OP_NOT_DIGIT: 1349 case OP_DIGIT: 1350 case OP_NOT_WHITESPACE: 1351 case OP_WHITESPACE: 1352 case OP_NOT_WORDCHAR: 1353 case OP_WORDCHAR: 1354 case OP_ANY: 1355 case OP_ALLANY: 1356 case OP_ANYBYTE: 1357 case OP_NOTPROP: 1358 case OP_PROP: 1359 case OP_ANYNL: 1360 case OP_NOT_HSPACE: 1361 case OP_HSPACE: 1362 case OP_NOT_VSPACE: 1363 case OP_VSPACE: 1364 case OP_EXTUNI: 1365 case OP_EODN: 1366 case OP_EOD: 1367 case OP_CIRC: 1368 case OP_CIRCM: 1369 case OP_DOLL: 1370 case OP_DOLLM: 1371 case OP_CHAR: 1372 case OP_CHARI: 1373 case OP_NOT: 1374 case OP_NOTI: 1375 1376 case OP_EXACT: 1377 case OP_POSSTAR: 1378 case OP_POSPLUS: 1379 case OP_POSQUERY: 1380 case OP_POSUPTO: 1381 1382 case OP_EXACTI: 1383 case OP_POSSTARI: 1384 case OP_POSPLUSI: 1385 case OP_POSQUERYI: 1386 case OP_POSUPTOI: 1387 1388 case OP_NOTEXACT: 1389 case OP_NOTPOSSTAR: 1390 case OP_NOTPOSPLUS: 1391 case OP_NOTPOSQUERY: 1392 case OP_NOTPOSUPTO: 1393 1394 case OP_NOTEXACTI: 1395 case OP_NOTPOSSTARI: 1396 case OP_NOTPOSPLUSI: 1397 case OP_NOTPOSQUERYI: 1398 case OP_NOTPOSUPTOI: 1399 1400 case OP_TYPEEXACT: 1401 case OP_TYPEPOSSTAR: 1402 case OP_TYPEPOSPLUS: 1403 case OP_TYPEPOSQUERY: 1404 case OP_TYPEPOSUPTO: 1405 1406 case OP_CLASS: 1407 case OP_NCLASS: 1408 case OP_XCLASS: 1409 1410 cc = next_opcode(common, cc); 1411 SLJIT_ASSERT(cc != NULL); 1412 break; 1413 } 1414 1415 /* Possessive quantifiers can use a special case. */ 1416 if (SLJIT_UNLIKELY(possessive == length)) 1417 return stack_restore ? no_frame : no_stack; 1418 1419 if (length > 0) 1420 return length + 1; 1421 return stack_restore ? no_frame : no_stack; 1422 } 1423 1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive) 1425 { 1426 DEFINE_COMPILER; 1427 BOOL setsom_found = recursive; 1428 BOOL setmark_found = recursive; 1429 /* The last capture is a local variable even for recursions. */ 1430 BOOL capture_last_found = FALSE; 1431 int offset; 1432 1433 /* >= 1 + shortest item size (2) */ 1434 SLJIT_UNUSED_ARG(stacktop); 1435 SLJIT_ASSERT(stackpos >= stacktop + 2); 1436 1437 stackpos = STACK(stackpos); 1438 if (ccend == NULL) 1439 { 1440 ccend = bracketend(cc) - (1 + LINK_SIZE); 1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) 1442 cc = next_opcode(common, cc); 1443 } 1444 1445 SLJIT_ASSERT(cc != NULL); 1446 while (cc < ccend) 1447 switch(*cc) 1448 { 1449 case OP_SET_SOM: 1450 SLJIT_ASSERT(common->has_set_som); 1451 if (!setsom_found) 1452 { 1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); 1455 stackpos += (int)sizeof(sljit_sw); 1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1457 stackpos += (int)sizeof(sljit_sw); 1458 setsom_found = TRUE; 1459 } 1460 cc += 1; 1461 break; 1462 1463 case OP_MARK: 1464 case OP_PRUNE_ARG: 1465 case OP_THEN_ARG: 1466 SLJIT_ASSERT(common->mark_ptr != 0); 1467 if (!setmark_found) 1468 { 1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); 1471 stackpos += (int)sizeof(sljit_sw); 1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1473 stackpos += (int)sizeof(sljit_sw); 1474 setmark_found = TRUE; 1475 } 1476 cc += 1 + 2 + cc[1]; 1477 break; 1478 1479 case OP_RECURSE: 1480 if (common->has_set_som && !setsom_found) 1481 { 1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); 1484 stackpos += (int)sizeof(sljit_sw); 1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1486 stackpos += (int)sizeof(sljit_sw); 1487 setsom_found = TRUE; 1488 } 1489 if (common->mark_ptr != 0 && !setmark_found) 1490 { 1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); 1493 stackpos += (int)sizeof(sljit_sw); 1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1495 stackpos += (int)sizeof(sljit_sw); 1496 setmark_found = TRUE; 1497 } 1498 if (common->capture_last_ptr != 0 && !capture_last_found) 1499 { 1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); 1502 stackpos += (int)sizeof(sljit_sw); 1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1504 stackpos += (int)sizeof(sljit_sw); 1505 capture_last_found = TRUE; 1506 } 1507 cc += 1 + LINK_SIZE; 1508 break; 1509 1510 case OP_CBRA: 1511 case OP_CBRAPOS: 1512 case OP_SCBRA: 1513 case OP_SCBRAPOS: 1514 if (common->capture_last_ptr != 0 && !capture_last_found) 1515 { 1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); 1518 stackpos += (int)sizeof(sljit_sw); 1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1520 stackpos += (int)sizeof(sljit_sw); 1521 capture_last_found = TRUE; 1522 } 1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1; 1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); 1525 stackpos += (int)sizeof(sljit_sw); 1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1529 stackpos += (int)sizeof(sljit_sw); 1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); 1531 stackpos += (int)sizeof(sljit_sw); 1532 1533 cc += 1 + LINK_SIZE + IMM2_SIZE; 1534 break; 1535 1536 default: 1537 cc = next_opcode(common, cc); 1538 SLJIT_ASSERT(cc != NULL); 1539 break; 1540 } 1541 1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); 1543 SLJIT_ASSERT(stackpos == STACK(stacktop)); 1544 } 1545 1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head) 1547 { 1548 int private_data_length = needs_control_head ? 3 : 2; 1549 int size; 1550 pcre_uchar *alternative; 1551 /* Calculate the sum of the private machine words. */ 1552 while (cc < ccend) 1553 { 1554 size = 0; 1555 switch(*cc) 1556 { 1557 case OP_KET: 1558 if (PRIVATE_DATA(cc) != 0) 1559 { 1560 private_data_length++; 1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); 1562 cc += PRIVATE_DATA(cc + 1); 1563 } 1564 cc += 1 + LINK_SIZE; 1565 break; 1566 1567 case OP_ASSERT: 1568 case OP_ASSERT_NOT: 1569 case OP_ASSERTBACK: 1570 case OP_ASSERTBACK_NOT: 1571 case OP_ONCE: 1572 case OP_ONCE_NC: 1573 case OP_BRAPOS: 1574 case OP_SBRA: 1575 case OP_SBRAPOS: 1576 case OP_SCOND: 1577 private_data_length++; 1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); 1579 cc += 1 + LINK_SIZE; 1580 break; 1581 1582 case OP_CBRA: 1583 case OP_SCBRA: 1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) 1585 private_data_length++; 1586 cc += 1 + LINK_SIZE + IMM2_SIZE; 1587 break; 1588 1589 case OP_CBRAPOS: 1590 case OP_SCBRAPOS: 1591 private_data_length += 2; 1592 cc += 1 + LINK_SIZE + IMM2_SIZE; 1593 break; 1594 1595 case OP_COND: 1596 /* Might be a hidden SCOND. */ 1597 alternative = cc + GET(cc, 1); 1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1599 private_data_length++; 1600 cc += 1 + LINK_SIZE; 1601 break; 1602 1603 CASE_ITERATOR_PRIVATE_DATA_1 1604 if (PRIVATE_DATA(cc)) 1605 private_data_length++; 1606 cc += 2; 1607 #ifdef SUPPORT_UTF 1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1609 #endif 1610 break; 1611 1612 CASE_ITERATOR_PRIVATE_DATA_2A 1613 if (PRIVATE_DATA(cc)) 1614 private_data_length += 2; 1615 cc += 2; 1616 #ifdef SUPPORT_UTF 1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1618 #endif 1619 break; 1620 1621 CASE_ITERATOR_PRIVATE_DATA_2B 1622 if (PRIVATE_DATA(cc)) 1623 private_data_length += 2; 1624 cc += 2 + IMM2_SIZE; 1625 #ifdef SUPPORT_UTF 1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1627 #endif 1628 break; 1629 1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1631 if (PRIVATE_DATA(cc)) 1632 private_data_length++; 1633 cc += 1; 1634 break; 1635 1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1637 if (PRIVATE_DATA(cc)) 1638 private_data_length += 2; 1639 cc += 1; 1640 break; 1641 1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 1643 if (PRIVATE_DATA(cc)) 1644 private_data_length += 2; 1645 cc += 1 + IMM2_SIZE; 1646 break; 1647 1648 case OP_CLASS: 1649 case OP_NCLASS: 1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1651 case OP_XCLASS: 1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); 1653 #else 1654 size = 1 + 32 / (int)sizeof(pcre_uchar); 1655 #endif 1656 if (PRIVATE_DATA(cc)) 1657 private_data_length += get_class_iterator_size(cc + size); 1658 cc += size; 1659 break; 1660 1661 default: 1662 cc = next_opcode(common, cc); 1663 SLJIT_ASSERT(cc != NULL); 1664 break; 1665 } 1666 } 1667 SLJIT_ASSERT(cc == ccend); 1668 return private_data_length; 1669 } 1670 1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, 1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head) 1673 { 1674 DEFINE_COMPILER; 1675 int srcw[2]; 1676 int count, size; 1677 BOOL tmp1next = TRUE; 1678 BOOL tmp1empty = TRUE; 1679 BOOL tmp2empty = TRUE; 1680 pcre_uchar *alternative; 1681 enum { 1682 start, 1683 loop, 1684 end 1685 } status; 1686 1687 status = save ? start : loop; 1688 stackptr = STACK(stackptr - 2); 1689 stacktop = STACK(stacktop - 1); 1690 1691 if (!save) 1692 { 1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw); 1694 if (stackptr < stacktop) 1695 { 1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1697 stackptr += sizeof(sljit_sw); 1698 tmp1empty = FALSE; 1699 } 1700 if (stackptr < stacktop) 1701 { 1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1703 stackptr += sizeof(sljit_sw); 1704 tmp2empty = FALSE; 1705 } 1706 /* The tmp1next must be TRUE in either way. */ 1707 } 1708 1709 do 1710 { 1711 count = 0; 1712 switch(status) 1713 { 1714 case start: 1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0); 1716 count = 1; 1717 srcw[0] = common->recursive_head_ptr; 1718 if (needs_control_head) 1719 { 1720 SLJIT_ASSERT(common->control_head_ptr != 0); 1721 count = 2; 1722 srcw[1] = common->control_head_ptr; 1723 } 1724 status = loop; 1725 break; 1726 1727 case loop: 1728 if (cc >= ccend) 1729 { 1730 status = end; 1731 break; 1732 } 1733 1734 switch(*cc) 1735 { 1736 case OP_KET: 1737 if (PRIVATE_DATA(cc) != 0) 1738 { 1739 count = 1; 1740 srcw[0] = PRIVATE_DATA(cc); 1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); 1742 cc += PRIVATE_DATA(cc + 1); 1743 } 1744 cc += 1 + LINK_SIZE; 1745 break; 1746 1747 case OP_ASSERT: 1748 case OP_ASSERT_NOT: 1749 case OP_ASSERTBACK: 1750 case OP_ASSERTBACK_NOT: 1751 case OP_ONCE: 1752 case OP_ONCE_NC: 1753 case OP_BRAPOS: 1754 case OP_SBRA: 1755 case OP_SBRAPOS: 1756 case OP_SCOND: 1757 count = 1; 1758 srcw[0] = PRIVATE_DATA(cc); 1759 SLJIT_ASSERT(srcw[0] != 0); 1760 cc += 1 + LINK_SIZE; 1761 break; 1762 1763 case OP_CBRA: 1764 case OP_SCBRA: 1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) 1766 { 1767 count = 1; 1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); 1769 } 1770 cc += 1 + LINK_SIZE + IMM2_SIZE; 1771 break; 1772 1773 case OP_CBRAPOS: 1774 case OP_SCBRAPOS: 1775 count = 2; 1776 srcw[0] = PRIVATE_DATA(cc); 1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); 1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); 1779 cc += 1 + LINK_SIZE + IMM2_SIZE; 1780 break; 1781 1782 case OP_COND: 1783 /* Might be a hidden SCOND. */ 1784 alternative = cc + GET(cc, 1); 1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1786 { 1787 count = 1; 1788 srcw[0] = PRIVATE_DATA(cc); 1789 SLJIT_ASSERT(srcw[0] != 0); 1790 } 1791 cc += 1 + LINK_SIZE; 1792 break; 1793 1794 CASE_ITERATOR_PRIVATE_DATA_1 1795 if (PRIVATE_DATA(cc)) 1796 { 1797 count = 1; 1798 srcw[0] = PRIVATE_DATA(cc); 1799 } 1800 cc += 2; 1801 #ifdef SUPPORT_UTF 1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1803 #endif 1804 break; 1805 1806 CASE_ITERATOR_PRIVATE_DATA_2A 1807 if (PRIVATE_DATA(cc)) 1808 { 1809 count = 2; 1810 srcw[0] = PRIVATE_DATA(cc); 1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); 1812 } 1813 cc += 2; 1814 #ifdef SUPPORT_UTF 1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1816 #endif 1817 break; 1818 1819 CASE_ITERATOR_PRIVATE_DATA_2B 1820 if (PRIVATE_DATA(cc)) 1821 { 1822 count = 2; 1823 srcw[0] = PRIVATE_DATA(cc); 1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); 1825 } 1826 cc += 2 + IMM2_SIZE; 1827 #ifdef SUPPORT_UTF 1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1829 #endif 1830 break; 1831 1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1833 if (PRIVATE_DATA(cc)) 1834 { 1835 count = 1; 1836 srcw[0] = PRIVATE_DATA(cc); 1837 } 1838 cc += 1; 1839 break; 1840 1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1842 if (PRIVATE_DATA(cc)) 1843 { 1844 count = 2; 1845 srcw[0] = PRIVATE_DATA(cc); 1846 srcw[1] = srcw[0] + sizeof(sljit_sw); 1847 } 1848 cc += 1; 1849 break; 1850 1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 1852 if (PRIVATE_DATA(cc)) 1853 { 1854 count = 2; 1855 srcw[0] = PRIVATE_DATA(cc); 1856 srcw[1] = srcw[0] + sizeof(sljit_sw); 1857 } 1858 cc += 1 + IMM2_SIZE; 1859 break; 1860 1861 case OP_CLASS: 1862 case OP_NCLASS: 1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1864 case OP_XCLASS: 1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); 1866 #else 1867 size = 1 + 32 / (int)sizeof(pcre_uchar); 1868 #endif 1869 if (PRIVATE_DATA(cc)) 1870 switch(get_class_iterator_size(cc + size)) 1871 { 1872 case 1: 1873 count = 1; 1874 srcw[0] = PRIVATE_DATA(cc); 1875 break; 1876 1877 case 2: 1878 count = 2; 1879 srcw[0] = PRIVATE_DATA(cc); 1880 srcw[1] = srcw[0] + sizeof(sljit_sw); 1881 break; 1882 1883 default: 1884 SLJIT_ASSERT_STOP(); 1885 break; 1886 } 1887 cc += size; 1888 break; 1889 1890 default: 1891 cc = next_opcode(common, cc); 1892 SLJIT_ASSERT(cc != NULL); 1893 break; 1894 } 1895 break; 1896 1897 case end: 1898 SLJIT_ASSERT_STOP(); 1899 break; 1900 } 1901 1902 while (count > 0) 1903 { 1904 count--; 1905 if (save) 1906 { 1907 if (tmp1next) 1908 { 1909 if (!tmp1empty) 1910 { 1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1912 stackptr += sizeof(sljit_sw); 1913 } 1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); 1915 tmp1empty = FALSE; 1916 tmp1next = FALSE; 1917 } 1918 else 1919 { 1920 if (!tmp2empty) 1921 { 1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1923 stackptr += sizeof(sljit_sw); 1924 } 1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); 1926 tmp2empty = FALSE; 1927 tmp1next = TRUE; 1928 } 1929 } 1930 else 1931 { 1932 if (tmp1next) 1933 { 1934 SLJIT_ASSERT(!tmp1empty); 1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0); 1936 tmp1empty = stackptr >= stacktop; 1937 if (!tmp1empty) 1938 { 1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1940 stackptr += sizeof(sljit_sw); 1941 } 1942 tmp1next = FALSE; 1943 } 1944 else 1945 { 1946 SLJIT_ASSERT(!tmp2empty); 1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0); 1948 tmp2empty = stackptr >= stacktop; 1949 if (!tmp2empty) 1950 { 1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1952 stackptr += sizeof(sljit_sw); 1953 } 1954 tmp1next = TRUE; 1955 } 1956 } 1957 } 1958 } 1959 while (status != end); 1960 1961 if (save) 1962 { 1963 if (tmp1next) 1964 { 1965 if (!tmp1empty) 1966 { 1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1968 stackptr += sizeof(sljit_sw); 1969 } 1970 if (!tmp2empty) 1971 { 1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1973 stackptr += sizeof(sljit_sw); 1974 } 1975 } 1976 else 1977 { 1978 if (!tmp2empty) 1979 { 1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1981 stackptr += sizeof(sljit_sw); 1982 } 1983 if (!tmp1empty) 1984 { 1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1986 stackptr += sizeof(sljit_sw); 1987 } 1988 } 1989 } 1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); 1991 } 1992 1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset) 1994 { 1995 pcre_uchar *end = bracketend(cc); 1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; 1997 1998 /* Assert captures then. */ 1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) 2000 current_offset = NULL; 2001 /* Conditional block does not. */ 2002 if (*cc == OP_COND || *cc == OP_SCOND) 2003 has_alternatives = FALSE; 2004 2005 cc = next_opcode(common, cc); 2006 if (has_alternatives) 2007 current_offset = common->then_offsets + (cc - common->start); 2008 2009 while (cc < end) 2010 { 2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) 2012 cc = set_then_offsets(common, cc, current_offset); 2013 else 2014 { 2015 if (*cc == OP_ALT && has_alternatives) 2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); 2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) 2018 *current_offset = 1; 2019 cc = next_opcode(common, cc); 2020 } 2021 } 2022 2023 return end; 2024 } 2025 2026 #undef CASE_ITERATOR_PRIVATE_DATA_1 2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A 2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B 2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 2032 2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value) 2034 { 2035 return (value & (value - 1)) == 0; 2036 } 2037 2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) 2039 { 2040 while (list) 2041 { 2042 /* sljit_set_label is clever enough to do nothing 2043 if either the jump or the label is NULL. */ 2044 SET_LABEL(list->jump, label); 2045 list = list->next; 2046 } 2047 } 2048 2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) 2050 { 2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); 2052 if (list_item) 2053 { 2054 list_item->next = *list; 2055 list_item->jump = jump; 2056 *list = list_item; 2057 } 2058 } 2059 2060 static void add_stub(compiler_common *common, struct sljit_jump *start) 2061 { 2062 DEFINE_COMPILER; 2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); 2064 2065 if (list_item) 2066 { 2067 list_item->start = start; 2068 list_item->quit = LABEL(); 2069 list_item->next = common->stubs; 2070 common->stubs = list_item; 2071 } 2072 } 2073 2074 static void flush_stubs(compiler_common *common) 2075 { 2076 DEFINE_COMPILER; 2077 stub_list *list_item = common->stubs; 2078 2079 while (list_item) 2080 { 2081 JUMPHERE(list_item->start); 2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); 2083 JUMPTO(SLJIT_JUMP, list_item->quit); 2084 list_item = list_item->next; 2085 } 2086 common->stubs = NULL; 2087 } 2088 2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr) 2090 { 2091 DEFINE_COMPILER; 2092 label_addr_list *label_addr; 2093 2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); 2095 if (label_addr == NULL) 2096 return; 2097 label_addr->label = LABEL(); 2098 label_addr->update_addr = update_addr; 2099 label_addr->next = common->label_addrs; 2100 common->label_addrs = label_addr; 2101 } 2102 2103 static SLJIT_INLINE void count_match(compiler_common *common) 2104 { 2105 DEFINE_COMPILER; 2106 2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); 2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); 2109 } 2110 2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) 2112 { 2113 /* May destroy all locals and registers except TMP2. */ 2114 DEFINE_COMPILER; 2115 2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); 2117 #ifdef DESTROY_REGISTERS 2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); 2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); 2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); 2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); 2123 #endif 2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); 2125 } 2126 2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size) 2128 { 2129 DEFINE_COMPILER; 2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); 2131 } 2132 2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) 2134 { 2135 DEFINE_COMPILER; 2136 sljit_uw *result; 2137 2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 2139 return NULL; 2140 2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); 2142 if (SLJIT_UNLIKELY(result == NULL)) 2143 { 2144 sljit_set_compiler_memory_error(compiler); 2145 return NULL; 2146 } 2147 2148 *(void**)result = common->read_only_data_head; 2149 common->read_only_data_head = (void *)result; 2150 return result + 1; 2151 } 2152 2153 static void free_read_only_data(void *current, void *allocator_data) 2154 { 2155 void *next; 2156 2157 SLJIT_UNUSED_ARG(allocator_data); 2158 2159 while (current != NULL) 2160 { 2161 next = *(void**)current; 2162 SLJIT_FREE(current, allocator_data); 2163 current = next; 2164 } 2165 } 2166 2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) 2168 { 2169 DEFINE_COMPILER; 2170 struct sljit_label *loop; 2171 int i; 2172 2173 /* At this point we can freely use all temporary registers. */ 2174 SLJIT_ASSERT(length > 1); 2175 /* TMP1 returns with begin - 1. */ 2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); 2177 if (length < 8) 2178 { 2179 for (i = 1; i < length; i++) 2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); 2181 } 2182 else 2183 { 2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); 2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); 2186 loop = LABEL(); 2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); 2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); 2189 JUMPTO(SLJIT_NOT_ZERO, loop); 2190 } 2191 } 2192 2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) 2194 { 2195 DEFINE_COMPILER; 2196 struct sljit_label *loop; 2197 int i; 2198 2199 SLJIT_ASSERT(length > 1); 2200 /* OVECTOR(1) contains the "string begin - 1" constant. */ 2201 if (length > 2) 2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 2203 if (length < 8) 2204 { 2205 for (i = 2; i < length; i++) 2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); 2207 } 2208 else 2209 { 2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); 2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); 2212 loop = LABEL(); 2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); 2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); 2215 JUMPTO(SLJIT_NOT_ZERO, loop); 2216 } 2217 2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); 2219 if (common->mark_ptr != 0) 2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); 2221 if (common->control_head_ptr != 0) 2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); 2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); 2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); 2226 } 2227 2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg) 2229 { 2230 while (current != NULL) 2231 { 2232 switch (current[-2]) 2233 { 2234 case type_then_trap: 2235 break; 2236 2237 case type_mark: 2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0) 2239 return current[-4]; 2240 break; 2241 2242 default: 2243 SLJIT_ASSERT_STOP(); 2244 break; 2245 } 2246 SLJIT_ASSERT(current > (sljit_sw*)current[-1]); 2247 current = (sljit_sw*)current[-1]; 2248 } 2249 return -1; 2250 } 2251 2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) 2253 { 2254 DEFINE_COMPILER; 2255 struct sljit_label *loop; 2256 struct sljit_jump *early_quit; 2257 2258 /* At this point we can freely use all registers. */ 2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); 2261 2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); 2263 if (common->mark_ptr != 0) 2264 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count)); 2266 if (common->mark_ptr != 0) 2267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); 2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); 2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); 2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START); 2271 /* Unlikely, but possible */ 2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); 2273 loop = LABEL(); 2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); 2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); 2276 /* Copy the integer value to the output buffer */ 2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); 2279 #endif 2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0); 2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); 2282 JUMPTO(SLJIT_NOT_ZERO, loop); 2283 JUMPHERE(early_quit); 2284 2285 /* Calculate the return value, which is the maximum ovector value. */ 2286 if (topbracket > 1) 2287 { 2288 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); 2289 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); 2290 2291 /* OVECTOR(0) is never equal to SLJIT_S2. */ 2292 loop = LABEL(); 2293 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); 2294 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); 2295 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); 2296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); 2297 } 2298 else 2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); 2300 } 2301 2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) 2303 { 2304 DEFINE_COMPILER; 2305 struct sljit_jump *jump; 2306 2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); 2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 2309 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); 2310 2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); 2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); 2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); 2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit); 2315 2316 /* Store match begin and end. */ 2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); 2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets)); 2319 2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3); 2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0); 2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); 2324 #endif 2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0); 2326 JUMPHERE(jump); 2327 2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); 2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0); 2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); 2332 #endif 2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0); 2334 2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); 2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); 2338 #endif 2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); 2340 2341 JUMPTO(SLJIT_JUMP, quit); 2342 } 2343 2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) 2345 { 2346 /* May destroy TMP1. */ 2347 DEFINE_COMPILER; 2348 struct sljit_jump *jump; 2349 2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2351 { 2352 /* The value of -1 must be kept for start_used_ptr! */ 2353 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); 2354 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting 2355 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ 2356 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); 2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2358 JUMPHERE(jump); 2359 } 2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE) 2361 { 2362 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2364 JUMPHERE(jump); 2365 } 2366 } 2367 2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc) 2369 { 2370 /* Detects if the character has an othercase. */ 2371 unsigned int c; 2372 2373 #ifdef SUPPORT_UTF 2374 if (common->utf) 2375 { 2376 GETCHAR(c, cc); 2377 if (c > 127) 2378 { 2379 #ifdef SUPPORT_UCP 2380 return c != UCD_OTHERCASE(c); 2381 #else 2382 return FALSE; 2383 #endif 2384 } 2385 #ifndef COMPILE_PCRE8 2386 return common->fcc[c] != c; 2387 #endif 2388 } 2389 else 2390 #endif 2391 c = *cc; 2392 return MAX_255(c) ? common->fcc[c] != c : FALSE; 2393 } 2394 2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) 2396 { 2397 /* Returns with the othercase. */ 2398 #ifdef SUPPORT_UTF 2399 if (common->utf && c > 127) 2400 { 2401 #ifdef SUPPORT_UCP 2402 return UCD_OTHERCASE(c); 2403 #else 2404 return c; 2405 #endif 2406 } 2407 #endif 2408 return TABLE_GET(c, common->fcc, c); 2409 } 2410 2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc) 2412 { 2413 /* Detects if the character and its othercase has only 1 bit difference. */ 2414 unsigned int c, oc, bit; 2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2416 int n; 2417 #endif 2418 2419 #ifdef SUPPORT_UTF 2420 if (common->utf) 2421 { 2422 GETCHAR(c, cc); 2423 if (c <= 127) 2424 oc = common->fcc[c]; 2425 else 2426 { 2427 #ifdef SUPPORT_UCP 2428 oc = UCD_OTHERCASE(c); 2429 #else 2430 oc = c; 2431 #endif 2432 } 2433 } 2434 else 2435 { 2436 c = *cc; 2437 oc = TABLE_GET(c, common->fcc, c); 2438 } 2439 #else 2440 c = *cc; 2441 oc = TABLE_GET(c, common->fcc, c); 2442 #endif 2443 2444 SLJIT_ASSERT(c != oc); 2445 2446 bit = c ^ oc; 2447 /* Optimized for English alphabet. */ 2448 if (c <= 127 && bit == 0x20) 2449 return (0 << 8) | 0x20; 2450 2451 /* Since c != oc, they must have at least 1 bit difference. */ 2452 if (!is_powerof2(bit)) 2453 return 0; 2454 2455 #if defined COMPILE_PCRE8 2456 2457 #ifdef SUPPORT_UTF 2458 if (common->utf && c > 127) 2459 { 2460 n = GET_EXTRALEN(*cc); 2461 while ((bit & 0x3f) == 0) 2462 { 2463 n--; 2464 bit >>= 6; 2465 } 2466 return (n << 8) | bit; 2467 } 2468 #endif /* SUPPORT_UTF */ 2469 return (0 << 8) | bit; 2470 2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2472 2473 #ifdef SUPPORT_UTF 2474 if (common->utf && c > 65535) 2475 { 2476 if (bit >= (1 << 10)) 2477 bit >>= 10; 2478 else 2479 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); 2480 } 2481 #endif /* SUPPORT_UTF */ 2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); 2483 2484 #endif /* COMPILE_PCRE[8|16|32] */ 2485 } 2486 2487 static void check_partial(compiler_common *common, BOOL force) 2488 { 2489 /* Checks whether a partial matching is occurred. Does not modify registers. */ 2490 DEFINE_COMPILER; 2491 struct sljit_jump *jump = NULL; 2492 2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); 2494 2495 if (common->mode == JIT_COMPILE) 2496 return; 2497 2498 if (!force) 2499 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2501 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); 2502 2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2505 else 2506 { 2507 if (common->partialmatchlabel != NULL) 2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2509 else 2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2511 } 2512 2513 if (jump != NULL) 2514 JUMPHERE(jump); 2515 } 2516 2517 static void check_str_end(compiler_common *common, jump_list **end_reached) 2518 { 2519 /* Does not affect registers. Usually used in a tight spot. */ 2520 DEFINE_COMPILER; 2521 struct sljit_jump *jump; 2522 2523 if (common->mode == JIT_COMPILE) 2524 { 2525 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 2526 return; 2527 } 2528 2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); 2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2531 { 2532 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2534 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); 2535 } 2536 else 2537 { 2538 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2539 if (common->partialmatchlabel != NULL) 2540 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2541 else 2542 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2543 } 2544 JUMPHERE(jump); 2545 } 2546 2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks) 2548 { 2549 DEFINE_COMPILER; 2550 struct sljit_jump *jump; 2551 2552 if (common->mode == JIT_COMPILE) 2553 { 2554 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 2555 return; 2556 } 2557 2558 /* Partial matching mode. */ 2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); 2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2562 { 2563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2564 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 2565 } 2566 else 2567 { 2568 if (common->partialmatchlabel != NULL) 2569 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2570 else 2571 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2572 } 2573 JUMPHERE(jump); 2574 } 2575 2576 static void peek_char(compiler_common *common, pcre_uint32 max) 2577 { 2578 /* Reads the character into TMP1, keeps STR_PTR. 2579 Does not check STR_END. TMP2 Destroyed. */ 2580 DEFINE_COMPILER; 2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2582 struct sljit_jump *jump; 2583 #endif 2584 2585 SLJIT_UNUSED_ARG(max); 2586 2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2589 if (common->utf) 2590 { 2591 if (max < 128) return; 2592 2593 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); 2596 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2597 JUMPHERE(jump); 2598 } 2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 2600 2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16 2602 if (common->utf) 2603 { 2604 if (max < 0xd800) return; 2605 2606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2607 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2608 /* TMP2 contains the high surrogate. */ 2609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2610 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); 2611 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); 2612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); 2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2614 JUMPHERE(jump); 2615 } 2616 #endif 2617 } 2618 2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2620 2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass) 2622 { 2623 /* Tells whether the character codes below 128 are enough 2624 to determine a match. */ 2625 const pcre_uint8 value = nclass ? 0xff : 0; 2626 const pcre_uint8 *end = bitset + 32; 2627 2628 bitset += 16; 2629 do 2630 { 2631 if (*bitset++ != value) 2632 return FALSE; 2633 } 2634 while (bitset < end); 2635 return TRUE; 2636 } 2637 2638 static void read_char7_type(compiler_common *common, BOOL full_read) 2639 { 2640 /* Reads the precise character type of a character into TMP1, if the character 2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The 2642 full_read argument tells whether characters above max are accepted or not. */ 2643 DEFINE_COMPILER; 2644 struct sljit_jump *jump; 2645 2646 SLJIT_ASSERT(common->utf); 2647 2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); 2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2650 2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2652 2653 if (full_read) 2654 { 2655 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); 2656 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2658 JUMPHERE(jump); 2659 } 2660 } 2661 2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 2663 2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr) 2665 { 2666 /* Reads the precise value of a character into TMP1, if the character is 2667 between min and max (c >= min && c <= max). Otherwise it returns with a value 2668 outside the range. Does not check STR_END. */ 2669 DEFINE_COMPILER; 2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2671 struct sljit_jump *jump; 2672 #endif 2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2674 struct sljit_jump *jump2; 2675 #endif 2676 2677 SLJIT_UNUSED_ARG(update_str_ptr); 2678 SLJIT_UNUSED_ARG(min); 2679 SLJIT_UNUSED_ARG(max); 2680 SLJIT_ASSERT(min <= max); 2681 2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2684 2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2686 if (common->utf) 2687 { 2688 if (max < 128 && !update_str_ptr) return; 2689 2690 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 2691 if (min >= 0x10000) 2692 { 2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); 2694 if (update_str_ptr) 2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); 2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); 2706 if (!update_str_ptr) 2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); 2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2711 JUMPHERE(jump2); 2712 if (update_str_ptr) 2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2714 } 2715 else if (min >= 0x800 && max <= 0xffff) 2716 { 2717 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); 2718 if (update_str_ptr) 2719 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2721 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); 2722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2726 if (!update_str_ptr) 2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2729 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2731 JUMPHERE(jump2); 2732 if (update_str_ptr) 2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2734 } 2735 else if (max >= 0x800) 2736 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); 2737 else if (max < 128) 2738 { 2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2741 } 2742 else 2743 { 2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2745 if (!update_str_ptr) 2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2747 else 2748 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2749 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2750 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2751 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2752 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2753 if (update_str_ptr) 2754 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2755 } 2756 JUMPHERE(jump); 2757 } 2758 #endif 2759 2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16 2761 if (common->utf) 2762 { 2763 if (max >= 0x10000) 2764 { 2765 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2766 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2767 /* TMP2 contains the high surrogate. */ 2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); 2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); 2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); 2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2774 JUMPHERE(jump); 2775 return; 2776 } 2777 2778 if (max < 0xd800 && !update_str_ptr) return; 2779 2780 /* Skip low surrogate if necessary. */ 2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2783 if (update_str_ptr) 2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2785 if (max >= 0xd800) 2786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); 2787 JUMPHERE(jump); 2788 } 2789 #endif 2790 } 2791 2792 static SLJIT_INLINE void read_char(compiler_common *common) 2793 { 2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE); 2795 } 2796 2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr) 2798 { 2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ 2800 DEFINE_COMPILER; 2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 2802 struct sljit_jump *jump; 2803 #endif 2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2805 struct sljit_jump *jump2; 2806 #endif 2807 2808 SLJIT_UNUSED_ARG(update_str_ptr); 2809 2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); 2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2812 2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 2814 if (common->utf) 2815 { 2816 /* This can be an extra read in some situations, but hopefully 2817 it is needed in most cases. */ 2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2819 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); 2820 if (!update_str_ptr) 2821 { 2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2824 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2827 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); 2828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2829 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); 2830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2831 JUMPHERE(jump2); 2832 } 2833 else 2834 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); 2835 JUMPHERE(jump); 2836 return; 2837 } 2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 2839 2840 #if !defined COMPILE_PCRE8 2841 /* The ctypes array contains only 256 values. */ 2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); 2844 #endif 2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2846 #if !defined COMPILE_PCRE8 2847 JUMPHERE(jump); 2848 #endif 2849 2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16 2851 if (common->utf && update_str_ptr) 2852 { 2853 /* Skip low surrogate if necessary. */ 2854 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); 2855 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2857 JUMPHERE(jump); 2858 } 2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */ 2860 } 2861 2862 static void skip_char_back(compiler_common *common) 2863 { 2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ 2865 DEFINE_COMPILER; 2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2867 #if defined COMPILE_PCRE8 2868 struct sljit_label *label; 2869 2870 if (common->utf) 2871 { 2872 label = LABEL(); 2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); 2874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); 2876 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); 2877 return; 2878 } 2879 #elif defined COMPILE_PCRE16 2880 if (common->utf) 2881 { 2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); 2883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2884 /* Skip low surrogate if necessary. */ 2885 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 2886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); 2887 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 2889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 2890 return; 2891 } 2892 #endif /* COMPILE_PCRE[8|16] */ 2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2895 } 2896 2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) 2898 { 2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ 2900 DEFINE_COMPILER; 2901 struct sljit_jump *jump; 2902 2903 if (nltype == NLTYPE_ANY) 2904 { 2905 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); 2906 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); 2907 } 2908 else if (nltype == NLTYPE_ANYCRLF) 2909 { 2910 if (jumpifmatch) 2911 { 2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); 2913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 2914 } 2915 else 2916 { 2917 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 2918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 2919 JUMPHERE(jump); 2920 } 2921 } 2922 else 2923 { 2924 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); 2925 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); 2926 } 2927 } 2928 2929 #ifdef SUPPORT_UTF 2930 2931 #if defined COMPILE_PCRE8 2932 static void do_utfreadchar(compiler_common *common) 2933 { 2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte 2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */ 2936 DEFINE_COMPILER; 2937 struct sljit_jump *jump; 2938 2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2945 2946 /* Searching for the first zero. */ 2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); 2948 jump = JUMP(SLJIT_NOT_ZERO); 2949 /* Two byte sequence. */ 2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); 2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2953 2954 JUMPHERE(jump); 2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); 2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2960 2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); 2962 jump = JUMP(SLJIT_NOT_ZERO); 2963 /* Three byte sequence. */ 2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); 2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2967 2968 /* Four byte sequence. */ 2969 JUMPHERE(jump); 2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); 2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); 2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); 2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); 2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2978 } 2979 2980 static void do_utfreadchar16(compiler_common *common) 2981 { 2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte 2983 of the character (>= 0xc0). Return value in TMP1. */ 2984 DEFINE_COMPILER; 2985 struct sljit_jump *jump; 2986 2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2993 2994 /* Searching for the first zero. */ 2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); 2996 jump = JUMP(SLJIT_NOT_ZERO); 2997 /* Two byte sequence. */ 2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3000 3001 JUMPHERE(jump); 3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); 3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO); 3004 /* This code runs only in 8 bit mode. No need to shift the value. */ 3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); 3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 3011 /* Three byte sequence. */ 3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3014 } 3015 3016 static void do_utfreadtype8(compiler_common *common) 3017 { 3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte 3019 of the character (>= 0xc0). Return value in TMP1. */ 3020 DEFINE_COMPILER; 3021 struct sljit_jump *jump; 3022 struct sljit_jump *compare; 3023 3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 3025 3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); 3027 jump = JUMP(SLJIT_NOT_ZERO); 3028 /* Two byte sequence. */ 3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); 3032 /* The upper 5 bits are known at this point. */ 3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); 3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); 3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3039 3040 JUMPHERE(compare); 3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3043 3044 /* We only have types for characters less than 256. */ 3045 JUMPHERE(jump); 3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); 3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3050 } 3051 3052 #endif /* COMPILE_PCRE8 */ 3053 3054 #endif /* SUPPORT_UTF */ 3055 3056 #ifdef SUPPORT_UCP 3057 3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */ 3059 #define UCD_BLOCK_MASK 127 3060 #define UCD_BLOCK_SHIFT 7 3061 3062 static void do_getucd(compiler_common *common) 3063 { 3064 /* Search the UCD record for the character comes in TMP1. 3065 Returns chartype in TMP1 and UCD offset in TMP2. */ 3066 DEFINE_COMPILER; 3067 3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); 3069 3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); 3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); 3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); 3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); 3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); 3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); 3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); 3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); 3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); 3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3081 } 3082 #endif 3083 3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) 3085 { 3086 DEFINE_COMPILER; 3087 struct sljit_label *mainloop; 3088 struct sljit_label *newlinelabel = NULL; 3089 struct sljit_jump *start; 3090 struct sljit_jump *end = NULL; 3091 struct sljit_jump *nl = NULL; 3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3093 struct sljit_jump *singlechar; 3094 #endif 3095 jump_list *newline = NULL; 3096 BOOL newlinecheck = FALSE; 3097 BOOL readuchar = FALSE; 3098 3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || 3100 common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) 3101 newlinecheck = TRUE; 3102 3103 if (firstline) 3104 { 3105 /* Search for the end of the first line. */ 3106 SLJIT_ASSERT(common->first_line_end != 0); 3107 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 3108 3109 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 3110 { 3111 mainloop = LABEL(); 3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 3115 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 3116 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); 3117 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); 3118 JUMPHERE(end); 3119 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3120 } 3121 else 3122 { 3123 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3124 mainloop = LABEL(); 3125 /* Continual stores does not cause data dependency. */ 3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); 3127 read_char_range(common, common->nlmin, common->nlmax, TRUE); 3128 check_newlinechar(common, common->nltype, &newline, TRUE); 3129 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); 3130 JUMPHERE(end); 3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); 3132 set_jumps(newline, LABEL()); 3133 } 3134 3135 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 3136 } 3137 3138 start = JUMP(SLJIT_JUMP); 3139 3140 if (newlinecheck) 3141 { 3142 newlinelabel = LABEL(); 3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3144 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); 3147 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); 3150 #endif 3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3152 nl = JUMP(SLJIT_JUMP); 3153 } 3154 3155 mainloop = LABEL(); 3156 3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */ 3158 #ifdef SUPPORT_UTF 3159 if (common->utf) readuchar = TRUE; 3160 #endif 3161 if (newlinecheck) readuchar = TRUE; 3162 3163 if (readuchar) 3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3165 3166 if (newlinecheck) 3167 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); 3168 3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3171 #if defined COMPILE_PCRE8 3172 if (common->utf) 3173 { 3174 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 3175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 3176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3177 JUMPHERE(singlechar); 3178 } 3179 #elif defined COMPILE_PCRE16 3180 if (common->utf) 3181 { 3182 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); 3183 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 3185 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 3186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3188 JUMPHERE(singlechar); 3189 } 3190 #endif /* COMPILE_PCRE[8|16] */ 3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 3192 JUMPHERE(start); 3193 3194 if (newlinecheck) 3195 { 3196 JUMPHERE(end); 3197 JUMPHERE(nl); 3198 } 3199 3200 return mainloop; 3201 } 3202 3203 #define MAX_N_CHARS 16 3204 #define MAX_N_BYTES 8 3205 3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes) 3207 { 3208 pcre_uint8 len = bytes[0]; 3209 int i; 3210 3211 if (len == 255) 3212 return; 3213 3214 if (len == 0) 3215 { 3216 bytes[0] = 1; 3217 bytes[1] = byte; 3218 return; 3219 } 3220 3221 for (i = len; i > 0; i--) 3222 if (bytes[i] == byte) 3223 return; 3224 3225 if (len >= MAX_N_BYTES - 1) 3226 { 3227 bytes[0] = 255; 3228 return; 3229 } 3230 3231 len++; 3232 bytes[len] = byte; 3233 bytes[0] = len; 3234 } 3235 3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count) 3237 { 3238 /* Recursive function, which scans prefix literals. */ 3239 BOOL last, any, caseless; 3240 int len, repeat, len_save, consumed = 0; 3241 pcre_uint32 chr, mask; 3242 pcre_uchar *alternative, *cc_save, *oc; 3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 3244 pcre_uchar othercase[8]; 3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16 3246 pcre_uchar othercase[2]; 3247 #else 3248 pcre_uchar othercase[1]; 3249 #endif 3250 3251 repeat = 1; 3252 while (TRUE) 3253 { 3254 if (*rec_count == 0) 3255 return 0; 3256 (*rec_count)--; 3257 3258 last = TRUE; 3259 any = FALSE; 3260 caseless = FALSE; 3261 3262 switch (*cc) 3263 { 3264 case OP_CHARI: 3265 caseless = TRUE; 3266 case OP_CHAR: 3267 last = FALSE; 3268 cc++; 3269 break; 3270 3271 case OP_SOD: 3272 case OP_SOM: 3273 case OP_SET_SOM: 3274 case OP_NOT_WORD_BOUNDARY: 3275 case OP_WORD_BOUNDARY: 3276 case OP_EODN: 3277 case OP_EOD: 3278 case OP_CIRC: 3279 case OP_CIRCM: 3280 case OP_DOLL: 3281 case OP_DOLLM: 3282 /* Zero width assertions. */ 3283 cc++; 3284 continue; 3285 3286 case OP_ASSERT: 3287 case OP_ASSERT_NOT: 3288 case OP_ASSERTBACK: 3289 case OP_ASSERTBACK_NOT: 3290 cc = bracketend(cc); 3291 continue; 3292 3293 case OP_PLUSI: 3294 case OP_MINPLUSI: 3295 case OP_POSPLUSI: 3296 caseless = TRUE; 3297 case OP_PLUS: 3298 case OP_MINPLUS: 3299 case OP_POSPLUS: 3300 cc++; 3301 break; 3302 3303 case OP_EXACTI: 3304 caseless = TRUE; 3305 case OP_EXACT: 3306 repeat = GET2(cc, 1); 3307 last = FALSE; 3308 cc += 1 + IMM2_SIZE; 3309 break; 3310 3311 case OP_QUERYI: 3312 case OP_MINQUERYI: 3313 case OP_POSQUERYI: 3314 caseless = TRUE; 3315 case OP_QUERY: 3316 case OP_MINQUERY: 3317 case OP_POSQUERY: 3318 len = 1; 3319 cc++; 3320 #ifdef SUPPORT_UTF 3321 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); 3322 #endif 3323 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count); 3324 if (max_chars == 0) 3325 return consumed; 3326 last = FALSE; 3327 break; 3328 3329 case OP_KET: 3330 cc += 1 + LINK_SIZE; 3331 continue; 3332 3333 case OP_ALT: 3334 cc += GET(cc, 1); 3335 continue; 3336 3337 case OP_ONCE: 3338 case OP_ONCE_NC: 3339 case OP_BRA: 3340 case OP_BRAPOS: 3341 case OP_CBRA: 3342 case OP_CBRAPOS: 3343 alternative = cc + GET(cc, 1); 3344 while (*alternative == OP_ALT) 3345 { 3346 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count); 3347 if (max_chars == 0) 3348 return consumed; 3349 alternative += GET(alternative, 1); 3350 } 3351 3352 if (*cc == OP_CBRA || *cc == OP_CBRAPOS) 3353 cc += IMM2_SIZE; 3354 cc += 1 + LINK_SIZE; 3355 continue; 3356 3357 case OP_CLASS: 3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 3359 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed; 3360 #endif 3361 any = TRUE; 3362 cc += 1 + 32 / sizeof(pcre_uchar); 3363 break; 3364 3365 case OP_NCLASS: 3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3367 if (common->utf) return consumed; 3368 #endif 3369 any = TRUE; 3370 cc += 1 + 32 / sizeof(pcre_uchar); 3371 break; 3372 3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 3374 case OP_XCLASS: 3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3376 if (common->utf) return consumed; 3377 #endif 3378 any = TRUE; 3379 cc += GET(cc, 1); 3380 break; 3381 #endif 3382 3383 case OP_DIGIT: 3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 3385 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) 3386 return consumed; 3387 #endif 3388 any = TRUE; 3389 cc++; 3390 break; 3391 3392 case OP_WHITESPACE: 3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 3394 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE)) 3395 return consumed; 3396 #endif 3397 any = TRUE; 3398 cc++; 3399 break; 3400 3401 case OP_WORDCHAR: 3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 3403 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE)) 3404 return consumed; 3405 #endif 3406 any = TRUE; 3407 cc++; 3408 break; 3409 3410 case OP_NOT: 3411 case OP_NOTI: 3412 cc++; 3413 /* Fall through. */ 3414 case OP_NOT_DIGIT: 3415 case OP_NOT_WHITESPACE: 3416 case OP_NOT_WORDCHAR: 3417 case OP_ANY: 3418 case OP_ALLANY: 3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3420 if (common->utf) return consumed; 3421 #endif 3422 any = TRUE; 3423 cc++; 3424 break; 3425 3426 #ifdef SUPPORT_UCP 3427 case OP_NOTPROP: 3428 case OP_PROP: 3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3430 if (common->utf) return consumed; 3431 #endif 3432 any = TRUE; 3433 cc += 1 + 2; 3434 break; 3435 #endif 3436 3437 case OP_TYPEEXACT: 3438 repeat = GET2(cc, 1); 3439 cc += 1 + IMM2_SIZE; 3440 continue; 3441 3442 case OP_NOTEXACT: 3443 case OP_NOTEXACTI: 3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3445 if (common->utf) return consumed; 3446 #endif 3447 any = TRUE; 3448 repeat = GET2(cc, 1); 3449 cc += 1 + IMM2_SIZE + 1; 3450 break; 3451 3452 default: 3453 return consumed; 3454 } 3455 3456 if (any) 3457 { 3458 #if defined COMPILE_PCRE8 3459 mask = 0xff; 3460 #elif defined COMPILE_PCRE16 3461 mask = 0xffff; 3462 #elif defined COMPILE_PCRE32 3463 mask = 0xffffffff; 3464 #else 3465 SLJIT_ASSERT_STOP(); 3466 #endif 3467 3468 do 3469 { 3470 chars[0] = mask; 3471 chars[1] = mask; 3472 bytes[0] = 255; 3473 3474 consumed++; 3475 if (--max_chars == 0) 3476 return consumed; 3477 chars += 2; 3478 bytes += MAX_N_BYTES; 3479 } 3480 while (--repeat > 0); 3481 3482 repeat = 1; 3483 continue; 3484 } 3485 3486 len = 1; 3487 #ifdef SUPPORT_UTF 3488 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); 3489 #endif 3490 3491 if (caseless && char_has_othercase(common, cc)) 3492 { 3493 #ifdef SUPPORT_UTF 3494 if (common->utf) 3495 { 3496 GETCHAR(chr, cc); 3497 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) 3498 return consumed; 3499 } 3500 else 3501 #endif 3502 { 3503 chr = *cc; 3504 othercase[0] = TABLE_GET(chr, common->fcc, chr); 3505 } 3506 } 3507 else 3508 caseless = FALSE; 3509 3510 len_save = len; 3511 cc_save = cc; 3512 while (TRUE) 3513 { 3514 oc = othercase; 3515 do 3516 { 3517 chr = *cc; 3518 #ifdef COMPILE_PCRE32 3519 if (SLJIT_UNLIKELY(chr == NOTACHAR)) 3520 return consumed; 3521 #endif 3522 add_prefix_byte((pcre_uint8)chr, bytes); 3523 3524 mask = 0; 3525 if (caseless) 3526 { 3527 add_prefix_byte((pcre_uint8)*oc, bytes); 3528 mask = *cc ^ *oc; 3529 chr |= mask; 3530 } 3531 3532 #ifdef COMPILE_PCRE32 3533 if (chars[0] == NOTACHAR && chars[1] == 0) 3534 #else 3535 if (chars[0] == NOTACHAR) 3536 #endif 3537 { 3538 chars[0] = chr; 3539 chars[1] = mask; 3540 } 3541 else 3542 { 3543 mask |= chars[0] ^ chr; 3544 chr |= mask; 3545 chars[0] = chr; 3546 chars[1] |= mask; 3547 } 3548 3549 len--; 3550 consumed++; 3551 if (--max_chars == 0) 3552 return consumed; 3553 chars += 2; 3554 bytes += MAX_N_BYTES; 3555 cc++; 3556 oc++; 3557 } 3558 while (len > 0); 3559 3560 if (--repeat == 0) 3561 break; 3562 3563 len = len_save; 3564 cc = cc_save; 3565 } 3566 3567 repeat = 1; 3568 if (last) 3569 return consumed; 3570 } 3571 } 3572 3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) 3574 { 3575 DEFINE_COMPILER; 3576 struct sljit_label *start; 3577 struct sljit_jump *quit; 3578 pcre_uint32 chars[MAX_N_CHARS * 2]; 3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES]; 3580 pcre_uint8 ones[MAX_N_CHARS]; 3581 int offsets[3]; 3582 pcre_uint32 mask; 3583 pcre_uint8 *byte_set, *byte_set_end; 3584 int i, max, from; 3585 int range_right = -1, range_len = 3 - 1; 3586 sljit_ub *update_table = NULL; 3587 BOOL in_range; 3588 pcre_uint32 rec_count; 3589 3590 for (i = 0; i < MAX_N_CHARS; i++) 3591 { 3592 chars[i << 1] = NOTACHAR; 3593 chars[(i << 1) + 1] = 0; 3594 bytes[i * MAX_N_BYTES] = 0; 3595 } 3596 3597 rec_count = 10000; 3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count); 3599 3600 if (max <= 1) 3601 return FALSE; 3602 3603 for (i = 0; i < max; i++) 3604 { 3605 mask = chars[(i << 1) + 1]; 3606 ones[i] = ones_in_half_byte[mask & 0xf]; 3607 mask >>= 4; 3608 while (mask != 0) 3609 { 3610 ones[i] += ones_in_half_byte[mask & 0xf]; 3611 mask >>= 4; 3612 } 3613 } 3614 3615 in_range = FALSE; 3616 from = 0; /* Prevent compiler "uninitialized" warning */ 3617 for (i = 0; i <= max; i++) 3618 { 3619 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4)) 3620 { 3621 range_len = i - from; 3622 range_right = i - 1; 3623 } 3624 3625 if (i < max && bytes[i * MAX_N_BYTES] < 255) 3626 { 3627 if (!in_range) 3628 { 3629 in_range = TRUE; 3630 from = i; 3631 } 3632 } 3633 else if (in_range) 3634 in_range = FALSE; 3635 } 3636 3637 if (range_right >= 0) 3638 { 3639 update_table = (sljit_ub *)allocate_read_only_data(common, 256); 3640 if (update_table == NULL) 3641 return TRUE; 3642 memset(update_table, IN_UCHARS(range_len), 256); 3643 3644 for (i = 0; i < range_len; i++) 3645 { 3646 byte_set = bytes + ((range_right - i) * MAX_N_BYTES); 3647 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255); 3648 byte_set_end = byte_set + byte_set[0]; 3649 byte_set++; 3650 while (byte_set <= byte_set_end) 3651 { 3652 if (update_table[*byte_set] > IN_UCHARS(i)) 3653 update_table[*byte_set] = IN_UCHARS(i); 3654 byte_set++; 3655 } 3656 } 3657 } 3658 3659 offsets[0] = -1; 3660 /* Scan forward. */ 3661 for (i = 0; i < max; i++) 3662 if (ones[i] <= 2) { 3663 offsets[0] = i; 3664 break; 3665 } 3666 3667 if (offsets[0] < 0 && range_right < 0) 3668 return FALSE; 3669 3670 if (offsets[0] >= 0) 3671 { 3672 /* Scan backward. */ 3673 offsets[1] = -1; 3674 for (i = max - 1; i > offsets[0]; i--) 3675 if (ones[i] <= 2 && i != range_right) 3676 { 3677 offsets[1] = i; 3678 break; 3679 } 3680 3681 /* This case is handled better by fast_forward_first_char. */ 3682 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0) 3683 return FALSE; 3684 3685 offsets[2] = -1; 3686 /* We only search for a middle character if there is no range check. */ 3687 if (offsets[1] >= 0 && range_right == -1) 3688 { 3689 /* Scan from middle. */ 3690 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++) 3691 if (ones[i] <= 2) 3692 { 3693 offsets[2] = i; 3694 break; 3695 } 3696 3697 if (offsets[2] == -1) 3698 { 3699 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--) 3700 if (ones[i] <= 2) 3701 { 3702 offsets[2] = i; 3703 break; 3704 } 3705 } 3706 } 3707 3708 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1])); 3709 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2])); 3710 3711 chars[0] = chars[offsets[0] << 1]; 3712 chars[1] = chars[(offsets[0] << 1) + 1]; 3713 if (offsets[2] >= 0) 3714 { 3715 chars[2] = chars[offsets[2] << 1]; 3716 chars[3] = chars[(offsets[2] << 1) + 1]; 3717 } 3718 if (offsets[1] >= 0) 3719 { 3720 chars[4] = chars[offsets[1] << 1]; 3721 chars[5] = chars[(offsets[1] << 1) + 1]; 3722 } 3723 } 3724 3725 max -= 1; 3726 if (firstline) 3727 { 3728 SLJIT_ASSERT(common->first_line_end != 0); 3729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3730 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3731 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3732 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0); 3733 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); 3734 JUMPHERE(quit); 3735 } 3736 else 3737 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3738 3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 3740 if (range_right >= 0) 3741 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); 3742 #endif 3743 3744 start = LABEL(); 3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3746 3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0); 3748 3749 if (range_right >= 0) 3750 { 3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) 3752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); 3753 #else 3754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); 3755 #endif 3756 3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); 3759 #else 3760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); 3761 #endif 3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); 3764 } 3765 3766 if (offsets[0] >= 0) 3767 { 3768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0])); 3769 if (offsets[1] >= 0) 3770 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1])); 3771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3772 3773 if (chars[1] != 0) 3774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); 3775 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); 3776 if (offsets[2] >= 0) 3777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1)); 3778 3779 if (offsets[1] >= 0) 3780 { 3781 if (chars[5] != 0) 3782 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]); 3783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start); 3784 } 3785 3786 if (offsets[2] >= 0) 3787 { 3788 if (chars[3] != 0) 3789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]); 3790 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start); 3791 } 3792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3793 } 3794 3795 JUMPHERE(quit); 3796 3797 if (firstline) 3798 { 3799 if (range_right >= 0) 3800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3801 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3802 if (range_right >= 0) 3803 { 3804 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); 3805 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 3806 JUMPHERE(quit); 3807 } 3808 } 3809 else 3810 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3811 return TRUE; 3812 } 3813 3814 #undef MAX_N_CHARS 3815 #undef MAX_N_BYTES 3816 3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) 3818 { 3819 DEFINE_COMPILER; 3820 struct sljit_label *start; 3821 struct sljit_jump *quit; 3822 struct sljit_jump *found; 3823 pcre_uchar oc, bit; 3824 3825 if (firstline) 3826 { 3827 SLJIT_ASSERT(common->first_line_end != 0); 3828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3830 } 3831 3832 start = LABEL(); 3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3835 3836 oc = first_char; 3837 if (caseless) 3838 { 3839 oc = TABLE_GET(first_char, common->fcc, first_char); 3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 3841 if (first_char > 127 && common->utf) 3842 oc = UCD_OTHERCASE(first_char); 3843 #endif 3844 } 3845 if (first_char == oc) 3846 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char); 3847 else 3848 { 3849 bit = first_char ^ oc; 3850 if (is_powerof2(bit)) 3851 { 3852 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); 3853 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); 3854 } 3855 else 3856 { 3857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); 3858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 3859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); 3860 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 3861 found = JUMP(SLJIT_NOT_ZERO); 3862 } 3863 } 3864 3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3866 JUMPTO(SLJIT_JUMP, start); 3867 JUMPHERE(found); 3868 JUMPHERE(quit); 3869 3870 if (firstline) 3871 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3872 } 3873 3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) 3875 { 3876 DEFINE_COMPILER; 3877 struct sljit_label *loop; 3878 struct sljit_jump *lastchar; 3879 struct sljit_jump *firstchar; 3880 struct sljit_jump *quit; 3881 struct sljit_jump *foundcr = NULL; 3882 struct sljit_jump *notfoundnl; 3883 jump_list *newline = NULL; 3884 3885 if (firstline) 3886 { 3887 SLJIT_ASSERT(common->first_line_end != 0); 3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3889 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3890 } 3891 3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 3893 { 3894 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 3896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 3898 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); 3899 3900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); 3901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); 3902 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL); 3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3904 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); 3905 #endif 3906 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 3907 3908 loop = LABEL(); 3909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3910 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3911 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); 3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 3913 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); 3914 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); 3915 3916 JUMPHERE(quit); 3917 JUMPHERE(firstchar); 3918 JUMPHERE(lastchar); 3919 3920 if (firstline) 3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3922 return; 3923 } 3924 3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); 3928 skip_char_back(common); 3929 3930 loop = LABEL(); 3931 common->ff_newline_shortcut = loop; 3932 3933 read_char_range(common, common->nlmin, common->nlmax, TRUE); 3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) 3936 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 3937 check_newlinechar(common, common->nltype, &newline, FALSE); 3938 set_jumps(newline, loop); 3939 3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) 3941 { 3942 quit = JUMP(SLJIT_JUMP); 3943 JUMPHERE(foundcr); 3944 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); 3947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); 3950 #endif 3951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3952 JUMPHERE(notfoundnl); 3953 JUMPHERE(quit); 3954 } 3955 JUMPHERE(lastchar); 3956 JUMPHERE(firstchar); 3957 3958 if (firstline) 3959 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3960 } 3961 3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); 3963 3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline) 3965 { 3966 DEFINE_COMPILER; 3967 struct sljit_label *start; 3968 struct sljit_jump *quit; 3969 struct sljit_jump *found = NULL; 3970 jump_list *matches = NULL; 3971 #ifndef COMPILE_PCRE8 3972 struct sljit_jump *jump; 3973 #endif 3974 3975 if (firstline) 3976 { 3977 SLJIT_ASSERT(common->first_line_end != 0); 3978 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); 3979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3980 } 3981 3982 start = LABEL(); 3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3985 #ifdef SUPPORT_UTF 3986 if (common->utf) 3987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 3988 #endif 3989 3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) 3991 { 3992 #ifndef COMPILE_PCRE8 3993 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255); 3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); 3995 JUMPHERE(jump); 3996 #endif 3997 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 3998 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 3999 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); 4000 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 4001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 4002 found = JUMP(SLJIT_NOT_ZERO); 4003 } 4004 4005 #ifdef SUPPORT_UTF 4006 if (common->utf) 4007 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 4008 #endif 4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4010 #ifdef SUPPORT_UTF 4011 #if defined COMPILE_PCRE8 4012 if (common->utf) 4013 { 4014 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); 4015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 4017 } 4018 #elif defined COMPILE_PCRE16 4019 if (common->utf) 4020 { 4021 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); 4022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 4024 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 4025 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 4027 } 4028 #endif /* COMPILE_PCRE[8|16] */ 4029 #endif /* SUPPORT_UTF */ 4030 JUMPTO(SLJIT_JUMP, start); 4031 if (found != NULL) 4032 JUMPHERE(found); 4033 if (matches != NULL) 4034 set_jumps(matches, LABEL()); 4035 JUMPHERE(quit); 4036 4037 if (firstline) 4038 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); 4039 } 4040 4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) 4042 { 4043 DEFINE_COMPILER; 4044 struct sljit_label *loop; 4045 struct sljit_jump *toolong; 4046 struct sljit_jump *alreadyfound; 4047 struct sljit_jump *found; 4048 struct sljit_jump *foundoc = NULL; 4049 struct sljit_jump *notfound; 4050 pcre_uint32 oc, bit; 4051 4052 SLJIT_ASSERT(common->req_char_ptr != 0); 4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); 4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); 4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); 4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); 4057 4058 if (has_firstchar) 4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4060 else 4061 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); 4062 4063 loop = LABEL(); 4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); 4065 4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); 4067 oc = req_char; 4068 if (caseless) 4069 { 4070 oc = TABLE_GET(req_char, common->fcc, req_char); 4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 4072 if (req_char > 127 && common->utf) 4073 oc = UCD_OTHERCASE(req_char); 4074 #endif 4075 } 4076 if (req_char == oc) 4077 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); 4078 else 4079 { 4080 bit = req_char ^ oc; 4081 if (is_powerof2(bit)) 4082 { 4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); 4084 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); 4085 } 4086 else 4087 { 4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); 4089 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); 4090 } 4091 } 4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4093 JUMPTO(SLJIT_JUMP, loop); 4094 4095 JUMPHERE(found); 4096 if (foundoc) 4097 JUMPHERE(foundoc); 4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); 4099 JUMPHERE(alreadyfound); 4100 JUMPHERE(toolong); 4101 return notfound; 4102 } 4103 4104 static void do_revertframes(compiler_common *common) 4105 { 4106 DEFINE_COMPILER; 4107 struct sljit_jump *jump; 4108 struct sljit_label *mainloop; 4109 4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); 4112 GET_LOCAL_BASE(TMP3, 0, 0); 4113 4114 /* Drop frames until we reach STACK_TOP. */ 4115 mainloop = LABEL(); 4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); 4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); 4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL); 4119 4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); 4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); 4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw)); 4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); 4124 JUMPTO(SLJIT_JUMP, mainloop); 4125 4126 JUMPHERE(jump); 4127 jump = JUMP(SLJIT_SIG_LESS); 4128 /* End of dropping frames. */ 4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4130 4131 JUMPHERE(jump); 4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); 4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); 4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); 4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); 4136 JUMPTO(SLJIT_JUMP, mainloop); 4137 } 4138 4139 static void check_wordboundary(compiler_common *common) 4140 { 4141 DEFINE_COMPILER; 4142 struct sljit_jump *skipread; 4143 jump_list *skipread_list = NULL; 4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF 4145 struct sljit_jump *jump; 4146 #endif 4147 4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); 4149 4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4151 /* Get type of the previous char, and put it to LOCALS1. */ 4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0); 4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); 4156 skip_char_back(common); 4157 check_start_used_ptr(common); 4158 read_char(common); 4159 4160 /* Testing char type. */ 4161 #ifdef SUPPORT_UCP 4162 if (common->use_ucp) 4163 { 4164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); 4165 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); 4166 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); 4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 4169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 4170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); 4171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); 4172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 4173 JUMPHERE(jump); 4174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); 4175 } 4176 else 4177 #endif 4178 { 4179 #ifndef COMPILE_PCRE8 4180 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4181 #elif defined SUPPORT_UTF 4182 /* Here LOCALS1 has already been zeroed. */ 4183 jump = NULL; 4184 if (common->utf) 4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4186 #endif /* COMPILE_PCRE8 */ 4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); 4188 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); 4189 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 4190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); 4191 #ifndef COMPILE_PCRE8 4192 JUMPHERE(jump); 4193 #elif defined SUPPORT_UTF 4194 if (jump != NULL) 4195 JUMPHERE(jump); 4196 #endif /* COMPILE_PCRE8 */ 4197 } 4198 JUMPHERE(skipread); 4199 4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4201 check_str_end(common, &skipread_list); 4202 peek_char(common, READ_CHAR_MAX); 4203 4204 /* Testing char type. This is a code duplication. */ 4205 #ifdef SUPPORT_UCP 4206 if (common->use_ucp) 4207 { 4208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); 4209 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); 4210 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); 4212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 4214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); 4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); 4216 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 4217 JUMPHERE(jump); 4218 } 4219 else 4220 #endif 4221 { 4222 #ifndef COMPILE_PCRE8 4223 /* TMP2 may be destroyed by peek_char. */ 4224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4225 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4226 #elif defined SUPPORT_UTF 4227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4228 jump = NULL; 4229 if (common->utf) 4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4231 #endif 4232 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); 4233 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); 4234 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); 4235 #ifndef COMPILE_PCRE8 4236 JUMPHERE(jump); 4237 #elif defined SUPPORT_UTF 4238 if (jump != NULL) 4239 JUMPHERE(jump); 4240 #endif /* COMPILE_PCRE8 */ 4241 } 4242 set_jumps(skipread_list, LABEL()); 4243 4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4246 } 4247 4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) 4249 { 4250 DEFINE_COMPILER; 4251 int ranges[MAX_RANGE_SIZE]; 4252 pcre_uint8 bit, cbit, all; 4253 int i, byte, length = 0; 4254 4255 bit = bits[0] & 0x1; 4256 /* All bits will be zero or one (since bit is zero or one). */ 4257 all = -bit; 4258 4259 for (i = 0; i < 256; ) 4260 { 4261 byte = i >> 3; 4262 if ((i & 0x7) == 0 && bits[byte] == all) 4263 i += 8; 4264 else 4265 { 4266 cbit = (bits[byte] >> (i & 0x7)) & 0x1; 4267 if (cbit != bit) 4268 { 4269 if (length >= MAX_RANGE_SIZE) 4270 return FALSE; 4271 ranges[length] = i; 4272 length++; 4273 bit = cbit; 4274 all = -cbit; 4275 } 4276 i++; 4277 } 4278 } 4279 4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) 4281 { 4282 if (length >= MAX_RANGE_SIZE) 4283 return FALSE; 4284 ranges[length] = 256; 4285 length++; 4286 } 4287 4288 if (length < 0 || length > 4) 4289 return FALSE; 4290 4291 bit = bits[0] & 0x1; 4292 if (invert) bit ^= 0x1; 4293 4294 /* No character is accepted. */ 4295 if (length == 0 && bit == 0) 4296 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 4297 4298 switch(length) 4299 { 4300 case 0: 4301 /* When bit != 0, all characters are accepted. */ 4302 return TRUE; 4303 4304 case 1: 4305 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4306 return TRUE; 4307 4308 case 2: 4309 if (ranges[0] + 1 != ranges[1]) 4310 { 4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4312 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4313 } 4314 else 4315 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4316 return TRUE; 4317 4318 case 3: 4319 if (bit != 0) 4320 { 4321 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); 4322 if (ranges[0] + 1 != ranges[1]) 4323 { 4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4325 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4326 } 4327 else 4328 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4329 return TRUE; 4330 } 4331 4332 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); 4333 if (ranges[1] + 1 != ranges[2]) 4334 { 4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); 4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); 4337 } 4338 else 4339 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); 4340 return TRUE; 4341 4342 case 4: 4343 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) 4344 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] 4345 && (ranges[1] & (ranges[2] - ranges[0])) == 0 4346 && is_powerof2(ranges[2] - ranges[0])) 4347 { 4348 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); 4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); 4350 if (ranges[2] + 1 != ranges[3]) 4351 { 4352 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); 4353 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); 4354 } 4355 else 4356 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); 4357 return TRUE; 4358 } 4359 4360 if (bit != 0) 4361 { 4362 i = 0; 4363 if (ranges[0] + 1 != ranges[1]) 4364 { 4365 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4366 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4367 i = ranges[0]; 4368 } 4369 else 4370 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4371 4372 if (ranges[2] + 1 != ranges[3]) 4373 { 4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); 4375 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); 4376 } 4377 else 4378 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); 4379 return TRUE; 4380 } 4381 4382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4383 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); 4384 if (ranges[1] + 1 != ranges[2]) 4385 { 4386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); 4387 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); 4388 } 4389 else 4390 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4391 return TRUE; 4392 4393 default: 4394 SLJIT_ASSERT_STOP(); 4395 return FALSE; 4396 } 4397 } 4398 4399 static void check_anynewline(compiler_common *common) 4400 { 4401 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4402 DEFINE_COMPILER; 4403 4404 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4405 4406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); 4407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); 4408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); 4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4411 #ifdef COMPILE_PCRE8 4412 if (common->utf) 4413 { 4414 #endif 4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4416 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); 4417 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); 4418 #ifdef COMPILE_PCRE8 4419 } 4420 #endif 4421 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4422 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4424 } 4425 4426 static void check_hspace(compiler_common *common) 4427 { 4428 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4429 DEFINE_COMPILER; 4430 4431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4432 4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); 4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); 4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); 4438 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4439 #ifdef COMPILE_PCRE8 4440 if (common->utf) 4441 { 4442 #endif 4443 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); 4445 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); 4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); 4449 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); 4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); 4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); 4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); 4456 #ifdef COMPILE_PCRE8 4457 } 4458 #endif 4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4461 4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4463 } 4464 4465 static void check_vspace(compiler_common *common) 4466 { 4467 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4468 DEFINE_COMPILER; 4469 4470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4471 4472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); 4473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); 4474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); 4476 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4477 #ifdef COMPILE_PCRE8 4478 if (common->utf) 4479 { 4480 #endif 4481 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4482 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); 4483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); 4484 #ifdef COMPILE_PCRE8 4485 } 4486 #endif 4487 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 4489 4490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4491 } 4492 4493 #define CHAR1 STR_END 4494 #define CHAR2 STACK_TOP 4495 4496 static void do_casefulcmp(compiler_common *common) 4497 { 4498 DEFINE_COMPILER; 4499 struct sljit_jump *jump; 4500 struct sljit_label *label; 4501 4502 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 4504 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); 4505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0); 4506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4508 4509 label = LABEL(); 4510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); 4511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 4512 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); 4513 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); 4514 JUMPTO(SLJIT_NOT_ZERO, label); 4515 4516 JUMPHERE(jump); 4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4518 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); 4519 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4520 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4521 } 4522 4523 #define LCC_TABLE STACK_LIMIT 4524 4525 static void do_caselesscmp(compiler_common *common) 4526 { 4527 DEFINE_COMPILER; 4528 struct sljit_jump *jump; 4529 struct sljit_label *label; 4530 4531 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 4533 4534 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); 4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0); 4536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0); 4537 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); 4538 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4539 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4540 4541 label = LABEL(); 4542 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); 4543 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 4544 #ifndef COMPILE_PCRE8 4545 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255); 4546 #endif 4547 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); 4548 #ifndef COMPILE_PCRE8 4549 JUMPHERE(jump); 4550 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255); 4551 #endif 4552 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); 4553 #ifndef COMPILE_PCRE8 4554 JUMPHERE(jump); 4555 #endif 4556 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); 4557 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); 4558 JUMPTO(SLJIT_NOT_ZERO, label); 4559 4560 JUMPHERE(jump); 4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4562 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); 4563 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4564 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 4565 sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4566 } 4567 4568 #undef LCC_TABLE 4569 #undef CHAR1 4570 #undef CHAR2 4571 4572 #if defined SUPPORT_UTF && defined SUPPORT_UCP 4573 4574 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) 4575 { 4576 /* This function would be ineffective to do in JIT level. */ 4577 pcre_uint32 c1, c2; 4578 const pcre_uchar *src2 = args->uchar_ptr; 4579 const pcre_uchar *end2 = args->end; 4580 const ucd_record *ur; 4581 const pcre_uint32 *pp; 4582 4583 while (src1 < end1) 4584 { 4585 if (src2 >= end2) 4586 return (pcre_uchar*)1; 4587 GETCHARINC(c1, src1); 4588 GETCHARINC(c2, src2); 4589 ur = GET_UCD(c2); 4590 if (c1 != c2 && c1 != c2 + ur->other_case) 4591 { 4592 pp = PRIV(ucd_caseless_sets) + ur->caseset; 4593 for (;;) 4594 { 4595 if (c1 < *pp) return NULL; 4596 if (c1 == *pp++) break; 4597 } 4598 } 4599 } 4600 return src2; 4601 } 4602 4603 #endif /* SUPPORT_UTF && SUPPORT_UCP */ 4604 4605 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, 4606 compare_context *context, jump_list **backtracks) 4607 { 4608 DEFINE_COMPILER; 4609 unsigned int othercasebit = 0; 4610 pcre_uchar *othercasechar = NULL; 4611 #ifdef SUPPORT_UTF 4612 int utflength; 4613 #endif 4614 4615 if (caseless && char_has_othercase(common, cc)) 4616 { 4617 othercasebit = char_get_othercase_bit(common, cc); 4618 SLJIT_ASSERT(othercasebit); 4619 /* Extracting bit difference info. */ 4620 #if defined COMPILE_PCRE8 4621 othercasechar = cc + (othercasebit >> 8); 4622 othercasebit &= 0xff; 4623 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4624 /* Note that this code only handles characters in the BMP. If there 4625 ever are characters outside the BMP whose othercase differs in only one 4626 bit from itself (there currently are none), this code will need to be 4627 revised for COMPILE_PCRE32. */ 4628 othercasechar = cc + (othercasebit >> 9); 4629 if ((othercasebit & 0x100) != 0) 4630 othercasebit = (othercasebit & 0xff) << 8; 4631 else 4632 othercasebit &= 0xff; 4633 #endif /* COMPILE_PCRE[8|16|32] */ 4634 } 4635 4636 if (context->sourcereg == -1) 4637 { 4638 #if defined COMPILE_PCRE8 4639 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 4640 if (context->length >= 4) 4641 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4642 else if (context->length >= 2) 4643 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4644 else 4645 #endif 4646 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4647 #elif defined COMPILE_PCRE16 4648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 4649 if (context->length >= 4) 4650 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4651 else 4652 #endif 4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4654 #elif defined COMPILE_PCRE32 4655 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4656 #endif /* COMPILE_PCRE[8|16|32] */ 4657 context->sourcereg = TMP2; 4658 } 4659 4660 #ifdef SUPPORT_UTF 4661 utflength = 1; 4662 if (common->utf && HAS_EXTRALEN(*cc)) 4663 utflength += GET_EXTRALEN(*cc); 4664 4665 do 4666 { 4667 #endif 4668 4669 context->length -= IN_UCHARS(1); 4670 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) 4671 4672 /* Unaligned read is supported. */ 4673 if (othercasebit != 0 && othercasechar == cc) 4674 { 4675 context->c.asuchars[context->ucharptr] = *cc | othercasebit; 4676 context->oc.asuchars[context->ucharptr] = othercasebit; 4677 } 4678 else 4679 { 4680 context->c.asuchars[context->ucharptr] = *cc; 4681 context->oc.asuchars[context->ucharptr] = 0; 4682 } 4683 context->ucharptr++; 4684 4685 #if defined COMPILE_PCRE8 4686 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) 4687 #else 4688 if (context->ucharptr >= 2 || context->length == 0) 4689 #endif 4690 { 4691 if (context->length >= 4) 4692 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4693 else if (context->length >= 2) 4694 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4695 #if defined COMPILE_PCRE8 4696 else if (context->length >= 1) 4697 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4698 #endif /* COMPILE_PCRE8 */ 4699 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; 4700 4701 switch(context->ucharptr) 4702 { 4703 case 4 / sizeof(pcre_uchar): 4704 if (context->oc.asint != 0) 4705 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); 4706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); 4707 break; 4708 4709 case 2 / sizeof(pcre_uchar): 4710 if (context->oc.asushort != 0) 4711 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); 4712 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); 4713 break; 4714 4715 #ifdef COMPILE_PCRE8 4716 case 1: 4717 if (context->oc.asbyte != 0) 4718 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); 4719 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); 4720 break; 4721 #endif 4722 4723 default: 4724 SLJIT_ASSERT_STOP(); 4725 break; 4726 } 4727 context->ucharptr = 0; 4728 } 4729 4730 #else 4731 4732 /* Unaligned read is unsupported or in 32 bit mode. */ 4733 if (context->length >= 1) 4734 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4735 4736 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; 4737 4738 if (othercasebit != 0 && othercasechar == cc) 4739 { 4740 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); 4741 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); 4742 } 4743 else 4744 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); 4745 4746 #endif 4747 4748 cc++; 4749 #ifdef SUPPORT_UTF 4750 utflength--; 4751 } 4752 while (utflength > 0); 4753 #endif 4754 4755 return cc; 4756 } 4757 4758 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 4759 4760 #define SET_TYPE_OFFSET(value) \ 4761 if ((value) != typeoffset) \ 4762 { \ 4763 if ((value) < typeoffset) \ 4764 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ 4765 else \ 4766 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ 4767 } \ 4768 typeoffset = (value); 4769 4770 #define SET_CHAR_OFFSET(value) \ 4771 if ((value) != charoffset) \ 4772 { \ 4773 if ((value) < charoffset) \ 4774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ 4775 else \ 4776 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ 4777 } \ 4778 charoffset = (value); 4779 4780 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) 4781 { 4782 DEFINE_COMPILER; 4783 jump_list *found = NULL; 4784 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; 4785 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; 4786 struct sljit_jump *jump = NULL; 4787 pcre_uchar *ccbegin; 4788 int compares, invertcmp, numberofcmps; 4789 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) 4790 BOOL utf = common->utf; 4791 #endif 4792 4793 #ifdef SUPPORT_UCP 4794 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; 4795 BOOL charsaved = FALSE; 4796 int typereg = TMP1, scriptreg = TMP1; 4797 const pcre_uint32 *other_cases; 4798 sljit_uw typeoffset; 4799 #endif 4800 4801 /* Scanning the necessary info. */ 4802 cc++; 4803 ccbegin = cc; 4804 compares = 0; 4805 if (cc[-1] & XCL_MAP) 4806 { 4807 min = 0; 4808 cc += 32 / sizeof(pcre_uchar); 4809 } 4810 4811 while (*cc != XCL_END) 4812 { 4813 compares++; 4814 if (*cc == XCL_SINGLE) 4815 { 4816 cc ++; 4817 GETCHARINCTEST(c, cc); 4818 if (c > max) max = c; 4819 if (c < min) min = c; 4820 #ifdef SUPPORT_UCP 4821 needschar = TRUE; 4822 #endif 4823 } 4824 else if (*cc == XCL_RANGE) 4825 { 4826 cc ++; 4827 GETCHARINCTEST(c, cc); 4828 if (c < min) min = c; 4829 GETCHARINCTEST(c, cc); 4830 if (c > max) max = c; 4831 #ifdef SUPPORT_UCP 4832 needschar = TRUE; 4833 #endif 4834 } 4835 #ifdef SUPPORT_UCP 4836 else 4837 { 4838 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); 4839 cc++; 4840 if (*cc == PT_CLIST) 4841 { 4842 other_cases = PRIV(ucd_caseless_sets) + cc[1]; 4843 while (*other_cases != NOTACHAR) 4844 { 4845 if (*other_cases > max) max = *other_cases; 4846 if (*other_cases < min) min = *other_cases; 4847 other_cases++; 4848 } 4849 } 4850 else 4851 { 4852 max = READ_CHAR_MAX; 4853 min = 0; 4854 } 4855 4856 switch(*cc) 4857 { 4858 case PT_ANY: 4859 break; 4860 4861 case PT_LAMP: 4862 case PT_GC: 4863 case PT_PC: 4864 case PT_ALNUM: 4865 needstype = TRUE; 4866 break; 4867 4868 case PT_SC: 4869 needsscript = TRUE; 4870 break; 4871 4872 case PT_SPACE: 4873 case PT_PXSPACE: 4874 case PT_WORD: 4875 case PT_PXGRAPH: 4876 case PT_PXPRINT: 4877 case PT_PXPUNCT: 4878 needstype = TRUE; 4879 needschar = TRUE; 4880 break; 4881 4882 case PT_CLIST: 4883 case PT_UCNC: 4884 needschar = TRUE; 4885 break; 4886 4887 default: 4888 SLJIT_ASSERT_STOP(); 4889 break; 4890 } 4891 cc += 2; 4892 } 4893 #endif 4894 } 4895 4896 /* We are not necessary in utf mode even in 8 bit mode. */ 4897 cc = ccbegin; 4898 detect_partial_match(common, backtracks); 4899 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); 4900 4901 if ((cc[-1] & XCL_HASPROP) == 0) 4902 { 4903 if ((cc[-1] & XCL_MAP) != 0) 4904 { 4905 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4906 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found)) 4907 { 4908 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 4909 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 4910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 4911 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 4912 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 4913 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); 4914 } 4915 4916 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 4917 JUMPHERE(jump); 4918 4919 cc += 32 / sizeof(pcre_uchar); 4920 } 4921 else 4922 { 4923 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); 4924 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); 4925 } 4926 } 4927 else if ((cc[-1] & XCL_MAP) != 0) 4928 { 4929 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 4930 #ifdef SUPPORT_UCP 4931 charsaved = TRUE; 4932 #endif 4933 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list)) 4934 { 4935 #ifdef COMPILE_PCRE8 4936 jump = NULL; 4937 if (common->utf) 4938 #endif 4939 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 4940 4941 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 4942 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 4943 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 4944 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 4945 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 4946 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); 4947 4948 #ifdef COMPILE_PCRE8 4949 if (common->utf) 4950 #endif 4951 JUMPHERE(jump); 4952 } 4953 4954 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 4955 cc += 32 / sizeof(pcre_uchar); 4956 } 4957 4958 #ifdef SUPPORT_UCP 4959 /* Simple register allocation. TMP1 is preferred if possible. */ 4960 if (needstype || needsscript) 4961 { 4962 if (needschar && !charsaved) 4963 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 4964 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4965 if (needschar) 4966 { 4967 if (needstype) 4968 { 4969 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); 4970 typereg = RETURN_ADDR; 4971 } 4972 4973 if (needsscript) 4974 scriptreg = TMP3; 4975 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 4976 } 4977 else if (needstype && needsscript) 4978 scriptreg = TMP3; 4979 /* In all other cases only one of them was specified, and that can goes to TMP1. */ 4980 4981 if (needsscript) 4982 { 4983 if (scriptreg == TMP1) 4984 { 4985 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); 4986 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); 4987 } 4988 else 4989 { 4990 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); 4991 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); 4992 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); 4993 } 4994 } 4995 } 4996 #endif 4997 4998 /* Generating code. */ 4999 charoffset = 0; 5000 numberofcmps = 0; 5001 #ifdef SUPPORT_UCP 5002 typeoffset = 0; 5003 #endif 5004 5005 while (*cc != XCL_END) 5006 { 5007 compares--; 5008 invertcmp = (compares == 0 && list != backtracks); 5009 jump = NULL; 5010 5011 if (*cc == XCL_SINGLE) 5012 { 5013 cc ++; 5014 GETCHARINCTEST(c, cc); 5015 5016 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) 5017 { 5018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5019 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL); 5020 numberofcmps++; 5021 } 5022 else if (numberofcmps > 0) 5023 { 5024 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5025 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5026 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5027 numberofcmps = 0; 5028 } 5029 else 5030 { 5031 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5032 numberofcmps = 0; 5033 } 5034 } 5035 else if (*cc == XCL_RANGE) 5036 { 5037 cc ++; 5038 GETCHARINCTEST(c, cc); 5039 SET_CHAR_OFFSET(c); 5040 GETCHARINCTEST(c, cc); 5041 5042 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) 5043 { 5044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5045 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); 5046 numberofcmps++; 5047 } 5048 else if (numberofcmps > 0) 5049 { 5050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5051 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5052 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5053 numberofcmps = 0; 5054 } 5055 else 5056 { 5057 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5058 numberofcmps = 0; 5059 } 5060 } 5061 #ifdef SUPPORT_UCP 5062 else 5063 { 5064 if (*cc == XCL_NOTPROP) 5065 invertcmp ^= 0x1; 5066 cc++; 5067 switch(*cc) 5068 { 5069 case PT_ANY: 5070 if (list != backtracks) 5071 { 5072 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) 5073 continue; 5074 } 5075 else if (cc[-1] == XCL_NOTPROP) 5076 continue; 5077 jump = JUMP(SLJIT_JUMP); 5078 break; 5079 5080 case PT_LAMP: 5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); 5082 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); 5084 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5085 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); 5086 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5087 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5088 break; 5089 5090 case PT_GC: 5091 c = PRIV(ucp_typerange)[(int)cc[1] * 2]; 5092 SET_TYPE_OFFSET(c); 5093 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); 5094 break; 5095 5096 case PT_PC: 5097 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); 5098 break; 5099 5100 case PT_SC: 5101 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]); 5102 break; 5103 5104 case PT_SPACE: 5105 case PT_PXSPACE: 5106 SET_CHAR_OFFSET(9); 5107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); 5108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 5109 5110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); 5111 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5112 5113 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); 5114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5115 5116 SET_TYPE_OFFSET(ucp_Zl); 5117 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); 5118 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5119 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5120 break; 5121 5122 case PT_WORD: 5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); 5124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5125 /* Fall through. */ 5126 5127 case PT_ALNUM: 5128 SET_TYPE_OFFSET(ucp_Ll); 5129 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 5130 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); 5131 SET_TYPE_OFFSET(ucp_Nd); 5132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); 5133 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5134 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5135 break; 5136 5137 case PT_CLIST: 5138 other_cases = PRIV(ucd_caseless_sets) + cc[1]; 5139 5140 /* At least three characters are required. 5141 Otherwise this case would be handled by the normal code path. */ 5142 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); 5143 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); 5144 5145 /* Optimizing character pairs, if their difference is power of 2. */ 5146 if (is_powerof2(other_cases[1] ^ other_cases[0])) 5147 { 5148 if (charoffset == 0) 5149 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5150 else 5151 { 5152 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); 5153 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5154 } 5155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); 5156 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5157 other_cases += 2; 5158 } 5159 else if (is_powerof2(other_cases[2] ^ other_cases[1])) 5160 { 5161 if (charoffset == 0) 5162 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); 5163 else 5164 { 5165 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); 5166 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5167 } 5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); 5169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5170 5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); 5172 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5173 5174 other_cases += 3; 5175 } 5176 else 5177 { 5178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); 5179 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5180 } 5181 5182 while (*other_cases != NOTACHAR) 5183 { 5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); 5185 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5186 } 5187 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5188 break; 5189 5190 case PT_UCNC: 5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); 5192 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); 5194 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); 5196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5197 5198 SET_CHAR_OFFSET(0xa0); 5199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); 5200 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5201 SET_CHAR_OFFSET(0); 5202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); 5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL); 5204 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5205 break; 5206 5207 case PT_PXGRAPH: 5208 /* C and Z groups are the farthest two groups. */ 5209 SET_TYPE_OFFSET(ucp_Ll); 5210 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); 5211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); 5212 5213 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); 5214 5215 /* In case of ucp_Cf, we overwrite the result. */ 5216 SET_CHAR_OFFSET(0x2066); 5217 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); 5218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 5219 5220 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); 5221 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5222 5223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); 5224 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5225 5226 JUMPHERE(jump); 5227 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); 5228 break; 5229 5230 case PT_PXPRINT: 5231 /* C and Z groups are the farthest two groups. */ 5232 SET_TYPE_OFFSET(ucp_Ll); 5233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); 5234 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); 5235 5236 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); 5237 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); 5238 5239 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); 5240 5241 /* In case of ucp_Cf, we overwrite the result. */ 5242 SET_CHAR_OFFSET(0x2066); 5243 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); 5244 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 5245 5246 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); 5247 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); 5248 5249 JUMPHERE(jump); 5250 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); 5251 break; 5252 5253 case PT_PXPUNCT: 5254 SET_TYPE_OFFSET(ucp_Sc); 5255 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); 5256 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); 5257 5258 SET_CHAR_OFFSET(0); 5259 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); 5260 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5261 5262 SET_TYPE_OFFSET(ucp_Pc); 5263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); 5264 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); 5265 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); 5266 break; 5267 } 5268 cc += 2; 5269 } 5270 #endif 5271 5272 if (jump != NULL) 5273 add_jump(compiler, compares > 0 ? list : backtracks, jump); 5274 } 5275 5276 if (found != NULL) 5277 set_jumps(found, LABEL()); 5278 } 5279 5280 #undef SET_TYPE_OFFSET 5281 #undef SET_CHAR_OFFSET 5282 5283 #endif 5284 5285 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) 5286 { 5287 DEFINE_COMPILER; 5288 int length; 5289 unsigned int c, oc, bit; 5290 compare_context context; 5291 struct sljit_jump *jump[4]; 5292 jump_list *end_list; 5293 #ifdef SUPPORT_UTF 5294 struct sljit_label *label; 5295 #ifdef SUPPORT_UCP 5296 pcre_uchar propdata[5]; 5297 #endif 5298 #endif /* SUPPORT_UTF */ 5299 5300 switch(type) 5301 { 5302 case OP_SOD: 5303 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5305 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); 5306 return cc; 5307 5308 case OP_SOM: 5309 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 5311 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); 5312 return cc; 5313 5314 case OP_NOT_WORD_BOUNDARY: 5315 case OP_WORD_BOUNDARY: 5316 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); 5317 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); 5318 return cc; 5319 5320 case OP_NOT_DIGIT: 5321 case OP_DIGIT: 5322 /* Digits are usually 0-9, so it is worth to optimize them. */ 5323 detect_partial_match(common, backtracks); 5324 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 5325 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE)) 5326 read_char7_type(common, type == OP_NOT_DIGIT); 5327 else 5328 #endif 5329 read_char8_type(common, type == OP_NOT_DIGIT); 5330 /* Flip the starting bit in the negative case. */ 5331 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); 5332 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); 5333 return cc; 5334 5335 case OP_NOT_WHITESPACE: 5336 case OP_WHITESPACE: 5337 detect_partial_match(common, backtracks); 5338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 5339 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE)) 5340 read_char7_type(common, type == OP_NOT_WHITESPACE); 5341 else 5342 #endif 5343 read_char8_type(common, type == OP_NOT_WHITESPACE); 5344 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); 5345 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); 5346 return cc; 5347 5348 case OP_NOT_WORDCHAR: 5349 case OP_WORDCHAR: 5350 detect_partial_match(common, backtracks); 5351 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 5352 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE)) 5353 read_char7_type(common, type == OP_NOT_WORDCHAR); 5354 else 5355 #endif 5356 read_char8_type(common, type == OP_NOT_WORDCHAR); 5357 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); 5358 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); 5359 return cc; 5360 5361 case OP_ANY: 5362 detect_partial_match(common, backtracks); 5363 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5364 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5365 { 5366 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); 5367 end_list = NULL; 5368 if (common->mode != JIT_PARTIAL_HARD_COMPILE) 5369 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5370 else 5371 check_str_end(common, &end_list); 5372 5373 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5374 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); 5375 set_jumps(end_list, LABEL()); 5376 JUMPHERE(jump[0]); 5377 } 5378 else 5379 check_newlinechar(common, common->nltype, backtracks, TRUE); 5380 return cc; 5381 5382 case OP_ALLANY: 5383 detect_partial_match(common, backtracks); 5384 #ifdef SUPPORT_UTF 5385 if (common->utf) 5386 { 5387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5389 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 5390 #if defined COMPILE_PCRE8 5391 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 5392 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 5393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5394 #elif defined COMPILE_PCRE16 5395 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); 5396 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 5397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 5398 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); 5399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 5400 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5401 #endif 5402 JUMPHERE(jump[0]); 5403 #endif /* COMPILE_PCRE[8|16] */ 5404 return cc; 5405 } 5406 #endif 5407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5408 return cc; 5409 5410 case OP_ANYBYTE: 5411 detect_partial_match(common, backtracks); 5412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5413 return cc; 5414 5415 #ifdef SUPPORT_UTF 5416 #ifdef SUPPORT_UCP 5417 case OP_NOTPROP: 5418 case OP_PROP: 5419 propdata[0] = XCL_HASPROP; 5420 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; 5421 propdata[2] = cc[0]; 5422 propdata[3] = cc[1]; 5423 propdata[4] = XCL_END; 5424 compile_xclass_matchingpath(common, propdata, backtracks); 5425 return cc + 2; 5426 #endif 5427 #endif 5428 5429 case OP_ANYNL: 5430 detect_partial_match(common, backtracks); 5431 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); 5432 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 5433 /* We don't need to handle soft partial matching case. */ 5434 end_list = NULL; 5435 if (common->mode != JIT_PARTIAL_HARD_COMPILE) 5436 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5437 else 5438 check_str_end(common, &end_list); 5439 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5440 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); 5441 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5442 jump[2] = JUMP(SLJIT_JUMP); 5443 JUMPHERE(jump[0]); 5444 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); 5445 set_jumps(end_list, LABEL()); 5446 JUMPHERE(jump[1]); 5447 JUMPHERE(jump[2]); 5448 return cc; 5449 5450 case OP_NOT_HSPACE: 5451 case OP_HSPACE: 5452 detect_partial_match(common, backtracks); 5453 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); 5454 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); 5455 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); 5456 return cc; 5457 5458 case OP_NOT_VSPACE: 5459 case OP_VSPACE: 5460 detect_partial_match(common, backtracks); 5461 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); 5462 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); 5463 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); 5464 return cc; 5465 5466 #ifdef SUPPORT_UCP 5467 case OP_EXTUNI: 5468 detect_partial_match(common, backtracks); 5469 read_char(common); 5470 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 5471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); 5472 /* Optimize register allocation: use a real register. */ 5473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 5474 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); 5475 5476 label = LABEL(); 5477 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 5478 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 5479 read_char(common); 5480 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 5481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); 5482 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); 5483 5484 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); 5485 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); 5486 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); 5487 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 5488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 5489 JUMPTO(SLJIT_NOT_ZERO, label); 5490 5491 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 5492 JUMPHERE(jump[0]); 5493 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 5494 5495 if (common->mode == JIT_PARTIAL_HARD_COMPILE) 5496 { 5497 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); 5498 /* Since we successfully read a char above, partial matching must occure. */ 5499 check_partial(common, TRUE); 5500 JUMPHERE(jump[0]); 5501 } 5502 return cc; 5503 #endif 5504 5505 case OP_EODN: 5506 /* Requires rather complex checks. */ 5507 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 5508 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5509 { 5510 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5512 if (common->mode == JIT_COMPILE) 5513 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); 5514 else 5515 { 5516 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); 5517 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); 5518 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS); 5519 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); 5520 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); 5521 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); 5522 check_partial(common, TRUE); 5523 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5524 JUMPHERE(jump[1]); 5525 } 5526 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5527 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5528 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5529 } 5530 else if (common->nltype == NLTYPE_FIXED) 5531 { 5532 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5534 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); 5535 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); 5536 } 5537 else 5538 { 5539 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5540 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 5541 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5542 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); 5543 jump[2] = JUMP(SLJIT_GREATER); 5544 add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); 5545 /* Equal. */ 5546 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5547 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); 5548 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5549 5550 JUMPHERE(jump[1]); 5551 if (common->nltype == NLTYPE_ANYCRLF) 5552 { 5553 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5554 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); 5555 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 5556 } 5557 else 5558 { 5559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); 5560 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5561 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); 5562 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); 5563 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); 5564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 5565 } 5566 JUMPHERE(jump[2]); 5567 JUMPHERE(jump[3]); 5568 } 5569 JUMPHERE(jump[0]); 5570 check_partial(common, FALSE); 5571 return cc; 5572 5573 case OP_EOD: 5574 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); 5575 check_partial(common, FALSE); 5576 return cc; 5577 5578 case OP_CIRC: 5579 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); 5581 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); 5582 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); 5583 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5584 return cc; 5585 5586 case OP_CIRCM: 5587 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); 5589 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); 5590 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); 5591 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5592 jump[0] = JUMP(SLJIT_JUMP); 5593 JUMPHERE(jump[1]); 5594 5595 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5596 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5597 { 5598 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5599 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); 5600 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); 5601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 5602 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5603 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5604 } 5605 else 5606 { 5607 skip_char_back(common); 5608 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5609 check_newlinechar(common, common->nltype, backtracks, FALSE); 5610 } 5611 JUMPHERE(jump[0]); 5612 return cc; 5613 5614 case OP_DOLL: 5615 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5616 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); 5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5618 5619 if (!common->endonly) 5620 compile_char1_matchingpath(common, OP_EODN, cc, backtracks); 5621 else 5622 { 5623 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); 5624 check_partial(common, FALSE); 5625 } 5626 return cc; 5627 5628 case OP_DOLLM: 5629 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); 5630 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5631 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); 5632 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5633 check_partial(common, FALSE); 5634 jump[0] = JUMP(SLJIT_JUMP); 5635 JUMPHERE(jump[1]); 5636 5637 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5638 { 5639 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5640 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5641 if (common->mode == JIT_COMPILE) 5642 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); 5643 else 5644 { 5645 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); 5646 /* STR_PTR = STR_END - IN_UCHARS(1) */ 5647 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5648 check_partial(common, TRUE); 5649 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5650 JUMPHERE(jump[1]); 5651 } 5652 5653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5654 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5655 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5656 } 5657 else 5658 { 5659 peek_char(common, common->nlmax); 5660 check_newlinechar(common, common->nltype, backtracks, FALSE); 5661 } 5662 JUMPHERE(jump[0]); 5663 return cc; 5664 5665 case OP_CHAR: 5666 case OP_CHARI: 5667 length = 1; 5668 #ifdef SUPPORT_UTF 5669 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); 5670 #endif 5671 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) 5672 { 5673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); 5674 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); 5675 5676 context.length = IN_UCHARS(length); 5677 context.sourcereg = -1; 5678 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 5679 context.ucharptr = 0; 5680 #endif 5681 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); 5682 } 5683 5684 detect_partial_match(common, backtracks); 5685 #ifdef SUPPORT_UTF 5686 if (common->utf) 5687 { 5688 GETCHAR(c, cc); 5689 } 5690 else 5691 #endif 5692 c = *cc; 5693 5694 if (type == OP_CHAR || !char_has_othercase(common, cc)) 5695 { 5696 read_char_range(common, c, c, FALSE); 5697 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5698 return cc + length; 5699 } 5700 oc = char_othercase(common, c); 5701 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE); 5702 bit = c ^ oc; 5703 if (is_powerof2(bit)) 5704 { 5705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); 5706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); 5707 return cc + length; 5708 } 5709 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); 5710 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); 5711 JUMPHERE(jump[0]); 5712 return cc + length; 5713 5714 case OP_NOT: 5715 case OP_NOTI: 5716 detect_partial_match(common, backtracks); 5717 length = 1; 5718 #ifdef SUPPORT_UTF 5719 if (common->utf) 5720 { 5721 #ifdef COMPILE_PCRE8 5722 c = *cc; 5723 if (c < 128) 5724 { 5725 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5726 if (type == OP_NOT || !char_has_othercase(common, cc)) 5727 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5728 else 5729 { 5730 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ 5731 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); 5732 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); 5733 } 5734 /* Skip the variable-length character. */ 5735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5736 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 5737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 5738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5739 JUMPHERE(jump[0]); 5740 return cc + 1; 5741 } 5742 else 5743 #endif /* COMPILE_PCRE8 */ 5744 { 5745 GETCHARLEN(c, cc, length); 5746 } 5747 } 5748 else 5749 #endif /* SUPPORT_UTF */ 5750 c = *cc; 5751 5752 if (type == OP_NOT || !char_has_othercase(common, cc)) 5753 { 5754 read_char_range(common, c, c, TRUE); 5755 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5756 } 5757 else 5758 { 5759 oc = char_othercase(common, c); 5760 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE); 5761 bit = c ^ oc; 5762 if (is_powerof2(bit)) 5763 { 5764 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); 5765 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); 5766 } 5767 else 5768 { 5769 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5770 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); 5771 } 5772 } 5773 return cc + length; 5774 5775 case OP_CLASS: 5776 case OP_NCLASS: 5777 detect_partial_match(common, backtracks); 5778 5779 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 5780 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255; 5781 read_char_range(common, 0, bit, type == OP_NCLASS); 5782 #else 5783 read_char_range(common, 0, 255, type == OP_NCLASS); 5784 #endif 5785 5786 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) 5787 return cc + 32 / sizeof(pcre_uchar); 5788 5789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8 5790 jump[0] = NULL; 5791 if (common->utf) 5792 { 5793 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); 5794 if (type == OP_CLASS) 5795 { 5796 add_jump(compiler, backtracks, jump[0]); 5797 jump[0] = NULL; 5798 } 5799 } 5800 #elif !defined COMPILE_PCRE8 5801 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); 5802 if (type == OP_CLASS) 5803 { 5804 add_jump(compiler, backtracks, jump[0]); 5805 jump[0] = NULL; 5806 } 5807 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 5808 5809 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 5810 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 5811 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 5812 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 5813 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 5814 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); 5815 5816 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 5817 if (jump[0] != NULL) 5818 JUMPHERE(jump[0]); 5819 #endif 5820 5821 return cc + 32 / sizeof(pcre_uchar); 5822 5823 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 5824 case OP_XCLASS: 5825 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); 5826 return cc + GET(cc, 0) - 1; 5827 #endif 5828 5829 case OP_REVERSE: 5830 length = GET(cc, 0); 5831 if (length == 0) 5832 return cc + LINK_SIZE; 5833 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5834 #ifdef SUPPORT_UTF 5835 if (common->utf) 5836 { 5837 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); 5839 label = LABEL(); 5840 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); 5841 skip_char_back(common); 5842 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); 5843 JUMPTO(SLJIT_NOT_ZERO, label); 5844 } 5845 else 5846 #endif 5847 { 5848 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5849 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); 5850 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); 5851 } 5852 check_start_used_ptr(common); 5853 return cc + LINK_SIZE; 5854 } 5855 SLJIT_ASSERT_STOP(); 5856 return cc; 5857 } 5858 5859 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) 5860 { 5861 /* This function consumes at least one input character. */ 5862 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ 5863 DEFINE_COMPILER; 5864 pcre_uchar *ccbegin = cc; 5865 compare_context context; 5866 int size; 5867 5868 context.length = 0; 5869 do 5870 { 5871 if (cc >= ccend) 5872 break; 5873 5874 if (*cc == OP_CHAR) 5875 { 5876 size = 1; 5877 #ifdef SUPPORT_UTF 5878 if (common->utf && HAS_EXTRALEN(cc[1])) 5879 size += GET_EXTRALEN(cc[1]); 5880 #endif 5881 } 5882 else if (*cc == OP_CHARI) 5883 { 5884 size = 1; 5885 #ifdef SUPPORT_UTF 5886 if (common->utf) 5887 { 5888 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) 5889 size = 0; 5890 else if (HAS_EXTRALEN(cc[1])) 5891 size += GET_EXTRALEN(cc[1]); 5892 } 5893 else 5894 #endif 5895 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) 5896 size = 0; 5897 } 5898 else 5899 size = 0; 5900 5901 cc += 1 + size; 5902 context.length += IN_UCHARS(size); 5903 } 5904 while (size > 0 && context.length <= 128); 5905 5906 cc = ccbegin; 5907 if (context.length > 0) 5908 { 5909 /* We have a fixed-length byte sequence. */ 5910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); 5911 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); 5912 5913 context.sourcereg = -1; 5914 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 5915 context.ucharptr = 0; 5916 #endif 5917 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); 5918 return cc; 5919 } 5920 5921 /* A non-fixed length character will be checked if length == 0. */ 5922 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); 5923 } 5924 5925 /* Forward definitions. */ 5926 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); 5927 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); 5928 5929 #define PUSH_BACKTRACK(size, ccstart, error) \ 5930 do \ 5931 { \ 5932 backtrack = sljit_alloc_memory(compiler, (size)); \ 5933 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 5934 return error; \ 5935 memset(backtrack, 0, size); \ 5936 backtrack->prev = parent->top; \ 5937 backtrack->cc = (ccstart); \ 5938 parent->top = backtrack; \ 5939 } \ 5940 while (0) 5941 5942 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ 5943 do \ 5944 { \ 5945 backtrack = sljit_alloc_memory(compiler, (size)); \ 5946 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 5947 return; \ 5948 memset(backtrack, 0, size); \ 5949 backtrack->prev = parent->top; \ 5950 backtrack->cc = (ccstart); \ 5951 parent->top = backtrack; \ 5952 } \ 5953 while (0) 5954 5955 #define BACKTRACK_AS(type) ((type *)backtrack) 5956 5957 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) 5958 { 5959 /* The OVECTOR offset goes to TMP2. */ 5960 DEFINE_COMPILER; 5961 int count = GET2(cc, 1 + IMM2_SIZE); 5962 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; 5963 unsigned int offset; 5964 jump_list *found = NULL; 5965 5966 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); 5967 5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 5969 5970 count--; 5971 while (count-- > 0) 5972 { 5973 offset = GET2(slot, 0) << 1; 5974 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); 5975 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); 5976 slot += common->name_entry_size; 5977 } 5978 5979 offset = GET2(slot, 0) << 1; 5980 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); 5981 if (backtracks != NULL && !common->jscript_compat) 5982 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); 5983 5984 set_jumps(found, LABEL()); 5985 } 5986 5987 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) 5988 { 5989 DEFINE_COMPILER; 5990 BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 5991 int offset = 0; 5992 struct sljit_jump *jump = NULL; 5993 struct sljit_jump *partial; 5994 struct sljit_jump *nopartial; 5995 5996 if (ref) 5997 { 5998 offset = GET2(cc, 1) << 1; 5999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6000 /* OVECTOR(1) contains the "string begin - 1" constant. */ 6001 if (withchecks && !common->jscript_compat) 6002 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 6003 } 6004 else 6005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6006 6007 #if defined SUPPORT_UTF && defined SUPPORT_UCP 6008 if (common->utf && *cc == OP_REFI) 6009 { 6010 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2); 6011 if (ref) 6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6013 else 6014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6015 6016 if (withchecks) 6017 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0); 6018 6019 /* Needed to save important temporary registers. */ 6020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 6021 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); 6022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); 6023 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); 6024 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 6025 if (common->mode == JIT_COMPILE) 6026 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); 6027 else 6028 { 6029 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); 6030 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); 6031 check_partial(common, FALSE); 6032 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 6033 JUMPHERE(nopartial); 6034 } 6035 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); 6036 } 6037 else 6038 #endif /* SUPPORT_UTF && SUPPORT_UCP */ 6039 { 6040 if (ref) 6041 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); 6042 else 6043 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); 6044 6045 if (withchecks) 6046 jump = JUMP(SLJIT_ZERO); 6047 6048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 6049 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); 6050 if (common->mode == JIT_COMPILE) 6051 add_jump(compiler, backtracks, partial); 6052 6053 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); 6054 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 6055 6056 if (common->mode != JIT_COMPILE) 6057 { 6058 nopartial = JUMP(SLJIT_JUMP); 6059 JUMPHERE(partial); 6060 /* TMP2 -= STR_END - STR_PTR */ 6061 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); 6062 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); 6063 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); 6064 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); 6065 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); 6066 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 6067 JUMPHERE(partial); 6068 check_partial(common, FALSE); 6069 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 6070 JUMPHERE(nopartial); 6071 } 6072 } 6073 6074 if (jump != NULL) 6075 { 6076 if (emptyfail) 6077 add_jump(compiler, backtracks, jump); 6078 else 6079 JUMPHERE(jump); 6080 } 6081 } 6082 6083 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6084 { 6085 DEFINE_COMPILER; 6086 BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 6087 backtrack_common *backtrack; 6088 pcre_uchar type; 6089 int offset = 0; 6090 struct sljit_label *label; 6091 struct sljit_jump *zerolength; 6092 struct sljit_jump *jump = NULL; 6093 pcre_uchar *ccbegin = cc; 6094 int min = 0, max = 0; 6095 BOOL minimize; 6096 6097 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); 6098 6099 if (ref) 6100 offset = GET2(cc, 1) << 1; 6101 else 6102 cc += IMM2_SIZE; 6103 type = cc[1 + IMM2_SIZE]; 6104 6105 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); 6106 minimize = (type & 0x1) != 0; 6107 switch(type) 6108 { 6109 case OP_CRSTAR: 6110 case OP_CRMINSTAR: 6111 min = 0; 6112 max = 0; 6113 cc += 1 + IMM2_SIZE + 1; 6114 break; 6115 case OP_CRPLUS: 6116 case OP_CRMINPLUS: 6117 min = 1; 6118 max = 0; 6119 cc += 1 + IMM2_SIZE + 1; 6120 break; 6121 case OP_CRQUERY: 6122 case OP_CRMINQUERY: 6123 min = 0; 6124 max = 1; 6125 cc += 1 + IMM2_SIZE + 1; 6126 break; 6127 case OP_CRRANGE: 6128 case OP_CRMINRANGE: 6129 min = GET2(cc, 1 + IMM2_SIZE + 1); 6130 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); 6131 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; 6132 break; 6133 default: 6134 SLJIT_ASSERT_STOP(); 6135 break; 6136 } 6137 6138 if (!minimize) 6139 { 6140 if (min == 0) 6141 { 6142 allocate_stack(common, 2); 6143 if (ref) 6144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 6147 /* Temporary release of STR_PTR. */ 6148 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6149 /* Handles both invalid and empty cases. Since the minimum repeat, 6150 is zero the invalid case is basically the same as an empty case. */ 6151 if (ref) 6152 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6153 else 6154 { 6155 compile_dnref_search(common, ccbegin, NULL); 6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); 6158 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6159 } 6160 /* Restore if not zero length. */ 6161 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6162 } 6163 else 6164 { 6165 allocate_stack(common, 1); 6166 if (ref) 6167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6169 if (ref) 6170 { 6171 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 6172 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6173 } 6174 else 6175 { 6176 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); 6177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); 6179 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6180 } 6181 } 6182 6183 if (min > 1 || max > 1) 6184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); 6185 6186 label = LABEL(); 6187 if (!ref) 6188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); 6189 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); 6190 6191 if (min > 1 || max > 1) 6192 { 6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); 6194 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 6195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); 6196 if (min > 1) 6197 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); 6198 if (max > 1) 6199 { 6200 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); 6201 allocate_stack(common, 1); 6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6203 JUMPTO(SLJIT_JUMP, label); 6204 JUMPHERE(jump); 6205 } 6206 } 6207 6208 if (max == 0) 6209 { 6210 /* Includes min > 1 case as well. */ 6211 allocate_stack(common, 1); 6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6213 JUMPTO(SLJIT_JUMP, label); 6214 } 6215 6216 JUMPHERE(zerolength); 6217 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 6218 6219 count_match(common); 6220 return cc; 6221 } 6222 6223 allocate_stack(common, ref ? 2 : 3); 6224 if (ref) 6225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6227 if (type != OP_CRMINSTAR) 6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 6229 6230 if (min == 0) 6231 { 6232 /* Handles both invalid and empty cases. Since the minimum repeat, 6233 is zero the invalid case is basically the same as an empty case. */ 6234 if (ref) 6235 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6236 else 6237 { 6238 compile_dnref_search(common, ccbegin, NULL); 6239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); 6241 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6242 } 6243 /* Length is non-zero, we can match real repeats. */ 6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6245 jump = JUMP(SLJIT_JUMP); 6246 } 6247 else 6248 { 6249 if (ref) 6250 { 6251 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 6252 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6253 } 6254 else 6255 { 6256 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); 6257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); 6259 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6260 } 6261 } 6262 6263 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 6264 if (max > 0) 6265 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); 6266 6267 if (!ref) 6268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 6269 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); 6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6271 6272 if (min > 1) 6273 { 6274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 6275 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 6276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6277 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath); 6278 } 6279 else if (max > 0) 6280 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); 6281 6282 if (jump != NULL) 6283 JUMPHERE(jump); 6284 JUMPHERE(zerolength); 6285 6286 count_match(common); 6287 return cc; 6288 } 6289 6290 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6291 { 6292 DEFINE_COMPILER; 6293 backtrack_common *backtrack; 6294 recurse_entry *entry = common->entries; 6295 recurse_entry *prev = NULL; 6296 sljit_sw start = GET(cc, 1); 6297 pcre_uchar *start_cc; 6298 BOOL needs_control_head; 6299 6300 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); 6301 6302 /* Inlining simple patterns. */ 6303 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) 6304 { 6305 start_cc = common->start + start; 6306 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); 6307 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; 6308 return cc + 1 + LINK_SIZE; 6309 } 6310 6311 while (entry != NULL) 6312 { 6313 if (entry->start == start) 6314 break; 6315 prev = entry; 6316 entry = entry->next; 6317 } 6318 6319 if (entry == NULL) 6320 { 6321 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); 6322 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6323 return NULL; 6324 entry->next = NULL; 6325 entry->entry = NULL; 6326 entry->calls = NULL; 6327 entry->start = start; 6328 6329 if (prev != NULL) 6330 prev->next = entry; 6331 else 6332 common->entries = entry; 6333 } 6334 6335 if (common->has_set_som && common->mark_ptr != 0) 6336 { 6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 6338 allocate_stack(common, 2); 6339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6342 } 6343 else if (common->has_set_som || common->mark_ptr != 0) 6344 { 6345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); 6346 allocate_stack(common, 1); 6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 6348 } 6349 6350 if (entry->entry == NULL) 6351 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); 6352 else 6353 JUMPTO(SLJIT_FAST_CALL, entry->entry); 6354 /* Leave if the match is failed. */ 6355 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); 6356 return cc + 1 + LINK_SIZE; 6357 } 6358 6359 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) 6360 { 6361 const pcre_uchar *begin = arguments->begin; 6362 int *offset_vector = arguments->offsets; 6363 int offset_count = arguments->offset_count; 6364 int i; 6365 6366 if (PUBL(callout) == NULL) 6367 return 0; 6368 6369 callout_block->version = 2; 6370 callout_block->callout_data = arguments->callout_data; 6371 6372 /* Offsets in subject. */ 6373 callout_block->subject_length = arguments->end - arguments->begin; 6374 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin; 6375 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin; 6376 #if defined COMPILE_PCRE8 6377 callout_block->subject = (PCRE_SPTR)begin; 6378 #elif defined COMPILE_PCRE16 6379 callout_block->subject = (PCRE_SPTR16)begin; 6380 #elif defined COMPILE_PCRE32 6381 callout_block->subject = (PCRE_SPTR32)begin; 6382 #endif 6383 6384 /* Convert and copy the JIT offset vector to the offset_vector array. */ 6385 callout_block->capture_top = 0; 6386 callout_block->offset_vector = offset_vector; 6387 for (i = 2; i < offset_count; i += 2) 6388 { 6389 offset_vector[i] = jit_ovector[i] - begin; 6390 offset_vector[i + 1] = jit_ovector[i + 1] - begin; 6391 if (jit_ovector[i] >= begin) 6392 callout_block->capture_top = i; 6393 } 6394 6395 callout_block->capture_top = (callout_block->capture_top >> 1) + 1; 6396 if (offset_count > 0) 6397 offset_vector[0] = -1; 6398 if (offset_count > 1) 6399 offset_vector[1] = -1; 6400 return (*PUBL(callout))(callout_block); 6401 } 6402 6403 /* Aligning to 8 byte. */ 6404 #define CALLOUT_ARG_SIZE \ 6405 (((int)sizeof(PUBL(callout_block)) + 7) & ~7) 6406 6407 #define CALLOUT_ARG_OFFSET(arg) \ 6408 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg)) 6409 6410 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6411 { 6412 DEFINE_COMPILER; 6413 backtrack_common *backtrack; 6414 6415 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 6416 6417 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); 6418 6419 SLJIT_ASSERT(common->capture_last_ptr != 0); 6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 6421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 6422 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); 6423 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); 6424 6425 /* These pointer sized fields temporarly stores internal variables. */ 6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); 6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); 6429 6430 if (common->mark_ptr != 0) 6431 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); 6432 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); 6433 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); 6434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); 6435 6436 /* Needed to save important temporary registers. */ 6437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 6438 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); 6439 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); 6440 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); 6441 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); 6442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 6443 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); 6444 6445 /* Check return value. */ 6446 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); 6447 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); 6448 if (common->forced_quit_label == NULL) 6449 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS)); 6450 else 6451 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label); 6452 return cc + 2 + 2 * LINK_SIZE; 6453 } 6454 6455 #undef CALLOUT_ARG_SIZE 6456 #undef CALLOUT_ARG_OFFSET 6457 6458 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) 6459 { 6460 DEFINE_COMPILER; 6461 int framesize; 6462 int extrasize; 6463 BOOL needs_control_head; 6464 int private_data_ptr; 6465 backtrack_common altbacktrack; 6466 pcre_uchar *ccbegin; 6467 pcre_uchar opcode; 6468 pcre_uchar bra = OP_BRA; 6469 jump_list *tmp = NULL; 6470 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; 6471 jump_list **found; 6472 /* Saving previous accept variables. */ 6473 BOOL save_local_exit = common->local_exit; 6474 BOOL save_positive_assert = common->positive_assert; 6475 then_trap_backtrack *save_then_trap = common->then_trap; 6476 struct sljit_label *save_quit_label = common->quit_label; 6477 struct sljit_label *save_accept_label = common->accept_label; 6478 jump_list *save_quit = common->quit; 6479 jump_list *save_positive_assert_quit = common->positive_assert_quit; 6480 jump_list *save_accept = common->accept; 6481 struct sljit_jump *jump; 6482 struct sljit_jump *brajump = NULL; 6483 6484 /* Assert captures then. */ 6485 common->then_trap = NULL; 6486 6487 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 6488 { 6489 SLJIT_ASSERT(!conditional); 6490 bra = *cc; 6491 cc++; 6492 } 6493 private_data_ptr = PRIVATE_DATA(cc); 6494 SLJIT_ASSERT(private_data_ptr != 0); 6495 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); 6496 backtrack->framesize = framesize; 6497 backtrack->private_data_ptr = private_data_ptr; 6498 opcode = *cc; 6499 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); 6500 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; 6501 ccbegin = cc; 6502 cc += GET(cc, 1); 6503 6504 if (bra == OP_BRAMINZERO) 6505 { 6506 /* This is a braminzero backtrack path. */ 6507 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6508 free_stack(common, 1); 6509 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 6510 } 6511 6512 if (framesize < 0) 6513 { 6514 extrasize = needs_control_head ? 2 : 1; 6515 if (framesize == no_frame) 6516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 6517 allocate_stack(common, extrasize); 6518 if (needs_control_head) 6519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 6520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6521 if (needs_control_head) 6522 { 6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6525 } 6526 } 6527 else 6528 { 6529 extrasize = needs_control_head ? 3 : 2; 6530 allocate_stack(common, framesize + extrasize); 6531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6532 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); 6533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 6534 if (needs_control_head) 6535 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 6536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6537 if (needs_control_head) 6538 { 6539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); 6540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 6541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 6542 } 6543 else 6544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6545 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); 6546 } 6547 6548 memset(&altbacktrack, 0, sizeof(backtrack_common)); 6549 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6550 { 6551 /* Negative assert is stronger than positive assert. */ 6552 common->local_exit = TRUE; 6553 common->quit_label = NULL; 6554 common->quit = NULL; 6555 common->positive_assert = FALSE; 6556 } 6557 else 6558 common->positive_assert = TRUE; 6559 common->positive_assert_quit = NULL; 6560 6561 while (1) 6562 { 6563 common->accept_label = NULL; 6564 common->accept = NULL; 6565 altbacktrack.top = NULL; 6566 altbacktrack.topbacktracks = NULL; 6567 6568 if (*ccbegin == OP_ALT) 6569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6570 6571 altbacktrack.cc = ccbegin; 6572 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); 6573 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6574 { 6575 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6576 { 6577 common->local_exit = save_local_exit; 6578 common->quit_label = save_quit_label; 6579 common->quit = save_quit; 6580 } 6581 common->positive_assert = save_positive_assert; 6582 common->then_trap = save_then_trap; 6583 common->accept_label = save_accept_label; 6584 common->positive_assert_quit = save_positive_assert_quit; 6585 common->accept = save_accept; 6586 return NULL; 6587 } 6588 common->accept_label = LABEL(); 6589 if (common->accept != NULL) 6590 set_jumps(common->accept, common->accept_label); 6591 6592 /* Reset stack. */ 6593 if (framesize < 0) 6594 { 6595 if (framesize == no_frame) 6596 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6597 else 6598 free_stack(common, extrasize); 6599 if (needs_control_head) 6600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); 6601 } 6602 else 6603 { 6604 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) 6605 { 6606 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6607 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); 6608 if (needs_control_head) 6609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); 6610 } 6611 else 6612 { 6613 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6614 if (needs_control_head) 6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); 6616 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6617 } 6618 } 6619 6620 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6621 { 6622 /* We know that STR_PTR was stored on the top of the stack. */ 6623 if (conditional) 6624 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); 6625 else if (bra == OP_BRAZERO) 6626 { 6627 if (framesize < 0) 6628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); 6629 else 6630 { 6631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw)); 6633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6634 } 6635 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6636 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6637 } 6638 else if (framesize >= 0) 6639 { 6640 /* For OP_BRA and OP_BRAMINZERO. */ 6641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6642 } 6643 } 6644 add_jump(compiler, found, JUMP(SLJIT_JUMP)); 6645 6646 compile_backtrackingpath(common, altbacktrack.top); 6647 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6648 { 6649 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6650 { 6651 common->local_exit = save_local_exit; 6652 common->quit_label = save_quit_label; 6653 common->quit = save_quit; 6654 } 6655 common->positive_assert = save_positive_assert; 6656 common->then_trap = save_then_trap; 6657 common->accept_label = save_accept_label; 6658 common->positive_assert_quit = save_positive_assert_quit; 6659 common->accept = save_accept; 6660 return NULL; 6661 } 6662 set_jumps(altbacktrack.topbacktracks, LABEL()); 6663 6664 if (*cc != OP_ALT) 6665 break; 6666 6667 ccbegin = cc; 6668 cc += GET(cc, 1); 6669 } 6670 6671 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6672 { 6673 SLJIT_ASSERT(common->positive_assert_quit == NULL); 6674 /* Makes the check less complicated below. */ 6675 common->positive_assert_quit = common->quit; 6676 } 6677 6678 /* None of them matched. */ 6679 if (common->positive_assert_quit != NULL) 6680 { 6681 jump = JUMP(SLJIT_JUMP); 6682 set_jumps(common->positive_assert_quit, LABEL()); 6683 SLJIT_ASSERT(framesize != no_stack); 6684 if (framesize < 0) 6685 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); 6686 else 6687 { 6688 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6689 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6690 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); 6691 } 6692 JUMPHERE(jump); 6693 } 6694 6695 if (needs_control_head) 6696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); 6697 6698 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) 6699 { 6700 /* Assert is failed. */ 6701 if (conditional || bra == OP_BRAZERO) 6702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6703 6704 if (framesize < 0) 6705 { 6706 /* The topmost item should be 0. */ 6707 if (bra == OP_BRAZERO) 6708 { 6709 if (extrasize == 2) 6710 free_stack(common, 1); 6711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6712 } 6713 else 6714 free_stack(common, extrasize); 6715 } 6716 else 6717 { 6718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); 6719 /* The topmost item should be 0. */ 6720 if (bra == OP_BRAZERO) 6721 { 6722 free_stack(common, framesize + extrasize - 1); 6723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6724 } 6725 else 6726 free_stack(common, framesize + extrasize); 6727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6728 } 6729 jump = JUMP(SLJIT_JUMP); 6730 if (bra != OP_BRAZERO) 6731 add_jump(compiler, target, jump); 6732 6733 /* Assert is successful. */ 6734 set_jumps(tmp, LABEL()); 6735 if (framesize < 0) 6736 { 6737 /* We know that STR_PTR was stored on the top of the stack. */ 6738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); 6739 /* Keep the STR_PTR on the top of the stack. */ 6740 if (bra == OP_BRAZERO) 6741 { 6742 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6743 if (extrasize == 2) 6744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6745 } 6746 else if (bra == OP_BRAMINZERO) 6747 { 6748 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6750 } 6751 } 6752 else 6753 { 6754 if (bra == OP_BRA) 6755 { 6756 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6757 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); 6758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw)); 6759 } 6760 else 6761 { 6762 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6763 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); 6764 if (extrasize == 2) 6765 { 6766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6767 if (bra == OP_BRAMINZERO) 6768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6769 } 6770 else 6771 { 6772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); 6773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); 6774 } 6775 } 6776 } 6777 6778 if (bra == OP_BRAZERO) 6779 { 6780 backtrack->matchingpath = LABEL(); 6781 SET_LABEL(jump, backtrack->matchingpath); 6782 } 6783 else if (bra == OP_BRAMINZERO) 6784 { 6785 JUMPTO(SLJIT_JUMP, backtrack->matchingpath); 6786 JUMPHERE(brajump); 6787 if (framesize >= 0) 6788 { 6789 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6790 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6792 } 6793 set_jumps(backtrack->common.topbacktracks, LABEL()); 6794 } 6795 } 6796 else 6797 { 6798 /* AssertNot is successful. */ 6799 if (framesize < 0) 6800 { 6801 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6802 if (bra != OP_BRA) 6803 { 6804 if (extrasize == 2) 6805 free_stack(common, 1); 6806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6807 } 6808 else 6809 free_stack(common, extrasize); 6810 } 6811 else 6812 { 6813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6814 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); 6815 /* The topmost item should be 0. */ 6816 if (bra != OP_BRA) 6817 { 6818 free_stack(common, framesize + extrasize - 1); 6819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6820 } 6821 else 6822 free_stack(common, framesize + extrasize); 6823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6824 } 6825 6826 if (bra == OP_BRAZERO) 6827 backtrack->matchingpath = LABEL(); 6828 else if (bra == OP_BRAMINZERO) 6829 { 6830 JUMPTO(SLJIT_JUMP, backtrack->matchingpath); 6831 JUMPHERE(brajump); 6832 } 6833 6834 if (bra != OP_BRA) 6835 { 6836 SLJIT_ASSERT(found == &backtrack->common.topbacktracks); 6837 set_jumps(backtrack->common.topbacktracks, LABEL()); 6838 backtrack->common.topbacktracks = NULL; 6839 } 6840 } 6841 6842 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6843 { 6844 common->local_exit = save_local_exit; 6845 common->quit_label = save_quit_label; 6846 common->quit = save_quit; 6847 } 6848 common->positive_assert = save_positive_assert; 6849 common->then_trap = save_then_trap; 6850 common->accept_label = save_accept_label; 6851 common->positive_assert_quit = save_positive_assert_quit; 6852 common->accept = save_accept; 6853 return cc + 1 + LINK_SIZE; 6854 } 6855 6856 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) 6857 { 6858 DEFINE_COMPILER; 6859 int stacksize; 6860 6861 if (framesize < 0) 6862 { 6863 if (framesize == no_frame) 6864 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6865 else 6866 { 6867 stacksize = needs_control_head ? 1 : 0; 6868 if (ket != OP_KET || has_alternatives) 6869 stacksize++; 6870 free_stack(common, stacksize); 6871 } 6872 6873 if (needs_control_head) 6874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0); 6875 6876 /* TMP2 which is set here used by OP_KETRMAX below. */ 6877 if (ket == OP_KETRMAX) 6878 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); 6879 else if (ket == OP_KETRMIN) 6880 { 6881 /* Move the STR_PTR to the private_data_ptr. */ 6882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); 6883 } 6884 } 6885 else 6886 { 6887 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; 6888 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); 6889 if (needs_control_head) 6890 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0); 6891 6892 if (ket == OP_KETRMAX) 6893 { 6894 /* TMP2 which is set here used by OP_KETRMAX below. */ 6895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6896 } 6897 } 6898 if (needs_control_head) 6899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); 6900 } 6901 6902 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) 6903 { 6904 DEFINE_COMPILER; 6905 6906 if (common->capture_last_ptr != 0) 6907 { 6908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 6909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 6910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 6911 stacksize++; 6912 } 6913 if (common->optimized_cbracket[offset >> 1] == 0) 6914 { 6915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 6918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); 6920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 6921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 6922 stacksize += 2; 6923 } 6924 return stacksize; 6925 } 6926 6927 /* 6928 Handling bracketed expressions is probably the most complex part. 6929 6930 Stack layout naming characters: 6931 S - Push the current STR_PTR 6932 0 - Push a 0 (NULL) 6933 A - Push the current STR_PTR. Needed for restoring the STR_PTR 6934 before the next alternative. Not pushed if there are no alternatives. 6935 M - Any values pushed by the current alternative. Can be empty, or anything. 6936 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. 6937 L - Push the previous local (pointed by localptr) to the stack 6938 () - opional values stored on the stack 6939 ()* - optonal, can be stored multiple times 6940 6941 The following list shows the regular expression templates, their PCRE byte codes 6942 and stack layout supported by pcre-sljit. 6943 6944 (?:) OP_BRA | OP_KET A M 6945 () OP_CBRA | OP_KET C M 6946 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* 6947 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* 6948 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* 6949 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* 6950 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* 6951 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* 6952 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* 6953 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* 6954 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) 6955 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) 6956 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) 6957 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) 6958 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* 6959 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* 6960 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* 6961 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* 6962 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* 6963 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* 6964 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* 6965 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* 6966 6967 6968 Stack layout naming characters: 6969 A - Push the alternative index (starting from 0) on the stack. 6970 Not pushed if there is no alternatives. 6971 M - Any values pushed by the current alternative. Can be empty, or anything. 6972 6973 The next list shows the possible content of a bracket: 6974 (|) OP_*BRA | OP_ALT ... M A 6975 (?()|) OP_*COND | OP_ALT M A 6976 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A 6977 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A 6978 Or nothing, if trace is unnecessary 6979 */ 6980 6981 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6982 { 6983 DEFINE_COMPILER; 6984 backtrack_common *backtrack; 6985 pcre_uchar opcode; 6986 int private_data_ptr = 0; 6987 int offset = 0; 6988 int i, stacksize; 6989 int repeat_ptr = 0, repeat_length = 0; 6990 int repeat_type = 0, repeat_count = 0; 6991 pcre_uchar *ccbegin; 6992 pcre_uchar *matchingpath; 6993 pcre_uchar *slot; 6994 pcre_uchar bra = OP_BRA; 6995 pcre_uchar ket; 6996 assert_backtrack *assert; 6997 BOOL has_alternatives; 6998 BOOL needs_control_head = FALSE; 6999 struct sljit_jump *jump; 7000 struct sljit_jump *skip; 7001 struct sljit_label *rmax_label = NULL; 7002 struct sljit_jump *braminzero = NULL; 7003 7004 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); 7005 7006 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 7007 { 7008 bra = *cc; 7009 cc++; 7010 opcode = *cc; 7011 } 7012 7013 opcode = *cc; 7014 ccbegin = cc; 7015 matchingpath = bracketend(cc) - 1 - LINK_SIZE; 7016 ket = *matchingpath; 7017 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) 7018 { 7019 repeat_ptr = PRIVATE_DATA(matchingpath); 7020 repeat_length = PRIVATE_DATA(matchingpath + 1); 7021 repeat_type = PRIVATE_DATA(matchingpath + 2); 7022 repeat_count = PRIVATE_DATA(matchingpath + 3); 7023 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); 7024 if (repeat_type == OP_UPTO) 7025 ket = OP_KETRMAX; 7026 if (repeat_type == OP_MINUPTO) 7027 ket = OP_KETRMIN; 7028 } 7029 7030 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) 7031 { 7032 /* Drop this bracket_backtrack. */ 7033 parent->top = backtrack->prev; 7034 return matchingpath + 1 + LINK_SIZE + repeat_length; 7035 } 7036 7037 matchingpath = ccbegin + 1 + LINK_SIZE; 7038 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); 7039 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); 7040 cc += GET(cc, 1); 7041 7042 has_alternatives = *cc == OP_ALT; 7043 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) 7044 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE; 7045 7046 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) 7047 opcode = OP_SCOND; 7048 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) 7049 opcode = OP_ONCE; 7050 7051 if (opcode == OP_CBRA || opcode == OP_SCBRA) 7052 { 7053 /* Capturing brackets has a pre-allocated space. */ 7054 offset = GET2(ccbegin, 1 + LINK_SIZE); 7055 if (common->optimized_cbracket[offset] == 0) 7056 { 7057 private_data_ptr = OVECTOR_PRIV(offset); 7058 offset <<= 1; 7059 } 7060 else 7061 { 7062 offset <<= 1; 7063 private_data_ptr = OVECTOR(offset); 7064 } 7065 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; 7066 matchingpath += IMM2_SIZE; 7067 } 7068 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) 7069 { 7070 /* Other brackets simply allocate the next entry. */ 7071 private_data_ptr = PRIVATE_DATA(ccbegin); 7072 SLJIT_ASSERT(private_data_ptr != 0); 7073 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; 7074 if (opcode == OP_ONCE) 7075 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); 7076 } 7077 7078 /* Instructions before the first alternative. */ 7079 stacksize = 0; 7080 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) 7081 stacksize++; 7082 if (bra == OP_BRAZERO) 7083 stacksize++; 7084 7085 if (stacksize > 0) 7086 allocate_stack(common, stacksize); 7087 7088 stacksize = 0; 7089 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) 7090 { 7091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7092 stacksize++; 7093 } 7094 7095 if (bra == OP_BRAZERO) 7096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7097 7098 if (bra == OP_BRAMINZERO) 7099 { 7100 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ 7101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7102 if (ket != OP_KETRMIN) 7103 { 7104 free_stack(common, 1); 7105 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7106 } 7107 else 7108 { 7109 if (opcode == OP_ONCE || opcode >= OP_SBRA) 7110 { 7111 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7112 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 7113 /* Nothing stored during the first run. */ 7114 skip = JUMP(SLJIT_JUMP); 7115 JUMPHERE(jump); 7116 /* Checking zero-length iteration. */ 7117 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) 7118 { 7119 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ 7120 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7121 } 7122 else 7123 { 7124 /* Except when the whole stack frame must be saved. */ 7125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7126 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); 7127 } 7128 JUMPHERE(skip); 7129 } 7130 else 7131 { 7132 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 7134 JUMPHERE(jump); 7135 } 7136 } 7137 } 7138 7139 if (repeat_type != 0) 7140 { 7141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); 7142 if (repeat_type == OP_EXACT) 7143 rmax_label = LABEL(); 7144 } 7145 7146 if (ket == OP_KETRMIN) 7147 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); 7148 7149 if (ket == OP_KETRMAX) 7150 { 7151 rmax_label = LABEL(); 7152 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0) 7153 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; 7154 } 7155 7156 /* Handling capturing brackets and alternatives. */ 7157 if (opcode == OP_ONCE) 7158 { 7159 stacksize = 0; 7160 if (needs_control_head) 7161 { 7162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7163 stacksize++; 7164 } 7165 7166 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) 7167 { 7168 /* Neither capturing brackets nor recursions are found in the block. */ 7169 if (ket == OP_KETRMIN) 7170 { 7171 stacksize += 2; 7172 if (!needs_control_head) 7173 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7174 } 7175 else 7176 { 7177 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) 7178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 7179 if (ket == OP_KETRMAX || has_alternatives) 7180 stacksize++; 7181 } 7182 7183 if (stacksize > 0) 7184 allocate_stack(common, stacksize); 7185 7186 stacksize = 0; 7187 if (needs_control_head) 7188 { 7189 stacksize++; 7190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7191 } 7192 7193 if (ket == OP_KETRMIN) 7194 { 7195 if (needs_control_head) 7196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7198 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) 7199 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); 7200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); 7201 } 7202 else if (ket == OP_KETRMAX || has_alternatives) 7203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7204 } 7205 else 7206 { 7207 if (ket != OP_KET || has_alternatives) 7208 stacksize++; 7209 7210 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; 7211 allocate_stack(common, stacksize); 7212 7213 if (needs_control_head) 7214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7215 7216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7217 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7218 7219 stacksize = needs_control_head ? 1 : 0; 7220 if (ket != OP_KET || has_alternatives) 7221 { 7222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 7224 stacksize++; 7225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 7226 } 7227 else 7228 { 7229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 7230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 7231 } 7232 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); 7233 } 7234 } 7235 else if (opcode == OP_CBRA || opcode == OP_SCBRA) 7236 { 7237 /* Saving the previous values. */ 7238 if (common->optimized_cbracket[offset >> 1] != 0) 7239 { 7240 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); 7241 allocate_stack(common, 2); 7242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7243 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); 7244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 7246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 7247 } 7248 else 7249 { 7250 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7251 allocate_stack(common, 1); 7252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7254 } 7255 } 7256 else if (opcode == OP_SBRA || opcode == OP_SCOND) 7257 { 7258 /* Saving the previous value. */ 7259 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7260 allocate_stack(common, 1); 7261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7263 } 7264 else if (has_alternatives) 7265 { 7266 /* Pushing the starting string pointer. */ 7267 allocate_stack(common, 1); 7268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7269 } 7270 7271 /* Generating code for the first alternative. */ 7272 if (opcode == OP_COND || opcode == OP_SCOND) 7273 { 7274 if (*matchingpath == OP_CREF) 7275 { 7276 SLJIT_ASSERT(has_alternatives); 7277 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), 7278 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 7279 matchingpath += 1 + IMM2_SIZE; 7280 } 7281 else if (*matchingpath == OP_DNCREF) 7282 { 7283 SLJIT_ASSERT(has_alternatives); 7284 7285 i = GET2(matchingpath, 1 + IMM2_SIZE); 7286 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; 7287 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 7288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 7289 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); 7290 slot += common->name_entry_size; 7291 i--; 7292 while (i-- > 0) 7293 { 7294 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); 7295 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); 7296 slot += common->name_entry_size; 7297 } 7298 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 7299 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); 7300 matchingpath += 1 + 2 * IMM2_SIZE; 7301 } 7302 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) 7303 { 7304 /* Never has other case. */ 7305 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; 7306 SLJIT_ASSERT(!has_alternatives); 7307 7308 if (*matchingpath == OP_FAIL) 7309 stacksize = 0; 7310 if (*matchingpath == OP_RREF) 7311 { 7312 stacksize = GET2(matchingpath, 1); 7313 if (common->currententry == NULL) 7314 stacksize = 0; 7315 else if (stacksize == RREF_ANY) 7316 stacksize = 1; 7317 else if (common->currententry->start == 0) 7318 stacksize = stacksize == 0; 7319 else 7320 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); 7321 7322 if (stacksize != 0) 7323 matchingpath += 1 + IMM2_SIZE; 7324 } 7325 else 7326 { 7327 if (common->currententry == NULL || common->currententry->start == 0) 7328 stacksize = 0; 7329 else 7330 { 7331 stacksize = GET2(matchingpath, 1 + IMM2_SIZE); 7332 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; 7333 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); 7334 while (stacksize > 0) 7335 { 7336 if ((int)GET2(slot, 0) == i) 7337 break; 7338 slot += common->name_entry_size; 7339 stacksize--; 7340 } 7341 } 7342 7343 if (stacksize != 0) 7344 matchingpath += 1 + 2 * IMM2_SIZE; 7345 } 7346 7347 /* The stacksize == 0 is a common "else" case. */ 7348 if (stacksize == 0) 7349 { 7350 if (*cc == OP_ALT) 7351 { 7352 matchingpath = cc + 1 + LINK_SIZE; 7353 cc += GET(cc, 1); 7354 } 7355 else 7356 matchingpath = cc; 7357 } 7358 } 7359 else 7360 { 7361 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); 7362 /* Similar code as PUSH_BACKTRACK macro. */ 7363 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); 7364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7365 return NULL; 7366 memset(assert, 0, sizeof(assert_backtrack)); 7367 assert->common.cc = matchingpath; 7368 BACKTRACK_AS(bracket_backtrack)->u.assert = assert; 7369 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); 7370 } 7371 } 7372 7373 compile_matchingpath(common, matchingpath, cc, backtrack); 7374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7375 return NULL; 7376 7377 if (opcode == OP_ONCE) 7378 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); 7379 7380 stacksize = 0; 7381 if (repeat_type == OP_MINUPTO) 7382 { 7383 /* We need to preserve the counter. TMP2 will be used below. */ 7384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 7385 stacksize++; 7386 } 7387 if (ket != OP_KET || bra != OP_BRA) 7388 stacksize++; 7389 if (offset != 0) 7390 { 7391 if (common->capture_last_ptr != 0) 7392 stacksize++; 7393 if (common->optimized_cbracket[offset >> 1] == 0) 7394 stacksize += 2; 7395 } 7396 if (has_alternatives && opcode != OP_ONCE) 7397 stacksize++; 7398 7399 if (stacksize > 0) 7400 allocate_stack(common, stacksize); 7401 7402 stacksize = 0; 7403 if (repeat_type == OP_MINUPTO) 7404 { 7405 /* TMP2 was set above. */ 7406 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); 7407 stacksize++; 7408 } 7409 7410 if (ket != OP_KET || bra != OP_BRA) 7411 { 7412 if (ket != OP_KET) 7413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7414 else 7415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7416 stacksize++; 7417 } 7418 7419 if (offset != 0) 7420 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); 7421 7422 if (has_alternatives) 7423 { 7424 if (opcode != OP_ONCE) 7425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7426 if (ket != OP_KETRMAX) 7427 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7428 } 7429 7430 /* Must be after the matchingpath label. */ 7431 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) 7432 { 7433 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); 7434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7435 } 7436 7437 if (ket == OP_KETRMAX) 7438 { 7439 if (repeat_type != 0) 7440 { 7441 if (has_alternatives) 7442 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 7444 JUMPTO(SLJIT_NOT_ZERO, rmax_label); 7445 /* Drop STR_PTR for greedy plus quantifier. */ 7446 if (opcode != OP_ONCE) 7447 free_stack(common, 1); 7448 } 7449 else if (opcode == OP_ONCE || opcode >= OP_SBRA) 7450 { 7451 if (has_alternatives) 7452 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7453 /* Checking zero-length iteration. */ 7454 if (opcode != OP_ONCE) 7455 { 7456 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); 7457 /* Drop STR_PTR for greedy plus quantifier. */ 7458 if (bra != OP_BRAZERO) 7459 free_stack(common, 1); 7460 } 7461 else 7462 /* TMP2 must contain the starting STR_PTR. */ 7463 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); 7464 } 7465 else 7466 JUMPTO(SLJIT_JUMP, rmax_label); 7467 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); 7468 } 7469 7470 if (repeat_type == OP_EXACT) 7471 { 7472 count_match(common); 7473 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 7474 JUMPTO(SLJIT_NOT_ZERO, rmax_label); 7475 } 7476 else if (repeat_type == OP_UPTO) 7477 { 7478 /* We need to preserve the counter. */ 7479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 7480 allocate_stack(common, 1); 7481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7482 } 7483 7484 if (bra == OP_BRAZERO) 7485 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); 7486 7487 if (bra == OP_BRAMINZERO) 7488 { 7489 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ 7490 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); 7491 if (braminzero != NULL) 7492 { 7493 JUMPHERE(braminzero); 7494 /* We need to release the end pointer to perform the 7495 backtrack for the zero-length iteration. When 7496 framesize is < 0, OP_ONCE will do the release itself. */ 7497 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) 7498 { 7499 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7500 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 7501 } 7502 else if (ket == OP_KETRMIN && opcode != OP_ONCE) 7503 free_stack(common, 1); 7504 } 7505 /* Continue to the normal backtrack. */ 7506 } 7507 7508 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) 7509 count_match(common); 7510 7511 /* Skip the other alternatives. */ 7512 while (*cc == OP_ALT) 7513 cc += GET(cc, 1); 7514 cc += 1 + LINK_SIZE; 7515 7516 /* Temporarily encoding the needs_control_head in framesize. */ 7517 if (opcode == OP_ONCE) 7518 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); 7519 return cc + repeat_length; 7520 } 7521 7522 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 7523 { 7524 DEFINE_COMPILER; 7525 backtrack_common *backtrack; 7526 pcre_uchar opcode; 7527 int private_data_ptr; 7528 int cbraprivptr = 0; 7529 BOOL needs_control_head; 7530 int framesize; 7531 int stacksize; 7532 int offset = 0; 7533 BOOL zero = FALSE; 7534 pcre_uchar *ccbegin = NULL; 7535 int stack; /* Also contains the offset of control head. */ 7536 struct sljit_label *loop = NULL; 7537 struct jump_list *emptymatch = NULL; 7538 7539 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); 7540 if (*cc == OP_BRAPOSZERO) 7541 { 7542 zero = TRUE; 7543 cc++; 7544 } 7545 7546 opcode = *cc; 7547 private_data_ptr = PRIVATE_DATA(cc); 7548 SLJIT_ASSERT(private_data_ptr != 0); 7549 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; 7550 switch(opcode) 7551 { 7552 case OP_BRAPOS: 7553 case OP_SBRAPOS: 7554 ccbegin = cc + 1 + LINK_SIZE; 7555 break; 7556 7557 case OP_CBRAPOS: 7558 case OP_SCBRAPOS: 7559 offset = GET2(cc, 1 + LINK_SIZE); 7560 /* This case cannot be optimized in the same was as 7561 normal capturing brackets. */ 7562 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); 7563 cbraprivptr = OVECTOR_PRIV(offset); 7564 offset <<= 1; 7565 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; 7566 break; 7567 7568 default: 7569 SLJIT_ASSERT_STOP(); 7570 break; 7571 } 7572 7573 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); 7574 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; 7575 if (framesize < 0) 7576 { 7577 if (offset != 0) 7578 { 7579 stacksize = 2; 7580 if (common->capture_last_ptr != 0) 7581 stacksize++; 7582 } 7583 else 7584 stacksize = 1; 7585 7586 if (needs_control_head) 7587 stacksize++; 7588 if (!zero) 7589 stacksize++; 7590 7591 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; 7592 allocate_stack(common, stacksize); 7593 if (framesize == no_frame) 7594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 7595 7596 stack = 0; 7597 if (offset != 0) 7598 { 7599 stack = 2; 7600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 7601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 7602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 7603 if (common->capture_last_ptr != 0) 7604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 7605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 7606 if (needs_control_head) 7607 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7608 if (common->capture_last_ptr != 0) 7609 { 7610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); 7611 stack = 3; 7612 } 7613 } 7614 else 7615 { 7616 if (needs_control_head) 7617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7619 stack = 1; 7620 } 7621 7622 if (needs_control_head) 7623 stack++; 7624 if (!zero) 7625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); 7626 if (needs_control_head) 7627 { 7628 stack--; 7629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); 7630 } 7631 } 7632 else 7633 { 7634 stacksize = framesize + 1; 7635 if (!zero) 7636 stacksize++; 7637 if (needs_control_head) 7638 stacksize++; 7639 if (offset == 0) 7640 stacksize++; 7641 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; 7642 7643 allocate_stack(common, stacksize); 7644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7645 if (needs_control_head) 7646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7647 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); 7648 7649 stack = 0; 7650 if (!zero) 7651 { 7652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); 7653 stack = 1; 7654 } 7655 if (needs_control_head) 7656 { 7657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); 7658 stack++; 7659 } 7660 if (offset == 0) 7661 { 7662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); 7663 stack++; 7664 } 7665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); 7666 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE); 7667 stack -= 1 + (offset == 0); 7668 } 7669 7670 if (offset != 0) 7671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7672 7673 loop = LABEL(); 7674 while (*cc != OP_KETRPOS) 7675 { 7676 backtrack->top = NULL; 7677 backtrack->topbacktracks = NULL; 7678 cc += GET(cc, 1); 7679 7680 compile_matchingpath(common, ccbegin, cc, backtrack); 7681 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7682 return NULL; 7683 7684 if (framesize < 0) 7685 { 7686 if (framesize == no_frame) 7687 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7688 7689 if (offset != 0) 7690 { 7691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7694 if (common->capture_last_ptr != 0) 7695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 7696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 7697 } 7698 else 7699 { 7700 if (opcode == OP_SBRAPOS) 7701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7703 } 7704 7705 /* Even if the match is empty, we need to reset the control head. */ 7706 if (needs_control_head) 7707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); 7708 7709 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) 7710 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); 7711 7712 if (!zero) 7713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); 7714 } 7715 else 7716 { 7717 if (offset != 0) 7718 { 7719 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7723 if (common->capture_last_ptr != 0) 7724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 7725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 7726 } 7727 else 7728 { 7729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7730 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7731 if (opcode == OP_SBRAPOS) 7732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); 7733 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); 7734 } 7735 7736 /* Even if the match is empty, we need to reset the control head. */ 7737 if (needs_control_head) 7738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); 7739 7740 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) 7741 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); 7742 7743 if (!zero) 7744 { 7745 if (framesize < 0) 7746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); 7747 else 7748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 7749 } 7750 } 7751 7752 JUMPTO(SLJIT_JUMP, loop); 7753 flush_stubs(common); 7754 7755 compile_backtrackingpath(common, backtrack->top); 7756 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7757 return NULL; 7758 set_jumps(backtrack->topbacktracks, LABEL()); 7759 7760 if (framesize < 0) 7761 { 7762 if (offset != 0) 7763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7764 else 7765 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7766 } 7767 else 7768 { 7769 if (offset != 0) 7770 { 7771 /* Last alternative. */ 7772 if (*cc == OP_KETRPOS) 7773 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7774 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7775 } 7776 else 7777 { 7778 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); 7780 } 7781 } 7782 7783 if (*cc == OP_KETRPOS) 7784 break; 7785 ccbegin = cc + 1 + LINK_SIZE; 7786 } 7787 7788 /* We don't have to restore the control head in case of a failed match. */ 7789 7790 backtrack->topbacktracks = NULL; 7791 if (!zero) 7792 { 7793 if (framesize < 0) 7794 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); 7795 else /* TMP2 is set to [private_data_ptr] above. */ 7796 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); 7797 } 7798 7799 /* None of them matched. */ 7800 set_jumps(emptymatch, LABEL()); 7801 count_match(common); 7802 return cc + 1 + LINK_SIZE; 7803 } 7804 7805 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end) 7806 { 7807 int class_len; 7808 7809 *opcode = *cc; 7810 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) 7811 { 7812 cc++; 7813 *type = OP_CHAR; 7814 } 7815 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) 7816 { 7817 cc++; 7818 *type = OP_CHARI; 7819 *opcode -= OP_STARI - OP_STAR; 7820 } 7821 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) 7822 { 7823 cc++; 7824 *type = OP_NOT; 7825 *opcode -= OP_NOTSTAR - OP_STAR; 7826 } 7827 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) 7828 { 7829 cc++; 7830 *type = OP_NOTI; 7831 *opcode -= OP_NOTSTARI - OP_STAR; 7832 } 7833 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) 7834 { 7835 cc++; 7836 *opcode -= OP_TYPESTAR - OP_STAR; 7837 *type = 0; 7838 } 7839 else 7840 { 7841 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); 7842 *type = *opcode; 7843 cc++; 7844 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); 7845 *opcode = cc[class_len - 1]; 7846 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) 7847 { 7848 *opcode -= OP_CRSTAR - OP_STAR; 7849 if (end != NULL) 7850 *end = cc + class_len; 7851 } 7852 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) 7853 { 7854 *opcode -= OP_CRPOSSTAR - OP_POSSTAR; 7855 if (end != NULL) 7856 *end = cc + class_len; 7857 } 7858 else 7859 { 7860 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); 7861 *max = GET2(cc, (class_len + IMM2_SIZE)); 7862 *min = GET2(cc, class_len); 7863 7864 if (*min == 0) 7865 { 7866 SLJIT_ASSERT(*max != 0); 7867 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO); 7868 } 7869 if (*max == *min) 7870 *opcode = OP_EXACT; 7871 7872 if (end != NULL) 7873 *end = cc + class_len + 2 * IMM2_SIZE; 7874 } 7875 return cc; 7876 } 7877 7878 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) 7879 { 7880 *max = GET2(cc, 0); 7881 cc += IMM2_SIZE; 7882 } 7883 7884 if (*type == 0) 7885 { 7886 *type = *cc; 7887 if (end != NULL) 7888 *end = next_opcode(common, cc); 7889 cc++; 7890 return cc; 7891 } 7892 7893 if (end != NULL) 7894 { 7895 *end = cc + 1; 7896 #ifdef SUPPORT_UTF 7897 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); 7898 #endif 7899 } 7900 return cc; 7901 } 7902 7903 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 7904 { 7905 DEFINE_COMPILER; 7906 backtrack_common *backtrack; 7907 pcre_uchar opcode; 7908 pcre_uchar type; 7909 int max = -1, min = -1; 7910 pcre_uchar *end; 7911 jump_list *nomatch = NULL; 7912 struct sljit_jump *jump = NULL; 7913 struct sljit_label *label; 7914 int private_data_ptr = PRIVATE_DATA(cc); 7915 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); 7916 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; 7917 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); 7918 int tmp_base, tmp_offset; 7919 7920 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); 7921 7922 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end); 7923 7924 switch(type) 7925 { 7926 case OP_NOT_DIGIT: 7927 case OP_DIGIT: 7928 case OP_NOT_WHITESPACE: 7929 case OP_WHITESPACE: 7930 case OP_NOT_WORDCHAR: 7931 case OP_WORDCHAR: 7932 case OP_ANY: 7933 case OP_ALLANY: 7934 case OP_ANYBYTE: 7935 case OP_ANYNL: 7936 case OP_NOT_HSPACE: 7937 case OP_HSPACE: 7938 case OP_NOT_VSPACE: 7939 case OP_VSPACE: 7940 case OP_CHAR: 7941 case OP_CHARI: 7942 case OP_NOT: 7943 case OP_NOTI: 7944 case OP_CLASS: 7945 case OP_NCLASS: 7946 tmp_base = TMP3; 7947 tmp_offset = 0; 7948 break; 7949 7950 default: 7951 SLJIT_ASSERT_STOP(); 7952 /* Fall through. */ 7953 7954 case OP_EXTUNI: 7955 case OP_XCLASS: 7956 case OP_NOTPROP: 7957 case OP_PROP: 7958 tmp_base = SLJIT_MEM1(SLJIT_SP); 7959 tmp_offset = POSSESSIVE0; 7960 break; 7961 } 7962 7963 switch(opcode) 7964 { 7965 case OP_STAR: 7966 case OP_PLUS: 7967 case OP_UPTO: 7968 case OP_CRRANGE: 7969 if (type == OP_ANYNL || type == OP_EXTUNI) 7970 { 7971 SLJIT_ASSERT(private_data_ptr == 0); 7972 if (opcode == OP_STAR || opcode == OP_UPTO) 7973 { 7974 allocate_stack(common, 2); 7975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 7977 } 7978 else 7979 { 7980 allocate_stack(common, 1); 7981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 7982 } 7983 7984 if (opcode == OP_UPTO || opcode == OP_CRRANGE) 7985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); 7986 7987 label = LABEL(); 7988 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 7989 if (opcode == OP_UPTO || opcode == OP_CRRANGE) 7990 { 7991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); 7992 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 7993 if (opcode == OP_CRRANGE && min > 0) 7994 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); 7995 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0)) 7996 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); 7997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); 7998 } 7999 8000 /* We cannot use TMP3 because of this allocate_stack. */ 8001 allocate_stack(common, 1); 8002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 8003 JUMPTO(SLJIT_JUMP, label); 8004 if (jump != NULL) 8005 JUMPHERE(jump); 8006 } 8007 else 8008 { 8009 if (opcode == OP_PLUS) 8010 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8011 if (private_data_ptr == 0) 8012 allocate_stack(common, 2); 8013 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8014 if (opcode <= OP_PLUS) 8015 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); 8016 else 8017 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); 8018 label = LABEL(); 8019 compile_char1_matchingpath(common, type, cc, &nomatch); 8020 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8021 if (opcode <= OP_PLUS) 8022 JUMPTO(SLJIT_JUMP, label); 8023 else if (opcode == OP_CRRANGE && max == 0) 8024 { 8025 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); 8026 JUMPTO(SLJIT_JUMP, label); 8027 } 8028 else 8029 { 8030 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 8031 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 8032 OP1(SLJIT_MOV, base, offset1, TMP1, 0); 8033 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label); 8034 } 8035 set_jumps(nomatch, LABEL()); 8036 if (opcode == OP_CRRANGE) 8037 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1)); 8038 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8039 } 8040 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8041 break; 8042 8043 case OP_MINSTAR: 8044 case OP_MINPLUS: 8045 if (opcode == OP_MINPLUS) 8046 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8047 if (private_data_ptr == 0) 8048 allocate_stack(common, 1); 8049 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8050 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8051 break; 8052 8053 case OP_MINUPTO: 8054 case OP_CRMINRANGE: 8055 if (private_data_ptr == 0) 8056 allocate_stack(common, 2); 8057 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8058 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); 8059 if (opcode == OP_CRMINRANGE) 8060 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8061 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8062 break; 8063 8064 case OP_QUERY: 8065 case OP_MINQUERY: 8066 if (private_data_ptr == 0) 8067 allocate_stack(common, 1); 8068 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8069 if (opcode == OP_QUERY) 8070 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8071 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8072 break; 8073 8074 case OP_EXACT: 8075 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); 8076 label = LABEL(); 8077 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8078 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); 8079 JUMPTO(SLJIT_NOT_ZERO, label); 8080 break; 8081 8082 case OP_POSSTAR: 8083 case OP_POSPLUS: 8084 case OP_POSUPTO: 8085 if (opcode == OP_POSPLUS) 8086 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8087 if (opcode == OP_POSUPTO) 8088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max); 8089 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8090 label = LABEL(); 8091 compile_char1_matchingpath(common, type, cc, &nomatch); 8092 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8093 if (opcode != OP_POSUPTO) 8094 JUMPTO(SLJIT_JUMP, label); 8095 else 8096 { 8097 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1); 8098 JUMPTO(SLJIT_NOT_ZERO, label); 8099 } 8100 set_jumps(nomatch, LABEL()); 8101 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8102 break; 8103 8104 case OP_POSQUERY: 8105 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8106 compile_char1_matchingpath(common, type, cc, &nomatch); 8107 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8108 set_jumps(nomatch, LABEL()); 8109 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8110 break; 8111 8112 case OP_CRPOSRANGE: 8113 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */ 8114 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min); 8115 label = LABEL(); 8116 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8117 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); 8118 JUMPTO(SLJIT_NOT_ZERO, label); 8119 8120 if (max != 0) 8121 { 8122 SLJIT_ASSERT(max - min > 0); 8123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min); 8124 } 8125 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8126 label = LABEL(); 8127 compile_char1_matchingpath(common, type, cc, &nomatch); 8128 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8129 if (max == 0) 8130 JUMPTO(SLJIT_JUMP, label); 8131 else 8132 { 8133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1); 8134 JUMPTO(SLJIT_NOT_ZERO, label); 8135 } 8136 set_jumps(nomatch, LABEL()); 8137 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8138 break; 8139 8140 default: 8141 SLJIT_ASSERT_STOP(); 8142 break; 8143 } 8144 8145 count_match(common); 8146 return end; 8147 } 8148 8149 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 8150 { 8151 DEFINE_COMPILER; 8152 backtrack_common *backtrack; 8153 8154 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 8155 8156 if (*cc == OP_FAIL) 8157 { 8158 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8159 return cc + 1; 8160 } 8161 8162 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) 8163 { 8164 /* No need to check notempty conditions. */ 8165 if (common->accept_label == NULL) 8166 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); 8167 else 8168 JUMPTO(SLJIT_JUMP, common->accept_label); 8169 return cc + 1; 8170 } 8171 8172 if (common->accept_label == NULL) 8173 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); 8174 else 8175 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); 8176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); 8178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 8179 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); 8180 if (common->accept_label == NULL) 8181 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 8182 else 8183 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); 8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 8185 if (common->accept_label == NULL) 8186 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); 8187 else 8188 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); 8189 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8190 return cc + 1; 8191 } 8192 8193 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) 8194 { 8195 DEFINE_COMPILER; 8196 int offset = GET2(cc, 1); 8197 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; 8198 8199 /* Data will be discarded anyway... */ 8200 if (common->currententry != NULL) 8201 return cc + 1 + IMM2_SIZE; 8202 8203 if (!optimized_cbracket) 8204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); 8205 offset <<= 1; 8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 8207 if (!optimized_cbracket) 8208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 8209 return cc + 1 + IMM2_SIZE; 8210 } 8211 8212 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 8213 { 8214 DEFINE_COMPILER; 8215 backtrack_common *backtrack; 8216 pcre_uchar opcode = *cc; 8217 pcre_uchar *ccend = cc + 1; 8218 8219 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) 8220 ccend += 2 + cc[1]; 8221 8222 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 8223 8224 if (opcode == OP_SKIP) 8225 { 8226 allocate_stack(common, 1); 8227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 8228 return ccend; 8229 } 8230 8231 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) 8232 { 8233 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); 8235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); 8236 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); 8237 } 8238 8239 return ccend; 8240 } 8241 8242 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP }; 8243 8244 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) 8245 { 8246 DEFINE_COMPILER; 8247 backtrack_common *backtrack; 8248 BOOL needs_control_head; 8249 int size; 8250 8251 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); 8252 common->then_trap = BACKTRACK_AS(then_trap_backtrack); 8253 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; 8254 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); 8255 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); 8256 8257 size = BACKTRACK_AS(then_trap_backtrack)->framesize; 8258 size = 3 + (size < 0 ? 0 : size); 8259 8260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 8261 allocate_stack(common, size); 8262 if (size > 3) 8263 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); 8264 else 8265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); 8266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); 8267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); 8268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); 8269 8270 size = BACKTRACK_AS(then_trap_backtrack)->framesize; 8271 if (size >= 0) 8272 init_frame(common, cc, ccend, size - 1, 0, FALSE); 8273 } 8274 8275 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) 8276 { 8277 DEFINE_COMPILER; 8278 backtrack_common *backtrack; 8279 BOOL has_then_trap = FALSE; 8280 then_trap_backtrack *save_then_trap = NULL; 8281 8282 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); 8283 8284 if (common->has_then && common->then_offsets[cc - common->start] != 0) 8285 { 8286 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); 8287 has_then_trap = TRUE; 8288 save_then_trap = common->then_trap; 8289 /* Tail item on backtrack. */ 8290 compile_then_trap_matchingpath(common, cc, ccend, parent); 8291 } 8292 8293 while (cc < ccend) 8294 { 8295 switch(*cc) 8296 { 8297 case OP_SOD: 8298 case OP_SOM: 8299 case OP_NOT_WORD_BOUNDARY: 8300 case OP_WORD_BOUNDARY: 8301 case OP_NOT_DIGIT: 8302 case OP_DIGIT: 8303 case OP_NOT_WHITESPACE: 8304 case OP_WHITESPACE: 8305 case OP_NOT_WORDCHAR: 8306 case OP_WORDCHAR: 8307 case OP_ANY: 8308 case OP_ALLANY: 8309 case OP_ANYBYTE: 8310 case OP_NOTPROP: 8311 case OP_PROP: 8312 case OP_ANYNL: 8313 case OP_NOT_HSPACE: 8314 case OP_HSPACE: 8315 case OP_NOT_VSPACE: 8316 case OP_VSPACE: 8317 case OP_EXTUNI: 8318 case OP_EODN: 8319 case OP_EOD: 8320 case OP_CIRC: 8321 case OP_CIRCM: 8322 case OP_DOLL: 8323 case OP_DOLLM: 8324 case OP_NOT: 8325 case OP_NOTI: 8326 case OP_REVERSE: 8327 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8328 break; 8329 8330 case OP_SET_SOM: 8331 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); 8332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 8333 allocate_stack(common, 1); 8334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); 8335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 8336 cc++; 8337 break; 8338 8339 case OP_CHAR: 8340 case OP_CHARI: 8341 if (common->mode == JIT_COMPILE) 8342 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8343 else 8344 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8345 break; 8346 8347 case OP_STAR: 8348 case OP_MINSTAR: 8349 case OP_PLUS: 8350 case OP_MINPLUS: 8351 case OP_QUERY: 8352 case OP_MINQUERY: 8353 case OP_UPTO: 8354 case OP_MINUPTO: 8355 case OP_EXACT: 8356 case OP_POSSTAR: 8357 case OP_POSPLUS: 8358 case OP_POSQUERY: 8359 case OP_POSUPTO: 8360 case OP_STARI: 8361 case OP_MINSTARI: 8362 case OP_PLUSI: 8363 case OP_MINPLUSI: 8364 case OP_QUERYI: 8365 case OP_MINQUERYI: 8366 case OP_UPTOI: 8367 case OP_MINUPTOI: 8368 case OP_EXACTI: 8369 case OP_POSSTARI: 8370 case OP_POSPLUSI: 8371 case OP_POSQUERYI: 8372 case OP_POSUPTOI: 8373 case OP_NOTSTAR: 8374 case OP_NOTMINSTAR: 8375 case OP_NOTPLUS: 8376 case OP_NOTMINPLUS: 8377 case OP_NOTQUERY: 8378 case OP_NOTMINQUERY: 8379 case OP_NOTUPTO: 8380 case OP_NOTMINUPTO: 8381 case OP_NOTEXACT: 8382 case OP_NOTPOSSTAR: 8383 case OP_NOTPOSPLUS: 8384 case OP_NOTPOSQUERY: 8385 case OP_NOTPOSUPTO: 8386 case OP_NOTSTARI: 8387 case OP_NOTMINSTARI: 8388 case OP_NOTPLUSI: 8389 case OP_NOTMINPLUSI: 8390 case OP_NOTQUERYI: 8391 case OP_NOTMINQUERYI: 8392 case OP_NOTUPTOI: 8393 case OP_NOTMINUPTOI: 8394 case OP_NOTEXACTI: 8395 case OP_NOTPOSSTARI: 8396 case OP_NOTPOSPLUSI: 8397 case OP_NOTPOSQUERYI: 8398 case OP_NOTPOSUPTOI: 8399 case OP_TYPESTAR: 8400 case OP_TYPEMINSTAR: 8401 case OP_TYPEPLUS: 8402 case OP_TYPEMINPLUS: 8403 case OP_TYPEQUERY: 8404 case OP_TYPEMINQUERY: 8405 case OP_TYPEUPTO: 8406 case OP_TYPEMINUPTO: 8407 case OP_TYPEEXACT: 8408 case OP_TYPEPOSSTAR: 8409 case OP_TYPEPOSPLUS: 8410 case OP_TYPEPOSQUERY: 8411 case OP_TYPEPOSUPTO: 8412 cc = compile_iterator_matchingpath(common, cc, parent); 8413 break; 8414 8415 case OP_CLASS: 8416 case OP_NCLASS: 8417 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) 8418 cc = compile_iterator_matchingpath(common, cc, parent); 8419 else 8420 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8421 break; 8422 8423 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 8424 case OP_XCLASS: 8425 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) 8426 cc = compile_iterator_matchingpath(common, cc, parent); 8427 else 8428 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8429 break; 8430 #endif 8431 8432 case OP_REF: 8433 case OP_REFI: 8434 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) 8435 cc = compile_ref_iterator_matchingpath(common, cc, parent); 8436 else 8437 { 8438 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); 8439 cc += 1 + IMM2_SIZE; 8440 } 8441 break; 8442 8443 case OP_DNREF: 8444 case OP_DNREFI: 8445 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) 8446 cc = compile_ref_iterator_matchingpath(common, cc, parent); 8447 else 8448 { 8449 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8450 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); 8451 cc += 1 + 2 * IMM2_SIZE; 8452 } 8453 break; 8454 8455 case OP_RECURSE: 8456 cc = compile_recurse_matchingpath(common, cc, parent); 8457 break; 8458 8459 case OP_CALLOUT: 8460 cc = compile_callout_matchingpath(common, cc, parent); 8461 break; 8462 8463 case OP_ASSERT: 8464 case OP_ASSERT_NOT: 8465 case OP_ASSERTBACK: 8466 case OP_ASSERTBACK_NOT: 8467 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); 8468 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); 8469 break; 8470 8471 case OP_BRAMINZERO: 8472 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); 8473 cc = bracketend(cc + 1); 8474 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) 8475 { 8476 allocate_stack(common, 1); 8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 8478 } 8479 else 8480 { 8481 allocate_stack(common, 2); 8482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); 8484 } 8485 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); 8486 count_match(common); 8487 break; 8488 8489 case OP_ONCE: 8490 case OP_ONCE_NC: 8491 case OP_BRA: 8492 case OP_CBRA: 8493 case OP_COND: 8494 case OP_SBRA: 8495 case OP_SCBRA: 8496 case OP_SCOND: 8497 cc = compile_bracket_matchingpath(common, cc, parent); 8498 break; 8499 8500 case OP_BRAZERO: 8501 if (cc[1] > OP_ASSERTBACK_NOT) 8502 cc = compile_bracket_matchingpath(common, cc, parent); 8503 else 8504 { 8505 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); 8506 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); 8507 } 8508 break; 8509 8510 case OP_BRAPOS: 8511 case OP_CBRAPOS: 8512 case OP_SBRAPOS: 8513 case OP_SCBRAPOS: 8514 case OP_BRAPOSZERO: 8515 cc = compile_bracketpos_matchingpath(common, cc, parent); 8516 break; 8517 8518 case OP_MARK: 8519 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); 8520 SLJIT_ASSERT(common->mark_ptr != 0); 8521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 8522 allocate_stack(common, common->has_skip_arg ? 5 : 1); 8523 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); 8525 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); 8526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); 8527 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); 8528 if (common->has_skip_arg) 8529 { 8530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 8531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); 8532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); 8533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); 8534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); 8535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 8536 } 8537 cc += 1 + 2 + cc[1]; 8538 break; 8539 8540 case OP_PRUNE: 8541 case OP_PRUNE_ARG: 8542 case OP_SKIP: 8543 case OP_SKIP_ARG: 8544 case OP_THEN: 8545 case OP_THEN_ARG: 8546 case OP_COMMIT: 8547 cc = compile_control_verb_matchingpath(common, cc, parent); 8548 break; 8549 8550 case OP_FAIL: 8551 case OP_ACCEPT: 8552 case OP_ASSERT_ACCEPT: 8553 cc = compile_fail_accept_matchingpath(common, cc, parent); 8554 break; 8555 8556 case OP_CLOSE: 8557 cc = compile_close_matchingpath(common, cc); 8558 break; 8559 8560 case OP_SKIPZERO: 8561 cc = bracketend(cc + 1); 8562 break; 8563 8564 default: 8565 SLJIT_ASSERT_STOP(); 8566 return; 8567 } 8568 if (cc == NULL) 8569 return; 8570 } 8571 8572 if (has_then_trap) 8573 { 8574 /* Head item on backtrack. */ 8575 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); 8576 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; 8577 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; 8578 common->then_trap = save_then_trap; 8579 } 8580 SLJIT_ASSERT(cc == ccend); 8581 } 8582 8583 #undef PUSH_BACKTRACK 8584 #undef PUSH_BACKTRACK_NOVALUE 8585 #undef BACKTRACK_AS 8586 8587 #define COMPILE_BACKTRACKINGPATH(current) \ 8588 do \ 8589 { \ 8590 compile_backtrackingpath(common, (current)); \ 8591 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 8592 return; \ 8593 } \ 8594 while (0) 8595 8596 #define CURRENT_AS(type) ((type *)current) 8597 8598 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8599 { 8600 DEFINE_COMPILER; 8601 pcre_uchar *cc = current->cc; 8602 pcre_uchar opcode; 8603 pcre_uchar type; 8604 int max = -1, min = -1; 8605 struct sljit_label *label = NULL; 8606 struct sljit_jump *jump = NULL; 8607 jump_list *jumplist = NULL; 8608 int private_data_ptr = PRIVATE_DATA(cc); 8609 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); 8610 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; 8611 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); 8612 8613 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL); 8614 8615 switch(opcode) 8616 { 8617 case OP_STAR: 8618 case OP_PLUS: 8619 case OP_UPTO: 8620 case OP_CRRANGE: 8621 if (type == OP_ANYNL || type == OP_EXTUNI) 8622 { 8623 SLJIT_ASSERT(private_data_ptr == 0); 8624 set_jumps(current->topbacktracks, LABEL()); 8625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8626 free_stack(common, 1); 8627 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8628 } 8629 else 8630 { 8631 if (opcode == OP_UPTO) 8632 min = 0; 8633 if (opcode <= OP_PLUS) 8634 { 8635 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8636 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); 8637 } 8638 else 8639 { 8640 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 8641 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8642 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1); 8643 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); 8644 } 8645 skip_char_back(common); 8646 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8647 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8648 if (opcode == OP_CRRANGE) 8649 set_jumps(current->topbacktracks, LABEL()); 8650 JUMPHERE(jump); 8651 if (private_data_ptr == 0) 8652 free_stack(common, 2); 8653 if (opcode == OP_PLUS) 8654 set_jumps(current->topbacktracks, LABEL()); 8655 } 8656 break; 8657 8658 case OP_MINSTAR: 8659 case OP_MINPLUS: 8660 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8661 compile_char1_matchingpath(common, type, cc, &jumplist); 8662 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8663 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8664 set_jumps(jumplist, LABEL()); 8665 if (private_data_ptr == 0) 8666 free_stack(common, 1); 8667 if (opcode == OP_MINPLUS) 8668 set_jumps(current->topbacktracks, LABEL()); 8669 break; 8670 8671 case OP_MINUPTO: 8672 case OP_CRMINRANGE: 8673 if (opcode == OP_CRMINRANGE) 8674 { 8675 label = LABEL(); 8676 set_jumps(current->topbacktracks, label); 8677 } 8678 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8679 compile_char1_matchingpath(common, type, cc, &jumplist); 8680 8681 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 8682 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8683 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 8684 OP1(SLJIT_MOV, base, offset1, TMP1, 0); 8685 8686 if (opcode == OP_CRMINRANGE) 8687 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label); 8688 8689 if (opcode == OP_CRMINRANGE && max == 0) 8690 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8691 else 8692 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath); 8693 8694 set_jumps(jumplist, LABEL()); 8695 if (private_data_ptr == 0) 8696 free_stack(common, 2); 8697 break; 8698 8699 case OP_QUERY: 8700 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8701 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8702 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8703 jump = JUMP(SLJIT_JUMP); 8704 set_jumps(current->topbacktracks, LABEL()); 8705 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8706 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8707 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8708 JUMPHERE(jump); 8709 if (private_data_ptr == 0) 8710 free_stack(common, 1); 8711 break; 8712 8713 case OP_MINQUERY: 8714 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8715 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8716 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 8717 compile_char1_matchingpath(common, type, cc, &jumplist); 8718 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8719 set_jumps(jumplist, LABEL()); 8720 JUMPHERE(jump); 8721 if (private_data_ptr == 0) 8722 free_stack(common, 1); 8723 break; 8724 8725 case OP_EXACT: 8726 case OP_POSPLUS: 8727 case OP_CRPOSRANGE: 8728 set_jumps(current->topbacktracks, LABEL()); 8729 break; 8730 8731 case OP_POSSTAR: 8732 case OP_POSQUERY: 8733 case OP_POSUPTO: 8734 break; 8735 8736 default: 8737 SLJIT_ASSERT_STOP(); 8738 break; 8739 } 8740 } 8741 8742 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8743 { 8744 DEFINE_COMPILER; 8745 pcre_uchar *cc = current->cc; 8746 BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 8747 pcre_uchar type; 8748 8749 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; 8750 8751 if ((type & 0x1) == 0) 8752 { 8753 /* Maximize case. */ 8754 set_jumps(current->topbacktracks, LABEL()); 8755 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8756 free_stack(common, 1); 8757 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8758 return; 8759 } 8760 8761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8763 set_jumps(current->topbacktracks, LABEL()); 8764 free_stack(common, ref ? 2 : 3); 8765 } 8766 8767 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8768 { 8769 DEFINE_COMPILER; 8770 8771 if (CURRENT_AS(recurse_backtrack)->inlined_pattern) 8772 compile_backtrackingpath(common, current->top); 8773 set_jumps(current->topbacktracks, LABEL()); 8774 if (CURRENT_AS(recurse_backtrack)->inlined_pattern) 8775 return; 8776 8777 if (common->has_set_som && common->mark_ptr != 0) 8778 { 8779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 8781 free_stack(common, 2); 8782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0); 8783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); 8784 } 8785 else if (common->has_set_som || common->mark_ptr != 0) 8786 { 8787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8788 free_stack(common, 1); 8789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); 8790 } 8791 } 8792 8793 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8794 { 8795 DEFINE_COMPILER; 8796 pcre_uchar *cc = current->cc; 8797 pcre_uchar bra = OP_BRA; 8798 struct sljit_jump *brajump = NULL; 8799 8800 SLJIT_ASSERT(*cc != OP_BRAMINZERO); 8801 if (*cc == OP_BRAZERO) 8802 { 8803 bra = *cc; 8804 cc++; 8805 } 8806 8807 if (bra == OP_BRAZERO) 8808 { 8809 SLJIT_ASSERT(current->topbacktracks == NULL); 8810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8811 } 8812 8813 if (CURRENT_AS(assert_backtrack)->framesize < 0) 8814 { 8815 set_jumps(current->topbacktracks, LABEL()); 8816 8817 if (bra == OP_BRAZERO) 8818 { 8819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8820 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); 8821 free_stack(common, 1); 8822 } 8823 return; 8824 } 8825 8826 if (bra == OP_BRAZERO) 8827 { 8828 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) 8829 { 8830 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8831 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); 8832 free_stack(common, 1); 8833 return; 8834 } 8835 free_stack(common, 1); 8836 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 8837 } 8838 8839 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) 8840 { 8841 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); 8842 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 8843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); 8844 8845 set_jumps(current->topbacktracks, LABEL()); 8846 } 8847 else 8848 set_jumps(current->topbacktracks, LABEL()); 8849 8850 if (bra == OP_BRAZERO) 8851 { 8852 /* We know there is enough place on the stack. */ 8853 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 8854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8855 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); 8856 JUMPHERE(brajump); 8857 } 8858 } 8859 8860 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8861 { 8862 DEFINE_COMPILER; 8863 int opcode, stacksize, alt_count, alt_max; 8864 int offset = 0; 8865 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; 8866 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; 8867 pcre_uchar *cc = current->cc; 8868 pcre_uchar *ccbegin; 8869 pcre_uchar *ccprev; 8870 pcre_uchar bra = OP_BRA; 8871 pcre_uchar ket; 8872 assert_backtrack *assert; 8873 sljit_uw *next_update_addr = NULL; 8874 BOOL has_alternatives; 8875 BOOL needs_control_head = FALSE; 8876 struct sljit_jump *brazero = NULL; 8877 struct sljit_jump *alt1 = NULL; 8878 struct sljit_jump *alt2 = NULL; 8879 struct sljit_jump *once = NULL; 8880 struct sljit_jump *cond = NULL; 8881 struct sljit_label *rmin_label = NULL; 8882 struct sljit_label *exact_label = NULL; 8883 8884 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 8885 { 8886 bra = *cc; 8887 cc++; 8888 } 8889 8890 opcode = *cc; 8891 ccbegin = bracketend(cc) - 1 - LINK_SIZE; 8892 ket = *ccbegin; 8893 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) 8894 { 8895 repeat_ptr = PRIVATE_DATA(ccbegin); 8896 repeat_type = PRIVATE_DATA(ccbegin + 2); 8897 repeat_count = PRIVATE_DATA(ccbegin + 3); 8898 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); 8899 if (repeat_type == OP_UPTO) 8900 ket = OP_KETRMAX; 8901 if (repeat_type == OP_MINUPTO) 8902 ket = OP_KETRMIN; 8903 } 8904 ccbegin = cc; 8905 cc += GET(cc, 1); 8906 has_alternatives = *cc == OP_ALT; 8907 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 8908 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; 8909 if (opcode == OP_CBRA || opcode == OP_SCBRA) 8910 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; 8911 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) 8912 opcode = OP_SCOND; 8913 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) 8914 opcode = OP_ONCE; 8915 8916 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; 8917 8918 /* Decoding the needs_control_head in framesize. */ 8919 if (opcode == OP_ONCE) 8920 { 8921 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; 8922 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; 8923 } 8924 8925 if (ket != OP_KET && repeat_type != 0) 8926 { 8927 /* TMP1 is used in OP_KETRMIN below. */ 8928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8929 free_stack(common, 1); 8930 if (repeat_type == OP_UPTO) 8931 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); 8932 else 8933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); 8934 } 8935 8936 if (ket == OP_KETRMAX) 8937 { 8938 if (bra == OP_BRAZERO) 8939 { 8940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8941 free_stack(common, 1); 8942 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); 8943 } 8944 } 8945 else if (ket == OP_KETRMIN) 8946 { 8947 if (bra != OP_BRAMINZERO) 8948 { 8949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8950 if (repeat_type != 0) 8951 { 8952 /* TMP1 was set a few lines above. */ 8953 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8954 /* Drop STR_PTR for non-greedy plus quantifier. */ 8955 if (opcode != OP_ONCE) 8956 free_stack(common, 1); 8957 } 8958 else if (opcode >= OP_SBRA || opcode == OP_ONCE) 8959 { 8960 /* Checking zero-length iteration. */ 8961 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) 8962 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8963 else 8964 { 8965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 8966 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8967 } 8968 /* Drop STR_PTR for non-greedy plus quantifier. */ 8969 if (opcode != OP_ONCE) 8970 free_stack(common, 1); 8971 } 8972 else 8973 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8974 } 8975 rmin_label = LABEL(); 8976 if (repeat_type != 0) 8977 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 8978 } 8979 else if (bra == OP_BRAZERO) 8980 { 8981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8982 free_stack(common, 1); 8983 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); 8984 } 8985 else if (repeat_type == OP_EXACT) 8986 { 8987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 8988 exact_label = LABEL(); 8989 } 8990 8991 if (offset != 0) 8992 { 8993 if (common->capture_last_ptr != 0) 8994 { 8995 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); 8996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 8998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); 8999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 9000 free_stack(common, 3); 9001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); 9002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); 9003 } 9004 else if (common->optimized_cbracket[offset >> 1] == 0) 9005 { 9006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9008 free_stack(common, 2); 9009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 9010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 9011 } 9012 } 9013 9014 if (SLJIT_UNLIKELY(opcode == OP_ONCE)) 9015 { 9016 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 9017 { 9018 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 9019 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9020 } 9021 once = JUMP(SLJIT_JUMP); 9022 } 9023 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 9024 { 9025 if (has_alternatives) 9026 { 9027 /* Always exactly one alternative. */ 9028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9029 free_stack(common, 1); 9030 9031 alt_max = 2; 9032 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); 9033 } 9034 } 9035 else if (has_alternatives) 9036 { 9037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9038 free_stack(common, 1); 9039 9040 if (alt_max > 4) 9041 { 9042 /* Table jump if alt_max is greater than 4. */ 9043 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); 9044 if (SLJIT_UNLIKELY(next_update_addr == NULL)) 9045 return; 9046 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); 9047 add_label_addr(common, next_update_addr++); 9048 } 9049 else 9050 { 9051 if (alt_max == 4) 9052 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); 9053 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); 9054 } 9055 } 9056 9057 COMPILE_BACKTRACKINGPATH(current->top); 9058 if (current->topbacktracks) 9059 set_jumps(current->topbacktracks, LABEL()); 9060 9061 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 9062 { 9063 /* Conditional block always has at most one alternative. */ 9064 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) 9065 { 9066 SLJIT_ASSERT(has_alternatives); 9067 assert = CURRENT_AS(bracket_backtrack)->u.assert; 9068 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) 9069 { 9070 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); 9071 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); 9073 } 9074 cond = JUMP(SLJIT_JUMP); 9075 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); 9076 } 9077 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) 9078 { 9079 SLJIT_ASSERT(has_alternatives); 9080 cond = JUMP(SLJIT_JUMP); 9081 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); 9082 } 9083 else 9084 SLJIT_ASSERT(!has_alternatives); 9085 } 9086 9087 if (has_alternatives) 9088 { 9089 alt_count = sizeof(sljit_uw); 9090 do 9091 { 9092 current->top = NULL; 9093 current->topbacktracks = NULL; 9094 current->nextbacktracks = NULL; 9095 /* Conditional blocks always have an additional alternative, even if it is empty. */ 9096 if (*cc == OP_ALT) 9097 { 9098 ccprev = cc + 1 + LINK_SIZE; 9099 cc += GET(cc, 1); 9100 if (opcode != OP_COND && opcode != OP_SCOND) 9101 { 9102 if (opcode != OP_ONCE) 9103 { 9104 if (private_data_ptr != 0) 9105 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 9106 else 9107 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9108 } 9109 else 9110 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); 9111 } 9112 compile_matchingpath(common, ccprev, cc, current); 9113 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9114 return; 9115 } 9116 9117 /* Instructions after the current alternative is successfully matched. */ 9118 /* There is a similar code in compile_bracket_matchingpath. */ 9119 if (opcode == OP_ONCE) 9120 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); 9121 9122 stacksize = 0; 9123 if (repeat_type == OP_MINUPTO) 9124 { 9125 /* We need to preserve the counter. TMP2 will be used below. */ 9126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 9127 stacksize++; 9128 } 9129 if (ket != OP_KET || bra != OP_BRA) 9130 stacksize++; 9131 if (offset != 0) 9132 { 9133 if (common->capture_last_ptr != 0) 9134 stacksize++; 9135 if (common->optimized_cbracket[offset >> 1] == 0) 9136 stacksize += 2; 9137 } 9138 if (opcode != OP_ONCE) 9139 stacksize++; 9140 9141 if (stacksize > 0) 9142 allocate_stack(common, stacksize); 9143 9144 stacksize = 0; 9145 if (repeat_type == OP_MINUPTO) 9146 { 9147 /* TMP2 was set above. */ 9148 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); 9149 stacksize++; 9150 } 9151 9152 if (ket != OP_KET || bra != OP_BRA) 9153 { 9154 if (ket != OP_KET) 9155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 9156 else 9157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 9158 stacksize++; 9159 } 9160 9161 if (offset != 0) 9162 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); 9163 9164 if (opcode != OP_ONCE) 9165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); 9166 9167 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) 9168 { 9169 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ 9170 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); 9171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 9172 } 9173 9174 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); 9175 9176 if (opcode != OP_ONCE) 9177 { 9178 if (alt_max > 4) 9179 add_label_addr(common, next_update_addr++); 9180 else 9181 { 9182 if (alt_count != 2 * sizeof(sljit_uw)) 9183 { 9184 JUMPHERE(alt1); 9185 if (alt_max == 3 && alt_count == sizeof(sljit_uw)) 9186 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); 9187 } 9188 else 9189 { 9190 JUMPHERE(alt2); 9191 if (alt_max == 4) 9192 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); 9193 } 9194 } 9195 alt_count += sizeof(sljit_uw); 9196 } 9197 9198 COMPILE_BACKTRACKINGPATH(current->top); 9199 if (current->topbacktracks) 9200 set_jumps(current->topbacktracks, LABEL()); 9201 SLJIT_ASSERT(!current->nextbacktracks); 9202 } 9203 while (*cc == OP_ALT); 9204 9205 if (cond != NULL) 9206 { 9207 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); 9208 assert = CURRENT_AS(bracket_backtrack)->u.assert; 9209 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) 9210 { 9211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); 9212 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); 9214 } 9215 JUMPHERE(cond); 9216 } 9217 9218 /* Free the STR_PTR. */ 9219 if (private_data_ptr == 0) 9220 free_stack(common, 1); 9221 } 9222 9223 if (offset != 0) 9224 { 9225 /* Using both tmp register is better for instruction scheduling. */ 9226 if (common->optimized_cbracket[offset >> 1] != 0) 9227 { 9228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9230 free_stack(common, 2); 9231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 9232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 9233 } 9234 else 9235 { 9236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9237 free_stack(common, 1); 9238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 9239 } 9240 } 9241 else if (opcode == OP_SBRA || opcode == OP_SCOND) 9242 { 9243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); 9244 free_stack(common, 1); 9245 } 9246 else if (opcode == OP_ONCE) 9247 { 9248 cc = ccbegin + GET(ccbegin, 1); 9249 stacksize = needs_control_head ? 1 : 0; 9250 9251 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 9252 { 9253 /* Reset head and drop saved frame. */ 9254 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); 9255 } 9256 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) 9257 { 9258 /* The STR_PTR must be released. */ 9259 stacksize++; 9260 } 9261 free_stack(common, stacksize); 9262 9263 JUMPHERE(once); 9264 /* Restore previous private_data_ptr */ 9265 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 9266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); 9267 else if (ket == OP_KETRMIN) 9268 { 9269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9270 /* See the comment below. */ 9271 free_stack(common, 2); 9272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 9273 } 9274 } 9275 9276 if (repeat_type == OP_EXACT) 9277 { 9278 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 9279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); 9280 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); 9281 } 9282 else if (ket == OP_KETRMAX) 9283 { 9284 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9285 if (bra != OP_BRAZERO) 9286 free_stack(common, 1); 9287 9288 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 9289 if (bra == OP_BRAZERO) 9290 { 9291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9292 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); 9293 JUMPHERE(brazero); 9294 free_stack(common, 1); 9295 } 9296 } 9297 else if (ket == OP_KETRMIN) 9298 { 9299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9300 9301 /* OP_ONCE removes everything in case of a backtrack, so we don't 9302 need to explicitly release the STR_PTR. The extra release would 9303 affect badly the free_stack(2) above. */ 9304 if (opcode != OP_ONCE) 9305 free_stack(common, 1); 9306 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); 9307 if (opcode == OP_ONCE) 9308 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); 9309 else if (bra == OP_BRAMINZERO) 9310 free_stack(common, 1); 9311 } 9312 else if (bra == OP_BRAZERO) 9313 { 9314 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9315 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); 9316 JUMPHERE(brazero); 9317 } 9318 } 9319 9320 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9321 { 9322 DEFINE_COMPILER; 9323 int offset; 9324 struct sljit_jump *jump; 9325 9326 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) 9327 { 9328 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) 9329 { 9330 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; 9331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 9334 if (common->capture_last_ptr != 0) 9335 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 9336 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 9337 if (common->capture_last_ptr != 0) 9338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); 9339 } 9340 set_jumps(current->topbacktracks, LABEL()); 9341 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); 9342 return; 9343 } 9344 9345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); 9346 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9347 9348 if (current->topbacktracks) 9349 { 9350 jump = JUMP(SLJIT_JUMP); 9351 set_jumps(current->topbacktracks, LABEL()); 9352 /* Drop the stack frame. */ 9353 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); 9354 JUMPHERE(jump); 9355 } 9356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); 9357 } 9358 9359 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9360 { 9361 assert_backtrack backtrack; 9362 9363 current->top = NULL; 9364 current->topbacktracks = NULL; 9365 current->nextbacktracks = NULL; 9366 if (current->cc[1] > OP_ASSERTBACK_NOT) 9367 { 9368 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ 9369 compile_bracket_matchingpath(common, current->cc, current); 9370 compile_bracket_backtrackingpath(common, current->top); 9371 } 9372 else 9373 { 9374 memset(&backtrack, 0, sizeof(backtrack)); 9375 backtrack.common.cc = current->cc; 9376 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; 9377 /* Manual call of compile_assert_matchingpath. */ 9378 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); 9379 } 9380 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); 9381 } 9382 9383 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9384 { 9385 DEFINE_COMPILER; 9386 pcre_uchar opcode = *current->cc; 9387 struct sljit_label *loop; 9388 struct sljit_jump *jump; 9389 9390 if (opcode == OP_THEN || opcode == OP_THEN_ARG) 9391 { 9392 if (common->then_trap != NULL) 9393 { 9394 SLJIT_ASSERT(common->control_head_ptr != 0); 9395 9396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 9397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); 9398 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); 9399 jump = JUMP(SLJIT_JUMP); 9400 9401 loop = LABEL(); 9402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw)); 9403 JUMPHERE(jump); 9404 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); 9405 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); 9406 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); 9407 return; 9408 } 9409 else if (common->positive_assert) 9410 { 9411 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP)); 9412 return; 9413 } 9414 } 9415 9416 if (common->local_exit) 9417 { 9418 if (common->quit_label == NULL) 9419 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9420 else 9421 JUMPTO(SLJIT_JUMP, common->quit_label); 9422 return; 9423 } 9424 9425 if (opcode == OP_SKIP_ARG) 9426 { 9427 SLJIT_ASSERT(common->control_head_ptr != 0); 9428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 9429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 9430 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); 9431 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); 9432 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 9433 9434 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 9435 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); 9436 return; 9437 } 9438 9439 if (opcode == OP_SKIP) 9440 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9441 else 9442 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); 9443 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); 9444 } 9445 9446 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9447 { 9448 DEFINE_COMPILER; 9449 struct sljit_jump *jump; 9450 int size; 9451 9452 if (CURRENT_AS(then_trap_backtrack)->then_trap) 9453 { 9454 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; 9455 return; 9456 } 9457 9458 size = CURRENT_AS(then_trap_backtrack)->framesize; 9459 size = 3 + (size < 0 ? 0 : size); 9460 9461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); 9462 free_stack(common, size); 9463 jump = JUMP(SLJIT_JUMP); 9464 9465 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); 9466 /* STACK_TOP is set by THEN. */ 9467 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) 9468 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9470 free_stack(common, 3); 9471 9472 JUMPHERE(jump); 9473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); 9474 } 9475 9476 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9477 { 9478 DEFINE_COMPILER; 9479 then_trap_backtrack *save_then_trap = common->then_trap; 9480 9481 while (current) 9482 { 9483 if (current->nextbacktracks != NULL) 9484 set_jumps(current->nextbacktracks, LABEL()); 9485 switch(*current->cc) 9486 { 9487 case OP_SET_SOM: 9488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9489 free_stack(common, 1); 9490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); 9491 break; 9492 9493 case OP_STAR: 9494 case OP_MINSTAR: 9495 case OP_PLUS: 9496 case OP_MINPLUS: 9497 case OP_QUERY: 9498 case OP_MINQUERY: 9499 case OP_UPTO: 9500 case OP_MINUPTO: 9501 case OP_EXACT: 9502 case OP_POSSTAR: 9503 case OP_POSPLUS: 9504 case OP_POSQUERY: 9505 case OP_POSUPTO: 9506 case OP_STARI: 9507 case OP_MINSTARI: 9508 case OP_PLUSI: 9509 case OP_MINPLUSI: 9510 case OP_QUERYI: 9511 case OP_MINQUERYI: 9512 case OP_UPTOI: 9513 case OP_MINUPTOI: 9514 case OP_EXACTI: 9515 case OP_POSSTARI: 9516 case OP_POSPLUSI: 9517 case OP_POSQUERYI: 9518 case OP_POSUPTOI: 9519 case OP_NOTSTAR: 9520 case OP_NOTMINSTAR: 9521 case OP_NOTPLUS: 9522 case OP_NOTMINPLUS: 9523 case OP_NOTQUERY: 9524 case OP_NOTMINQUERY: 9525 case OP_NOTUPTO: 9526 case OP_NOTMINUPTO: 9527 case OP_NOTEXACT: 9528 case OP_NOTPOSSTAR: 9529 case OP_NOTPOSPLUS: 9530 case OP_NOTPOSQUERY: 9531 case OP_NOTPOSUPTO: 9532 case OP_NOTSTARI: 9533 case OP_NOTMINSTARI: 9534 case OP_NOTPLUSI: 9535 case OP_NOTMINPLUSI: 9536 case OP_NOTQUERYI: 9537 case OP_NOTMINQUERYI: 9538 case OP_NOTUPTOI: 9539 case OP_NOTMINUPTOI: 9540 case OP_NOTEXACTI: 9541 case OP_NOTPOSSTARI: 9542 case OP_NOTPOSPLUSI: 9543 case OP_NOTPOSQUERYI: 9544 case OP_NOTPOSUPTOI: 9545 case OP_TYPESTAR: 9546 case OP_TYPEMINSTAR: 9547 case OP_TYPEPLUS: 9548 case OP_TYPEMINPLUS: 9549 case OP_TYPEQUERY: 9550 case OP_TYPEMINQUERY: 9551 case OP_TYPEUPTO: 9552 case OP_TYPEMINUPTO: 9553 case OP_TYPEEXACT: 9554 case OP_TYPEPOSSTAR: 9555 case OP_TYPEPOSPLUS: 9556 case OP_TYPEPOSQUERY: 9557 case OP_TYPEPOSUPTO: 9558 case OP_CLASS: 9559 case OP_NCLASS: 9560 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 9561 case OP_XCLASS: 9562 #endif 9563 compile_iterator_backtrackingpath(common, current); 9564 break; 9565 9566 case OP_REF: 9567 case OP_REFI: 9568 case OP_DNREF: 9569 case OP_DNREFI: 9570 compile_ref_iterator_backtrackingpath(common, current); 9571 break; 9572 9573 case OP_RECURSE: 9574 compile_recurse_backtrackingpath(common, current); 9575 break; 9576 9577 case OP_ASSERT: 9578 case OP_ASSERT_NOT: 9579 case OP_ASSERTBACK: 9580 case OP_ASSERTBACK_NOT: 9581 compile_assert_backtrackingpath(common, current); 9582 break; 9583 9584 case OP_ONCE: 9585 case OP_ONCE_NC: 9586 case OP_BRA: 9587 case OP_CBRA: 9588 case OP_COND: 9589 case OP_SBRA: 9590 case OP_SCBRA: 9591 case OP_SCOND: 9592 compile_bracket_backtrackingpath(common, current); 9593 break; 9594 9595 case OP_BRAZERO: 9596 if (current->cc[1] > OP_ASSERTBACK_NOT) 9597 compile_bracket_backtrackingpath(common, current); 9598 else 9599 compile_assert_backtrackingpath(common, current); 9600 break; 9601 9602 case OP_BRAPOS: 9603 case OP_CBRAPOS: 9604 case OP_SBRAPOS: 9605 case OP_SCBRAPOS: 9606 case OP_BRAPOSZERO: 9607 compile_bracketpos_backtrackingpath(common, current); 9608 break; 9609 9610 case OP_BRAMINZERO: 9611 compile_braminzero_backtrackingpath(common, current); 9612 break; 9613 9614 case OP_MARK: 9615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); 9616 if (common->has_skip_arg) 9617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9618 free_stack(common, common->has_skip_arg ? 5 : 1); 9619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); 9620 if (common->has_skip_arg) 9621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); 9622 break; 9623 9624 case OP_THEN: 9625 case OP_THEN_ARG: 9626 case OP_PRUNE: 9627 case OP_PRUNE_ARG: 9628 case OP_SKIP: 9629 case OP_SKIP_ARG: 9630 compile_control_verb_backtrackingpath(common, current); 9631 break; 9632 9633 case OP_COMMIT: 9634 if (!common->local_exit) 9635 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 9636 if (common->quit_label == NULL) 9637 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9638 else 9639 JUMPTO(SLJIT_JUMP, common->quit_label); 9640 break; 9641 9642 case OP_CALLOUT: 9643 case OP_FAIL: 9644 case OP_ACCEPT: 9645 case OP_ASSERT_ACCEPT: 9646 set_jumps(current->topbacktracks, LABEL()); 9647 break; 9648 9649 case OP_THEN_TRAP: 9650 /* A virtual opcode for then traps. */ 9651 compile_then_trap_backtrackingpath(common, current); 9652 break; 9653 9654 default: 9655 SLJIT_ASSERT_STOP(); 9656 break; 9657 } 9658 current = current->prev; 9659 } 9660 common->then_trap = save_then_trap; 9661 } 9662 9663 static SLJIT_INLINE void compile_recurse(compiler_common *common) 9664 { 9665 DEFINE_COMPILER; 9666 pcre_uchar *cc = common->start + common->currententry->start; 9667 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); 9668 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE); 9669 BOOL needs_control_head; 9670 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); 9671 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); 9672 int alternativesize; 9673 BOOL needs_frame; 9674 backtrack_common altbacktrack; 9675 struct sljit_jump *jump; 9676 9677 /* Recurse captures then. */ 9678 common->then_trap = NULL; 9679 9680 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); 9681 needs_frame = framesize >= 0; 9682 if (!needs_frame) 9683 framesize = 0; 9684 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; 9685 9686 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0); 9687 common->currententry->entry = LABEL(); 9688 set_jumps(common->currententry->calls, common->currententry->entry); 9689 9690 sljit_emit_fast_enter(compiler, TMP2, 0); 9691 allocate_stack(common, private_data_size + framesize + alternativesize); 9692 count_match(common); 9693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); 9694 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); 9695 if (needs_control_head) 9696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 9697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); 9698 if (needs_frame) 9699 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); 9700 9701 if (alternativesize > 0) 9702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 9703 9704 memset(&altbacktrack, 0, sizeof(backtrack_common)); 9705 common->quit_label = NULL; 9706 common->accept_label = NULL; 9707 common->quit = NULL; 9708 common->accept = NULL; 9709 altbacktrack.cc = ccbegin; 9710 cc += GET(cc, 1); 9711 while (1) 9712 { 9713 altbacktrack.top = NULL; 9714 altbacktrack.topbacktracks = NULL; 9715 9716 if (altbacktrack.cc != ccbegin) 9717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9718 9719 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); 9720 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9721 return; 9722 9723 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); 9724 9725 compile_backtrackingpath(common, altbacktrack.top); 9726 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9727 return; 9728 set_jumps(altbacktrack.topbacktracks, LABEL()); 9729 9730 if (*cc != OP_ALT) 9731 break; 9732 9733 altbacktrack.cc = cc + 1 + LINK_SIZE; 9734 cc += GET(cc, 1); 9735 } 9736 9737 /* None of them matched. */ 9738 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); 9739 jump = JUMP(SLJIT_JUMP); 9740 9741 if (common->quit != NULL) 9742 { 9743 set_jumps(common->quit, LABEL()); 9744 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); 9745 if (needs_frame) 9746 { 9747 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9748 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9749 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9750 } 9751 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); 9752 common->quit = NULL; 9753 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9754 } 9755 9756 set_jumps(common->accept, LABEL()); 9757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); 9758 if (needs_frame) 9759 { 9760 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9761 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9762 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9763 } 9764 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); 9765 9766 JUMPHERE(jump); 9767 if (common->quit != NULL) 9768 set_jumps(common->quit, LABEL()); 9769 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); 9770 free_stack(common, private_data_size + framesize + alternativesize); 9771 if (needs_control_head) 9772 { 9773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw)); 9774 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); 9775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0); 9776 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 9777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); 9778 } 9779 else 9780 { 9781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); 9782 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 9783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0); 9784 } 9785 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); 9786 } 9787 9788 #undef COMPILE_BACKTRACKINGPATH 9789 #undef CURRENT_AS 9790 9791 void 9792 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) 9793 { 9794 struct sljit_compiler *compiler; 9795 backtrack_common rootbacktrack; 9796 compiler_common common_data; 9797 compiler_common *common = &common_data; 9798 const pcre_uint8 *tables = re->tables; 9799 pcre_study_data *study; 9800 int private_data_size; 9801 pcre_uchar *ccend; 9802 executable_functions *functions; 9803 void *executable_func; 9804 sljit_uw executable_size; 9805 sljit_uw total_length; 9806 label_addr_list *label_addr; 9807 struct sljit_label *mainloop_label = NULL; 9808 struct sljit_label *continue_match_label; 9809 struct sljit_label *empty_match_found_label = NULL; 9810 struct sljit_label *empty_match_backtrack_label = NULL; 9811 struct sljit_label *reset_match_label; 9812 struct sljit_label *quit_label; 9813 struct sljit_jump *jump; 9814 struct sljit_jump *minlength_check_failed = NULL; 9815 struct sljit_jump *reqbyte_notfound = NULL; 9816 struct sljit_jump *empty_match = NULL; 9817 9818 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); 9819 study = extra->study_data; 9820 9821 if (!tables) 9822 tables = PRIV(default_tables); 9823 9824 memset(&rootbacktrack, 0, sizeof(backtrack_common)); 9825 memset(common, 0, sizeof(compiler_common)); 9826 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; 9827 9828 common->start = rootbacktrack.cc; 9829 common->read_only_data_head = NULL; 9830 common->fcc = tables + fcc_offset; 9831 common->lcc = (sljit_sw)(tables + lcc_offset); 9832 common->mode = mode; 9833 common->might_be_empty = study->minlength == 0; 9834 common->nltype = NLTYPE_FIXED; 9835 switch(re->options & PCRE_NEWLINE_BITS) 9836 { 9837 case 0: 9838 /* Compile-time default */ 9839 switch(NEWLINE) 9840 { 9841 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; 9842 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; 9843 default: common->newline = NEWLINE; break; 9844 } 9845 break; 9846 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break; 9847 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break; 9848 case PCRE_NEWLINE_CR+ 9849 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; 9850 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; 9851 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; 9852 default: return; 9853 } 9854 common->nlmax = READ_CHAR_MAX; 9855 common->nlmin = 0; 9856 if ((re->options & PCRE_BSR_ANYCRLF) != 0) 9857 common->bsr_nltype = NLTYPE_ANYCRLF; 9858 else if ((re->options & PCRE_BSR_UNICODE) != 0) 9859 common->bsr_nltype = NLTYPE_ANY; 9860 else 9861 { 9862 #ifdef BSR_ANYCRLF 9863 common->bsr_nltype = NLTYPE_ANYCRLF; 9864 #else 9865 common->bsr_nltype = NLTYPE_ANY; 9866 #endif 9867 } 9868 common->bsr_nlmax = READ_CHAR_MAX; 9869 common->bsr_nlmin = 0; 9870 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; 9871 common->ctypes = (sljit_sw)(tables + ctypes_offset); 9872 common->name_table = ((pcre_uchar *)re) + re->name_table_offset; 9873 common->name_count = re->name_count; 9874 common->name_entry_size = re->name_entry_size; 9875 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; 9876 #ifdef SUPPORT_UTF 9877 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ 9878 common->utf = (re->options & PCRE_UTF8) != 0; 9879 #ifdef SUPPORT_UCP 9880 common->use_ucp = (re->options & PCRE_UCP) != 0; 9881 #endif 9882 if (common->utf) 9883 { 9884 if (common->nltype == NLTYPE_ANY) 9885 common->nlmax = 0x2029; 9886 else if (common->nltype == NLTYPE_ANYCRLF) 9887 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; 9888 else 9889 { 9890 /* We only care about the first newline character. */ 9891 common->nlmax = common->newline & 0xff; 9892 } 9893 9894 if (common->nltype == NLTYPE_FIXED) 9895 common->nlmin = common->newline & 0xff; 9896 else 9897 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; 9898 9899 if (common->bsr_nltype == NLTYPE_ANY) 9900 common->bsr_nlmax = 0x2029; 9901 else 9902 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; 9903 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; 9904 } 9905 #endif /* SUPPORT_UTF */ 9906 ccend = bracketend(common->start); 9907 9908 /* Calculate the local space size on the stack. */ 9909 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); 9910 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data); 9911 if (!common->optimized_cbracket) 9912 return; 9913 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 9914 memset(common->optimized_cbracket, 0, re->top_bracket + 1); 9915 #else 9916 memset(common->optimized_cbracket, 1, re->top_bracket + 1); 9917 #endif 9918 9919 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); 9920 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 9921 common->capture_last_ptr = common->ovector_start; 9922 common->ovector_start += sizeof(sljit_sw); 9923 #endif 9924 if (!check_opcode_types(common, common->start, ccend)) 9925 { 9926 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 9927 return; 9928 } 9929 9930 /* Checking flags and updating ovector_start. */ 9931 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) 9932 { 9933 common->req_char_ptr = common->ovector_start; 9934 common->ovector_start += sizeof(sljit_sw); 9935 } 9936 if (mode != JIT_COMPILE) 9937 { 9938 common->start_used_ptr = common->ovector_start; 9939 common->ovector_start += sizeof(sljit_sw); 9940 if (mode == JIT_PARTIAL_SOFT_COMPILE) 9941 { 9942 common->hit_start = common->ovector_start; 9943 common->ovector_start += 2 * sizeof(sljit_sw); 9944 } 9945 else 9946 { 9947 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE); 9948 common->needs_start_ptr = TRUE; 9949 } 9950 } 9951 if ((re->options & PCRE_FIRSTLINE) != 0) 9952 { 9953 common->first_line_end = common->ovector_start; 9954 common->ovector_start += sizeof(sljit_sw); 9955 } 9956 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD 9957 common->control_head_ptr = 1; 9958 #endif 9959 if (common->control_head_ptr != 0) 9960 { 9961 common->control_head_ptr = common->ovector_start; 9962 common->ovector_start += sizeof(sljit_sw); 9963 } 9964 if (common->needs_start_ptr && common->has_set_som) 9965 { 9966 /* Saving the real start pointer is necessary. */ 9967 common->start_ptr = common->ovector_start; 9968 common->ovector_start += sizeof(sljit_sw); 9969 } 9970 else 9971 common->needs_start_ptr = FALSE; 9972 9973 /* Aligning ovector to even number of sljit words. */ 9974 if ((common->ovector_start & sizeof(sljit_sw)) != 0) 9975 common->ovector_start += sizeof(sljit_sw); 9976 9977 if (common->start_ptr == 0) 9978 common->start_ptr = OVECTOR(0); 9979 9980 /* Capturing brackets cannot be optimized if callouts are allowed. */ 9981 if (common->capture_last_ptr != 0) 9982 memset(common->optimized_cbracket, 0, re->top_bracket + 1); 9983 9984 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); 9985 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); 9986 9987 total_length = ccend - common->start; 9988 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data); 9989 if (!common->private_data_ptrs) 9990 { 9991 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 9992 return; 9993 } 9994 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si)); 9995 9996 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); 9997 set_private_data_ptrs(common, &private_data_size, ccend); 9998 if (private_data_size > SLJIT_MAX_LOCAL_SIZE) 9999 { 10000 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10001 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10002 return; 10003 } 10004 10005 if (common->has_then) 10006 { 10007 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length); 10008 memset(common->then_offsets, 0, total_length); 10009 set_then_offsets(common, common->start, NULL); 10010 } 10011 10012 compiler = sljit_create_compiler(NULL); 10013 if (!compiler) 10014 { 10015 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10016 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10017 return; 10018 } 10019 common->compiler = compiler; 10020 10021 /* Main pcre_jit_exec entry. */ 10022 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size); 10023 10024 /* Register init. */ 10025 reset_ovector(common, (re->top_bracket + 1) * 2); 10026 if (common->req_char_ptr != 0) 10027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); 10028 10029 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); 10030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); 10031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 10032 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); 10033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 10034 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); 10035 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); 10036 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); 10037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 10038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); 10039 10040 if (mode == JIT_PARTIAL_SOFT_COMPILE) 10041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); 10042 if (common->mark_ptr != 0) 10043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); 10044 if (common->control_head_ptr != 0) 10045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 10046 10047 /* Main part of the matching */ 10048 if ((re->options & PCRE_ANCHORED) == 0) 10049 { 10050 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); 10051 continue_match_label = LABEL(); 10052 /* Forward search if possible. */ 10053 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) 10054 { 10055 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0)) 10056 ; 10057 else if ((re->flags & PCRE_FIRSTSET) != 0) 10058 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); 10059 else if ((re->flags & PCRE_STARTLINE) != 0) 10060 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); 10061 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) 10062 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); 10063 } 10064 } 10065 else 10066 continue_match_label = LABEL(); 10067 10068 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) 10069 { 10070 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 10071 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); 10072 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); 10073 } 10074 if (common->req_char_ptr != 0) 10075 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); 10076 10077 /* Store the current STR_PTR in OVECTOR(0). */ 10078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); 10079 /* Copy the limit of allowed recursions. */ 10080 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); 10081 if (common->capture_last_ptr != 0) 10082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1); 10083 10084 if (common->needs_start_ptr) 10085 { 10086 SLJIT_ASSERT(common->start_ptr != OVECTOR(0)); 10087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); 10088 } 10089 else 10090 SLJIT_ASSERT(common->start_ptr == OVECTOR(0)); 10091 10092 /* Copy the beginning of the string. */ 10093 if (mode == JIT_PARTIAL_SOFT_COMPILE) 10094 { 10095 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); 10096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 10097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); 10098 JUMPHERE(jump); 10099 } 10100 else if (mode == JIT_PARTIAL_HARD_COMPILE) 10101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 10102 10103 compile_matchingpath(common, common->start, ccend, &rootbacktrack); 10104 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10105 { 10106 sljit_free_compiler(compiler); 10107 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10108 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10109 free_read_only_data(common->read_only_data_head, compiler->allocator_data); 10110 return; 10111 } 10112 10113 if (common->might_be_empty) 10114 { 10115 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 10116 empty_match_found_label = LABEL(); 10117 } 10118 10119 common->accept_label = LABEL(); 10120 if (common->accept != NULL) 10121 set_jumps(common->accept, common->accept_label); 10122 10123 /* This means we have a match. Update the ovector. */ 10124 copy_ovector(common, re->top_bracket + 1); 10125 common->quit_label = common->forced_quit_label = LABEL(); 10126 if (common->quit != NULL) 10127 set_jumps(common->quit, common->quit_label); 10128 if (common->forced_quit != NULL) 10129 set_jumps(common->forced_quit, common->forced_quit_label); 10130 if (minlength_check_failed != NULL) 10131 SET_LABEL(minlength_check_failed, common->forced_quit_label); 10132 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); 10133 10134 if (mode != JIT_COMPILE) 10135 { 10136 common->partialmatchlabel = LABEL(); 10137 set_jumps(common->partialmatch, common->partialmatchlabel); 10138 return_with_partial_match(common, common->quit_label); 10139 } 10140 10141 if (common->might_be_empty) 10142 empty_match_backtrack_label = LABEL(); 10143 compile_backtrackingpath(common, rootbacktrack.top); 10144 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10145 { 10146 sljit_free_compiler(compiler); 10147 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10148 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10149 free_read_only_data(common->read_only_data_head, compiler->allocator_data); 10150 return; 10151 } 10152 10153 SLJIT_ASSERT(rootbacktrack.prev == NULL); 10154 reset_match_label = LABEL(); 10155 10156 if (mode == JIT_PARTIAL_SOFT_COMPILE) 10157 { 10158 /* Update hit_start only in the first time. */ 10159 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 10160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr); 10161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); 10162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); 10163 JUMPHERE(jump); 10164 } 10165 10166 /* Check we have remaining characters. */ 10167 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) 10168 { 10169 SLJIT_ASSERT(common->first_line_end != 0); 10170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 10171 } 10172 10173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); 10174 10175 if ((re->options & PCRE_ANCHORED) == 0) 10176 { 10177 if (common->ff_newline_shortcut != NULL) 10178 { 10179 if ((re->options & PCRE_FIRSTLINE) == 0) 10180 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); 10181 /* There cannot be more newlines here. */ 10182 } 10183 else 10184 { 10185 if ((re->options & PCRE_FIRSTLINE) == 0) 10186 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label); 10187 else 10188 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label); 10189 } 10190 } 10191 10192 /* No more remaining characters. */ 10193 if (reqbyte_notfound != NULL) 10194 JUMPHERE(reqbyte_notfound); 10195 10196 if (mode == JIT_PARTIAL_SOFT_COMPILE) 10197 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); 10198 10199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 10200 JUMPTO(SLJIT_JUMP, common->quit_label); 10201 10202 flush_stubs(common); 10203 10204 if (common->might_be_empty) 10205 { 10206 JUMPHERE(empty_match); 10207 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10208 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); 10209 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); 10210 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); 10211 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); 10212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 10213 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); 10214 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); 10215 } 10216 10217 common->currententry = common->entries; 10218 common->local_exit = TRUE; 10219 quit_label = common->quit_label; 10220 while (common->currententry != NULL) 10221 { 10222 /* Might add new entries. */ 10223 compile_recurse(common); 10224 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10225 { 10226 sljit_free_compiler(compiler); 10227 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10228 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10229 free_read_only_data(common->read_only_data_head, compiler->allocator_data); 10230 return; 10231 } 10232 flush_stubs(common); 10233 common->currententry = common->currententry->next; 10234 } 10235 common->local_exit = FALSE; 10236 common->quit_label = quit_label; 10237 10238 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ 10239 /* This is a (really) rare case. */ 10240 set_jumps(common->stackalloc, LABEL()); 10241 /* RETURN_ADDR is not a saved register. */ 10242 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 10243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); 10244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 10246 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); 10247 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); 10248 10249 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); 10250 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); 10251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 10253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); 10254 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit)); 10255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 10256 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 10257 10258 /* Allocation failed. */ 10259 JUMPHERE(jump); 10260 /* We break the return address cache here, but this is a really rare case. */ 10261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); 10262 JUMPTO(SLJIT_JUMP, common->quit_label); 10263 10264 /* Call limit reached. */ 10265 set_jumps(common->calllimit, LABEL()); 10266 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); 10267 JUMPTO(SLJIT_JUMP, common->quit_label); 10268 10269 if (common->revertframes != NULL) 10270 { 10271 set_jumps(common->revertframes, LABEL()); 10272 do_revertframes(common); 10273 } 10274 if (common->wordboundary != NULL) 10275 { 10276 set_jumps(common->wordboundary, LABEL()); 10277 check_wordboundary(common); 10278 } 10279 if (common->anynewline != NULL) 10280 { 10281 set_jumps(common->anynewline, LABEL()); 10282 check_anynewline(common); 10283 } 10284 if (common->hspace != NULL) 10285 { 10286 set_jumps(common->hspace, LABEL()); 10287 check_hspace(common); 10288 } 10289 if (common->vspace != NULL) 10290 { 10291 set_jumps(common->vspace, LABEL()); 10292 check_vspace(common); 10293 } 10294 if (common->casefulcmp != NULL) 10295 { 10296 set_jumps(common->casefulcmp, LABEL()); 10297 do_casefulcmp(common); 10298 } 10299 if (common->caselesscmp != NULL) 10300 { 10301 set_jumps(common->caselesscmp, LABEL()); 10302 do_caselesscmp(common); 10303 } 10304 if (common->reset_match != NULL) 10305 { 10306 set_jumps(common->reset_match, LABEL()); 10307 do_reset_match(common, (re->top_bracket + 1) * 2); 10308 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); 10309 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 10310 JUMPTO(SLJIT_JUMP, reset_match_label); 10311 } 10312 #ifdef SUPPORT_UTF 10313 #ifdef COMPILE_PCRE8 10314 if (common->utfreadchar != NULL) 10315 { 10316 set_jumps(common->utfreadchar, LABEL()); 10317 do_utfreadchar(common); 10318 } 10319 if (common->utfreadchar16 != NULL) 10320 { 10321 set_jumps(common->utfreadchar16, LABEL()); 10322 do_utfreadchar16(common); 10323 } 10324 if (common->utfreadtype8 != NULL) 10325 { 10326 set_jumps(common->utfreadtype8, LABEL()); 10327 do_utfreadtype8(common); 10328 } 10329 #endif /* COMPILE_PCRE8 */ 10330 #endif /* SUPPORT_UTF */ 10331 #ifdef SUPPORT_UCP 10332 if (common->getucd != NULL) 10333 { 10334 set_jumps(common->getucd, LABEL()); 10335 do_getucd(common); 10336 } 10337 #endif 10338 10339 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); 10340 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); 10341 10342 executable_func = sljit_generate_code(compiler); 10343 executable_size = sljit_get_generated_code_size(compiler); 10344 label_addr = common->label_addrs; 10345 while (label_addr != NULL) 10346 { 10347 *label_addr->update_addr = sljit_get_label_addr(label_addr->label); 10348 label_addr = label_addr->next; 10349 } 10350 sljit_free_compiler(compiler); 10351 if (executable_func == NULL) 10352 { 10353 free_read_only_data(common->read_only_data_head, compiler->allocator_data); 10354 return; 10355 } 10356 10357 /* Reuse the function descriptor if possible. */ 10358 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) 10359 functions = (executable_functions *)extra->executable_jit; 10360 else 10361 { 10362 /* Note: If your memory-checker has flagged the allocation below as a 10363 * memory leak, it is probably because you either forgot to call 10364 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or 10365 * pcre16_extra) object, or you called said function after having 10366 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field 10367 * of the object. (The function will only free the JIT data if the 10368 * bit remains set, as the bit indicates that the pointer to the data 10369 * is valid.) 10370 */ 10371 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data); 10372 if (functions == NULL) 10373 { 10374 /* This case is highly unlikely since we just recently 10375 freed a lot of memory. Not impossible though. */ 10376 sljit_free_code(executable_func); 10377 free_read_only_data(common->read_only_data_head, compiler->allocator_data); 10378 return; 10379 } 10380 memset(functions, 0, sizeof(executable_functions)); 10381 functions->top_bracket = (re->top_bracket + 1) * 2; 10382 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0; 10383 extra->executable_jit = functions; 10384 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; 10385 } 10386 10387 functions->executable_funcs[mode] = executable_func; 10388 functions->read_only_data_heads[mode] = common->read_only_data_head; 10389 functions->executable_sizes[mode] = executable_size; 10390 } 10391 10392 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func) 10393 { 10394 union { 10395 void *executable_func; 10396 jit_function call_executable_func; 10397 } convert_executable_func; 10398 pcre_uint8 local_space[MACHINE_STACK_SIZE]; 10399 struct sljit_stack local_stack; 10400 10401 local_stack.top = (sljit_sw)&local_space; 10402 local_stack.base = local_stack.top; 10403 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; 10404 local_stack.max_limit = local_stack.limit; 10405 arguments->stack = &local_stack; 10406 convert_executable_func.executable_func = executable_func; 10407 return convert_executable_func.call_executable_func(arguments); 10408 } 10409 10410 int 10411 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, 10412 int length, int start_offset, int options, int *offsets, int offset_count) 10413 { 10414 executable_functions *functions = (executable_functions *)extra_data->executable_jit; 10415 union { 10416 void *executable_func; 10417 jit_function call_executable_func; 10418 } convert_executable_func; 10419 jit_arguments arguments; 10420 int max_offset_count; 10421 int retval; 10422 int mode = JIT_COMPILE; 10423 10424 if ((options & PCRE_PARTIAL_HARD) != 0) 10425 mode = JIT_PARTIAL_HARD_COMPILE; 10426 else if ((options & PCRE_PARTIAL_SOFT) != 0) 10427 mode = JIT_PARTIAL_SOFT_COMPILE; 10428 10429 if (functions->executable_funcs[mode] == NULL) 10430 return PCRE_ERROR_JIT_BADOPTION; 10431 10432 /* Sanity checks should be handled by pcre_exec. */ 10433 arguments.str = subject + start_offset; 10434 arguments.begin = subject; 10435 arguments.end = subject + length; 10436 arguments.mark_ptr = NULL; 10437 /* JIT decreases this value less frequently than the interpreter. */ 10438 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); 10439 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) 10440 arguments.limit_match = functions->limit_match; 10441 arguments.notbol = (options & PCRE_NOTBOL) != 0; 10442 arguments.noteol = (options & PCRE_NOTEOL) != 0; 10443 arguments.notempty = (options & PCRE_NOTEMPTY) != 0; 10444 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; 10445 arguments.offsets = offsets; 10446 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; 10447 arguments.real_offset_count = offset_count; 10448 10449 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of 10450 the output vector for storing captured strings, with the remainder used as 10451 workspace. We don't need the workspace here. For compatibility, we limit the 10452 number of captured strings in the same way as pcre_exec(), so that the user 10453 gets the same result with and without JIT. */ 10454 10455 if (offset_count != 2) 10456 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; 10457 max_offset_count = functions->top_bracket; 10458 if (offset_count > max_offset_count) 10459 offset_count = max_offset_count; 10460 arguments.offset_count = offset_count; 10461 10462 if (functions->callback) 10463 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); 10464 else 10465 arguments.stack = (struct sljit_stack *)functions->userdata; 10466 10467 if (arguments.stack == NULL) 10468 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); 10469 else 10470 { 10471 convert_executable_func.executable_func = functions->executable_funcs[mode]; 10472 retval = convert_executable_func.call_executable_func(&arguments); 10473 } 10474 10475 if (retval * 2 > offset_count) 10476 retval = 0; 10477 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) 10478 *(extra_data->mark) = arguments.mark_ptr; 10479 10480 return retval; 10481 } 10482 10483 #if defined COMPILE_PCRE8 10484 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10485 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, 10486 PCRE_SPTR subject, int length, int start_offset, int options, 10487 int *offsets, int offset_count, pcre_jit_stack *stack) 10488 #elif defined COMPILE_PCRE16 10489 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10490 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, 10491 PCRE_SPTR16 subject, int length, int start_offset, int options, 10492 int *offsets, int offset_count, pcre16_jit_stack *stack) 10493 #elif defined COMPILE_PCRE32 10494 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10495 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, 10496 PCRE_SPTR32 subject, int length, int start_offset, int options, 10497 int *offsets, int offset_count, pcre32_jit_stack *stack) 10498 #endif 10499 { 10500 pcre_uchar *subject_ptr = (pcre_uchar *)subject; 10501 executable_functions *functions = (executable_functions *)extra_data->executable_jit; 10502 union { 10503 void *executable_func; 10504 jit_function call_executable_func; 10505 } convert_executable_func; 10506 jit_arguments arguments; 10507 int max_offset_count; 10508 int retval; 10509 int mode = JIT_COMPILE; 10510 10511 SLJIT_UNUSED_ARG(argument_re); 10512 10513 /* Plausibility checks */ 10514 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; 10515 10516 if ((options & PCRE_PARTIAL_HARD) != 0) 10517 mode = JIT_PARTIAL_HARD_COMPILE; 10518 else if ((options & PCRE_PARTIAL_SOFT) != 0) 10519 mode = JIT_PARTIAL_SOFT_COMPILE; 10520 10521 if (functions->executable_funcs[mode] == NULL) 10522 return PCRE_ERROR_JIT_BADOPTION; 10523 10524 /* Sanity checks should be handled by pcre_exec. */ 10525 arguments.stack = (struct sljit_stack *)stack; 10526 arguments.str = subject_ptr + start_offset; 10527 arguments.begin = subject_ptr; 10528 arguments.end = subject_ptr + length; 10529 arguments.mark_ptr = NULL; 10530 /* JIT decreases this value less frequently than the interpreter. */ 10531 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); 10532 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) 10533 arguments.limit_match = functions->limit_match; 10534 arguments.notbol = (options & PCRE_NOTBOL) != 0; 10535 arguments.noteol = (options & PCRE_NOTEOL) != 0; 10536 arguments.notempty = (options & PCRE_NOTEMPTY) != 0; 10537 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; 10538 arguments.offsets = offsets; 10539 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; 10540 arguments.real_offset_count = offset_count; 10541 10542 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of 10543 the output vector for storing captured strings, with the remainder used as 10544 workspace. We don't need the workspace here. For compatibility, we limit the 10545 number of captured strings in the same way as pcre_exec(), so that the user 10546 gets the same result with and without JIT. */ 10547 10548 if (offset_count != 2) 10549 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; 10550 max_offset_count = functions->top_bracket; 10551 if (offset_count > max_offset_count) 10552 offset_count = max_offset_count; 10553 arguments.offset_count = offset_count; 10554 10555 convert_executable_func.executable_func = functions->executable_funcs[mode]; 10556 retval = convert_executable_func.call_executable_func(&arguments); 10557 10558 if (retval * 2 > offset_count) 10559 retval = 0; 10560 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) 10561 *(extra_data->mark) = arguments.mark_ptr; 10562 10563 return retval; 10564 } 10565 10566 void 10567 PRIV(jit_free)(void *executable_funcs) 10568 { 10569 int i; 10570 executable_functions *functions = (executable_functions *)executable_funcs; 10571 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 10572 { 10573 if (functions->executable_funcs[i] != NULL) 10574 sljit_free_code(functions->executable_funcs[i]); 10575 free_read_only_data(functions->read_only_data_heads[i], NULL); 10576 } 10577 SLJIT_FREE(functions, compiler->allocator_data); 10578 } 10579 10580 int 10581 PRIV(jit_get_size)(void *executable_funcs) 10582 { 10583 int i; 10584 sljit_uw size = 0; 10585 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; 10586 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 10587 size += executable_sizes[i]; 10588 return (int)size; 10589 } 10590 10591 const char* 10592 PRIV(jit_get_target)(void) 10593 { 10594 return sljit_get_platform_name(); 10595 } 10596 10597 #if defined COMPILE_PCRE8 10598 PCRE_EXP_DECL pcre_jit_stack * 10599 pcre_jit_stack_alloc(int startsize, int maxsize) 10600 #elif defined COMPILE_PCRE16 10601 PCRE_EXP_DECL pcre16_jit_stack * 10602 pcre16_jit_stack_alloc(int startsize, int maxsize) 10603 #elif defined COMPILE_PCRE32 10604 PCRE_EXP_DECL pcre32_jit_stack * 10605 pcre32_jit_stack_alloc(int startsize, int maxsize) 10606 #endif 10607 { 10608 if (startsize < 1 || maxsize < 1) 10609 return NULL; 10610 if (startsize > maxsize) 10611 startsize = maxsize; 10612 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 10613 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 10614 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL); 10615 } 10616 10617 #if defined COMPILE_PCRE8 10618 PCRE_EXP_DECL void 10619 pcre_jit_stack_free(pcre_jit_stack *stack) 10620 #elif defined COMPILE_PCRE16 10621 PCRE_EXP_DECL void 10622 pcre16_jit_stack_free(pcre16_jit_stack *stack) 10623 #elif defined COMPILE_PCRE32 10624 PCRE_EXP_DECL void 10625 pcre32_jit_stack_free(pcre32_jit_stack *stack) 10626 #endif 10627 { 10628 sljit_free_stack((struct sljit_stack *)stack, NULL); 10629 } 10630 10631 #if defined COMPILE_PCRE8 10632 PCRE_EXP_DECL void 10633 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) 10634 #elif defined COMPILE_PCRE16 10635 PCRE_EXP_DECL void 10636 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) 10637 #elif defined COMPILE_PCRE32 10638 PCRE_EXP_DECL void 10639 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) 10640 #endif 10641 { 10642 executable_functions *functions; 10643 if (extra != NULL && 10644 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && 10645 extra->executable_jit != NULL) 10646 { 10647 functions = (executable_functions *)extra->executable_jit; 10648 functions->callback = callback; 10649 functions->userdata = userdata; 10650 } 10651 } 10652 10653 #if defined COMPILE_PCRE8 10654 PCRE_EXP_DECL void 10655 pcre_jit_free_unused_memory(void) 10656 #elif defined COMPILE_PCRE16 10657 PCRE_EXP_DECL void 10658 pcre16_jit_free_unused_memory(void) 10659 #elif defined COMPILE_PCRE32 10660 PCRE_EXP_DECL void 10661 pcre32_jit_free_unused_memory(void) 10662 #endif 10663 { 10664 sljit_free_unused_memory_exec(); 10665 } 10666 10667 #else /* SUPPORT_JIT */ 10668 10669 /* These are dummy functions to avoid linking errors when JIT support is not 10670 being compiled. */ 10671 10672 #if defined COMPILE_PCRE8 10673 PCRE_EXP_DECL pcre_jit_stack * 10674 pcre_jit_stack_alloc(int startsize, int maxsize) 10675 #elif defined COMPILE_PCRE16 10676 PCRE_EXP_DECL pcre16_jit_stack * 10677 pcre16_jit_stack_alloc(int startsize, int maxsize) 10678 #elif defined COMPILE_PCRE32 10679 PCRE_EXP_DECL pcre32_jit_stack * 10680 pcre32_jit_stack_alloc(int startsize, int maxsize) 10681 #endif 10682 { 10683 (void)startsize; 10684 (void)maxsize; 10685 return NULL; 10686 } 10687 10688 #if defined COMPILE_PCRE8 10689 PCRE_EXP_DECL void 10690 pcre_jit_stack_free(pcre_jit_stack *stack) 10691 #elif defined COMPILE_PCRE16 10692 PCRE_EXP_DECL void 10693 pcre16_jit_stack_free(pcre16_jit_stack *stack) 10694 #elif defined COMPILE_PCRE32 10695 PCRE_EXP_DECL void 10696 pcre32_jit_stack_free(pcre32_jit_stack *stack) 10697 #endif 10698 { 10699 (void)stack; 10700 } 10701 10702 #if defined COMPILE_PCRE8 10703 PCRE_EXP_DECL void 10704 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) 10705 #elif defined COMPILE_PCRE16 10706 PCRE_EXP_DECL void 10707 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) 10708 #elif defined COMPILE_PCRE32 10709 PCRE_EXP_DECL void 10710 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) 10711 #endif 10712 { 10713 (void)extra; 10714 (void)callback; 10715 (void)userdata; 10716 } 10717 10718 #if defined COMPILE_PCRE8 10719 PCRE_EXP_DECL void 10720 pcre_jit_free_unused_memory(void) 10721 #elif defined COMPILE_PCRE16 10722 PCRE_EXP_DECL void 10723 pcre16_jit_free_unused_memory(void) 10724 #elif defined COMPILE_PCRE32 10725 PCRE_EXP_DECL void 10726 pcre32_jit_free_unused_memory(void) 10727 #endif 10728 { 10729 } 10730 10731 #endif 10732 10733 /* End of pcre_jit_compile.c */ 10734