1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if V8_TARGET_ARCH_X87 6 7 #include "src/regexp/x87/regexp-macro-assembler-x87.h" 8 9 #include "src/log.h" 10 #include "src/macro-assembler.h" 11 #include "src/regexp/regexp-macro-assembler.h" 12 #include "src/regexp/regexp-stack.h" 13 #include "src/unicode.h" 14 15 namespace v8 { 16 namespace internal { 17 18 #ifndef V8_INTERPRETED_REGEXP 19 /* 20 * This assembler uses the following register assignment convention 21 * - edx : Current character. Must be loaded using LoadCurrentCharacter 22 * before using any of the dispatch methods. Temporarily stores the 23 * index of capture start after a matching pass for a global regexp. 24 * - edi : Current position in input, as negative offset from end of string. 25 * Please notice that this is the byte offset, not the character offset! 26 * - esi : end of input (points to byte after last character in input). 27 * - ebp : Frame pointer. Used to access arguments, local variables and 28 * RegExp registers. 29 * - esp : Points to tip of C stack. 30 * - ecx : Points to tip of backtrack stack 31 * 32 * The registers eax and ebx are free to use for computations. 33 * 34 * Each call to a public method should retain this convention. 35 * The stack will have the following structure: 36 * - Isolate* isolate (address of the current isolate) 37 * - direct_call (if 1, direct call from JavaScript code, if 0 38 * call through the runtime system) 39 * - stack_area_base (high end of the memory area to use as 40 * backtracking stack) 41 * - capture array size (may fit multiple sets of matches) 42 * - int* capture_array (int[num_saved_registers_], for output). 43 * - end of input (address of end of string) 44 * - start of input (address of first character in string) 45 * - start index (character index of start) 46 * - String* input_string (location of a handle containing the string) 47 * --- frame alignment (if applicable) --- 48 * - return address 49 * ebp-> - old ebp 50 * - backup of caller esi 51 * - backup of caller edi 52 * - backup of caller ebx 53 * - success counter (only for global regexps to count matches). 54 * - Offset of location before start of input (effectively character 55 * string start - 1). Used to initialize capture registers to a 56 * non-position. 57 * - register 0 ebp[-4] (only positions must be stored in the first 58 * - register 1 ebp[-8] num_saved_registers_ registers) 59 * - ... 60 * 61 * The first num_saved_registers_ registers are initialized to point to 62 * "character -1" in the string (i.e., char_size() bytes before the first 63 * character of the string). The remaining registers starts out as garbage. 64 * 65 * The data up to the return address must be placed there by the calling 66 * code, by calling the code entry as cast to a function with the signature: 67 * int (*match)(String* input_string, 68 * int start_index, 69 * Address start, 70 * Address end, 71 * int* capture_output_array, 72 * bool at_start, 73 * byte* stack_area_base, 74 * bool direct_call) 75 */ 76 77 #define __ ACCESS_MASM(masm_) 78 79 RegExpMacroAssemblerX87::RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone, 80 Mode mode, 81 int registers_to_save) 82 : NativeRegExpMacroAssembler(isolate, zone), 83 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, 84 CodeObjectRequired::kYes)), 85 mode_(mode), 86 num_registers_(registers_to_save), 87 num_saved_registers_(registers_to_save), 88 entry_label_(), 89 start_label_(), 90 success_label_(), 91 backtrack_label_(), 92 exit_label_() { 93 DCHECK_EQ(0, registers_to_save % 2); 94 __ jmp(&entry_label_); // We'll write the entry code later. 95 __ bind(&start_label_); // And then continue from here. 96 } 97 98 99 RegExpMacroAssemblerX87::~RegExpMacroAssemblerX87() { 100 delete masm_; 101 // Unuse labels in case we throw away the assembler without calling GetCode. 102 entry_label_.Unuse(); 103 start_label_.Unuse(); 104 success_label_.Unuse(); 105 backtrack_label_.Unuse(); 106 exit_label_.Unuse(); 107 check_preempt_label_.Unuse(); 108 stack_overflow_label_.Unuse(); 109 } 110 111 112 int RegExpMacroAssemblerX87::stack_limit_slack() { 113 return RegExpStack::kStackLimitSlack; 114 } 115 116 117 void RegExpMacroAssemblerX87::AdvanceCurrentPosition(int by) { 118 if (by != 0) { 119 __ add(edi, Immediate(by * char_size())); 120 } 121 } 122 123 124 void RegExpMacroAssemblerX87::AdvanceRegister(int reg, int by) { 125 DCHECK(reg >= 0); 126 DCHECK(reg < num_registers_); 127 if (by != 0) { 128 __ add(register_location(reg), Immediate(by)); 129 } 130 } 131 132 133 void RegExpMacroAssemblerX87::Backtrack() { 134 CheckPreemption(); 135 // Pop Code* offset from backtrack stack, add Code* and jump to location. 136 Pop(ebx); 137 __ add(ebx, Immediate(masm_->CodeObject())); 138 __ jmp(ebx); 139 } 140 141 142 void RegExpMacroAssemblerX87::Bind(Label* label) { 143 __ bind(label); 144 } 145 146 147 void RegExpMacroAssemblerX87::CheckCharacter(uint32_t c, Label* on_equal) { 148 __ cmp(current_character(), c); 149 BranchOrBacktrack(equal, on_equal); 150 } 151 152 153 void RegExpMacroAssemblerX87::CheckCharacterGT(uc16 limit, Label* on_greater) { 154 __ cmp(current_character(), limit); 155 BranchOrBacktrack(greater, on_greater); 156 } 157 158 159 void RegExpMacroAssemblerX87::CheckAtStart(Label* on_at_start) { 160 __ lea(eax, Operand(edi, -char_size())); 161 __ cmp(eax, Operand(ebp, kStringStartMinusOne)); 162 BranchOrBacktrack(equal, on_at_start); 163 } 164 165 166 void RegExpMacroAssemblerX87::CheckNotAtStart(int cp_offset, 167 Label* on_not_at_start) { 168 __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size())); 169 __ cmp(eax, Operand(ebp, kStringStartMinusOne)); 170 BranchOrBacktrack(not_equal, on_not_at_start); 171 } 172 173 174 void RegExpMacroAssemblerX87::CheckCharacterLT(uc16 limit, Label* on_less) { 175 __ cmp(current_character(), limit); 176 BranchOrBacktrack(less, on_less); 177 } 178 179 180 void RegExpMacroAssemblerX87::CheckGreedyLoop(Label* on_equal) { 181 Label fallthrough; 182 __ cmp(edi, Operand(backtrack_stackpointer(), 0)); 183 __ j(not_equal, &fallthrough); 184 __ add(backtrack_stackpointer(), Immediate(kPointerSize)); // Pop. 185 BranchOrBacktrack(no_condition, on_equal); 186 __ bind(&fallthrough); 187 } 188 189 void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( 190 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 191 Label fallthrough; 192 __ mov(edx, register_location(start_reg)); // Index of start of capture 193 __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture 194 __ sub(ebx, edx); // Length of capture. 195 196 // At this point, the capture registers are either both set or both cleared. 197 // If the capture length is zero, then the capture is either empty or cleared. 198 // Fall through in both cases. 199 __ j(equal, &fallthrough); 200 201 // Check that there are sufficient characters left in the input. 202 if (read_backward) { 203 __ mov(eax, Operand(ebp, kStringStartMinusOne)); 204 __ add(eax, ebx); 205 __ cmp(edi, eax); 206 BranchOrBacktrack(less_equal, on_no_match); 207 } else { 208 __ mov(eax, edi); 209 __ add(eax, ebx); 210 BranchOrBacktrack(greater, on_no_match); 211 } 212 213 if (mode_ == LATIN1) { 214 Label success; 215 Label fail; 216 Label loop_increment; 217 // Save register contents to make the registers available below. 218 __ push(edi); 219 __ push(backtrack_stackpointer()); 220 // After this, the eax, ecx, and edi registers are available. 221 222 __ add(edx, esi); // Start of capture 223 __ add(edi, esi); // Start of text to match against capture. 224 if (read_backward) { 225 __ sub(edi, ebx); // Offset by length when matching backwards. 226 } 227 __ add(ebx, edi); // End of text to match against capture. 228 229 Label loop; 230 __ bind(&loop); 231 __ movzx_b(eax, Operand(edi, 0)); 232 __ cmpb_al(Operand(edx, 0)); 233 __ j(equal, &loop_increment); 234 235 // Mismatch, try case-insensitive match (converting letters to lower-case). 236 __ or_(eax, 0x20); // Convert match character to lower-case. 237 __ lea(ecx, Operand(eax, -'a')); 238 __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter? 239 Label convert_capture; 240 __ j(below_equal, &convert_capture); // In range 'a'-'z'. 241 // Latin-1: Check for values in range [224,254] but not 247. 242 __ sub(ecx, Immediate(224 - 'a')); 243 __ cmp(ecx, Immediate(254 - 224)); 244 __ j(above, &fail); // Weren't Latin-1 letters. 245 __ cmp(ecx, Immediate(247 - 224)); // Check for 247. 246 __ j(equal, &fail); 247 __ bind(&convert_capture); 248 // Also convert capture character. 249 __ movzx_b(ecx, Operand(edx, 0)); 250 __ or_(ecx, 0x20); 251 252 __ cmp(eax, ecx); 253 __ j(not_equal, &fail); 254 255 __ bind(&loop_increment); 256 // Increment pointers into match and capture strings. 257 __ add(edx, Immediate(1)); 258 __ add(edi, Immediate(1)); 259 // Compare to end of match, and loop if not done. 260 __ cmp(edi, ebx); 261 __ j(below, &loop); 262 __ jmp(&success); 263 264 __ bind(&fail); 265 // Restore original values before failing. 266 __ pop(backtrack_stackpointer()); 267 __ pop(edi); 268 BranchOrBacktrack(no_condition, on_no_match); 269 270 __ bind(&success); 271 // Restore original value before continuing. 272 __ pop(backtrack_stackpointer()); 273 // Drop original value of character position. 274 __ add(esp, Immediate(kPointerSize)); 275 // Compute new value of character position after the matched part. 276 __ sub(edi, esi); 277 if (read_backward) { 278 // Subtract match length if we matched backward. 279 __ add(edi, register_location(start_reg)); 280 __ sub(edi, register_location(start_reg + 1)); 281 } 282 } else { 283 DCHECK(mode_ == UC16); 284 // Save registers before calling C function. 285 __ push(esi); 286 __ push(edi); 287 __ push(backtrack_stackpointer()); 288 __ push(ebx); 289 290 static const int argument_count = 4; 291 __ PrepareCallCFunction(argument_count, ecx); 292 // Put arguments into allocated stack area, last argument highest on stack. 293 // Parameters are 294 // Address byte_offset1 - Address captured substring's start. 295 // Address byte_offset2 - Address of current character position. 296 // size_t byte_length - length of capture in bytes(!) 297 // Isolate* isolate or 0 if unicode flag. 298 299 // Set isolate. 300 #ifdef V8_I18N_SUPPORT 301 if (unicode) { 302 __ mov(Operand(esp, 3 * kPointerSize), Immediate(0)); 303 } else // NOLINT 304 #endif // V8_I18N_SUPPORT 305 { 306 __ mov(Operand(esp, 3 * kPointerSize), 307 Immediate(ExternalReference::isolate_address(isolate()))); 308 } 309 // Set byte_length. 310 __ mov(Operand(esp, 2 * kPointerSize), ebx); 311 // Set byte_offset2. 312 // Found by adding negative string-end offset of current position (edi) 313 // to end of string. 314 __ add(edi, esi); 315 if (read_backward) { 316 __ sub(edi, ebx); // Offset by length when matching backwards. 317 } 318 __ mov(Operand(esp, 1 * kPointerSize), edi); 319 // Set byte_offset1. 320 // Start of capture, where edx already holds string-end negative offset. 321 __ add(edx, esi); 322 __ mov(Operand(esp, 0 * kPointerSize), edx); 323 324 { 325 AllowExternalCallThatCantCauseGC scope(masm_); 326 ExternalReference compare = 327 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 328 __ CallCFunction(compare, argument_count); 329 } 330 // Pop original values before reacting on result value. 331 __ pop(ebx); 332 __ pop(backtrack_stackpointer()); 333 __ pop(edi); 334 __ pop(esi); 335 336 // Check if function returned non-zero for success or zero for failure. 337 __ or_(eax, eax); 338 BranchOrBacktrack(zero, on_no_match); 339 // On success, advance position by length of capture. 340 if (read_backward) { 341 __ sub(edi, ebx); 342 } else { 343 __ add(edi, ebx); 344 } 345 } 346 __ bind(&fallthrough); 347 } 348 349 350 void RegExpMacroAssemblerX87::CheckNotBackReference(int start_reg, 351 bool read_backward, 352 Label* on_no_match) { 353 Label fallthrough; 354 Label success; 355 Label fail; 356 357 // Find length of back-referenced capture. 358 __ mov(edx, register_location(start_reg)); 359 __ mov(eax, register_location(start_reg + 1)); 360 __ sub(eax, edx); // Length to check. 361 362 // At this point, the capture registers are either both set or both cleared. 363 // If the capture length is zero, then the capture is either empty or cleared. 364 // Fall through in both cases. 365 __ j(equal, &fallthrough); 366 367 // Check that there are sufficient characters left in the input. 368 if (read_backward) { 369 __ mov(ebx, Operand(ebp, kStringStartMinusOne)); 370 __ add(ebx, eax); 371 __ cmp(edi, ebx); 372 BranchOrBacktrack(less_equal, on_no_match); 373 } else { 374 __ mov(ebx, edi); 375 __ add(ebx, eax); 376 BranchOrBacktrack(greater, on_no_match); 377 } 378 379 // Save register to make it available below. 380 __ push(backtrack_stackpointer()); 381 382 // Compute pointers to match string and capture string 383 __ add(edx, esi); // Start of capture. 384 __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. 385 if (read_backward) { 386 __ sub(ebx, eax); // Offset by length when matching backwards. 387 } 388 __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match 389 390 Label loop; 391 __ bind(&loop); 392 if (mode_ == LATIN1) { 393 __ movzx_b(eax, Operand(edx, 0)); 394 __ cmpb_al(Operand(ebx, 0)); 395 } else { 396 DCHECK(mode_ == UC16); 397 __ movzx_w(eax, Operand(edx, 0)); 398 __ cmpw_ax(Operand(ebx, 0)); 399 } 400 __ j(not_equal, &fail); 401 // Increment pointers into capture and match string. 402 __ add(edx, Immediate(char_size())); 403 __ add(ebx, Immediate(char_size())); 404 // Check if we have reached end of match area. 405 __ cmp(ebx, ecx); 406 __ j(below, &loop); 407 __ jmp(&success); 408 409 __ bind(&fail); 410 // Restore backtrack stackpointer. 411 __ pop(backtrack_stackpointer()); 412 BranchOrBacktrack(no_condition, on_no_match); 413 414 __ bind(&success); 415 // Move current character position to position after match. 416 __ mov(edi, ecx); 417 __ sub(edi, esi); 418 if (read_backward) { 419 // Subtract match length if we matched backward. 420 __ add(edi, register_location(start_reg)); 421 __ sub(edi, register_location(start_reg + 1)); 422 } 423 // Restore backtrack stackpointer. 424 __ pop(backtrack_stackpointer()); 425 426 __ bind(&fallthrough); 427 } 428 429 430 void RegExpMacroAssemblerX87::CheckNotCharacter(uint32_t c, 431 Label* on_not_equal) { 432 __ cmp(current_character(), c); 433 BranchOrBacktrack(not_equal, on_not_equal); 434 } 435 436 437 void RegExpMacroAssemblerX87::CheckCharacterAfterAnd(uint32_t c, 438 uint32_t mask, 439 Label* on_equal) { 440 if (c == 0) { 441 __ test(current_character(), Immediate(mask)); 442 } else { 443 __ mov(eax, mask); 444 __ and_(eax, current_character()); 445 __ cmp(eax, c); 446 } 447 BranchOrBacktrack(equal, on_equal); 448 } 449 450 451 void RegExpMacroAssemblerX87::CheckNotCharacterAfterAnd(uint32_t c, 452 uint32_t mask, 453 Label* on_not_equal) { 454 if (c == 0) { 455 __ test(current_character(), Immediate(mask)); 456 } else { 457 __ mov(eax, mask); 458 __ and_(eax, current_character()); 459 __ cmp(eax, c); 460 } 461 BranchOrBacktrack(not_equal, on_not_equal); 462 } 463 464 465 void RegExpMacroAssemblerX87::CheckNotCharacterAfterMinusAnd( 466 uc16 c, 467 uc16 minus, 468 uc16 mask, 469 Label* on_not_equal) { 470 DCHECK(minus < String::kMaxUtf16CodeUnit); 471 __ lea(eax, Operand(current_character(), -minus)); 472 if (c == 0) { 473 __ test(eax, Immediate(mask)); 474 } else { 475 __ and_(eax, mask); 476 __ cmp(eax, c); 477 } 478 BranchOrBacktrack(not_equal, on_not_equal); 479 } 480 481 482 void RegExpMacroAssemblerX87::CheckCharacterInRange( 483 uc16 from, 484 uc16 to, 485 Label* on_in_range) { 486 __ lea(eax, Operand(current_character(), -from)); 487 __ cmp(eax, to - from); 488 BranchOrBacktrack(below_equal, on_in_range); 489 } 490 491 492 void RegExpMacroAssemblerX87::CheckCharacterNotInRange( 493 uc16 from, 494 uc16 to, 495 Label* on_not_in_range) { 496 __ lea(eax, Operand(current_character(), -from)); 497 __ cmp(eax, to - from); 498 BranchOrBacktrack(above, on_not_in_range); 499 } 500 501 502 void RegExpMacroAssemblerX87::CheckBitInTable( 503 Handle<ByteArray> table, 504 Label* on_bit_set) { 505 __ mov(eax, Immediate(table)); 506 Register index = current_character(); 507 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { 508 __ mov(ebx, kTableSize - 1); 509 __ and_(ebx, current_character()); 510 index = ebx; 511 } 512 __ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize), 513 Immediate(0)); 514 BranchOrBacktrack(not_equal, on_bit_set); 515 } 516 517 518 bool RegExpMacroAssemblerX87::CheckSpecialCharacterClass(uc16 type, 519 Label* on_no_match) { 520 // Range checks (c in min..max) are generally implemented by an unsigned 521 // (c - min) <= (max - min) check 522 switch (type) { 523 case 's': 524 // Match space-characters 525 if (mode_ == LATIN1) { 526 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 527 Label success; 528 __ cmp(current_character(), ' '); 529 __ j(equal, &success, Label::kNear); 530 // Check range 0x09..0x0d 531 __ lea(eax, Operand(current_character(), -'\t')); 532 __ cmp(eax, '\r' - '\t'); 533 __ j(below_equal, &success, Label::kNear); 534 // \u00a0 (NBSP). 535 __ cmp(eax, 0x00a0 - '\t'); 536 BranchOrBacktrack(not_equal, on_no_match); 537 __ bind(&success); 538 return true; 539 } 540 return false; 541 case 'S': 542 // The emitted code for generic character classes is good enough. 543 return false; 544 case 'd': 545 // Match ASCII digits ('0'..'9') 546 __ lea(eax, Operand(current_character(), -'0')); 547 __ cmp(eax, '9' - '0'); 548 BranchOrBacktrack(above, on_no_match); 549 return true; 550 case 'D': 551 // Match non ASCII-digits 552 __ lea(eax, Operand(current_character(), -'0')); 553 __ cmp(eax, '9' - '0'); 554 BranchOrBacktrack(below_equal, on_no_match); 555 return true; 556 case '.': { 557 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 558 __ mov(eax, current_character()); 559 __ xor_(eax, Immediate(0x01)); 560 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 561 __ sub(eax, Immediate(0x0b)); 562 __ cmp(eax, 0x0c - 0x0b); 563 BranchOrBacktrack(below_equal, on_no_match); 564 if (mode_ == UC16) { 565 // Compare original value to 0x2028 and 0x2029, using the already 566 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 567 // 0x201d (0x2028 - 0x0b) or 0x201e. 568 __ sub(eax, Immediate(0x2028 - 0x0b)); 569 __ cmp(eax, 0x2029 - 0x2028); 570 BranchOrBacktrack(below_equal, on_no_match); 571 } 572 return true; 573 } 574 case 'w': { 575 if (mode_ != LATIN1) { 576 // Table is 256 entries, so all Latin1 characters can be tested. 577 __ cmp(current_character(), Immediate('z')); 578 BranchOrBacktrack(above, on_no_match); 579 } 580 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 581 ExternalReference word_map = ExternalReference::re_word_character_map(); 582 __ test_b(current_character(), 583 Operand::StaticArray(current_character(), times_1, word_map)); 584 BranchOrBacktrack(zero, on_no_match); 585 return true; 586 } 587 case 'W': { 588 Label done; 589 if (mode_ != LATIN1) { 590 // Table is 256 entries, so all Latin1 characters can be tested. 591 __ cmp(current_character(), Immediate('z')); 592 __ j(above, &done); 593 } 594 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 595 ExternalReference word_map = ExternalReference::re_word_character_map(); 596 __ test_b(current_character(), 597 Operand::StaticArray(current_character(), times_1, word_map)); 598 BranchOrBacktrack(not_zero, on_no_match); 599 if (mode_ != LATIN1) { 600 __ bind(&done); 601 } 602 return true; 603 } 604 // Non-standard classes (with no syntactic shorthand) used internally. 605 case '*': 606 // Match any character. 607 return true; 608 case 'n': { 609 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029). 610 // The opposite of '.'. 611 __ mov(eax, current_character()); 612 __ xor_(eax, Immediate(0x01)); 613 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 614 __ sub(eax, Immediate(0x0b)); 615 __ cmp(eax, 0x0c - 0x0b); 616 if (mode_ == LATIN1) { 617 BranchOrBacktrack(above, on_no_match); 618 } else { 619 Label done; 620 BranchOrBacktrack(below_equal, &done); 621 DCHECK_EQ(UC16, mode_); 622 // Compare original value to 0x2028 and 0x2029, using the already 623 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 624 // 0x201d (0x2028 - 0x0b) or 0x201e. 625 __ sub(eax, Immediate(0x2028 - 0x0b)); 626 __ cmp(eax, 1); 627 BranchOrBacktrack(above, on_no_match); 628 __ bind(&done); 629 } 630 return true; 631 } 632 // No custom implementation (yet): s(UC16), S(UC16). 633 default: 634 return false; 635 } 636 } 637 638 639 void RegExpMacroAssemblerX87::Fail() { 640 STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero. 641 if (!global()) { 642 __ Move(eax, Immediate(FAILURE)); 643 } 644 __ jmp(&exit_label_); 645 } 646 647 648 Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) { 649 Label return_eax; 650 // Finalize code - write the entry point code now we know how many 651 // registers we need. 652 653 // Entry code: 654 __ bind(&entry_label_); 655 656 // Tell the system that we have a stack frame. Because the type is MANUAL, no 657 // code is generated. 658 FrameScope scope(masm_, StackFrame::MANUAL); 659 660 // Actually emit code to start a new stack frame. 661 __ push(ebp); 662 __ mov(ebp, esp); 663 // Save callee-save registers. Order here should correspond to order of 664 // kBackup_ebx etc. 665 __ push(esi); 666 __ push(edi); 667 __ push(ebx); // Callee-save on MacOS. 668 __ push(Immediate(0)); // Number of successful matches in a global regexp. 669 __ push(Immediate(0)); // Make room for "string start - 1" constant. 670 671 // Check if we have space on the stack for registers. 672 Label stack_limit_hit; 673 Label stack_ok; 674 675 ExternalReference stack_limit = 676 ExternalReference::address_of_stack_limit(isolate()); 677 __ mov(ecx, esp); 678 __ sub(ecx, Operand::StaticVariable(stack_limit)); 679 // Handle it if the stack pointer is already below the stack limit. 680 __ j(below_equal, &stack_limit_hit); 681 // Check if there is room for the variable number of registers above 682 // the stack limit. 683 __ cmp(ecx, num_registers_ * kPointerSize); 684 __ j(above_equal, &stack_ok); 685 // Exit with OutOfMemory exception. There is not enough space on the stack 686 // for our working registers. 687 __ mov(eax, EXCEPTION); 688 __ jmp(&return_eax); 689 690 __ bind(&stack_limit_hit); 691 CallCheckStackGuardState(ebx); 692 __ or_(eax, eax); 693 // If returned value is non-zero, we exit with the returned value as result. 694 __ j(not_zero, &return_eax); 695 696 __ bind(&stack_ok); 697 // Load start index for later use. 698 __ mov(ebx, Operand(ebp, kStartIndex)); 699 700 // Allocate space on stack for registers. 701 __ sub(esp, Immediate(num_registers_ * kPointerSize)); 702 // Load string length. 703 __ mov(esi, Operand(ebp, kInputEnd)); 704 // Load input position. 705 __ mov(edi, Operand(ebp, kInputStart)); 706 // Set up edi to be negative offset from string end. 707 __ sub(edi, esi); 708 709 // Set eax to address of char before start of the string. 710 // (effectively string position -1). 711 __ neg(ebx); 712 if (mode_ == UC16) { 713 __ lea(eax, Operand(edi, ebx, times_2, -char_size())); 714 } else { 715 __ lea(eax, Operand(edi, ebx, times_1, -char_size())); 716 } 717 // Store this value in a local variable, for use when clearing 718 // position registers. 719 __ mov(Operand(ebp, kStringStartMinusOne), eax); 720 721 #if V8_OS_WIN 722 // Ensure that we write to each stack page, in order. Skipping a page 723 // on Windows can cause segmentation faults. Assuming page size is 4k. 724 const int kPageSize = 4096; 725 const int kRegistersPerPage = kPageSize / kPointerSize; 726 for (int i = num_saved_registers_ + kRegistersPerPage - 1; 727 i < num_registers_; 728 i += kRegistersPerPage) { 729 __ mov(register_location(i), eax); // One write every page. 730 } 731 #endif // V8_OS_WIN 732 733 Label load_char_start_regexp, start_regexp; 734 // Load newline if index is at start, previous character otherwise. 735 __ cmp(Operand(ebp, kStartIndex), Immediate(0)); 736 __ j(not_equal, &load_char_start_regexp, Label::kNear); 737 __ mov(current_character(), '\n'); 738 __ jmp(&start_regexp, Label::kNear); 739 740 // Global regexp restarts matching here. 741 __ bind(&load_char_start_regexp); 742 // Load previous char as initial value of current character register. 743 LoadCurrentCharacterUnchecked(-1, 1); 744 __ bind(&start_regexp); 745 746 // Initialize on-stack registers. 747 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. 748 // Fill saved registers with initial value = start offset - 1 749 // Fill in stack push order, to avoid accessing across an unwritten 750 // page (a problem on Windows). 751 if (num_saved_registers_ > 8) { 752 __ mov(ecx, kRegisterZero); 753 Label init_loop; 754 __ bind(&init_loop); 755 __ mov(Operand(ebp, ecx, times_1, 0), eax); 756 __ sub(ecx, Immediate(kPointerSize)); 757 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize); 758 __ j(greater, &init_loop); 759 } else { // Unroll the loop. 760 for (int i = 0; i < num_saved_registers_; i++) { 761 __ mov(register_location(i), eax); 762 } 763 } 764 } 765 766 // Initialize backtrack stack pointer. 767 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); 768 769 __ jmp(&start_label_); 770 771 // Exit code: 772 if (success_label_.is_linked()) { 773 // Save captures when successful. 774 __ bind(&success_label_); 775 if (num_saved_registers_ > 0) { 776 // copy captures to output 777 __ mov(ebx, Operand(ebp, kRegisterOutput)); 778 __ mov(ecx, Operand(ebp, kInputEnd)); 779 __ mov(edx, Operand(ebp, kStartIndex)); 780 __ sub(ecx, Operand(ebp, kInputStart)); 781 if (mode_ == UC16) { 782 __ lea(ecx, Operand(ecx, edx, times_2, 0)); 783 } else { 784 __ add(ecx, edx); 785 } 786 for (int i = 0; i < num_saved_registers_; i++) { 787 __ mov(eax, register_location(i)); 788 if (i == 0 && global_with_zero_length_check()) { 789 // Keep capture start in edx for the zero-length check later. 790 __ mov(edx, eax); 791 } 792 // Convert to index from start of string, not end. 793 __ add(eax, ecx); 794 if (mode_ == UC16) { 795 __ sar(eax, 1); // Convert byte index to character index. 796 } 797 __ mov(Operand(ebx, i * kPointerSize), eax); 798 } 799 } 800 801 if (global()) { 802 // Restart matching if the regular expression is flagged as global. 803 // Increment success counter. 804 __ inc(Operand(ebp, kSuccessfulCaptures)); 805 // Capture results have been stored, so the number of remaining global 806 // output registers is reduced by the number of stored captures. 807 __ mov(ecx, Operand(ebp, kNumOutputRegisters)); 808 __ sub(ecx, Immediate(num_saved_registers_)); 809 // Check whether we have enough room for another set of capture results. 810 __ cmp(ecx, Immediate(num_saved_registers_)); 811 __ j(less, &exit_label_); 812 813 __ mov(Operand(ebp, kNumOutputRegisters), ecx); 814 // Advance the location for output. 815 __ add(Operand(ebp, kRegisterOutput), 816 Immediate(num_saved_registers_ * kPointerSize)); 817 818 // Prepare eax to initialize registers with its value in the next run. 819 __ mov(eax, Operand(ebp, kStringStartMinusOne)); 820 821 if (global_with_zero_length_check()) { 822 // Special case for zero-length matches. 823 // edx: capture start index 824 __ cmp(edi, edx); 825 // Not a zero-length match, restart. 826 __ j(not_equal, &load_char_start_regexp); 827 // edi (offset from the end) is zero if we already reached the end. 828 __ test(edi, edi); 829 __ j(zero, &exit_label_, Label::kNear); 830 // Advance current position after a zero-length match. 831 Label advance; 832 __ bind(&advance); 833 if (mode_ == UC16) { 834 __ add(edi, Immediate(2)); 835 } else { 836 __ inc(edi); 837 } 838 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 839 } 840 __ jmp(&load_char_start_regexp); 841 } else { 842 __ mov(eax, Immediate(SUCCESS)); 843 } 844 } 845 846 __ bind(&exit_label_); 847 if (global()) { 848 // Return the number of successful captures. 849 __ mov(eax, Operand(ebp, kSuccessfulCaptures)); 850 } 851 852 __ bind(&return_eax); 853 // Skip esp past regexp registers. 854 __ lea(esp, Operand(ebp, kBackup_ebx)); 855 // Restore callee-save registers. 856 __ pop(ebx); 857 __ pop(edi); 858 __ pop(esi); 859 // Exit function frame, restore previous one. 860 __ pop(ebp); 861 __ ret(0); 862 863 // Backtrack code (branch target for conditional backtracks). 864 if (backtrack_label_.is_linked()) { 865 __ bind(&backtrack_label_); 866 Backtrack(); 867 } 868 869 Label exit_with_exception; 870 871 // Preempt-code 872 if (check_preempt_label_.is_linked()) { 873 SafeCallTarget(&check_preempt_label_); 874 875 __ push(backtrack_stackpointer()); 876 __ push(edi); 877 878 CallCheckStackGuardState(ebx); 879 __ or_(eax, eax); 880 // If returning non-zero, we should end execution with the given 881 // result as return value. 882 __ j(not_zero, &return_eax); 883 884 __ pop(edi); 885 __ pop(backtrack_stackpointer()); 886 // String might have moved: Reload esi from frame. 887 __ mov(esi, Operand(ebp, kInputEnd)); 888 SafeReturn(); 889 } 890 891 // Backtrack stack overflow code. 892 if (stack_overflow_label_.is_linked()) { 893 SafeCallTarget(&stack_overflow_label_); 894 // Reached if the backtrack-stack limit has been hit. 895 896 Label grow_failed; 897 // Save registers before calling C function 898 __ push(esi); 899 __ push(edi); 900 901 // Call GrowStack(backtrack_stackpointer()) 902 static const int num_arguments = 3; 903 __ PrepareCallCFunction(num_arguments, ebx); 904 __ mov(Operand(esp, 2 * kPointerSize), 905 Immediate(ExternalReference::isolate_address(isolate()))); 906 __ lea(eax, Operand(ebp, kStackHighEnd)); 907 __ mov(Operand(esp, 1 * kPointerSize), eax); 908 __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer()); 909 ExternalReference grow_stack = 910 ExternalReference::re_grow_stack(isolate()); 911 __ CallCFunction(grow_stack, num_arguments); 912 // If return NULL, we have failed to grow the stack, and 913 // must exit with a stack-overflow exception. 914 __ or_(eax, eax); 915 __ j(equal, &exit_with_exception); 916 // Otherwise use return value as new stack pointer. 917 __ mov(backtrack_stackpointer(), eax); 918 // Restore saved registers and continue. 919 __ pop(edi); 920 __ pop(esi); 921 SafeReturn(); 922 } 923 924 if (exit_with_exception.is_linked()) { 925 // If any of the code above needed to exit with an exception. 926 __ bind(&exit_with_exception); 927 // Exit with Result EXCEPTION(-1) to signal thrown exception. 928 __ mov(eax, EXCEPTION); 929 __ jmp(&return_eax); 930 } 931 932 CodeDesc code_desc; 933 masm_->GetCode(&code_desc); 934 Handle<Code> code = 935 isolate()->factory()->NewCode(code_desc, 936 Code::ComputeFlags(Code::REGEXP), 937 masm_->CodeObject()); 938 PROFILE(masm_->isolate(), 939 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source)); 940 return Handle<HeapObject>::cast(code); 941 } 942 943 944 void RegExpMacroAssemblerX87::GoTo(Label* to) { 945 BranchOrBacktrack(no_condition, to); 946 } 947 948 949 void RegExpMacroAssemblerX87::IfRegisterGE(int reg, 950 int comparand, 951 Label* if_ge) { 952 __ cmp(register_location(reg), Immediate(comparand)); 953 BranchOrBacktrack(greater_equal, if_ge); 954 } 955 956 957 void RegExpMacroAssemblerX87::IfRegisterLT(int reg, 958 int comparand, 959 Label* if_lt) { 960 __ cmp(register_location(reg), Immediate(comparand)); 961 BranchOrBacktrack(less, if_lt); 962 } 963 964 965 void RegExpMacroAssemblerX87::IfRegisterEqPos(int reg, 966 Label* if_eq) { 967 __ cmp(edi, register_location(reg)); 968 BranchOrBacktrack(equal, if_eq); 969 } 970 971 972 RegExpMacroAssembler::IrregexpImplementation 973 RegExpMacroAssemblerX87::Implementation() { 974 return kX87Implementation; 975 } 976 977 978 void RegExpMacroAssemblerX87::LoadCurrentCharacter(int cp_offset, 979 Label* on_end_of_input, 980 bool check_bounds, 981 int characters) { 982 DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) 983 if (check_bounds) { 984 if (cp_offset >= 0) { 985 CheckPosition(cp_offset + characters - 1, on_end_of_input); 986 } else { 987 CheckPosition(cp_offset, on_end_of_input); 988 } 989 } 990 LoadCurrentCharacterUnchecked(cp_offset, characters); 991 } 992 993 994 void RegExpMacroAssemblerX87::PopCurrentPosition() { 995 Pop(edi); 996 } 997 998 999 void RegExpMacroAssemblerX87::PopRegister(int register_index) { 1000 Pop(eax); 1001 __ mov(register_location(register_index), eax); 1002 } 1003 1004 1005 void RegExpMacroAssemblerX87::PushBacktrack(Label* label) { 1006 Push(Immediate::CodeRelativeOffset(label)); 1007 CheckStackLimit(); 1008 } 1009 1010 1011 void RegExpMacroAssemblerX87::PushCurrentPosition() { 1012 Push(edi); 1013 } 1014 1015 1016 void RegExpMacroAssemblerX87::PushRegister(int register_index, 1017 StackCheckFlag check_stack_limit) { 1018 __ mov(eax, register_location(register_index)); 1019 Push(eax); 1020 if (check_stack_limit) CheckStackLimit(); 1021 } 1022 1023 1024 void RegExpMacroAssemblerX87::ReadCurrentPositionFromRegister(int reg) { 1025 __ mov(edi, register_location(reg)); 1026 } 1027 1028 1029 void RegExpMacroAssemblerX87::ReadStackPointerFromRegister(int reg) { 1030 __ mov(backtrack_stackpointer(), register_location(reg)); 1031 __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); 1032 } 1033 1034 void RegExpMacroAssemblerX87::SetCurrentPositionFromEnd(int by) { 1035 Label after_position; 1036 __ cmp(edi, -by * char_size()); 1037 __ j(greater_equal, &after_position, Label::kNear); 1038 __ mov(edi, -by * char_size()); 1039 // On RegExp code entry (where this operation is used), the character before 1040 // the current position is expected to be already loaded. 1041 // We have advanced the position, so it's safe to read backwards. 1042 LoadCurrentCharacterUnchecked(-1, 1); 1043 __ bind(&after_position); 1044 } 1045 1046 1047 void RegExpMacroAssemblerX87::SetRegister(int register_index, int to) { 1048 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1049 __ mov(register_location(register_index), Immediate(to)); 1050 } 1051 1052 1053 bool RegExpMacroAssemblerX87::Succeed() { 1054 __ jmp(&success_label_); 1055 return global(); 1056 } 1057 1058 1059 void RegExpMacroAssemblerX87::WriteCurrentPositionToRegister(int reg, 1060 int cp_offset) { 1061 if (cp_offset == 0) { 1062 __ mov(register_location(reg), edi); 1063 } else { 1064 __ lea(eax, Operand(edi, cp_offset * char_size())); 1065 __ mov(register_location(reg), eax); 1066 } 1067 } 1068 1069 1070 void RegExpMacroAssemblerX87::ClearRegisters(int reg_from, int reg_to) { 1071 DCHECK(reg_from <= reg_to); 1072 __ mov(eax, Operand(ebp, kStringStartMinusOne)); 1073 for (int reg = reg_from; reg <= reg_to; reg++) { 1074 __ mov(register_location(reg), eax); 1075 } 1076 } 1077 1078 1079 void RegExpMacroAssemblerX87::WriteStackPointerToRegister(int reg) { 1080 __ mov(eax, backtrack_stackpointer()); 1081 __ sub(eax, Operand(ebp, kStackHighEnd)); 1082 __ mov(register_location(reg), eax); 1083 } 1084 1085 1086 // Private methods: 1087 1088 void RegExpMacroAssemblerX87::CallCheckStackGuardState(Register scratch) { 1089 static const int num_arguments = 3; 1090 __ PrepareCallCFunction(num_arguments, scratch); 1091 // RegExp code frame pointer. 1092 __ mov(Operand(esp, 2 * kPointerSize), ebp); 1093 // Code* of self. 1094 __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject())); 1095 // Next address on the stack (will be address of return address). 1096 __ lea(eax, Operand(esp, -kPointerSize)); 1097 __ mov(Operand(esp, 0 * kPointerSize), eax); 1098 ExternalReference check_stack_guard = 1099 ExternalReference::re_check_stack_guard_state(isolate()); 1100 __ CallCFunction(check_stack_guard, num_arguments); 1101 } 1102 1103 1104 // Helper function for reading a value out of a stack frame. 1105 template <typename T> 1106 static T& frame_entry(Address re_frame, int frame_offset) { 1107 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset)); 1108 } 1109 1110 1111 template <typename T> 1112 static T* frame_entry_address(Address re_frame, int frame_offset) { 1113 return reinterpret_cast<T*>(re_frame + frame_offset); 1114 } 1115 1116 1117 int RegExpMacroAssemblerX87::CheckStackGuardState(Address* return_address, 1118 Code* re_code, 1119 Address re_frame) { 1120 return NativeRegExpMacroAssembler::CheckStackGuardState( 1121 frame_entry<Isolate*>(re_frame, kIsolate), 1122 frame_entry<int>(re_frame, kStartIndex), 1123 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, 1124 frame_entry_address<String*>(re_frame, kInputString), 1125 frame_entry_address<const byte*>(re_frame, kInputStart), 1126 frame_entry_address<const byte*>(re_frame, kInputEnd)); 1127 } 1128 1129 1130 Operand RegExpMacroAssemblerX87::register_location(int register_index) { 1131 DCHECK(register_index < (1<<30)); 1132 if (num_registers_ <= register_index) { 1133 num_registers_ = register_index + 1; 1134 } 1135 return Operand(ebp, kRegisterZero - register_index * kPointerSize); 1136 } 1137 1138 1139 void RegExpMacroAssemblerX87::CheckPosition(int cp_offset, 1140 Label* on_outside_input) { 1141 if (cp_offset >= 0) { 1142 __ cmp(edi, -cp_offset * char_size()); 1143 BranchOrBacktrack(greater_equal, on_outside_input); 1144 } else { 1145 __ lea(eax, Operand(edi, cp_offset * char_size())); 1146 __ cmp(eax, Operand(ebp, kStringStartMinusOne)); 1147 BranchOrBacktrack(less_equal, on_outside_input); 1148 } 1149 } 1150 1151 1152 void RegExpMacroAssemblerX87::BranchOrBacktrack(Condition condition, 1153 Label* to) { 1154 if (condition < 0) { // No condition 1155 if (to == NULL) { 1156 Backtrack(); 1157 return; 1158 } 1159 __ jmp(to); 1160 return; 1161 } 1162 if (to == NULL) { 1163 __ j(condition, &backtrack_label_); 1164 return; 1165 } 1166 __ j(condition, to); 1167 } 1168 1169 1170 void RegExpMacroAssemblerX87::SafeCall(Label* to) { 1171 Label return_to; 1172 __ push(Immediate::CodeRelativeOffset(&return_to)); 1173 __ jmp(to); 1174 __ bind(&return_to); 1175 } 1176 1177 1178 void RegExpMacroAssemblerX87::SafeReturn() { 1179 __ pop(ebx); 1180 __ add(ebx, Immediate(masm_->CodeObject())); 1181 __ jmp(ebx); 1182 } 1183 1184 1185 void RegExpMacroAssemblerX87::SafeCallTarget(Label* name) { 1186 __ bind(name); 1187 } 1188 1189 1190 void RegExpMacroAssemblerX87::Push(Register source) { 1191 DCHECK(!source.is(backtrack_stackpointer())); 1192 // Notice: This updates flags, unlike normal Push. 1193 __ sub(backtrack_stackpointer(), Immediate(kPointerSize)); 1194 __ mov(Operand(backtrack_stackpointer(), 0), source); 1195 } 1196 1197 1198 void RegExpMacroAssemblerX87::Push(Immediate value) { 1199 // Notice: This updates flags, unlike normal Push. 1200 __ sub(backtrack_stackpointer(), Immediate(kPointerSize)); 1201 __ mov(Operand(backtrack_stackpointer(), 0), value); 1202 } 1203 1204 1205 void RegExpMacroAssemblerX87::Pop(Register target) { 1206 DCHECK(!target.is(backtrack_stackpointer())); 1207 __ mov(target, Operand(backtrack_stackpointer(), 0)); 1208 // Notice: This updates flags, unlike normal Pop. 1209 __ add(backtrack_stackpointer(), Immediate(kPointerSize)); 1210 } 1211 1212 1213 void RegExpMacroAssemblerX87::CheckPreemption() { 1214 // Check for preemption. 1215 Label no_preempt; 1216 ExternalReference stack_limit = 1217 ExternalReference::address_of_stack_limit(isolate()); 1218 __ cmp(esp, Operand::StaticVariable(stack_limit)); 1219 __ j(above, &no_preempt); 1220 1221 SafeCall(&check_preempt_label_); 1222 1223 __ bind(&no_preempt); 1224 } 1225 1226 1227 void RegExpMacroAssemblerX87::CheckStackLimit() { 1228 Label no_stack_overflow; 1229 ExternalReference stack_limit = 1230 ExternalReference::address_of_regexp_stack_limit(isolate()); 1231 __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit)); 1232 __ j(above, &no_stack_overflow); 1233 1234 SafeCall(&stack_overflow_label_); 1235 1236 __ bind(&no_stack_overflow); 1237 } 1238 1239 1240 void RegExpMacroAssemblerX87::LoadCurrentCharacterUnchecked(int cp_offset, 1241 int characters) { 1242 if (mode_ == LATIN1) { 1243 if (characters == 4) { 1244 __ mov(current_character(), Operand(esi, edi, times_1, cp_offset)); 1245 } else if (characters == 2) { 1246 __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset)); 1247 } else { 1248 DCHECK(characters == 1); 1249 __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset)); 1250 } 1251 } else { 1252 DCHECK(mode_ == UC16); 1253 if (characters == 2) { 1254 __ mov(current_character(), 1255 Operand(esi, edi, times_1, cp_offset * sizeof(uc16))); 1256 } else { 1257 DCHECK(characters == 1); 1258 __ movzx_w(current_character(), 1259 Operand(esi, edi, times_1, cp_offset * sizeof(uc16))); 1260 } 1261 } 1262 } 1263 1264 1265 #undef __ 1266 1267 #endif // V8_INTERPRETED_REGEXP 1268 1269 } // namespace internal 1270 } // namespace v8 1271 1272 #endif // V8_TARGET_ARCH_X87 1273