1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 #include "unicode.h" 30 #include "log.h" 31 #include "ast.h" 32 #include "regexp-stack.h" 33 #include "macro-assembler.h" 34 #include "regexp-macro-assembler.h" 35 #include "ia32/macro-assembler-ia32.h" 36 #include "ia32/regexp-macro-assembler-ia32.h" 37 38 namespace v8 { 39 namespace internal { 40 41 #ifdef V8_NATIVE_REGEXP 42 /* 43 * This assembler uses the following register assignment convention 44 * - edx : current character. Must be loaded using LoadCurrentCharacter 45 * before using any of the dispatch methods. 46 * - edi : current position in input, as negative offset from end of string. 47 * Please notice that this is the byte offset, not the character offset! 48 * - esi : end of input (points to byte after last character in input). 49 * - ebp : frame pointer. Used to access arguments, local variables and 50 * RegExp registers. 51 * - esp : points to tip of C stack. 52 * - ecx : points to tip of backtrack stack 53 * 54 * The registers eax, ebx and ecx are free to use for computations. 55 * 56 * Each call to a public method should retain this convention. 57 * The stack will have the following structure: 58 * - direct_call (if 1, direct call from JavaScript code, if 0 59 * call through the runtime system) 60 * - stack_area_base (High end of the memory area to use as 61 * backtracking stack) 62 * - int* capture_array (int[num_saved_registers_], for output). 63 * - end of input (Address of end of string) 64 * - start of input (Address of first character in string) 65 * - start index (character index of start) 66 * - String* input_string (location of a handle containing the string) 67 * --- frame alignment (if applicable) --- 68 * - return address 69 * ebp-> - old ebp 70 * - backup of caller esi 71 * - backup of caller edi 72 * - backup of caller ebx 73 * - Offset of location before start of input (effectively character 74 * position -1). Used to initialize capture registers to a non-position. 75 * - Boolean at start (if 1, we are starting at the start of the string, 76 * otherwise 0) 77 * - register 0 ebp[-4] (Only positions must be stored in the first 78 * - register 1 ebp[-8] num_saved_registers_ registers) 79 * - ... 80 * 81 * The first num_saved_registers_ registers are initialized to point to 82 * "character -1" in the string (i.e., char_size() bytes before the first 83 * character of the string). The remaining registers starts out as garbage. 84 * 85 * The data up to the return address must be placed there by the calling 86 * code, by calling the code entry as cast to a function with the signature: 87 * int (*match)(String* input_string, 88 * int start_index, 89 * Address start, 90 * Address end, 91 * int* capture_output_array, 92 * bool at_start, 93 * byte* stack_area_base, 94 * bool direct_call) 95 */ 96 97 #define __ ACCESS_MASM(masm_) 98 99 RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32( 100 Mode mode, 101 int registers_to_save) 102 : masm_(new MacroAssembler(NULL, kRegExpCodeSize)), 103 mode_(mode), 104 num_registers_(registers_to_save), 105 num_saved_registers_(registers_to_save), 106 entry_label_(), 107 start_label_(), 108 success_label_(), 109 backtrack_label_(), 110 exit_label_() { 111 ASSERT_EQ(0, registers_to_save % 2); 112 __ jmp(&entry_label_); // We'll write the entry code later. 113 __ bind(&start_label_); // And then continue from here. 114 } 115 116 117 RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() { 118 delete masm_; 119 // Unuse labels in case we throw away the assembler without calling GetCode. 120 entry_label_.Unuse(); 121 start_label_.Unuse(); 122 success_label_.Unuse(); 123 backtrack_label_.Unuse(); 124 exit_label_.Unuse(); 125 check_preempt_label_.Unuse(); 126 stack_overflow_label_.Unuse(); 127 } 128 129 130 int RegExpMacroAssemblerIA32::stack_limit_slack() { 131 return RegExpStack::kStackLimitSlack; 132 } 133 134 135 void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) { 136 if (by != 0) { 137 Label inside_string; 138 __ add(Operand(edi), Immediate(by * char_size())); 139 } 140 } 141 142 143 void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) { 144 ASSERT(reg >= 0); 145 ASSERT(reg < num_registers_); 146 if (by != 0) { 147 __ add(register_location(reg), Immediate(by)); 148 } 149 } 150 151 152 void RegExpMacroAssemblerIA32::Backtrack() { 153 CheckPreemption(); 154 // Pop Code* offset from backtrack stack, add Code* and jump to location. 155 Pop(ebx); 156 __ add(Operand(ebx), Immediate(masm_->CodeObject())); 157 __ jmp(Operand(ebx)); 158 } 159 160 161 void RegExpMacroAssemblerIA32::Bind(Label* label) { 162 __ bind(label); 163 } 164 165 166 void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) { 167 __ cmp(current_character(), c); 168 BranchOrBacktrack(equal, on_equal); 169 } 170 171 172 void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { 173 __ cmp(current_character(), limit); 174 BranchOrBacktrack(greater, on_greater); 175 } 176 177 178 void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { 179 Label not_at_start; 180 // Did we start the match at the start of the string at all? 181 __ cmp(Operand(ebp, kAtStart), Immediate(0)); 182 BranchOrBacktrack(equal, ¬_at_start); 183 // If we did, are we still at the start of the input? 184 __ lea(eax, Operand(esi, edi, times_1, 0)); 185 __ cmp(eax, Operand(ebp, kInputStart)); 186 BranchOrBacktrack(equal, on_at_start); 187 __ bind(¬_at_start); 188 } 189 190 191 void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) { 192 // Did we start the match at the start of the string at all? 193 __ cmp(Operand(ebp, kAtStart), Immediate(0)); 194 BranchOrBacktrack(equal, on_not_at_start); 195 // If we did, are we still at the start of the input? 196 __ lea(eax, Operand(esi, edi, times_1, 0)); 197 __ cmp(eax, Operand(ebp, kInputStart)); 198 BranchOrBacktrack(not_equal, on_not_at_start); 199 } 200 201 202 void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) { 203 __ cmp(current_character(), limit); 204 BranchOrBacktrack(less, on_less); 205 } 206 207 208 void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, 209 int cp_offset, 210 Label* on_failure, 211 bool check_end_of_string) { 212 int byte_length = str.length() * char_size(); 213 int byte_offset = cp_offset * char_size(); 214 if (check_end_of_string) { 215 // Check that there are at least str.length() characters left in the input. 216 __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length))); 217 BranchOrBacktrack(greater, on_failure); 218 } 219 220 if (on_failure == NULL) { 221 // Instead of inlining a backtrack, (re)use the global backtrack target. 222 on_failure = &backtrack_label_; 223 } 224 225 for (int i = 0; i < str.length(); i++) { 226 if (mode_ == ASCII) { 227 __ cmpb(Operand(esi, edi, times_1, byte_offset + i), 228 static_cast<int8_t>(str[i])); 229 } else { 230 ASSERT(mode_ == UC16); 231 __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)), 232 Immediate(str[i])); 233 } 234 BranchOrBacktrack(not_equal, on_failure); 235 } 236 } 237 238 239 void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) { 240 Label fallthrough; 241 __ cmp(edi, Operand(backtrack_stackpointer(), 0)); 242 __ j(not_equal, &fallthrough); 243 __ add(Operand(backtrack_stackpointer()), Immediate(kPointerSize)); // Pop. 244 BranchOrBacktrack(no_condition, on_equal); 245 __ bind(&fallthrough); 246 } 247 248 249 void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( 250 int start_reg, 251 Label* on_no_match) { 252 Label fallthrough; 253 __ mov(edx, register_location(start_reg)); // Index of start of capture 254 __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture 255 __ sub(ebx, Operand(edx)); // Length of capture. 256 257 // The length of a capture should not be negative. This can only happen 258 // if the end of the capture is unrecorded, or at a point earlier than 259 // the start of the capture. 260 BranchOrBacktrack(less, on_no_match, not_taken); 261 262 // If length is zero, either the capture is empty or it is completely 263 // uncaptured. In either case succeed immediately. 264 __ j(equal, &fallthrough); 265 266 if (mode_ == ASCII) { 267 Label success; 268 Label fail; 269 Label loop_increment; 270 // Save register contents to make the registers available below. 271 __ push(edi); 272 __ push(backtrack_stackpointer()); 273 // After this, the eax, ecx, and edi registers are available. 274 275 __ add(edx, Operand(esi)); // Start of capture 276 __ add(edi, Operand(esi)); // Start of text to match against capture. 277 __ add(ebx, Operand(edi)); // End of text to match against capture. 278 279 Label loop; 280 __ bind(&loop); 281 __ movzx_b(eax, Operand(edi, 0)); 282 __ cmpb_al(Operand(edx, 0)); 283 __ j(equal, &loop_increment); 284 285 // Mismatch, try case-insensitive match (converting letters to lower-case). 286 __ or_(eax, 0x20); // Convert match character to lower-case. 287 __ lea(ecx, Operand(eax, -'a')); 288 __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter? 289 __ j(above, &fail); 290 // Also convert capture character. 291 __ movzx_b(ecx, Operand(edx, 0)); 292 __ or_(ecx, 0x20); 293 294 __ cmp(eax, Operand(ecx)); 295 __ j(not_equal, &fail); 296 297 __ bind(&loop_increment); 298 // Increment pointers into match and capture strings. 299 __ add(Operand(edx), Immediate(1)); 300 __ add(Operand(edi), Immediate(1)); 301 // Compare to end of match, and loop if not done. 302 __ cmp(edi, Operand(ebx)); 303 __ j(below, &loop, taken); 304 __ jmp(&success); 305 306 __ bind(&fail); 307 // Restore original values before failing. 308 __ pop(backtrack_stackpointer()); 309 __ pop(edi); 310 BranchOrBacktrack(no_condition, on_no_match); 311 312 __ bind(&success); 313 // Restore original value before continuing. 314 __ pop(backtrack_stackpointer()); 315 // Drop original value of character position. 316 __ add(Operand(esp), Immediate(kPointerSize)); 317 // Compute new value of character position after the matched part. 318 __ sub(edi, Operand(esi)); 319 } else { 320 ASSERT(mode_ == UC16); 321 // Save registers before calling C function. 322 __ push(esi); 323 __ push(edi); 324 __ push(backtrack_stackpointer()); 325 __ push(ebx); 326 327 const int argument_count = 3; 328 FrameAlign(argument_count, ecx); 329 // Put arguments into allocated stack area, last argument highest on stack. 330 // Parameters are 331 // Address byte_offset1 - Address captured substring's start. 332 // Address byte_offset2 - Address of current character position. 333 // size_t byte_length - length of capture in bytes(!) 334 335 // Set byte_length. 336 __ mov(Operand(esp, 2 * kPointerSize), ebx); 337 // Set byte_offset2. 338 // Found by adding negative string-end offset of current position (edi) 339 // to end of string. 340 __ add(edi, Operand(esi)); 341 __ mov(Operand(esp, 1 * kPointerSize), edi); 342 // Set byte_offset1. 343 // Start of capture, where edx already holds string-end negative offset. 344 __ add(edx, Operand(esi)); 345 __ mov(Operand(esp, 0 * kPointerSize), edx); 346 347 ExternalReference compare = 348 ExternalReference::re_case_insensitive_compare_uc16(); 349 CallCFunction(compare, argument_count); 350 // Pop original values before reacting on result value. 351 __ pop(ebx); 352 __ pop(backtrack_stackpointer()); 353 __ pop(edi); 354 __ pop(esi); 355 356 // Check if function returned non-zero for success or zero for failure. 357 __ or_(eax, Operand(eax)); 358 BranchOrBacktrack(zero, on_no_match); 359 // On success, increment position by length of capture. 360 __ add(edi, Operand(ebx)); 361 } 362 __ bind(&fallthrough); 363 } 364 365 366 void RegExpMacroAssemblerIA32::CheckNotBackReference( 367 int start_reg, 368 Label* on_no_match) { 369 Label fallthrough; 370 Label success; 371 Label fail; 372 373 // Find length of back-referenced capture. 374 __ mov(edx, register_location(start_reg)); 375 __ mov(eax, register_location(start_reg + 1)); 376 __ sub(eax, Operand(edx)); // Length to check. 377 // Fail on partial or illegal capture (start of capture after end of capture). 378 BranchOrBacktrack(less, on_no_match); 379 // Succeed on empty capture (including no capture) 380 __ j(equal, &fallthrough); 381 382 // Check that there are sufficient characters left in the input. 383 __ mov(ebx, edi); 384 __ add(ebx, Operand(eax)); 385 BranchOrBacktrack(greater, on_no_match); 386 387 // Save register to make it available below. 388 __ push(backtrack_stackpointer()); 389 390 // Compute pointers to match string and capture string 391 __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. 392 __ add(edx, Operand(esi)); // Start of capture. 393 __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match 394 395 Label loop; 396 __ bind(&loop); 397 if (mode_ == ASCII) { 398 __ movzx_b(eax, Operand(edx, 0)); 399 __ cmpb_al(Operand(ebx, 0)); 400 } else { 401 ASSERT(mode_ == UC16); 402 __ movzx_w(eax, Operand(edx, 0)); 403 __ cmpw_ax(Operand(ebx, 0)); 404 } 405 __ j(not_equal, &fail); 406 // Increment pointers into capture and match string. 407 __ add(Operand(edx), Immediate(char_size())); 408 __ add(Operand(ebx), Immediate(char_size())); 409 // Check if we have reached end of match area. 410 __ cmp(ebx, Operand(ecx)); 411 __ j(below, &loop); 412 __ jmp(&success); 413 414 __ bind(&fail); 415 // Restore backtrack stackpointer. 416 __ pop(backtrack_stackpointer()); 417 BranchOrBacktrack(no_condition, on_no_match); 418 419 __ bind(&success); 420 // Move current character position to position after match. 421 __ mov(edi, ecx); 422 __ sub(Operand(edi), esi); 423 // Restore backtrack stackpointer. 424 __ pop(backtrack_stackpointer()); 425 426 __ bind(&fallthrough); 427 } 428 429 430 void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1, 431 int reg2, 432 Label* on_not_equal) { 433 __ mov(eax, register_location(reg1)); 434 __ cmp(eax, register_location(reg2)); 435 BranchOrBacktrack(not_equal, on_not_equal); 436 } 437 438 439 void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c, 440 Label* on_not_equal) { 441 __ cmp(current_character(), c); 442 BranchOrBacktrack(not_equal, on_not_equal); 443 } 444 445 446 void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c, 447 uint32_t mask, 448 Label* on_equal) { 449 __ mov(eax, current_character()); 450 __ and_(eax, mask); 451 __ cmp(eax, c); 452 BranchOrBacktrack(equal, on_equal); 453 } 454 455 456 void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c, 457 uint32_t mask, 458 Label* on_not_equal) { 459 __ mov(eax, current_character()); 460 __ and_(eax, mask); 461 __ cmp(eax, c); 462 BranchOrBacktrack(not_equal, on_not_equal); 463 } 464 465 466 void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd( 467 uc16 c, 468 uc16 minus, 469 uc16 mask, 470 Label* on_not_equal) { 471 ASSERT(minus < String::kMaxUC16CharCode); 472 __ lea(eax, Operand(current_character(), -minus)); 473 __ and_(eax, mask); 474 __ cmp(eax, c); 475 BranchOrBacktrack(not_equal, on_not_equal); 476 } 477 478 479 bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, 480 Label* on_no_match) { 481 // Range checks (c in min..max) are generally implemented by an unsigned 482 // (c - min) <= (max - min) check 483 switch (type) { 484 case 's': 485 // Match space-characters 486 if (mode_ == ASCII) { 487 // ASCII space characters are '\t'..'\r' and ' '. 488 Label success; 489 __ cmp(current_character(), ' '); 490 __ j(equal, &success); 491 // Check range 0x09..0x0d 492 __ lea(eax, Operand(current_character(), -'\t')); 493 __ cmp(eax, '\r' - '\t'); 494 BranchOrBacktrack(above, on_no_match); 495 __ bind(&success); 496 return true; 497 } 498 return false; 499 case 'S': 500 // Match non-space characters. 501 if (mode_ == ASCII) { 502 // ASCII space characters are '\t'..'\r' and ' '. 503 __ cmp(current_character(), ' '); 504 BranchOrBacktrack(equal, on_no_match); 505 __ lea(eax, Operand(current_character(), -'\t')); 506 __ cmp(eax, '\r' - '\t'); 507 BranchOrBacktrack(below_equal, on_no_match); 508 return true; 509 } 510 return false; 511 case 'd': 512 // Match ASCII digits ('0'..'9') 513 __ lea(eax, Operand(current_character(), -'0')); 514 __ cmp(eax, '9' - '0'); 515 BranchOrBacktrack(above, on_no_match); 516 return true; 517 case 'D': 518 // Match non ASCII-digits 519 __ lea(eax, Operand(current_character(), -'0')); 520 __ cmp(eax, '9' - '0'); 521 BranchOrBacktrack(below_equal, on_no_match); 522 return true; 523 case '.': { 524 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 525 __ mov(Operand(eax), current_character()); 526 __ xor_(Operand(eax), Immediate(0x01)); 527 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 528 __ sub(Operand(eax), Immediate(0x0b)); 529 __ cmp(eax, 0x0c - 0x0b); 530 BranchOrBacktrack(below_equal, on_no_match); 531 if (mode_ == UC16) { 532 // Compare original value to 0x2028 and 0x2029, using the already 533 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 534 // 0x201d (0x2028 - 0x0b) or 0x201e. 535 __ sub(Operand(eax), Immediate(0x2028 - 0x0b)); 536 __ cmp(eax, 0x2029 - 0x2028); 537 BranchOrBacktrack(below_equal, on_no_match); 538 } 539 return true; 540 } 541 case 'w': { 542 if (mode_ != ASCII) { 543 // Table is 128 entries, so all ASCII characters can be tested. 544 __ cmp(Operand(current_character()), Immediate('z')); 545 BranchOrBacktrack(above, on_no_match); 546 } 547 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 548 ExternalReference word_map = ExternalReference::re_word_character_map(); 549 __ test_b(current_character(), 550 Operand::StaticArray(current_character(), times_1, word_map)); 551 BranchOrBacktrack(zero, on_no_match); 552 return true; 553 } 554 case 'W': { 555 Label done; 556 if (mode_ != ASCII) { 557 // Table is 128 entries, so all ASCII characters can be tested. 558 __ cmp(Operand(current_character()), Immediate('z')); 559 __ j(above, &done); 560 } 561 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 562 ExternalReference word_map = ExternalReference::re_word_character_map(); 563 __ test_b(current_character(), 564 Operand::StaticArray(current_character(), times_1, word_map)); 565 BranchOrBacktrack(not_zero, on_no_match); 566 if (mode_ != ASCII) { 567 __ bind(&done); 568 } 569 return true; 570 } 571 // Non-standard classes (with no syntactic shorthand) used internally. 572 case '*': 573 // Match any character. 574 return true; 575 case 'n': { 576 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029). 577 // The opposite of '.'. 578 __ mov(Operand(eax), current_character()); 579 __ xor_(Operand(eax), Immediate(0x01)); 580 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 581 __ sub(Operand(eax), Immediate(0x0b)); 582 __ cmp(eax, 0x0c - 0x0b); 583 if (mode_ == ASCII) { 584 BranchOrBacktrack(above, on_no_match); 585 } else { 586 Label done; 587 BranchOrBacktrack(below_equal, &done); 588 ASSERT_EQ(UC16, mode_); 589 // Compare original value to 0x2028 and 0x2029, using the already 590 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 591 // 0x201d (0x2028 - 0x0b) or 0x201e. 592 __ sub(Operand(eax), Immediate(0x2028 - 0x0b)); 593 __ cmp(eax, 1); 594 BranchOrBacktrack(above, on_no_match); 595 __ bind(&done); 596 } 597 return true; 598 } 599 // No custom implementation (yet): s(UC16), S(UC16). 600 default: 601 return false; 602 } 603 } 604 605 606 void RegExpMacroAssemblerIA32::Fail() { 607 ASSERT(FAILURE == 0); // Return value for failure is zero. 608 __ xor_(eax, Operand(eax)); // zero eax. 609 __ jmp(&exit_label_); 610 } 611 612 613 Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { 614 // Finalize code - write the entry point code now we know how many 615 // registers we need. 616 617 // Entry code: 618 __ bind(&entry_label_); 619 // Start new stack frame. 620 __ push(ebp); 621 __ mov(ebp, esp); 622 // Save callee-save registers. Order here should correspond to order of 623 // kBackup_ebx etc. 624 __ push(esi); 625 __ push(edi); 626 __ push(ebx); // Callee-save on MacOS. 627 __ push(Immediate(0)); // Make room for "input start - 1" constant. 628 __ push(Immediate(0)); // Make room for "at start" constant. 629 630 // Check if we have space on the stack for registers. 631 Label stack_limit_hit; 632 Label stack_ok; 633 634 ExternalReference stack_limit = 635 ExternalReference::address_of_stack_limit(); 636 __ mov(ecx, esp); 637 __ sub(ecx, Operand::StaticVariable(stack_limit)); 638 // Handle it if the stack pointer is already below the stack limit. 639 __ j(below_equal, &stack_limit_hit, not_taken); 640 // Check if there is room for the variable number of registers above 641 // the stack limit. 642 __ cmp(ecx, num_registers_ * kPointerSize); 643 __ j(above_equal, &stack_ok, taken); 644 // Exit with OutOfMemory exception. There is not enough space on the stack 645 // for our working registers. 646 __ mov(eax, EXCEPTION); 647 __ jmp(&exit_label_); 648 649 __ bind(&stack_limit_hit); 650 CallCheckStackGuardState(ebx); 651 __ or_(eax, Operand(eax)); 652 // If returned value is non-zero, we exit with the returned value as result. 653 __ j(not_zero, &exit_label_); 654 655 __ bind(&stack_ok); 656 657 // Allocate space on stack for registers. 658 __ sub(Operand(esp), Immediate(num_registers_ * kPointerSize)); 659 // Load string length. 660 __ mov(esi, Operand(ebp, kInputEnd)); 661 // Load input position. 662 __ mov(edi, Operand(ebp, kInputStart)); 663 // Set up edi to be negative offset from string end. 664 __ sub(edi, Operand(esi)); 665 // Set eax to address of char before start of input 666 // (effectively string position -1). 667 __ lea(eax, Operand(edi, -char_size())); 668 // Store this value in a local variable, for use when clearing 669 // position registers. 670 __ mov(Operand(ebp, kInputStartMinusOne), eax); 671 672 // Determine whether the start index is zero, that is at the start of the 673 // string, and store that value in a local variable. 674 __ mov(ebx, Operand(ebp, kStartIndex)); 675 __ xor_(Operand(ecx), ecx); // setcc only operates on cl (lower byte of ecx). 676 __ test(ebx, Operand(ebx)); 677 __ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive. 678 __ mov(Operand(ebp, kAtStart), ecx); 679 680 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. 681 // Fill saved registers with initial value = start offset - 1 682 // Fill in stack push order, to avoid accessing across an unwritten 683 // page (a problem on Windows). 684 __ mov(ecx, kRegisterZero); 685 Label init_loop; 686 __ bind(&init_loop); 687 __ mov(Operand(ebp, ecx, times_1, +0), eax); 688 __ sub(Operand(ecx), Immediate(kPointerSize)); 689 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize); 690 __ j(greater, &init_loop); 691 } 692 // Ensure that we have written to each stack page, in order. Skipping a page 693 // on Windows can cause segmentation faults. Assuming page size is 4k. 694 const int kPageSize = 4096; 695 const int kRegistersPerPage = kPageSize / kPointerSize; 696 for (int i = num_saved_registers_ + kRegistersPerPage - 1; 697 i < num_registers_; 698 i += kRegistersPerPage) { 699 __ mov(register_location(i), eax); // One write every page. 700 } 701 702 703 // Initialize backtrack stack pointer. 704 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); 705 // Load previous char as initial value of current-character. 706 Label at_start; 707 __ cmp(Operand(ebp, kAtStart), Immediate(0)); 708 __ j(not_equal, &at_start); 709 LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. 710 __ jmp(&start_label_); 711 __ bind(&at_start); 712 __ mov(current_character(), '\n'); 713 __ jmp(&start_label_); 714 715 716 // Exit code: 717 if (success_label_.is_linked()) { 718 // Save captures when successful. 719 __ bind(&success_label_); 720 if (num_saved_registers_ > 0) { 721 // copy captures to output 722 __ mov(ebx, Operand(ebp, kRegisterOutput)); 723 __ mov(ecx, Operand(ebp, kInputEnd)); 724 __ sub(ecx, Operand(ebp, kInputStart)); 725 for (int i = 0; i < num_saved_registers_; i++) { 726 __ mov(eax, register_location(i)); 727 __ add(eax, Operand(ecx)); // Convert to index from start, not end. 728 if (mode_ == UC16) { 729 __ sar(eax, 1); // Convert byte index to character index. 730 } 731 __ mov(Operand(ebx, i * kPointerSize), eax); 732 } 733 } 734 __ mov(eax, Immediate(SUCCESS)); 735 } 736 // Exit and return eax 737 __ bind(&exit_label_); 738 // Skip esp past regexp registers. 739 __ lea(esp, Operand(ebp, kBackup_ebx)); 740 // Restore callee-save registers. 741 __ pop(ebx); 742 __ pop(edi); 743 __ pop(esi); 744 // Exit function frame, restore previous one. 745 __ pop(ebp); 746 __ ret(0); 747 748 // Backtrack code (branch target for conditional backtracks). 749 if (backtrack_label_.is_linked()) { 750 __ bind(&backtrack_label_); 751 Backtrack(); 752 } 753 754 Label exit_with_exception; 755 756 // Preempt-code 757 if (check_preempt_label_.is_linked()) { 758 SafeCallTarget(&check_preempt_label_); 759 760 __ push(backtrack_stackpointer()); 761 __ push(edi); 762 763 CallCheckStackGuardState(ebx); 764 __ or_(eax, Operand(eax)); 765 // If returning non-zero, we should end execution with the given 766 // result as return value. 767 __ j(not_zero, &exit_label_); 768 769 __ pop(edi); 770 __ pop(backtrack_stackpointer()); 771 // String might have moved: Reload esi from frame. 772 __ mov(esi, Operand(ebp, kInputEnd)); 773 SafeReturn(); 774 } 775 776 // Backtrack stack overflow code. 777 if (stack_overflow_label_.is_linked()) { 778 SafeCallTarget(&stack_overflow_label_); 779 // Reached if the backtrack-stack limit has been hit. 780 781 Label grow_failed; 782 // Save registers before calling C function 783 __ push(esi); 784 __ push(edi); 785 786 // Call GrowStack(backtrack_stackpointer()) 787 int num_arguments = 2; 788 FrameAlign(num_arguments, ebx); 789 __ lea(eax, Operand(ebp, kStackHighEnd)); 790 __ mov(Operand(esp, 1 * kPointerSize), eax); 791 __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer()); 792 ExternalReference grow_stack = ExternalReference::re_grow_stack(); 793 CallCFunction(grow_stack, num_arguments); 794 // If return NULL, we have failed to grow the stack, and 795 // must exit with a stack-overflow exception. 796 __ or_(eax, Operand(eax)); 797 __ j(equal, &exit_with_exception); 798 // Otherwise use return value as new stack pointer. 799 __ mov(backtrack_stackpointer(), eax); 800 // Restore saved registers and continue. 801 __ pop(edi); 802 __ pop(esi); 803 SafeReturn(); 804 } 805 806 if (exit_with_exception.is_linked()) { 807 // If any of the code above needed to exit with an exception. 808 __ bind(&exit_with_exception); 809 // Exit with Result EXCEPTION(-1) to signal thrown exception. 810 __ mov(eax, EXCEPTION); 811 __ jmp(&exit_label_); 812 } 813 814 CodeDesc code_desc; 815 masm_->GetCode(&code_desc); 816 Handle<Code> code = Factory::NewCode(code_desc, 817 NULL, 818 Code::ComputeFlags(Code::REGEXP), 819 masm_->CodeObject()); 820 LOG(RegExpCodeCreateEvent(*code, *source)); 821 return Handle<Object>::cast(code); 822 } 823 824 825 void RegExpMacroAssemblerIA32::GoTo(Label* to) { 826 BranchOrBacktrack(no_condition, to); 827 } 828 829 830 void RegExpMacroAssemblerIA32::IfRegisterGE(int reg, 831 int comparand, 832 Label* if_ge) { 833 __ cmp(register_location(reg), Immediate(comparand)); 834 BranchOrBacktrack(greater_equal, if_ge); 835 } 836 837 838 void RegExpMacroAssemblerIA32::IfRegisterLT(int reg, 839 int comparand, 840 Label* if_lt) { 841 __ cmp(register_location(reg), Immediate(comparand)); 842 BranchOrBacktrack(less, if_lt); 843 } 844 845 846 void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg, 847 Label* if_eq) { 848 __ cmp(edi, register_location(reg)); 849 BranchOrBacktrack(equal, if_eq); 850 } 851 852 853 RegExpMacroAssembler::IrregexpImplementation 854 RegExpMacroAssemblerIA32::Implementation() { 855 return kIA32Implementation; 856 } 857 858 859 void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, 860 Label* on_end_of_input, 861 bool check_bounds, 862 int characters) { 863 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. 864 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) 865 if (check_bounds) { 866 CheckPosition(cp_offset + characters - 1, on_end_of_input); 867 } 868 LoadCurrentCharacterUnchecked(cp_offset, characters); 869 } 870 871 872 void RegExpMacroAssemblerIA32::PopCurrentPosition() { 873 Pop(edi); 874 } 875 876 877 void RegExpMacroAssemblerIA32::PopRegister(int register_index) { 878 Pop(eax); 879 __ mov(register_location(register_index), eax); 880 } 881 882 883 void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) { 884 Push(Immediate::CodeRelativeOffset(label)); 885 CheckStackLimit(); 886 } 887 888 889 void RegExpMacroAssemblerIA32::PushCurrentPosition() { 890 Push(edi); 891 } 892 893 894 void RegExpMacroAssemblerIA32::PushRegister(int register_index, 895 StackCheckFlag check_stack_limit) { 896 __ mov(eax, register_location(register_index)); 897 Push(eax); 898 if (check_stack_limit) CheckStackLimit(); 899 } 900 901 902 void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) { 903 __ mov(edi, register_location(reg)); 904 } 905 906 907 void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) { 908 __ mov(backtrack_stackpointer(), register_location(reg)); 909 __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); 910 } 911 912 913 void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) { 914 ASSERT(register_index >= num_saved_registers_); // Reserved for positions! 915 __ mov(register_location(register_index), Immediate(to)); 916 } 917 918 919 void RegExpMacroAssemblerIA32::Succeed() { 920 __ jmp(&success_label_); 921 } 922 923 924 void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg, 925 int cp_offset) { 926 if (cp_offset == 0) { 927 __ mov(register_location(reg), edi); 928 } else { 929 __ lea(eax, Operand(edi, cp_offset * char_size())); 930 __ mov(register_location(reg), eax); 931 } 932 } 933 934 935 void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) { 936 ASSERT(reg_from <= reg_to); 937 __ mov(eax, Operand(ebp, kInputStartMinusOne)); 938 for (int reg = reg_from; reg <= reg_to; reg++) { 939 __ mov(register_location(reg), eax); 940 } 941 } 942 943 944 void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) { 945 __ mov(eax, backtrack_stackpointer()); 946 __ sub(eax, Operand(ebp, kStackHighEnd)); 947 __ mov(register_location(reg), eax); 948 } 949 950 951 // Private methods: 952 953 void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) { 954 int num_arguments = 3; 955 FrameAlign(num_arguments, scratch); 956 // RegExp code frame pointer. 957 __ mov(Operand(esp, 2 * kPointerSize), ebp); 958 // Code* of self. 959 __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject())); 960 // Next address on the stack (will be address of return address). 961 __ lea(eax, Operand(esp, -kPointerSize)); 962 __ mov(Operand(esp, 0 * kPointerSize), eax); 963 ExternalReference check_stack_guard = 964 ExternalReference::re_check_stack_guard_state(); 965 CallCFunction(check_stack_guard, num_arguments); 966 } 967 968 969 // Helper function for reading a value out of a stack frame. 970 template <typename T> 971 static T& frame_entry(Address re_frame, int frame_offset) { 972 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset)); 973 } 974 975 976 int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address, 977 Code* re_code, 978 Address re_frame) { 979 if (StackGuard::IsStackOverflow()) { 980 Top::StackOverflow(); 981 return EXCEPTION; 982 } 983 984 // If not real stack overflow the stack guard was used to interrupt 985 // execution for another purpose. 986 987 // If this is a direct call from JavaScript retry the RegExp forcing the call 988 // through the runtime system. Currently the direct call cannot handle a GC. 989 if (frame_entry<int>(re_frame, kDirectCall) == 1) { 990 return RETRY; 991 } 992 993 // Prepare for possible GC. 994 HandleScope handles; 995 Handle<Code> code_handle(re_code); 996 997 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); 998 // Current string. 999 bool is_ascii = subject->IsAsciiRepresentation(); 1000 1001 ASSERT(re_code->instruction_start() <= *return_address); 1002 ASSERT(*return_address <= 1003 re_code->instruction_start() + re_code->instruction_size()); 1004 1005 Object* result = Execution::HandleStackGuardInterrupt(); 1006 1007 if (*code_handle != re_code) { // Return address no longer valid 1008 int delta = *code_handle - re_code; 1009 // Overwrite the return address on the stack. 1010 *return_address += delta; 1011 } 1012 1013 if (result->IsException()) { 1014 return EXCEPTION; 1015 } 1016 1017 // String might have changed. 1018 if (subject->IsAsciiRepresentation() != is_ascii) { 1019 // If we changed between an ASCII and an UC16 string, the specialized 1020 // code cannot be used, and we need to restart regexp matching from 1021 // scratch (including, potentially, compiling a new version of the code). 1022 return RETRY; 1023 } 1024 1025 // Otherwise, the content of the string might have moved. It must still 1026 // be a sequential or external string with the same content. 1027 // Update the start and end pointers in the stack frame to the current 1028 // location (whether it has actually moved or not). 1029 ASSERT(StringShape(*subject).IsSequential() || 1030 StringShape(*subject).IsExternal()); 1031 1032 // The original start address of the characters to match. 1033 const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart); 1034 1035 // Find the current start address of the same character at the current string 1036 // position. 1037 int start_index = frame_entry<int>(re_frame, kStartIndex); 1038 const byte* new_address = StringCharacterPosition(*subject, start_index); 1039 1040 if (start_address != new_address) { 1041 // If there is a difference, update the object pointer and start and end 1042 // addresses in the RegExp stack frame to match the new value. 1043 const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd); 1044 int byte_length = end_address - start_address; 1045 frame_entry<const String*>(re_frame, kInputString) = *subject; 1046 frame_entry<const byte*>(re_frame, kInputStart) = new_address; 1047 frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length; 1048 } 1049 1050 return 0; 1051 } 1052 1053 1054 Operand RegExpMacroAssemblerIA32::register_location(int register_index) { 1055 ASSERT(register_index < (1<<30)); 1056 if (num_registers_ <= register_index) { 1057 num_registers_ = register_index + 1; 1058 } 1059 return Operand(ebp, kRegisterZero - register_index * kPointerSize); 1060 } 1061 1062 1063 void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset, 1064 Label* on_outside_input) { 1065 __ cmp(edi, -cp_offset * char_size()); 1066 BranchOrBacktrack(greater_equal, on_outside_input); 1067 } 1068 1069 1070 void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition, 1071 Label* to, 1072 Hint hint) { 1073 if (condition < 0) { // No condition 1074 if (to == NULL) { 1075 Backtrack(); 1076 return; 1077 } 1078 __ jmp(to); 1079 return; 1080 } 1081 if (to == NULL) { 1082 __ j(condition, &backtrack_label_, hint); 1083 return; 1084 } 1085 __ j(condition, to, hint); 1086 } 1087 1088 1089 void RegExpMacroAssemblerIA32::SafeCall(Label* to) { 1090 __ call(to); 1091 } 1092 1093 1094 void RegExpMacroAssemblerIA32::SafeReturn() { 1095 __ add(Operand(esp, 0), Immediate(masm_->CodeObject())); 1096 __ ret(0); 1097 } 1098 1099 1100 void RegExpMacroAssemblerIA32::SafeCallTarget(Label* name) { 1101 __ bind(name); 1102 __ sub(Operand(esp, 0), Immediate(masm_->CodeObject())); 1103 } 1104 1105 1106 void RegExpMacroAssemblerIA32::Push(Register source) { 1107 ASSERT(!source.is(backtrack_stackpointer())); 1108 // Notice: This updates flags, unlike normal Push. 1109 __ sub(Operand(backtrack_stackpointer()), Immediate(kPointerSize)); 1110 __ mov(Operand(backtrack_stackpointer(), 0), source); 1111 } 1112 1113 1114 void RegExpMacroAssemblerIA32::Push(Immediate value) { 1115 // Notice: This updates flags, unlike normal Push. 1116 __ sub(Operand(backtrack_stackpointer()), Immediate(kPointerSize)); 1117 __ mov(Operand(backtrack_stackpointer(), 0), value); 1118 } 1119 1120 1121 void RegExpMacroAssemblerIA32::Pop(Register target) { 1122 ASSERT(!target.is(backtrack_stackpointer())); 1123 __ mov(target, Operand(backtrack_stackpointer(), 0)); 1124 // Notice: This updates flags, unlike normal Pop. 1125 __ add(Operand(backtrack_stackpointer()), Immediate(kPointerSize)); 1126 } 1127 1128 1129 void RegExpMacroAssemblerIA32::CheckPreemption() { 1130 // Check for preemption. 1131 Label no_preempt; 1132 ExternalReference stack_limit = 1133 ExternalReference::address_of_stack_limit(); 1134 __ cmp(esp, Operand::StaticVariable(stack_limit)); 1135 __ j(above, &no_preempt, taken); 1136 1137 SafeCall(&check_preempt_label_); 1138 1139 __ bind(&no_preempt); 1140 } 1141 1142 1143 void RegExpMacroAssemblerIA32::CheckStackLimit() { 1144 Label no_stack_overflow; 1145 ExternalReference stack_limit = 1146 ExternalReference::address_of_regexp_stack_limit(); 1147 __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit)); 1148 __ j(above, &no_stack_overflow); 1149 1150 SafeCall(&stack_overflow_label_); 1151 1152 __ bind(&no_stack_overflow); 1153 } 1154 1155 1156 void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) { 1157 // TODO(lrn): Since we no longer use the system stack arbitrarily (but we do 1158 // use it, e.g., for SafeCall), we know the number of elements on the stack 1159 // since the last frame alignment. We might be able to do this simpler then. 1160 int frameAlignment = OS::ActivationFrameAlignment(); 1161 if (frameAlignment != 0) { 1162 // Make stack end at alignment and make room for num_arguments words 1163 // and the original value of esp. 1164 __ mov(scratch, esp); 1165 __ sub(Operand(esp), Immediate((num_arguments + 1) * kPointerSize)); 1166 ASSERT(IsPowerOf2(frameAlignment)); 1167 __ and_(esp, -frameAlignment); 1168 __ mov(Operand(esp, num_arguments * kPointerSize), scratch); 1169 } else { 1170 __ sub(Operand(esp), Immediate(num_arguments * kPointerSize)); 1171 } 1172 } 1173 1174 1175 void RegExpMacroAssemblerIA32::CallCFunction(ExternalReference function, 1176 int num_arguments) { 1177 __ mov(Operand(eax), Immediate(function)); 1178 __ call(Operand(eax)); 1179 if (OS::ActivationFrameAlignment() != 0) { 1180 __ mov(esp, Operand(esp, num_arguments * kPointerSize)); 1181 } else { 1182 __ add(Operand(esp), Immediate(num_arguments * sizeof(int32_t))); 1183 } 1184 } 1185 1186 1187 void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset, 1188 int characters) { 1189 if (mode_ == ASCII) { 1190 if (characters == 4) { 1191 __ mov(current_character(), Operand(esi, edi, times_1, cp_offset)); 1192 } else if (characters == 2) { 1193 __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset)); 1194 } else { 1195 ASSERT(characters == 1); 1196 __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset)); 1197 } 1198 } else { 1199 ASSERT(mode_ == UC16); 1200 if (characters == 2) { 1201 __ mov(current_character(), 1202 Operand(esi, edi, times_1, cp_offset * sizeof(uc16))); 1203 } else { 1204 ASSERT(characters == 1); 1205 __ movzx_w(current_character(), 1206 Operand(esi, edi, times_1, cp_offset * sizeof(uc16))); 1207 } 1208 } 1209 } 1210 1211 1212 #undef __ 1213 1214 #endif // V8_NATIVE_REGEXP 1215 1216 }} // namespace v8::internal 1217