1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 30 #if V8_TARGET_ARCH_X64 31 32 #include "cpu-profiler.h" 33 #include "serialize.h" 34 #include "unicode.h" 35 #include "log.h" 36 #include "regexp-stack.h" 37 #include "macro-assembler.h" 38 #include "regexp-macro-assembler.h" 39 #include "x64/regexp-macro-assembler-x64.h" 40 41 namespace v8 { 42 namespace internal { 43 44 #ifndef V8_INTERPRETED_REGEXP 45 46 /* 47 * This assembler uses the following register assignment convention 48 * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded 49 * using LoadCurrentCharacter before using any of the dispatch methods. 50 * Temporarily stores the index of capture start after a matching pass 51 * for a global regexp. 52 * - rdi : Current position in input, as negative offset from end of string. 53 * Please notice that this is the byte offset, not the character 54 * offset! Is always a 32-bit signed (negative) offset, but must be 55 * maintained sign-extended to 64 bits, since it is used as index. 56 * - rsi : End of input (points to byte after last character in input), 57 * so that rsi+rdi points to the current character. 58 * - rbp : Frame pointer. Used to access arguments, local variables and 59 * RegExp registers. 60 * - rsp : Points to tip of C stack. 61 * - rcx : Points to tip of backtrack stack. The backtrack stack contains 62 * only 32-bit values. Most are offsets from some base (e.g., character 63 * positions from end of string or code location from Code* pointer). 64 * - r8 : Code object pointer. Used to convert between absolute and 65 * code-object-relative addresses. 66 * 67 * The registers rax, rbx, r9 and r11 are free to use for computations. 68 * If changed to use r12+, they should be saved as callee-save registers. 69 * The macro assembler special registers r12 and r13 (kSmiConstantRegister, 70 * kRootRegister) aren't special during execution of RegExp code (they don't 71 * hold the values assumed when creating JS code), so no Smi or Root related 72 * macro operations can be used. 73 * 74 * Each call to a C++ method should retain these registers. 75 * 76 * The stack will have the following content, in some order, indexable from the 77 * frame pointer (see, e.g., kStackHighEnd): 78 * - Isolate* isolate (address of the current isolate) 79 * - direct_call (if 1, direct call from JavaScript code, if 0 call 80 * through the runtime system) 81 * - stack_area_base (high end of the memory area to use as 82 * backtracking stack) 83 * - capture array size (may fit multiple sets of matches) 84 * - int* capture_array (int[num_saved_registers_], for output). 85 * - end of input (address of end of string) 86 * - start of input (address of first character in string) 87 * - start index (character index of start) 88 * - String* input_string (input string) 89 * - return address 90 * - backup of callee save registers (rbx, possibly rsi and rdi). 91 * - success counter (only useful for global regexp to count matches) 92 * - Offset of location before start of input (effectively character 93 * position -1). Used to initialize capture registers to a non-position. 94 * - At start of string (if 1, we are starting at the start of the 95 * string, otherwise 0) 96 * - register 0 rbp[-n] (Only positions must be stored in the first 97 * - register 1 rbp[-n-8] num_saved_registers_ registers) 98 * - ... 99 * 100 * The first num_saved_registers_ registers are initialized to point to 101 * "character -1" in the string (i.e., char_size() bytes before the first 102 * character of the string). The remaining registers starts out uninitialized. 103 * 104 * The first seven values must be provided by the calling code by 105 * calling the code's entry address cast to a function pointer with the 106 * following signature: 107 * int (*match)(String* input_string, 108 * int start_index, 109 * Address start, 110 * Address end, 111 * int* capture_output_array, 112 * bool at_start, 113 * byte* stack_area_base, 114 * bool direct_call) 115 */ 116 117 #define __ ACCESS_MASM((&masm_)) 118 119 RegExpMacroAssemblerX64::RegExpMacroAssemblerX64( 120 Mode mode, 121 int registers_to_save, 122 Zone* zone) 123 : NativeRegExpMacroAssembler(zone), 124 masm_(zone->isolate(), NULL, kRegExpCodeSize), 125 no_root_array_scope_(&masm_), 126 code_relative_fixup_positions_(4, zone), 127 mode_(mode), 128 num_registers_(registers_to_save), 129 num_saved_registers_(registers_to_save), 130 entry_label_(), 131 start_label_(), 132 success_label_(), 133 backtrack_label_(), 134 exit_label_() { 135 ASSERT_EQ(0, registers_to_save % 2); 136 __ jmp(&entry_label_); // We'll write the entry code when we know more. 137 __ bind(&start_label_); // And then continue from here. 138 } 139 140 141 RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() { 142 // Unuse labels in case we throw away the assembler without calling GetCode. 143 entry_label_.Unuse(); 144 start_label_.Unuse(); 145 success_label_.Unuse(); 146 backtrack_label_.Unuse(); 147 exit_label_.Unuse(); 148 check_preempt_label_.Unuse(); 149 stack_overflow_label_.Unuse(); 150 } 151 152 153 int RegExpMacroAssemblerX64::stack_limit_slack() { 154 return RegExpStack::kStackLimitSlack; 155 } 156 157 158 void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) { 159 if (by != 0) { 160 __ addq(rdi, Immediate(by * char_size())); 161 } 162 } 163 164 165 void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) { 166 ASSERT(reg >= 0); 167 ASSERT(reg < num_registers_); 168 if (by != 0) { 169 __ addq(register_location(reg), Immediate(by)); 170 } 171 } 172 173 174 void RegExpMacroAssemblerX64::Backtrack() { 175 CheckPreemption(); 176 // Pop Code* offset from backtrack stack, add Code* and jump to location. 177 Pop(rbx); 178 __ addq(rbx, code_object_pointer()); 179 __ jmp(rbx); 180 } 181 182 183 void RegExpMacroAssemblerX64::Bind(Label* label) { 184 __ bind(label); 185 } 186 187 188 void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) { 189 __ cmpl(current_character(), Immediate(c)); 190 BranchOrBacktrack(equal, on_equal); 191 } 192 193 194 void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { 195 __ cmpl(current_character(), Immediate(limit)); 196 BranchOrBacktrack(greater, on_greater); 197 } 198 199 200 void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { 201 Label not_at_start; 202 // Did we start the match at the start of the string at all? 203 __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); 204 BranchOrBacktrack(not_equal, ¬_at_start); 205 // If we did, are we still at the start of the input? 206 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 207 __ cmpq(rax, Operand(rbp, kInputStart)); 208 BranchOrBacktrack(equal, on_at_start); 209 __ bind(¬_at_start); 210 } 211 212 213 void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) { 214 // Did we start the match at the start of the string at all? 215 __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); 216 BranchOrBacktrack(not_equal, on_not_at_start); 217 // If we did, are we still at the start of the input? 218 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 219 __ cmpq(rax, Operand(rbp, kInputStart)); 220 BranchOrBacktrack(not_equal, on_not_at_start); 221 } 222 223 224 void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) { 225 __ cmpl(current_character(), Immediate(limit)); 226 BranchOrBacktrack(less, on_less); 227 } 228 229 230 void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) { 231 Label fallthrough; 232 __ cmpl(rdi, Operand(backtrack_stackpointer(), 0)); 233 __ j(not_equal, &fallthrough); 234 Drop(); 235 BranchOrBacktrack(no_condition, on_equal); 236 __ bind(&fallthrough); 237 } 238 239 240 void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( 241 int start_reg, 242 Label* on_no_match) { 243 Label fallthrough; 244 __ movq(rdx, register_location(start_reg)); // Offset of start of capture 245 __ movq(rbx, register_location(start_reg + 1)); // Offset of end of capture 246 __ subq(rbx, rdx); // Length of capture. 247 248 // ----------------------- 249 // rdx = Start offset of capture. 250 // rbx = Length of capture 251 252 // If length is negative, this code will fail (it's a symptom of a partial or 253 // illegal capture where start of capture after end of capture). 254 // This must not happen (no back-reference can reference a capture that wasn't 255 // closed before in the reg-exp, and we must not generate code that can cause 256 // this condition). 257 258 // If length is zero, either the capture is empty or it is nonparticipating. 259 // In either case succeed immediately. 260 __ j(equal, &fallthrough); 261 262 // ----------------------- 263 // rdx - Start of capture 264 // rbx - length of capture 265 // Check that there are sufficient characters left in the input. 266 __ movl(rax, rdi); 267 __ addl(rax, rbx); 268 BranchOrBacktrack(greater, on_no_match); 269 270 if (mode_ == ASCII) { 271 Label loop_increment; 272 if (on_no_match == NULL) { 273 on_no_match = &backtrack_label_; 274 } 275 276 __ lea(r9, Operand(rsi, rdx, times_1, 0)); 277 __ lea(r11, Operand(rsi, rdi, times_1, 0)); 278 __ addq(rbx, r9); // End of capture 279 // --------------------- 280 // r11 - current input character address 281 // r9 - current capture character address 282 // rbx - end of capture 283 284 Label loop; 285 __ bind(&loop); 286 __ movzxbl(rdx, Operand(r9, 0)); 287 __ movzxbl(rax, Operand(r11, 0)); 288 // al - input character 289 // dl - capture character 290 __ cmpb(rax, rdx); 291 __ j(equal, &loop_increment); 292 293 // Mismatch, try case-insensitive match (converting letters to lower-case). 294 // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's 295 // a match. 296 __ or_(rax, Immediate(0x20)); // Convert match character to lower-case. 297 __ or_(rdx, Immediate(0x20)); // Convert capture character to lower-case. 298 __ cmpb(rax, rdx); 299 __ j(not_equal, on_no_match); // Definitely not equal. 300 __ subb(rax, Immediate('a')); 301 __ cmpb(rax, Immediate('z' - 'a')); 302 __ j(below_equal, &loop_increment); // In range 'a'-'z'. 303 // Latin-1: Check for values in range [224,254] but not 247. 304 __ subb(rax, Immediate(224 - 'a')); 305 __ cmpb(rax, Immediate(254 - 224)); 306 __ j(above, on_no_match); // Weren't Latin-1 letters. 307 __ cmpb(rax, Immediate(247 - 224)); // Check for 247. 308 __ j(equal, on_no_match); 309 __ bind(&loop_increment); 310 // Increment pointers into match and capture strings. 311 __ addq(r11, Immediate(1)); 312 __ addq(r9, Immediate(1)); 313 // Compare to end of capture, and loop if not done. 314 __ cmpq(r9, rbx); 315 __ j(below, &loop); 316 317 // Compute new value of character position after the matched part. 318 __ movq(rdi, r11); 319 __ subq(rdi, rsi); 320 } else { 321 ASSERT(mode_ == UC16); 322 // Save important/volatile registers before calling C function. 323 #ifndef _WIN64 324 // Caller save on Linux and callee save in Windows. 325 __ push(rsi); 326 __ push(rdi); 327 #endif 328 __ push(backtrack_stackpointer()); 329 330 static const int num_arguments = 4; 331 __ PrepareCallCFunction(num_arguments); 332 333 // Put arguments into parameter registers. Parameters are 334 // Address byte_offset1 - Address captured substring's start. 335 // Address byte_offset2 - Address of current character position. 336 // size_t byte_length - length of capture in bytes(!) 337 // Isolate* isolate 338 #ifdef _WIN64 339 // Compute and set byte_offset1 (start of capture). 340 __ lea(rcx, Operand(rsi, rdx, times_1, 0)); 341 // Set byte_offset2. 342 __ lea(rdx, Operand(rsi, rdi, times_1, 0)); 343 // Set byte_length. 344 __ movq(r8, rbx); 345 // Isolate. 346 __ LoadAddress(r9, ExternalReference::isolate_address(isolate())); 347 #else // AMD64 calling convention 348 // Compute byte_offset2 (current position = rsi+rdi). 349 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 350 // Compute and set byte_offset1 (start of capture). 351 __ lea(rdi, Operand(rsi, rdx, times_1, 0)); 352 // Set byte_offset2. 353 __ movq(rsi, rax); 354 // Set byte_length. 355 __ movq(rdx, rbx); 356 // Isolate. 357 __ LoadAddress(rcx, ExternalReference::isolate_address(isolate())); 358 #endif 359 360 { // NOLINT: Can't find a way to open this scope without confusing the 361 // linter. 362 AllowExternalCallThatCantCauseGC scope(&masm_); 363 ExternalReference compare = 364 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 365 __ CallCFunction(compare, num_arguments); 366 } 367 368 // Restore original values before reacting on result value. 369 __ Move(code_object_pointer(), masm_.CodeObject()); 370 __ pop(backtrack_stackpointer()); 371 #ifndef _WIN64 372 __ pop(rdi); 373 __ pop(rsi); 374 #endif 375 376 // Check if function returned non-zero for success or zero for failure. 377 __ testq(rax, rax); 378 BranchOrBacktrack(zero, on_no_match); 379 // On success, increment position by length of capture. 380 // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs). 381 __ addq(rdi, rbx); 382 } 383 __ bind(&fallthrough); 384 } 385 386 387 void RegExpMacroAssemblerX64::CheckNotBackReference( 388 int start_reg, 389 Label* on_no_match) { 390 Label fallthrough; 391 392 // Find length of back-referenced capture. 393 __ movq(rdx, register_location(start_reg)); 394 __ movq(rax, register_location(start_reg + 1)); 395 __ subq(rax, rdx); // Length to check. 396 397 // Fail on partial or illegal capture (start of capture after end of capture). 398 // This must not happen (no back-reference can reference a capture that wasn't 399 // closed before in the reg-exp). 400 __ Check(greater_equal, kInvalidCaptureReferenced); 401 402 // Succeed on empty capture (including non-participating capture) 403 __ j(equal, &fallthrough); 404 405 // ----------------------- 406 // rdx - Start of capture 407 // rax - length of capture 408 409 // Check that there are sufficient characters left in the input. 410 __ movl(rbx, rdi); 411 __ addl(rbx, rax); 412 BranchOrBacktrack(greater, on_no_match); 413 414 // Compute pointers to match string and capture string 415 __ lea(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match. 416 __ addq(rdx, rsi); // Start of capture. 417 __ lea(r9, Operand(rdx, rax, times_1, 0)); // End of capture 418 419 // ----------------------- 420 // rbx - current capture character address. 421 // rbx - current input character address . 422 // r9 - end of input to match (capture length after rbx). 423 424 Label loop; 425 __ bind(&loop); 426 if (mode_ == ASCII) { 427 __ movzxbl(rax, Operand(rdx, 0)); 428 __ cmpb(rax, Operand(rbx, 0)); 429 } else { 430 ASSERT(mode_ == UC16); 431 __ movzxwl(rax, Operand(rdx, 0)); 432 __ cmpw(rax, Operand(rbx, 0)); 433 } 434 BranchOrBacktrack(not_equal, on_no_match); 435 // Increment pointers into capture and match string. 436 __ addq(rbx, Immediate(char_size())); 437 __ addq(rdx, Immediate(char_size())); 438 // Check if we have reached end of match area. 439 __ cmpq(rdx, r9); 440 __ j(below, &loop); 441 442 // Success. 443 // Set current character position to position after match. 444 __ movq(rdi, rbx); 445 __ subq(rdi, rsi); 446 447 __ bind(&fallthrough); 448 } 449 450 451 void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c, 452 Label* on_not_equal) { 453 __ cmpl(current_character(), Immediate(c)); 454 BranchOrBacktrack(not_equal, on_not_equal); 455 } 456 457 458 void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c, 459 uint32_t mask, 460 Label* on_equal) { 461 if (c == 0) { 462 __ testl(current_character(), Immediate(mask)); 463 } else { 464 __ movl(rax, Immediate(mask)); 465 __ and_(rax, current_character()); 466 __ cmpl(rax, Immediate(c)); 467 } 468 BranchOrBacktrack(equal, on_equal); 469 } 470 471 472 void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c, 473 uint32_t mask, 474 Label* on_not_equal) { 475 if (c == 0) { 476 __ testl(current_character(), Immediate(mask)); 477 } else { 478 __ movl(rax, Immediate(mask)); 479 __ and_(rax, current_character()); 480 __ cmpl(rax, Immediate(c)); 481 } 482 BranchOrBacktrack(not_equal, on_not_equal); 483 } 484 485 486 void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd( 487 uc16 c, 488 uc16 minus, 489 uc16 mask, 490 Label* on_not_equal) { 491 ASSERT(minus < String::kMaxUtf16CodeUnit); 492 __ lea(rax, Operand(current_character(), -minus)); 493 __ and_(rax, Immediate(mask)); 494 __ cmpl(rax, Immediate(c)); 495 BranchOrBacktrack(not_equal, on_not_equal); 496 } 497 498 499 void RegExpMacroAssemblerX64::CheckCharacterInRange( 500 uc16 from, 501 uc16 to, 502 Label* on_in_range) { 503 __ leal(rax, Operand(current_character(), -from)); 504 __ cmpl(rax, Immediate(to - from)); 505 BranchOrBacktrack(below_equal, on_in_range); 506 } 507 508 509 void RegExpMacroAssemblerX64::CheckCharacterNotInRange( 510 uc16 from, 511 uc16 to, 512 Label* on_not_in_range) { 513 __ leal(rax, Operand(current_character(), -from)); 514 __ cmpl(rax, Immediate(to - from)); 515 BranchOrBacktrack(above, on_not_in_range); 516 } 517 518 519 void RegExpMacroAssemblerX64::CheckBitInTable( 520 Handle<ByteArray> table, 521 Label* on_bit_set) { 522 __ Move(rax, table); 523 Register index = current_character(); 524 if (mode_ != ASCII || kTableMask != String::kMaxOneByteCharCode) { 525 __ movq(rbx, current_character()); 526 __ and_(rbx, Immediate(kTableMask)); 527 index = rbx; 528 } 529 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), 530 Immediate(0)); 531 BranchOrBacktrack(not_equal, on_bit_set); 532 } 533 534 535 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, 536 Label* on_no_match) { 537 // Range checks (c in min..max) are generally implemented by an unsigned 538 // (c - min) <= (max - min) check, using the sequence: 539 // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) 540 // cmp(rax, Immediate(max - min)) 541 switch (type) { 542 case 's': 543 // Match space-characters 544 if (mode_ == ASCII) { 545 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 546 Label success; 547 __ cmpl(current_character(), Immediate(' ')); 548 __ j(equal, &success, Label::kNear); 549 // Check range 0x09..0x0d 550 __ lea(rax, Operand(current_character(), -'\t')); 551 __ cmpl(rax, Immediate('\r' - '\t')); 552 __ j(below_equal, &success, Label::kNear); 553 // \u00a0 (NBSP). 554 __ cmpl(rax, Immediate(0x00a0 - '\t')); 555 BranchOrBacktrack(not_equal, on_no_match); 556 __ bind(&success); 557 return true; 558 } 559 return false; 560 case 'S': 561 // The emitted code for generic character classes is good enough. 562 return false; 563 case 'd': 564 // Match ASCII digits ('0'..'9') 565 __ lea(rax, Operand(current_character(), -'0')); 566 __ cmpl(rax, Immediate('9' - '0')); 567 BranchOrBacktrack(above, on_no_match); 568 return true; 569 case 'D': 570 // Match non ASCII-digits 571 __ lea(rax, Operand(current_character(), -'0')); 572 __ cmpl(rax, Immediate('9' - '0')); 573 BranchOrBacktrack(below_equal, on_no_match); 574 return true; 575 case '.': { 576 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 577 __ movl(rax, current_character()); 578 __ xor_(rax, Immediate(0x01)); 579 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 580 __ subl(rax, Immediate(0x0b)); 581 __ cmpl(rax, Immediate(0x0c - 0x0b)); 582 BranchOrBacktrack(below_equal, on_no_match); 583 if (mode_ == UC16) { 584 // Compare original value to 0x2028 and 0x2029, using the already 585 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 586 // 0x201d (0x2028 - 0x0b) or 0x201e. 587 __ subl(rax, Immediate(0x2028 - 0x0b)); 588 __ cmpl(rax, Immediate(0x2029 - 0x2028)); 589 BranchOrBacktrack(below_equal, on_no_match); 590 } 591 return true; 592 } 593 case 'n': { 594 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 595 __ movl(rax, current_character()); 596 __ xor_(rax, Immediate(0x01)); 597 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 598 __ subl(rax, Immediate(0x0b)); 599 __ cmpl(rax, Immediate(0x0c - 0x0b)); 600 if (mode_ == ASCII) { 601 BranchOrBacktrack(above, on_no_match); 602 } else { 603 Label done; 604 BranchOrBacktrack(below_equal, &done); 605 // Compare original value to 0x2028 and 0x2029, using the already 606 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 607 // 0x201d (0x2028 - 0x0b) or 0x201e. 608 __ subl(rax, Immediate(0x2028 - 0x0b)); 609 __ cmpl(rax, Immediate(0x2029 - 0x2028)); 610 BranchOrBacktrack(above, on_no_match); 611 __ bind(&done); 612 } 613 return true; 614 } 615 case 'w': { 616 if (mode_ != ASCII) { 617 // Table is 128 entries, so all ASCII characters can be tested. 618 __ cmpl(current_character(), Immediate('z')); 619 BranchOrBacktrack(above, on_no_match); 620 } 621 __ Move(rbx, ExternalReference::re_word_character_map()); 622 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 623 __ testb(Operand(rbx, current_character(), times_1, 0), 624 current_character()); 625 BranchOrBacktrack(zero, on_no_match); 626 return true; 627 } 628 case 'W': { 629 Label done; 630 if (mode_ != ASCII) { 631 // Table is 128 entries, so all ASCII characters can be tested. 632 __ cmpl(current_character(), Immediate('z')); 633 __ j(above, &done); 634 } 635 __ Move(rbx, ExternalReference::re_word_character_map()); 636 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 637 __ testb(Operand(rbx, current_character(), times_1, 0), 638 current_character()); 639 BranchOrBacktrack(not_zero, on_no_match); 640 if (mode_ != ASCII) { 641 __ bind(&done); 642 } 643 return true; 644 } 645 646 case '*': 647 // Match any character. 648 return true; 649 // No custom implementation (yet): s(UC16), S(UC16). 650 default: 651 return false; 652 } 653 } 654 655 656 void RegExpMacroAssemblerX64::Fail() { 657 STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero. 658 if (!global()) { 659 __ Set(rax, FAILURE); 660 } 661 __ jmp(&exit_label_); 662 } 663 664 665 Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { 666 Label return_rax; 667 // Finalize code - write the entry point code now we know how many 668 // registers we need. 669 // Entry code: 670 __ bind(&entry_label_); 671 672 // Tell the system that we have a stack frame. Because the type is MANUAL, no 673 // is generated. 674 FrameScope scope(&masm_, StackFrame::MANUAL); 675 676 // Actually emit code to start a new stack frame. 677 __ push(rbp); 678 __ movq(rbp, rsp); 679 // Save parameters and callee-save registers. Order here should correspond 680 // to order of kBackup_ebx etc. 681 #ifdef _WIN64 682 // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots. 683 // Store register parameters in pre-allocated stack slots, 684 __ movq(Operand(rbp, kInputString), rcx); 685 __ movq(Operand(rbp, kStartIndex), rdx); // Passed as int32 in edx. 686 __ movq(Operand(rbp, kInputStart), r8); 687 __ movq(Operand(rbp, kInputEnd), r9); 688 // Callee-save on Win64. 689 __ push(rsi); 690 __ push(rdi); 691 __ push(rbx); 692 #else 693 // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack). 694 // Push register parameters on stack for reference. 695 ASSERT_EQ(kInputString, -1 * kPointerSize); 696 ASSERT_EQ(kStartIndex, -2 * kPointerSize); 697 ASSERT_EQ(kInputStart, -3 * kPointerSize); 698 ASSERT_EQ(kInputEnd, -4 * kPointerSize); 699 ASSERT_EQ(kRegisterOutput, -5 * kPointerSize); 700 ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize); 701 __ push(rdi); 702 __ push(rsi); 703 __ push(rdx); 704 __ push(rcx); 705 __ push(r8); 706 __ push(r9); 707 708 __ push(rbx); // Callee-save 709 #endif 710 711 __ push(Immediate(0)); // Number of successful matches in a global regexp. 712 __ push(Immediate(0)); // Make room for "input start - 1" constant. 713 714 // Check if we have space on the stack for registers. 715 Label stack_limit_hit; 716 Label stack_ok; 717 718 ExternalReference stack_limit = 719 ExternalReference::address_of_stack_limit(isolate()); 720 __ movq(rcx, rsp); 721 __ Move(kScratchRegister, stack_limit); 722 __ subq(rcx, Operand(kScratchRegister, 0)); 723 // Handle it if the stack pointer is already below the stack limit. 724 __ j(below_equal, &stack_limit_hit); 725 // Check if there is room for the variable number of registers above 726 // the stack limit. 727 __ cmpq(rcx, Immediate(num_registers_ * kPointerSize)); 728 __ j(above_equal, &stack_ok); 729 // Exit with OutOfMemory exception. There is not enough space on the stack 730 // for our working registers. 731 __ Set(rax, EXCEPTION); 732 __ jmp(&return_rax); 733 734 __ bind(&stack_limit_hit); 735 __ Move(code_object_pointer(), masm_.CodeObject()); 736 CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp. 737 __ testq(rax, rax); 738 // If returned value is non-zero, we exit with the returned value as result. 739 __ j(not_zero, &return_rax); 740 741 __ bind(&stack_ok); 742 743 // Allocate space on stack for registers. 744 __ subq(rsp, Immediate(num_registers_ * kPointerSize)); 745 // Load string length. 746 __ movq(rsi, Operand(rbp, kInputEnd)); 747 // Load input position. 748 __ movq(rdi, Operand(rbp, kInputStart)); 749 // Set up rdi to be negative offset from string end. 750 __ subq(rdi, rsi); 751 // Set rax to address of char before start of the string 752 // (effectively string position -1). 753 __ movq(rbx, Operand(rbp, kStartIndex)); 754 __ neg(rbx); 755 if (mode_ == UC16) { 756 __ lea(rax, Operand(rdi, rbx, times_2, -char_size())); 757 } else { 758 __ lea(rax, Operand(rdi, rbx, times_1, -char_size())); 759 } 760 // Store this value in a local variable, for use when clearing 761 // position registers. 762 __ movq(Operand(rbp, kInputStartMinusOne), rax); 763 764 #if V8_OS_WIN 765 // Ensure that we have written to each stack page, in order. Skipping a page 766 // on Windows can cause segmentation faults. Assuming page size is 4k. 767 const int kPageSize = 4096; 768 const int kRegistersPerPage = kPageSize / kPointerSize; 769 for (int i = num_saved_registers_ + kRegistersPerPage - 1; 770 i < num_registers_; 771 i += kRegistersPerPage) { 772 __ movq(register_location(i), rax); // One write every page. 773 } 774 #endif // V8_OS_WIN 775 776 // Initialize code object pointer. 777 __ Move(code_object_pointer(), masm_.CodeObject()); 778 779 Label load_char_start_regexp, start_regexp; 780 // Load newline if index is at start, previous character otherwise. 781 __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); 782 __ j(not_equal, &load_char_start_regexp, Label::kNear); 783 __ Set(current_character(), '\n'); 784 __ jmp(&start_regexp, Label::kNear); 785 786 // Global regexp restarts matching here. 787 __ bind(&load_char_start_regexp); 788 // Load previous char as initial value of current character register. 789 LoadCurrentCharacterUnchecked(-1, 1); 790 __ bind(&start_regexp); 791 792 // Initialize on-stack registers. 793 if (num_saved_registers_ > 0) { 794 // Fill saved registers with initial value = start offset - 1 795 // Fill in stack push order, to avoid accessing across an unwritten 796 // page (a problem on Windows). 797 if (num_saved_registers_ > 8) { 798 __ Set(rcx, kRegisterZero); 799 Label init_loop; 800 __ bind(&init_loop); 801 __ movq(Operand(rbp, rcx, times_1, 0), rax); 802 __ subq(rcx, Immediate(kPointerSize)); 803 __ cmpq(rcx, 804 Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); 805 __ j(greater, &init_loop); 806 } else { // Unroll the loop. 807 for (int i = 0; i < num_saved_registers_; i++) { 808 __ movq(register_location(i), rax); 809 } 810 } 811 } 812 813 // Initialize backtrack stack pointer. 814 __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); 815 816 __ jmp(&start_label_); 817 818 // Exit code: 819 if (success_label_.is_linked()) { 820 // Save captures when successful. 821 __ bind(&success_label_); 822 if (num_saved_registers_ > 0) { 823 // copy captures to output 824 __ movq(rdx, Operand(rbp, kStartIndex)); 825 __ movq(rbx, Operand(rbp, kRegisterOutput)); 826 __ movq(rcx, Operand(rbp, kInputEnd)); 827 __ subq(rcx, Operand(rbp, kInputStart)); 828 if (mode_ == UC16) { 829 __ lea(rcx, Operand(rcx, rdx, times_2, 0)); 830 } else { 831 __ addq(rcx, rdx); 832 } 833 for (int i = 0; i < num_saved_registers_; i++) { 834 __ movq(rax, register_location(i)); 835 if (i == 0 && global_with_zero_length_check()) { 836 // Keep capture start in rdx for the zero-length check later. 837 __ movq(rdx, rax); 838 } 839 __ addq(rax, rcx); // Convert to index from start, not end. 840 if (mode_ == UC16) { 841 __ sar(rax, Immediate(1)); // Convert byte index to character index. 842 } 843 __ movl(Operand(rbx, i * kIntSize), rax); 844 } 845 } 846 847 if (global()) { 848 // Restart matching if the regular expression is flagged as global. 849 // Increment success counter. 850 __ incq(Operand(rbp, kSuccessfulCaptures)); 851 // Capture results have been stored, so the number of remaining global 852 // output registers is reduced by the number of stored captures. 853 __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters)); 854 __ subq(rcx, Immediate(num_saved_registers_)); 855 // Check whether we have enough room for another set of capture results. 856 __ cmpq(rcx, Immediate(num_saved_registers_)); 857 __ j(less, &exit_label_); 858 859 __ movq(Operand(rbp, kNumOutputRegisters), rcx); 860 // Advance the location for output. 861 __ addq(Operand(rbp, kRegisterOutput), 862 Immediate(num_saved_registers_ * kIntSize)); 863 864 // Prepare rax to initialize registers with its value in the next run. 865 __ movq(rax, Operand(rbp, kInputStartMinusOne)); 866 867 if (global_with_zero_length_check()) { 868 // Special case for zero-length matches. 869 // rdx: capture start index 870 __ cmpq(rdi, rdx); 871 // Not a zero-length match, restart. 872 __ j(not_equal, &load_char_start_regexp); 873 // rdi (offset from the end) is zero if we already reached the end. 874 __ testq(rdi, rdi); 875 __ j(zero, &exit_label_, Label::kNear); 876 // Advance current position after a zero-length match. 877 if (mode_ == UC16) { 878 __ addq(rdi, Immediate(2)); 879 } else { 880 __ incq(rdi); 881 } 882 } 883 884 __ jmp(&load_char_start_regexp); 885 } else { 886 __ movq(rax, Immediate(SUCCESS)); 887 } 888 } 889 890 __ bind(&exit_label_); 891 if (global()) { 892 // Return the number of successful captures. 893 __ movq(rax, Operand(rbp, kSuccessfulCaptures)); 894 } 895 896 __ bind(&return_rax); 897 #ifdef _WIN64 898 // Restore callee save registers. 899 __ lea(rsp, Operand(rbp, kLastCalleeSaveRegister)); 900 __ pop(rbx); 901 __ pop(rdi); 902 __ pop(rsi); 903 // Stack now at rbp. 904 #else 905 // Restore callee save register. 906 __ movq(rbx, Operand(rbp, kBackup_rbx)); 907 // Skip rsp to rbp. 908 __ movq(rsp, rbp); 909 #endif 910 // Exit function frame, restore previous one. 911 __ pop(rbp); 912 __ ret(0); 913 914 // Backtrack code (branch target for conditional backtracks). 915 if (backtrack_label_.is_linked()) { 916 __ bind(&backtrack_label_); 917 Backtrack(); 918 } 919 920 Label exit_with_exception; 921 922 // Preempt-code 923 if (check_preempt_label_.is_linked()) { 924 SafeCallTarget(&check_preempt_label_); 925 926 __ push(backtrack_stackpointer()); 927 __ push(rdi); 928 929 CallCheckStackGuardState(); 930 __ testq(rax, rax); 931 // If returning non-zero, we should end execution with the given 932 // result as return value. 933 __ j(not_zero, &return_rax); 934 935 // Restore registers. 936 __ Move(code_object_pointer(), masm_.CodeObject()); 937 __ pop(rdi); 938 __ pop(backtrack_stackpointer()); 939 // String might have moved: Reload esi from frame. 940 __ movq(rsi, Operand(rbp, kInputEnd)); 941 SafeReturn(); 942 } 943 944 // Backtrack stack overflow code. 945 if (stack_overflow_label_.is_linked()) { 946 SafeCallTarget(&stack_overflow_label_); 947 // Reached if the backtrack-stack limit has been hit. 948 949 Label grow_failed; 950 // Save registers before calling C function 951 #ifndef _WIN64 952 // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI. 953 __ push(rsi); 954 __ push(rdi); 955 #endif 956 957 // Call GrowStack(backtrack_stackpointer()) 958 static const int num_arguments = 3; 959 __ PrepareCallCFunction(num_arguments); 960 #ifdef _WIN64 961 // Microsoft passes parameters in rcx, rdx, r8. 962 // First argument, backtrack stackpointer, is already in rcx. 963 __ lea(rdx, Operand(rbp, kStackHighEnd)); // Second argument 964 __ LoadAddress(r8, ExternalReference::isolate_address(isolate())); 965 #else 966 // AMD64 ABI passes parameters in rdi, rsi, rdx. 967 __ movq(rdi, backtrack_stackpointer()); // First argument. 968 __ lea(rsi, Operand(rbp, kStackHighEnd)); // Second argument. 969 __ LoadAddress(rdx, ExternalReference::isolate_address(isolate())); 970 #endif 971 ExternalReference grow_stack = 972 ExternalReference::re_grow_stack(isolate()); 973 __ CallCFunction(grow_stack, num_arguments); 974 // If return NULL, we have failed to grow the stack, and 975 // must exit with a stack-overflow exception. 976 __ testq(rax, rax); 977 __ j(equal, &exit_with_exception); 978 // Otherwise use return value as new stack pointer. 979 __ movq(backtrack_stackpointer(), rax); 980 // Restore saved registers and continue. 981 __ Move(code_object_pointer(), masm_.CodeObject()); 982 #ifndef _WIN64 983 __ pop(rdi); 984 __ pop(rsi); 985 #endif 986 SafeReturn(); 987 } 988 989 if (exit_with_exception.is_linked()) { 990 // If any of the code above needed to exit with an exception. 991 __ bind(&exit_with_exception); 992 // Exit with Result EXCEPTION(-1) to signal thrown exception. 993 __ Set(rax, EXCEPTION); 994 __ jmp(&return_rax); 995 } 996 997 FixupCodeRelativePositions(); 998 999 CodeDesc code_desc; 1000 masm_.GetCode(&code_desc); 1001 Isolate* isolate = this->isolate(); 1002 Handle<Code> code = isolate->factory()->NewCode( 1003 code_desc, Code::ComputeFlags(Code::REGEXP), 1004 masm_.CodeObject()); 1005 PROFILE(isolate, RegExpCodeCreateEvent(*code, *source)); 1006 return Handle<HeapObject>::cast(code); 1007 } 1008 1009 1010 void RegExpMacroAssemblerX64::GoTo(Label* to) { 1011 BranchOrBacktrack(no_condition, to); 1012 } 1013 1014 1015 void RegExpMacroAssemblerX64::IfRegisterGE(int reg, 1016 int comparand, 1017 Label* if_ge) { 1018 __ cmpq(register_location(reg), Immediate(comparand)); 1019 BranchOrBacktrack(greater_equal, if_ge); 1020 } 1021 1022 1023 void RegExpMacroAssemblerX64::IfRegisterLT(int reg, 1024 int comparand, 1025 Label* if_lt) { 1026 __ cmpq(register_location(reg), Immediate(comparand)); 1027 BranchOrBacktrack(less, if_lt); 1028 } 1029 1030 1031 void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg, 1032 Label* if_eq) { 1033 __ cmpq(rdi, register_location(reg)); 1034 BranchOrBacktrack(equal, if_eq); 1035 } 1036 1037 1038 RegExpMacroAssembler::IrregexpImplementation 1039 RegExpMacroAssemblerX64::Implementation() { 1040 return kX64Implementation; 1041 } 1042 1043 1044 void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, 1045 Label* on_end_of_input, 1046 bool check_bounds, 1047 int characters) { 1048 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. 1049 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) 1050 if (check_bounds) { 1051 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1052 } 1053 LoadCurrentCharacterUnchecked(cp_offset, characters); 1054 } 1055 1056 1057 void RegExpMacroAssemblerX64::PopCurrentPosition() { 1058 Pop(rdi); 1059 } 1060 1061 1062 void RegExpMacroAssemblerX64::PopRegister(int register_index) { 1063 Pop(rax); 1064 __ movq(register_location(register_index), rax); 1065 } 1066 1067 1068 void RegExpMacroAssemblerX64::PushBacktrack(Label* label) { 1069 Push(label); 1070 CheckStackLimit(); 1071 } 1072 1073 1074 void RegExpMacroAssemblerX64::PushCurrentPosition() { 1075 Push(rdi); 1076 } 1077 1078 1079 void RegExpMacroAssemblerX64::PushRegister(int register_index, 1080 StackCheckFlag check_stack_limit) { 1081 __ movq(rax, register_location(register_index)); 1082 Push(rax); 1083 if (check_stack_limit) CheckStackLimit(); 1084 } 1085 1086 1087 void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) { 1088 __ movq(rdi, register_location(reg)); 1089 } 1090 1091 1092 void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) { 1093 __ movq(backtrack_stackpointer(), register_location(reg)); 1094 __ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); 1095 } 1096 1097 1098 void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) { 1099 Label after_position; 1100 __ cmpq(rdi, Immediate(-by * char_size())); 1101 __ j(greater_equal, &after_position, Label::kNear); 1102 __ movq(rdi, Immediate(-by * char_size())); 1103 // On RegExp code entry (where this operation is used), the character before 1104 // the current position is expected to be already loaded. 1105 // We have advanced the position, so it's safe to read backwards. 1106 LoadCurrentCharacterUnchecked(-1, 1); 1107 __ bind(&after_position); 1108 } 1109 1110 1111 void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) { 1112 ASSERT(register_index >= num_saved_registers_); // Reserved for positions! 1113 __ movq(register_location(register_index), Immediate(to)); 1114 } 1115 1116 1117 bool RegExpMacroAssemblerX64::Succeed() { 1118 __ jmp(&success_label_); 1119 return global(); 1120 } 1121 1122 1123 void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg, 1124 int cp_offset) { 1125 if (cp_offset == 0) { 1126 __ movq(register_location(reg), rdi); 1127 } else { 1128 __ lea(rax, Operand(rdi, cp_offset * char_size())); 1129 __ movq(register_location(reg), rax); 1130 } 1131 } 1132 1133 1134 void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) { 1135 ASSERT(reg_from <= reg_to); 1136 __ movq(rax, Operand(rbp, kInputStartMinusOne)); 1137 for (int reg = reg_from; reg <= reg_to; reg++) { 1138 __ movq(register_location(reg), rax); 1139 } 1140 } 1141 1142 1143 void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) { 1144 __ movq(rax, backtrack_stackpointer()); 1145 __ subq(rax, Operand(rbp, kStackHighEnd)); 1146 __ movq(register_location(reg), rax); 1147 } 1148 1149 1150 // Private methods: 1151 1152 void RegExpMacroAssemblerX64::CallCheckStackGuardState() { 1153 // This function call preserves no register values. Caller should 1154 // store anything volatile in a C call or overwritten by this function. 1155 static const int num_arguments = 3; 1156 __ PrepareCallCFunction(num_arguments); 1157 #ifdef _WIN64 1158 // Second argument: Code* of self. (Do this before overwriting r8). 1159 __ movq(rdx, code_object_pointer()); 1160 // Third argument: RegExp code frame pointer. 1161 __ movq(r8, rbp); 1162 // First argument: Next address on the stack (will be address of 1163 // return address). 1164 __ lea(rcx, Operand(rsp, -kPointerSize)); 1165 #else 1166 // Third argument: RegExp code frame pointer. 1167 __ movq(rdx, rbp); 1168 // Second argument: Code* of self. 1169 __ movq(rsi, code_object_pointer()); 1170 // First argument: Next address on the stack (will be address of 1171 // return address). 1172 __ lea(rdi, Operand(rsp, -kPointerSize)); 1173 #endif 1174 ExternalReference stack_check = 1175 ExternalReference::re_check_stack_guard_state(isolate()); 1176 __ CallCFunction(stack_check, num_arguments); 1177 } 1178 1179 1180 // Helper function for reading a value out of a stack frame. 1181 template <typename T> 1182 static T& frame_entry(Address re_frame, int frame_offset) { 1183 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset)); 1184 } 1185 1186 1187 int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address, 1188 Code* re_code, 1189 Address re_frame) { 1190 Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate); 1191 if (isolate->stack_guard()->IsStackOverflow()) { 1192 isolate->StackOverflow(); 1193 return EXCEPTION; 1194 } 1195 1196 // If not real stack overflow the stack guard was used to interrupt 1197 // execution for another purpose. 1198 1199 // If this is a direct call from JavaScript retry the RegExp forcing the call 1200 // through the runtime system. Currently the direct call cannot handle a GC. 1201 if (frame_entry<int>(re_frame, kDirectCall) == 1) { 1202 return RETRY; 1203 } 1204 1205 // Prepare for possible GC. 1206 HandleScope handles(isolate); 1207 Handle<Code> code_handle(re_code); 1208 1209 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); 1210 1211 // Current string. 1212 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); 1213 1214 ASSERT(re_code->instruction_start() <= *return_address); 1215 ASSERT(*return_address <= 1216 re_code->instruction_start() + re_code->instruction_size()); 1217 1218 MaybeObject* result = Execution::HandleStackGuardInterrupt(isolate); 1219 1220 if (*code_handle != re_code) { // Return address no longer valid 1221 intptr_t delta = code_handle->address() - re_code->address(); 1222 // Overwrite the return address on the stack. 1223 *return_address += delta; 1224 } 1225 1226 if (result->IsException()) { 1227 return EXCEPTION; 1228 } 1229 1230 Handle<String> subject_tmp = subject; 1231 int slice_offset = 0; 1232 1233 // Extract the underlying string and the slice offset. 1234 if (StringShape(*subject_tmp).IsCons()) { 1235 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); 1236 } else if (StringShape(*subject_tmp).IsSliced()) { 1237 SlicedString* slice = SlicedString::cast(*subject_tmp); 1238 subject_tmp = Handle<String>(slice->parent()); 1239 slice_offset = slice->offset(); 1240 } 1241 1242 // String might have changed. 1243 if (subject_tmp->IsOneByteRepresentation() != is_ascii) { 1244 // If we changed between an ASCII and an UC16 string, the specialized 1245 // code cannot be used, and we need to restart regexp matching from 1246 // scratch (including, potentially, compiling a new version of the code). 1247 return RETRY; 1248 } 1249 1250 // Otherwise, the content of the string might have moved. It must still 1251 // be a sequential or external string with the same content. 1252 // Update the start and end pointers in the stack frame to the current 1253 // location (whether it has actually moved or not). 1254 ASSERT(StringShape(*subject_tmp).IsSequential() || 1255 StringShape(*subject_tmp).IsExternal()); 1256 1257 // The original start address of the characters to match. 1258 const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart); 1259 1260 // Find the current start address of the same character at the current string 1261 // position. 1262 int start_index = frame_entry<int>(re_frame, kStartIndex); 1263 const byte* new_address = StringCharacterPosition(*subject_tmp, 1264 start_index + slice_offset); 1265 1266 if (start_address != new_address) { 1267 // If there is a difference, update the object pointer and start and end 1268 // addresses in the RegExp stack frame to match the new value. 1269 const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd); 1270 int byte_length = static_cast<int>(end_address - start_address); 1271 frame_entry<const String*>(re_frame, kInputString) = *subject; 1272 frame_entry<const byte*>(re_frame, kInputStart) = new_address; 1273 frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length; 1274 } else if (frame_entry<const String*>(re_frame, kInputString) != *subject) { 1275 // Subject string might have been a ConsString that underwent 1276 // short-circuiting during GC. That will not change start_address but 1277 // will change pointer inside the subject handle. 1278 frame_entry<const String*>(re_frame, kInputString) = *subject; 1279 } 1280 1281 return 0; 1282 } 1283 1284 1285 Operand RegExpMacroAssemblerX64::register_location(int register_index) { 1286 ASSERT(register_index < (1<<30)); 1287 if (num_registers_ <= register_index) { 1288 num_registers_ = register_index + 1; 1289 } 1290 return Operand(rbp, kRegisterZero - register_index * kPointerSize); 1291 } 1292 1293 1294 void RegExpMacroAssemblerX64::CheckPosition(int cp_offset, 1295 Label* on_outside_input) { 1296 __ cmpl(rdi, Immediate(-cp_offset * char_size())); 1297 BranchOrBacktrack(greater_equal, on_outside_input); 1298 } 1299 1300 1301 void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition, 1302 Label* to) { 1303 if (condition < 0) { // No condition 1304 if (to == NULL) { 1305 Backtrack(); 1306 return; 1307 } 1308 __ jmp(to); 1309 return; 1310 } 1311 if (to == NULL) { 1312 __ j(condition, &backtrack_label_); 1313 return; 1314 } 1315 __ j(condition, to); 1316 } 1317 1318 1319 void RegExpMacroAssemblerX64::SafeCall(Label* to) { 1320 __ call(to); 1321 } 1322 1323 1324 void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) { 1325 __ bind(label); 1326 __ subq(Operand(rsp, 0), code_object_pointer()); 1327 } 1328 1329 1330 void RegExpMacroAssemblerX64::SafeReturn() { 1331 __ addq(Operand(rsp, 0), code_object_pointer()); 1332 __ ret(0); 1333 } 1334 1335 1336 void RegExpMacroAssemblerX64::Push(Register source) { 1337 ASSERT(!source.is(backtrack_stackpointer())); 1338 // Notice: This updates flags, unlike normal Push. 1339 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1340 __ movl(Operand(backtrack_stackpointer(), 0), source); 1341 } 1342 1343 1344 void RegExpMacroAssemblerX64::Push(Immediate value) { 1345 // Notice: This updates flags, unlike normal Push. 1346 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1347 __ movl(Operand(backtrack_stackpointer(), 0), value); 1348 } 1349 1350 1351 void RegExpMacroAssemblerX64::FixupCodeRelativePositions() { 1352 for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) { 1353 int position = code_relative_fixup_positions_[i]; 1354 // The position succeeds a relative label offset from position. 1355 // Patch the relative offset to be relative to the Code object pointer 1356 // instead. 1357 int patch_position = position - kIntSize; 1358 int offset = masm_.long_at(patch_position); 1359 masm_.long_at_put(patch_position, 1360 offset 1361 + position 1362 + Code::kHeaderSize 1363 - kHeapObjectTag); 1364 } 1365 code_relative_fixup_positions_.Clear(); 1366 } 1367 1368 1369 void RegExpMacroAssemblerX64::Push(Label* backtrack_target) { 1370 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1371 __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target); 1372 MarkPositionForCodeRelativeFixup(); 1373 } 1374 1375 1376 void RegExpMacroAssemblerX64::Pop(Register target) { 1377 ASSERT(!target.is(backtrack_stackpointer())); 1378 __ movsxlq(target, Operand(backtrack_stackpointer(), 0)); 1379 // Notice: This updates flags, unlike normal Pop. 1380 __ addq(backtrack_stackpointer(), Immediate(kIntSize)); 1381 } 1382 1383 1384 void RegExpMacroAssemblerX64::Drop() { 1385 __ addq(backtrack_stackpointer(), Immediate(kIntSize)); 1386 } 1387 1388 1389 void RegExpMacroAssemblerX64::CheckPreemption() { 1390 // Check for preemption. 1391 Label no_preempt; 1392 ExternalReference stack_limit = 1393 ExternalReference::address_of_stack_limit(isolate()); 1394 __ load_rax(stack_limit); 1395 __ cmpq(rsp, rax); 1396 __ j(above, &no_preempt); 1397 1398 SafeCall(&check_preempt_label_); 1399 1400 __ bind(&no_preempt); 1401 } 1402 1403 1404 void RegExpMacroAssemblerX64::CheckStackLimit() { 1405 Label no_stack_overflow; 1406 ExternalReference stack_limit = 1407 ExternalReference::address_of_regexp_stack_limit(isolate()); 1408 __ load_rax(stack_limit); 1409 __ cmpq(backtrack_stackpointer(), rax); 1410 __ j(above, &no_stack_overflow); 1411 1412 SafeCall(&stack_overflow_label_); 1413 1414 __ bind(&no_stack_overflow); 1415 } 1416 1417 1418 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, 1419 int characters) { 1420 if (mode_ == ASCII) { 1421 if (characters == 4) { 1422 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1423 } else if (characters == 2) { 1424 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1425 } else { 1426 ASSERT(characters == 1); 1427 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1428 } 1429 } else { 1430 ASSERT(mode_ == UC16); 1431 if (characters == 2) { 1432 __ movl(current_character(), 1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1434 } else { 1435 ASSERT(characters == 1); 1436 __ movzxwl(current_character(), 1437 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1438 } 1439 } 1440 } 1441 1442 #undef __ 1443 1444 #endif // V8_INTERPRETED_REGEXP 1445 1446 }} // namespace v8::internal 1447 1448 #endif // V8_TARGET_ARCH_X64 1449