1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 30 #if defined(V8_TARGET_ARCH_X64) 31 32 #include "serialize.h" 33 #include "unicode.h" 34 #include "log.h" 35 #include "regexp-stack.h" 36 #include "macro-assembler.h" 37 #include "regexp-macro-assembler.h" 38 #include "x64/regexp-macro-assembler-x64.h" 39 40 namespace v8 { 41 namespace internal { 42 43 #ifndef V8_INTERPRETED_REGEXP 44 45 /* 46 * This assembler uses the following register assignment convention 47 * - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using 48 * LoadCurrentCharacter before using any of the dispatch methods. 49 * - rdi : current position in input, as negative offset from end of string. 50 * Please notice that this is the byte offset, not the character 51 * offset! Is always a 32-bit signed (negative) offset, but must be 52 * maintained sign-extended to 64 bits, since it is used as index. 53 * - rsi : end of input (points to byte after last character in input), 54 * so that rsi+rdi points to the current character. 55 * - rbp : frame pointer. Used to access arguments, local variables and 56 * RegExp registers. 57 * - rsp : points to tip of C stack. 58 * - rcx : points to tip of backtrack stack. The backtrack stack contains 59 * only 32-bit values. Most are offsets from some base (e.g., character 60 * positions from end of string or code location from Code* pointer). 61 * - r8 : code object pointer. Used to convert between absolute and 62 * code-object-relative addresses. 63 * 64 * The registers rax, rbx, r9 and r11 are free to use for computations. 65 * If changed to use r12+, they should be saved as callee-save registers. 66 * The macro assembler special registers r12 and r13 (kSmiConstantRegister, 67 * kRootRegister) aren't special during execution of RegExp code (they don't 68 * hold the values assumed when creating JS code), so no Smi or Root related 69 * macro operations can be used. 70 * 71 * Each call to a C++ method should retain these registers. 72 * 73 * The stack will have the following content, in some order, indexable from the 74 * frame pointer (see, e.g., kStackHighEnd): 75 * - Isolate* isolate (Address of the current isolate) 76 * - direct_call (if 1, direct call from JavaScript code, if 0 call 77 * through the runtime system) 78 * - stack_area_base (High end of the memory area to use as 79 * backtracking stack) 80 * - int* capture_array (int[num_saved_registers_], for output). 81 * - end of input (Address of end of string) 82 * - start of input (Address of first character in string) 83 * - start index (character index of start) 84 * - String* input_string (input string) 85 * - return address 86 * - backup of callee save registers (rbx, possibly rsi and rdi). 87 * - Offset of location before start of input (effectively character 88 * position -1). Used to initialize capture registers to a non-position. 89 * - At start of string (if 1, we are starting at the start of the 90 * string, otherwise 0) 91 * - register 0 rbp[-n] (Only positions must be stored in the first 92 * - register 1 rbp[-n-8] num_saved_registers_ registers) 93 * - ... 94 * 95 * The first num_saved_registers_ registers are initialized to point to 96 * "character -1" in the string (i.e., char_size() bytes before the first 97 * character of the string). The remaining registers starts out uninitialized. 98 * 99 * The first seven values must be provided by the calling code by 100 * calling the code's entry address cast to a function pointer with the 101 * following signature: 102 * int (*match)(String* input_string, 103 * int start_index, 104 * Address start, 105 * Address end, 106 * int* capture_output_array, 107 * bool at_start, 108 * byte* stack_area_base, 109 * bool direct_call) 110 */ 111 112 #define __ ACCESS_MASM((&masm_)) 113 114 RegExpMacroAssemblerX64::RegExpMacroAssemblerX64( 115 Mode mode, 116 int registers_to_save) 117 : masm_(Isolate::Current(), NULL, kRegExpCodeSize), 118 no_root_array_scope_(&masm_), 119 code_relative_fixup_positions_(4), 120 mode_(mode), 121 num_registers_(registers_to_save), 122 num_saved_registers_(registers_to_save), 123 entry_label_(), 124 start_label_(), 125 success_label_(), 126 backtrack_label_(), 127 exit_label_() { 128 ASSERT_EQ(0, registers_to_save % 2); 129 __ jmp(&entry_label_); // We'll write the entry code when we know more. 130 __ bind(&start_label_); // And then continue from here. 131 } 132 133 134 RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() { 135 // Unuse labels in case we throw away the assembler without calling GetCode. 136 entry_label_.Unuse(); 137 start_label_.Unuse(); 138 success_label_.Unuse(); 139 backtrack_label_.Unuse(); 140 exit_label_.Unuse(); 141 check_preempt_label_.Unuse(); 142 stack_overflow_label_.Unuse(); 143 } 144 145 146 int RegExpMacroAssemblerX64::stack_limit_slack() { 147 return RegExpStack::kStackLimitSlack; 148 } 149 150 151 void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) { 152 if (by != 0) { 153 __ addq(rdi, Immediate(by * char_size())); 154 } 155 } 156 157 158 void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) { 159 ASSERT(reg >= 0); 160 ASSERT(reg < num_registers_); 161 if (by != 0) { 162 __ addq(register_location(reg), Immediate(by)); 163 } 164 } 165 166 167 void RegExpMacroAssemblerX64::Backtrack() { 168 CheckPreemption(); 169 // Pop Code* offset from backtrack stack, add Code* and jump to location. 170 Pop(rbx); 171 __ addq(rbx, code_object_pointer()); 172 __ jmp(rbx); 173 } 174 175 176 void RegExpMacroAssemblerX64::Bind(Label* label) { 177 __ bind(label); 178 } 179 180 181 void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) { 182 __ cmpl(current_character(), Immediate(c)); 183 BranchOrBacktrack(equal, on_equal); 184 } 185 186 187 void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { 188 __ cmpl(current_character(), Immediate(limit)); 189 BranchOrBacktrack(greater, on_greater); 190 } 191 192 193 void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { 194 Label not_at_start; 195 // Did we start the match at the start of the string at all? 196 __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); 197 BranchOrBacktrack(not_equal, ¬_at_start); 198 // If we did, are we still at the start of the input? 199 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 200 __ cmpq(rax, Operand(rbp, kInputStart)); 201 BranchOrBacktrack(equal, on_at_start); 202 __ bind(¬_at_start); 203 } 204 205 206 void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) { 207 // Did we start the match at the start of the string at all? 208 __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); 209 BranchOrBacktrack(not_equal, on_not_at_start); 210 // If we did, are we still at the start of the input? 211 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 212 __ cmpq(rax, Operand(rbp, kInputStart)); 213 BranchOrBacktrack(not_equal, on_not_at_start); 214 } 215 216 217 void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) { 218 __ cmpl(current_character(), Immediate(limit)); 219 BranchOrBacktrack(less, on_less); 220 } 221 222 223 void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str, 224 int cp_offset, 225 Label* on_failure, 226 bool check_end_of_string) { 227 #ifdef DEBUG 228 // If input is ASCII, don't even bother calling here if the string to 229 // match contains a non-ASCII character. 230 if (mode_ == ASCII) { 231 ASSERT(String::IsAscii(str.start(), str.length())); 232 } 233 #endif 234 int byte_length = str.length() * char_size(); 235 int byte_offset = cp_offset * char_size(); 236 if (check_end_of_string) { 237 // Check that there are at least str.length() characters left in the input. 238 __ cmpl(rdi, Immediate(-(byte_offset + byte_length))); 239 BranchOrBacktrack(greater, on_failure); 240 } 241 242 if (on_failure == NULL) { 243 // Instead of inlining a backtrack, (re)use the global backtrack target. 244 on_failure = &backtrack_label_; 245 } 246 247 // Do one character test first to minimize loading for the case that 248 // we don't match at all (loading more than one character introduces that 249 // chance of reading unaligned and reading across cache boundaries). 250 // If the first character matches, expect a larger chance of matching the 251 // string, and start loading more characters at a time. 252 if (mode_ == ASCII) { 253 __ cmpb(Operand(rsi, rdi, times_1, byte_offset), 254 Immediate(static_cast<int8_t>(str[0]))); 255 } else { 256 // Don't use 16-bit immediate. The size changing prefix throws off 257 // pre-decoding. 258 __ movzxwl(rax, 259 Operand(rsi, rdi, times_1, byte_offset)); 260 __ cmpl(rax, Immediate(static_cast<int32_t>(str[0]))); 261 } 262 BranchOrBacktrack(not_equal, on_failure); 263 264 __ lea(rbx, Operand(rsi, rdi, times_1, 0)); 265 for (int i = 1, n = str.length(); i < n; ) { 266 if (mode_ == ASCII) { 267 if (i + 8 <= n) { 268 uint64_t combined_chars = 269 (static_cast<uint64_t>(str[i + 0]) << 0) || 270 (static_cast<uint64_t>(str[i + 1]) << 8) || 271 (static_cast<uint64_t>(str[i + 2]) << 16) || 272 (static_cast<uint64_t>(str[i + 3]) << 24) || 273 (static_cast<uint64_t>(str[i + 4]) << 32) || 274 (static_cast<uint64_t>(str[i + 5]) << 40) || 275 (static_cast<uint64_t>(str[i + 6]) << 48) || 276 (static_cast<uint64_t>(str[i + 7]) << 56); 277 __ movq(rax, combined_chars, RelocInfo::NONE); 278 __ cmpq(rax, Operand(rbx, byte_offset + i)); 279 i += 8; 280 } else if (i + 4 <= n) { 281 uint32_t combined_chars = 282 (static_cast<uint32_t>(str[i + 0]) << 0) || 283 (static_cast<uint32_t>(str[i + 1]) << 8) || 284 (static_cast<uint32_t>(str[i + 2]) << 16) || 285 (static_cast<uint32_t>(str[i + 3]) << 24); 286 __ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars)); 287 i += 4; 288 } else { 289 __ cmpb(Operand(rbx, byte_offset + i), 290 Immediate(static_cast<int8_t>(str[i]))); 291 i++; 292 } 293 } else { 294 ASSERT(mode_ == UC16); 295 if (i + 4 <= n) { 296 uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]); 297 __ movq(rax, combined_chars, RelocInfo::NONE); 298 __ cmpq(rax, 299 Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); 300 i += 4; 301 } else if (i + 2 <= n) { 302 uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]); 303 __ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)), 304 Immediate(combined_chars)); 305 i += 2; 306 } else { 307 __ movzxwl(rax, 308 Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); 309 __ cmpl(rax, Immediate(str[i])); 310 i++; 311 } 312 } 313 BranchOrBacktrack(not_equal, on_failure); 314 } 315 } 316 317 318 void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) { 319 Label fallthrough; 320 __ cmpl(rdi, Operand(backtrack_stackpointer(), 0)); 321 __ j(not_equal, &fallthrough); 322 Drop(); 323 BranchOrBacktrack(no_condition, on_equal); 324 __ bind(&fallthrough); 325 } 326 327 328 void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( 329 int start_reg, 330 Label* on_no_match) { 331 Label fallthrough; 332 __ movq(rdx, register_location(start_reg)); // Offset of start of capture 333 __ movq(rbx, register_location(start_reg + 1)); // Offset of end of capture 334 __ subq(rbx, rdx); // Length of capture. 335 336 // ----------------------- 337 // rdx = Start offset of capture. 338 // rbx = Length of capture 339 340 // If length is negative, this code will fail (it's a symptom of a partial or 341 // illegal capture where start of capture after end of capture). 342 // This must not happen (no back-reference can reference a capture that wasn't 343 // closed before in the reg-exp, and we must not generate code that can cause 344 // this condition). 345 346 // If length is zero, either the capture is empty or it is nonparticipating. 347 // In either case succeed immediately. 348 __ j(equal, &fallthrough); 349 350 if (mode_ == ASCII) { 351 Label loop_increment; 352 if (on_no_match == NULL) { 353 on_no_match = &backtrack_label_; 354 } 355 356 __ lea(r9, Operand(rsi, rdx, times_1, 0)); 357 __ lea(r11, Operand(rsi, rdi, times_1, 0)); 358 __ addq(rbx, r9); // End of capture 359 // --------------------- 360 // r11 - current input character address 361 // r9 - current capture character address 362 // rbx - end of capture 363 364 Label loop; 365 __ bind(&loop); 366 __ movzxbl(rdx, Operand(r9, 0)); 367 __ movzxbl(rax, Operand(r11, 0)); 368 // al - input character 369 // dl - capture character 370 __ cmpb(rax, rdx); 371 __ j(equal, &loop_increment); 372 373 // Mismatch, try case-insensitive match (converting letters to lower-case). 374 // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's 375 // a match. 376 __ or_(rax, Immediate(0x20)); // Convert match character to lower-case. 377 __ or_(rdx, Immediate(0x20)); // Convert capture character to lower-case. 378 __ cmpb(rax, rdx); 379 __ j(not_equal, on_no_match); // Definitely not equal. 380 __ subb(rax, Immediate('a')); 381 __ cmpb(rax, Immediate('z' - 'a')); 382 __ j(above, on_no_match); // Weren't letters anyway. 383 384 __ bind(&loop_increment); 385 // Increment pointers into match and capture strings. 386 __ addq(r11, Immediate(1)); 387 __ addq(r9, Immediate(1)); 388 // Compare to end of capture, and loop if not done. 389 __ cmpq(r9, rbx); 390 __ j(below, &loop); 391 392 // Compute new value of character position after the matched part. 393 __ movq(rdi, r11); 394 __ subq(rdi, rsi); 395 } else { 396 ASSERT(mode_ == UC16); 397 // Save important/volatile registers before calling C function. 398 #ifndef _WIN64 399 // Caller save on Linux and callee save in Windows. 400 __ push(rsi); 401 __ push(rdi); 402 #endif 403 __ push(backtrack_stackpointer()); 404 405 static const int num_arguments = 4; 406 __ PrepareCallCFunction(num_arguments); 407 408 // Put arguments into parameter registers. Parameters are 409 // Address byte_offset1 - Address captured substring's start. 410 // Address byte_offset2 - Address of current character position. 411 // size_t byte_length - length of capture in bytes(!) 412 // Isolate* isolate 413 #ifdef _WIN64 414 // Compute and set byte_offset1 (start of capture). 415 __ lea(rcx, Operand(rsi, rdx, times_1, 0)); 416 // Set byte_offset2. 417 __ lea(rdx, Operand(rsi, rdi, times_1, 0)); 418 // Set byte_length. 419 __ movq(r8, rbx); 420 // Isolate. 421 __ LoadAddress(r9, ExternalReference::isolate_address()); 422 #else // AMD64 calling convention 423 // Compute byte_offset2 (current position = rsi+rdi). 424 __ lea(rax, Operand(rsi, rdi, times_1, 0)); 425 // Compute and set byte_offset1 (start of capture). 426 __ lea(rdi, Operand(rsi, rdx, times_1, 0)); 427 // Set byte_offset2. 428 __ movq(rsi, rax); 429 // Set byte_length. 430 __ movq(rdx, rbx); 431 // Isolate. 432 __ LoadAddress(rcx, ExternalReference::isolate_address()); 433 #endif 434 435 { // NOLINT: Can't find a way to open this scope without confusing the 436 // linter. 437 AllowExternalCallThatCantCauseGC scope(&masm_); 438 ExternalReference compare = 439 ExternalReference::re_case_insensitive_compare_uc16(masm_.isolate()); 440 __ CallCFunction(compare, num_arguments); 441 } 442 443 // Restore original values before reacting on result value. 444 __ Move(code_object_pointer(), masm_.CodeObject()); 445 __ pop(backtrack_stackpointer()); 446 #ifndef _WIN64 447 __ pop(rdi); 448 __ pop(rsi); 449 #endif 450 451 // Check if function returned non-zero for success or zero for failure. 452 __ testq(rax, rax); 453 BranchOrBacktrack(zero, on_no_match); 454 // On success, increment position by length of capture. 455 // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs). 456 __ addq(rdi, rbx); 457 } 458 __ bind(&fallthrough); 459 } 460 461 462 void RegExpMacroAssemblerX64::CheckNotBackReference( 463 int start_reg, 464 Label* on_no_match) { 465 Label fallthrough; 466 467 // Find length of back-referenced capture. 468 __ movq(rdx, register_location(start_reg)); 469 __ movq(rax, register_location(start_reg + 1)); 470 __ subq(rax, rdx); // Length to check. 471 472 // Fail on partial or illegal capture (start of capture after end of capture). 473 // This must not happen (no back-reference can reference a capture that wasn't 474 // closed before in the reg-exp). 475 __ Check(greater_equal, "Invalid capture referenced"); 476 477 // Succeed on empty capture (including non-participating capture) 478 __ j(equal, &fallthrough); 479 480 // ----------------------- 481 // rdx - Start of capture 482 // rax - length of capture 483 484 // Check that there are sufficient characters left in the input. 485 __ movl(rbx, rdi); 486 __ addl(rbx, rax); 487 BranchOrBacktrack(greater, on_no_match); 488 489 // Compute pointers to match string and capture string 490 __ lea(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match. 491 __ addq(rdx, rsi); // Start of capture. 492 __ lea(r9, Operand(rdx, rax, times_1, 0)); // End of capture 493 494 // ----------------------- 495 // rbx - current capture character address. 496 // rbx - current input character address . 497 // r9 - end of input to match (capture length after rbx). 498 499 Label loop; 500 __ bind(&loop); 501 if (mode_ == ASCII) { 502 __ movzxbl(rax, Operand(rdx, 0)); 503 __ cmpb(rax, Operand(rbx, 0)); 504 } else { 505 ASSERT(mode_ == UC16); 506 __ movzxwl(rax, Operand(rdx, 0)); 507 __ cmpw(rax, Operand(rbx, 0)); 508 } 509 BranchOrBacktrack(not_equal, on_no_match); 510 // Increment pointers into capture and match string. 511 __ addq(rbx, Immediate(char_size())); 512 __ addq(rdx, Immediate(char_size())); 513 // Check if we have reached end of match area. 514 __ cmpq(rdx, r9); 515 __ j(below, &loop); 516 517 // Success. 518 // Set current character position to position after match. 519 __ movq(rdi, rbx); 520 __ subq(rdi, rsi); 521 522 __ bind(&fallthrough); 523 } 524 525 526 void RegExpMacroAssemblerX64::CheckNotRegistersEqual(int reg1, 527 int reg2, 528 Label* on_not_equal) { 529 __ movq(rax, register_location(reg1)); 530 __ cmpq(rax, register_location(reg2)); 531 BranchOrBacktrack(not_equal, on_not_equal); 532 } 533 534 535 void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c, 536 Label* on_not_equal) { 537 __ cmpl(current_character(), Immediate(c)); 538 BranchOrBacktrack(not_equal, on_not_equal); 539 } 540 541 542 void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c, 543 uint32_t mask, 544 Label* on_equal) { 545 __ movl(rax, current_character()); 546 __ and_(rax, Immediate(mask)); 547 __ cmpl(rax, Immediate(c)); 548 BranchOrBacktrack(equal, on_equal); 549 } 550 551 552 void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c, 553 uint32_t mask, 554 Label* on_not_equal) { 555 __ movl(rax, current_character()); 556 __ and_(rax, Immediate(mask)); 557 __ cmpl(rax, Immediate(c)); 558 BranchOrBacktrack(not_equal, on_not_equal); 559 } 560 561 562 void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd( 563 uc16 c, 564 uc16 minus, 565 uc16 mask, 566 Label* on_not_equal) { 567 ASSERT(minus < String::kMaxUtf16CodeUnit); 568 __ lea(rax, Operand(current_character(), -minus)); 569 __ and_(rax, Immediate(mask)); 570 __ cmpl(rax, Immediate(c)); 571 BranchOrBacktrack(not_equal, on_not_equal); 572 } 573 574 575 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, 576 Label* on_no_match) { 577 // Range checks (c in min..max) are generally implemented by an unsigned 578 // (c - min) <= (max - min) check, using the sequence: 579 // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) 580 // cmp(rax, Immediate(max - min)) 581 switch (type) { 582 case 's': 583 // Match space-characters 584 if (mode_ == ASCII) { 585 // ASCII space characters are '\t'..'\r' and ' '. 586 Label success; 587 __ cmpl(current_character(), Immediate(' ')); 588 __ j(equal, &success); 589 // Check range 0x09..0x0d 590 __ lea(rax, Operand(current_character(), -'\t')); 591 __ cmpl(rax, Immediate('\r' - '\t')); 592 BranchOrBacktrack(above, on_no_match); 593 __ bind(&success); 594 return true; 595 } 596 return false; 597 case 'S': 598 // Match non-space characters. 599 if (mode_ == ASCII) { 600 // ASCII space characters are '\t'..'\r' and ' '. 601 __ cmpl(current_character(), Immediate(' ')); 602 BranchOrBacktrack(equal, on_no_match); 603 __ lea(rax, Operand(current_character(), -'\t')); 604 __ cmpl(rax, Immediate('\r' - '\t')); 605 BranchOrBacktrack(below_equal, on_no_match); 606 return true; 607 } 608 return false; 609 case 'd': 610 // Match ASCII digits ('0'..'9') 611 __ lea(rax, Operand(current_character(), -'0')); 612 __ cmpl(rax, Immediate('9' - '0')); 613 BranchOrBacktrack(above, on_no_match); 614 return true; 615 case 'D': 616 // Match non ASCII-digits 617 __ lea(rax, Operand(current_character(), -'0')); 618 __ cmpl(rax, Immediate('9' - '0')); 619 BranchOrBacktrack(below_equal, on_no_match); 620 return true; 621 case '.': { 622 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 623 __ movl(rax, current_character()); 624 __ xor_(rax, Immediate(0x01)); 625 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 626 __ subl(rax, Immediate(0x0b)); 627 __ cmpl(rax, Immediate(0x0c - 0x0b)); 628 BranchOrBacktrack(below_equal, on_no_match); 629 if (mode_ == UC16) { 630 // Compare original value to 0x2028 and 0x2029, using the already 631 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 632 // 0x201d (0x2028 - 0x0b) or 0x201e. 633 __ subl(rax, Immediate(0x2028 - 0x0b)); 634 __ cmpl(rax, Immediate(0x2029 - 0x2028)); 635 BranchOrBacktrack(below_equal, on_no_match); 636 } 637 return true; 638 } 639 case 'n': { 640 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 641 __ movl(rax, current_character()); 642 __ xor_(rax, Immediate(0x01)); 643 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 644 __ subl(rax, Immediate(0x0b)); 645 __ cmpl(rax, Immediate(0x0c - 0x0b)); 646 if (mode_ == ASCII) { 647 BranchOrBacktrack(above, on_no_match); 648 } else { 649 Label done; 650 BranchOrBacktrack(below_equal, &done); 651 // Compare original value to 0x2028 and 0x2029, using the already 652 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 653 // 0x201d (0x2028 - 0x0b) or 0x201e. 654 __ subl(rax, Immediate(0x2028 - 0x0b)); 655 __ cmpl(rax, Immediate(0x2029 - 0x2028)); 656 BranchOrBacktrack(above, on_no_match); 657 __ bind(&done); 658 } 659 return true; 660 } 661 case 'w': { 662 if (mode_ != ASCII) { 663 // Table is 128 entries, so all ASCII characters can be tested. 664 __ cmpl(current_character(), Immediate('z')); 665 BranchOrBacktrack(above, on_no_match); 666 } 667 __ movq(rbx, ExternalReference::re_word_character_map()); 668 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 669 __ testb(Operand(rbx, current_character(), times_1, 0), 670 current_character()); 671 BranchOrBacktrack(zero, on_no_match); 672 return true; 673 } 674 case 'W': { 675 Label done; 676 if (mode_ != ASCII) { 677 // Table is 128 entries, so all ASCII characters can be tested. 678 __ cmpl(current_character(), Immediate('z')); 679 __ j(above, &done); 680 } 681 __ movq(rbx, ExternalReference::re_word_character_map()); 682 ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 683 __ testb(Operand(rbx, current_character(), times_1, 0), 684 current_character()); 685 BranchOrBacktrack(not_zero, on_no_match); 686 if (mode_ != ASCII) { 687 __ bind(&done); 688 } 689 return true; 690 } 691 692 case '*': 693 // Match any character. 694 return true; 695 // No custom implementation (yet): s(UC16), S(UC16). 696 default: 697 return false; 698 } 699 } 700 701 702 void RegExpMacroAssemblerX64::Fail() { 703 ASSERT(FAILURE == 0); // Return value for failure is zero. 704 __ Set(rax, 0); 705 __ jmp(&exit_label_); 706 } 707 708 709 Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { 710 // Finalize code - write the entry point code now we know how many 711 // registers we need. 712 // Entry code: 713 __ bind(&entry_label_); 714 715 // Tell the system that we have a stack frame. Because the type is MANUAL, no 716 // is generated. 717 FrameScope scope(&masm_, StackFrame::MANUAL); 718 719 // Actually emit code to start a new stack frame. 720 __ push(rbp); 721 __ movq(rbp, rsp); 722 // Save parameters and callee-save registers. Order here should correspond 723 // to order of kBackup_ebx etc. 724 #ifdef _WIN64 725 // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots. 726 // Store register parameters in pre-allocated stack slots, 727 __ movq(Operand(rbp, kInputString), rcx); 728 __ movq(Operand(rbp, kStartIndex), rdx); // Passed as int32 in edx. 729 __ movq(Operand(rbp, kInputStart), r8); 730 __ movq(Operand(rbp, kInputEnd), r9); 731 // Callee-save on Win64. 732 __ push(rsi); 733 __ push(rdi); 734 __ push(rbx); 735 #else 736 // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack). 737 // Push register parameters on stack for reference. 738 ASSERT_EQ(kInputString, -1 * kPointerSize); 739 ASSERT_EQ(kStartIndex, -2 * kPointerSize); 740 ASSERT_EQ(kInputStart, -3 * kPointerSize); 741 ASSERT_EQ(kInputEnd, -4 * kPointerSize); 742 ASSERT_EQ(kRegisterOutput, -5 * kPointerSize); 743 ASSERT_EQ(kStackHighEnd, -6 * kPointerSize); 744 __ push(rdi); 745 __ push(rsi); 746 __ push(rdx); 747 __ push(rcx); 748 __ push(r8); 749 __ push(r9); 750 751 __ push(rbx); // Callee-save 752 #endif 753 754 __ push(Immediate(0)); // Make room for "at start" constant. 755 756 // Check if we have space on the stack for registers. 757 Label stack_limit_hit; 758 Label stack_ok; 759 760 ExternalReference stack_limit = 761 ExternalReference::address_of_stack_limit(masm_.isolate()); 762 __ movq(rcx, rsp); 763 __ movq(kScratchRegister, stack_limit); 764 __ subq(rcx, Operand(kScratchRegister, 0)); 765 // Handle it if the stack pointer is already below the stack limit. 766 __ j(below_equal, &stack_limit_hit); 767 // Check if there is room for the variable number of registers above 768 // the stack limit. 769 __ cmpq(rcx, Immediate(num_registers_ * kPointerSize)); 770 __ j(above_equal, &stack_ok); 771 // Exit with OutOfMemory exception. There is not enough space on the stack 772 // for our working registers. 773 __ Set(rax, EXCEPTION); 774 __ jmp(&exit_label_); 775 776 __ bind(&stack_limit_hit); 777 __ Move(code_object_pointer(), masm_.CodeObject()); 778 CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp. 779 __ testq(rax, rax); 780 // If returned value is non-zero, we exit with the returned value as result. 781 __ j(not_zero, &exit_label_); 782 783 __ bind(&stack_ok); 784 785 // Allocate space on stack for registers. 786 __ subq(rsp, Immediate(num_registers_ * kPointerSize)); 787 // Load string length. 788 __ movq(rsi, Operand(rbp, kInputEnd)); 789 // Load input position. 790 __ movq(rdi, Operand(rbp, kInputStart)); 791 // Set up rdi to be negative offset from string end. 792 __ subq(rdi, rsi); 793 // Set rax to address of char before start of the string 794 // (effectively string position -1). 795 __ movq(rbx, Operand(rbp, kStartIndex)); 796 __ neg(rbx); 797 if (mode_ == UC16) { 798 __ lea(rax, Operand(rdi, rbx, times_2, -char_size())); 799 } else { 800 __ lea(rax, Operand(rdi, rbx, times_1, -char_size())); 801 } 802 // Store this value in a local variable, for use when clearing 803 // position registers. 804 __ movq(Operand(rbp, kInputStartMinusOne), rax); 805 806 if (num_saved_registers_ > 0) { 807 // Fill saved registers with initial value = start offset - 1 808 // Fill in stack push order, to avoid accessing across an unwritten 809 // page (a problem on Windows). 810 __ Set(rcx, kRegisterZero); 811 Label init_loop; 812 __ bind(&init_loop); 813 __ movq(Operand(rbp, rcx, times_1, 0), rax); 814 __ subq(rcx, Immediate(kPointerSize)); 815 __ cmpq(rcx, 816 Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); 817 __ j(greater, &init_loop); 818 } 819 // Ensure that we have written to each stack page, in order. Skipping a page 820 // on Windows can cause segmentation faults. Assuming page size is 4k. 821 const int kPageSize = 4096; 822 const int kRegistersPerPage = kPageSize / kPointerSize; 823 for (int i = num_saved_registers_ + kRegistersPerPage - 1; 824 i < num_registers_; 825 i += kRegistersPerPage) { 826 __ movq(register_location(i), rax); // One write every page. 827 } 828 829 // Initialize backtrack stack pointer. 830 __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); 831 // Initialize code object pointer. 832 __ Move(code_object_pointer(), masm_.CodeObject()); 833 // Load previous char as initial value of current-character. 834 Label at_start; 835 __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); 836 __ j(equal, &at_start); 837 LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. 838 __ jmp(&start_label_); 839 __ bind(&at_start); 840 __ Set(current_character(), '\n'); 841 __ jmp(&start_label_); 842 843 844 // Exit code: 845 if (success_label_.is_linked()) { 846 // Save captures when successful. 847 __ bind(&success_label_); 848 if (num_saved_registers_ > 0) { 849 // copy captures to output 850 __ movq(rdx, Operand(rbp, kStartIndex)); 851 __ movq(rbx, Operand(rbp, kRegisterOutput)); 852 __ movq(rcx, Operand(rbp, kInputEnd)); 853 __ subq(rcx, Operand(rbp, kInputStart)); 854 if (mode_ == UC16) { 855 __ lea(rcx, Operand(rcx, rdx, times_2, 0)); 856 } else { 857 __ addq(rcx, rdx); 858 } 859 for (int i = 0; i < num_saved_registers_; i++) { 860 __ movq(rax, register_location(i)); 861 __ addq(rax, rcx); // Convert to index from start, not end. 862 if (mode_ == UC16) { 863 __ sar(rax, Immediate(1)); // Convert byte index to character index. 864 } 865 __ movl(Operand(rbx, i * kIntSize), rax); 866 } 867 } 868 __ Set(rax, SUCCESS); 869 } 870 871 // Exit and return rax 872 __ bind(&exit_label_); 873 874 #ifdef _WIN64 875 // Restore callee save registers. 876 __ lea(rsp, Operand(rbp, kLastCalleeSaveRegister)); 877 __ pop(rbx); 878 __ pop(rdi); 879 __ pop(rsi); 880 // Stack now at rbp. 881 #else 882 // Restore callee save register. 883 __ movq(rbx, Operand(rbp, kBackup_rbx)); 884 // Skip rsp to rbp. 885 __ movq(rsp, rbp); 886 #endif 887 // Exit function frame, restore previous one. 888 __ pop(rbp); 889 __ ret(0); 890 891 // Backtrack code (branch target for conditional backtracks). 892 if (backtrack_label_.is_linked()) { 893 __ bind(&backtrack_label_); 894 Backtrack(); 895 } 896 897 Label exit_with_exception; 898 899 // Preempt-code 900 if (check_preempt_label_.is_linked()) { 901 SafeCallTarget(&check_preempt_label_); 902 903 __ push(backtrack_stackpointer()); 904 __ push(rdi); 905 906 CallCheckStackGuardState(); 907 __ testq(rax, rax); 908 // If returning non-zero, we should end execution with the given 909 // result as return value. 910 __ j(not_zero, &exit_label_); 911 912 // Restore registers. 913 __ Move(code_object_pointer(), masm_.CodeObject()); 914 __ pop(rdi); 915 __ pop(backtrack_stackpointer()); 916 // String might have moved: Reload esi from frame. 917 __ movq(rsi, Operand(rbp, kInputEnd)); 918 SafeReturn(); 919 } 920 921 // Backtrack stack overflow code. 922 if (stack_overflow_label_.is_linked()) { 923 SafeCallTarget(&stack_overflow_label_); 924 // Reached if the backtrack-stack limit has been hit. 925 926 Label grow_failed; 927 // Save registers before calling C function 928 #ifndef _WIN64 929 // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI. 930 __ push(rsi); 931 __ push(rdi); 932 #endif 933 934 // Call GrowStack(backtrack_stackpointer()) 935 static const int num_arguments = 3; 936 __ PrepareCallCFunction(num_arguments); 937 #ifdef _WIN64 938 // Microsoft passes parameters in rcx, rdx, r8. 939 // First argument, backtrack stackpointer, is already in rcx. 940 __ lea(rdx, Operand(rbp, kStackHighEnd)); // Second argument 941 __ LoadAddress(r8, ExternalReference::isolate_address()); 942 #else 943 // AMD64 ABI passes parameters in rdi, rsi, rdx. 944 __ movq(rdi, backtrack_stackpointer()); // First argument. 945 __ lea(rsi, Operand(rbp, kStackHighEnd)); // Second argument. 946 __ LoadAddress(rdx, ExternalReference::isolate_address()); 947 #endif 948 ExternalReference grow_stack = 949 ExternalReference::re_grow_stack(masm_.isolate()); 950 __ CallCFunction(grow_stack, num_arguments); 951 // If return NULL, we have failed to grow the stack, and 952 // must exit with a stack-overflow exception. 953 __ testq(rax, rax); 954 __ j(equal, &exit_with_exception); 955 // Otherwise use return value as new stack pointer. 956 __ movq(backtrack_stackpointer(), rax); 957 // Restore saved registers and continue. 958 __ Move(code_object_pointer(), masm_.CodeObject()); 959 #ifndef _WIN64 960 __ pop(rdi); 961 __ pop(rsi); 962 #endif 963 SafeReturn(); 964 } 965 966 if (exit_with_exception.is_linked()) { 967 // If any of the code above needed to exit with an exception. 968 __ bind(&exit_with_exception); 969 // Exit with Result EXCEPTION(-1) to signal thrown exception. 970 __ Set(rax, EXCEPTION); 971 __ jmp(&exit_label_); 972 } 973 974 FixupCodeRelativePositions(); 975 976 CodeDesc code_desc; 977 masm_.GetCode(&code_desc); 978 Isolate* isolate = ISOLATE; 979 Handle<Code> code = isolate->factory()->NewCode( 980 code_desc, Code::ComputeFlags(Code::REGEXP), 981 masm_.CodeObject()); 982 PROFILE(isolate, RegExpCodeCreateEvent(*code, *source)); 983 return Handle<HeapObject>::cast(code); 984 } 985 986 987 void RegExpMacroAssemblerX64::GoTo(Label* to) { 988 BranchOrBacktrack(no_condition, to); 989 } 990 991 992 void RegExpMacroAssemblerX64::IfRegisterGE(int reg, 993 int comparand, 994 Label* if_ge) { 995 __ cmpq(register_location(reg), Immediate(comparand)); 996 BranchOrBacktrack(greater_equal, if_ge); 997 } 998 999 1000 void RegExpMacroAssemblerX64::IfRegisterLT(int reg, 1001 int comparand, 1002 Label* if_lt) { 1003 __ cmpq(register_location(reg), Immediate(comparand)); 1004 BranchOrBacktrack(less, if_lt); 1005 } 1006 1007 1008 void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg, 1009 Label* if_eq) { 1010 __ cmpq(rdi, register_location(reg)); 1011 BranchOrBacktrack(equal, if_eq); 1012 } 1013 1014 1015 RegExpMacroAssembler::IrregexpImplementation 1016 RegExpMacroAssemblerX64::Implementation() { 1017 return kX64Implementation; 1018 } 1019 1020 1021 void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, 1022 Label* on_end_of_input, 1023 bool check_bounds, 1024 int characters) { 1025 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. 1026 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) 1027 if (check_bounds) { 1028 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1029 } 1030 LoadCurrentCharacterUnchecked(cp_offset, characters); 1031 } 1032 1033 1034 void RegExpMacroAssemblerX64::PopCurrentPosition() { 1035 Pop(rdi); 1036 } 1037 1038 1039 void RegExpMacroAssemblerX64::PopRegister(int register_index) { 1040 Pop(rax); 1041 __ movq(register_location(register_index), rax); 1042 } 1043 1044 1045 void RegExpMacroAssemblerX64::PushBacktrack(Label* label) { 1046 Push(label); 1047 CheckStackLimit(); 1048 } 1049 1050 1051 void RegExpMacroAssemblerX64::PushCurrentPosition() { 1052 Push(rdi); 1053 } 1054 1055 1056 void RegExpMacroAssemblerX64::PushRegister(int register_index, 1057 StackCheckFlag check_stack_limit) { 1058 __ movq(rax, register_location(register_index)); 1059 Push(rax); 1060 if (check_stack_limit) CheckStackLimit(); 1061 } 1062 1063 1064 void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) { 1065 __ movq(rdi, register_location(reg)); 1066 } 1067 1068 1069 void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) { 1070 __ movq(backtrack_stackpointer(), register_location(reg)); 1071 __ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); 1072 } 1073 1074 1075 void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) { 1076 Label after_position; 1077 __ cmpq(rdi, Immediate(-by * char_size())); 1078 __ j(greater_equal, &after_position, Label::kNear); 1079 __ movq(rdi, Immediate(-by * char_size())); 1080 // On RegExp code entry (where this operation is used), the character before 1081 // the current position is expected to be already loaded. 1082 // We have advanced the position, so it's safe to read backwards. 1083 LoadCurrentCharacterUnchecked(-1, 1); 1084 __ bind(&after_position); 1085 } 1086 1087 1088 void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) { 1089 ASSERT(register_index >= num_saved_registers_); // Reserved for positions! 1090 __ movq(register_location(register_index), Immediate(to)); 1091 } 1092 1093 1094 void RegExpMacroAssemblerX64::Succeed() { 1095 __ jmp(&success_label_); 1096 } 1097 1098 1099 void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg, 1100 int cp_offset) { 1101 if (cp_offset == 0) { 1102 __ movq(register_location(reg), rdi); 1103 } else { 1104 __ lea(rax, Operand(rdi, cp_offset * char_size())); 1105 __ movq(register_location(reg), rax); 1106 } 1107 } 1108 1109 1110 void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) { 1111 ASSERT(reg_from <= reg_to); 1112 __ movq(rax, Operand(rbp, kInputStartMinusOne)); 1113 for (int reg = reg_from; reg <= reg_to; reg++) { 1114 __ movq(register_location(reg), rax); 1115 } 1116 } 1117 1118 1119 void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) { 1120 __ movq(rax, backtrack_stackpointer()); 1121 __ subq(rax, Operand(rbp, kStackHighEnd)); 1122 __ movq(register_location(reg), rax); 1123 } 1124 1125 1126 // Private methods: 1127 1128 void RegExpMacroAssemblerX64::CallCheckStackGuardState() { 1129 // This function call preserves no register values. Caller should 1130 // store anything volatile in a C call or overwritten by this function. 1131 static const int num_arguments = 3; 1132 __ PrepareCallCFunction(num_arguments); 1133 #ifdef _WIN64 1134 // Second argument: Code* of self. (Do this before overwriting r8). 1135 __ movq(rdx, code_object_pointer()); 1136 // Third argument: RegExp code frame pointer. 1137 __ movq(r8, rbp); 1138 // First argument: Next address on the stack (will be address of 1139 // return address). 1140 __ lea(rcx, Operand(rsp, -kPointerSize)); 1141 #else 1142 // Third argument: RegExp code frame pointer. 1143 __ movq(rdx, rbp); 1144 // Second argument: Code* of self. 1145 __ movq(rsi, code_object_pointer()); 1146 // First argument: Next address on the stack (will be address of 1147 // return address). 1148 __ lea(rdi, Operand(rsp, -kPointerSize)); 1149 #endif 1150 ExternalReference stack_check = 1151 ExternalReference::re_check_stack_guard_state(masm_.isolate()); 1152 __ CallCFunction(stack_check, num_arguments); 1153 } 1154 1155 1156 // Helper function for reading a value out of a stack frame. 1157 template <typename T> 1158 static T& frame_entry(Address re_frame, int frame_offset) { 1159 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset)); 1160 } 1161 1162 1163 int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address, 1164 Code* re_code, 1165 Address re_frame) { 1166 Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate); 1167 ASSERT(isolate == Isolate::Current()); 1168 if (isolate->stack_guard()->IsStackOverflow()) { 1169 isolate->StackOverflow(); 1170 return EXCEPTION; 1171 } 1172 1173 // If not real stack overflow the stack guard was used to interrupt 1174 // execution for another purpose. 1175 1176 // If this is a direct call from JavaScript retry the RegExp forcing the call 1177 // through the runtime system. Currently the direct call cannot handle a GC. 1178 if (frame_entry<int>(re_frame, kDirectCall) == 1) { 1179 return RETRY; 1180 } 1181 1182 // Prepare for possible GC. 1183 HandleScope handles(isolate); 1184 Handle<Code> code_handle(re_code); 1185 1186 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); 1187 1188 // Current string. 1189 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); 1190 1191 ASSERT(re_code->instruction_start() <= *return_address); 1192 ASSERT(*return_address <= 1193 re_code->instruction_start() + re_code->instruction_size()); 1194 1195 MaybeObject* result = Execution::HandleStackGuardInterrupt(isolate); 1196 1197 if (*code_handle != re_code) { // Return address no longer valid 1198 intptr_t delta = code_handle->address() - re_code->address(); 1199 // Overwrite the return address on the stack. 1200 *return_address += delta; 1201 } 1202 1203 if (result->IsException()) { 1204 return EXCEPTION; 1205 } 1206 1207 Handle<String> subject_tmp = subject; 1208 int slice_offset = 0; 1209 1210 // Extract the underlying string and the slice offset. 1211 if (StringShape(*subject_tmp).IsCons()) { 1212 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); 1213 } else if (StringShape(*subject_tmp).IsSliced()) { 1214 SlicedString* slice = SlicedString::cast(*subject_tmp); 1215 subject_tmp = Handle<String>(slice->parent()); 1216 slice_offset = slice->offset(); 1217 } 1218 1219 // String might have changed. 1220 if (subject_tmp->IsAsciiRepresentation() != is_ascii) { 1221 // If we changed between an ASCII and an UC16 string, the specialized 1222 // code cannot be used, and we need to restart regexp matching from 1223 // scratch (including, potentially, compiling a new version of the code). 1224 return RETRY; 1225 } 1226 1227 // Otherwise, the content of the string might have moved. It must still 1228 // be a sequential or external string with the same content. 1229 // Update the start and end pointers in the stack frame to the current 1230 // location (whether it has actually moved or not). 1231 ASSERT(StringShape(*subject_tmp).IsSequential() || 1232 StringShape(*subject_tmp).IsExternal()); 1233 1234 // The original start address of the characters to match. 1235 const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart); 1236 1237 // Find the current start address of the same character at the current string 1238 // position. 1239 int start_index = frame_entry<int>(re_frame, kStartIndex); 1240 const byte* new_address = StringCharacterPosition(*subject_tmp, 1241 start_index + slice_offset); 1242 1243 if (start_address != new_address) { 1244 // If there is a difference, update the object pointer and start and end 1245 // addresses in the RegExp stack frame to match the new value. 1246 const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd); 1247 int byte_length = static_cast<int>(end_address - start_address); 1248 frame_entry<const String*>(re_frame, kInputString) = *subject; 1249 frame_entry<const byte*>(re_frame, kInputStart) = new_address; 1250 frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length; 1251 } else if (frame_entry<const String*>(re_frame, kInputString) != *subject) { 1252 // Subject string might have been a ConsString that underwent 1253 // short-circuiting during GC. That will not change start_address but 1254 // will change pointer inside the subject handle. 1255 frame_entry<const String*>(re_frame, kInputString) = *subject; 1256 } 1257 1258 return 0; 1259 } 1260 1261 1262 Operand RegExpMacroAssemblerX64::register_location(int register_index) { 1263 ASSERT(register_index < (1<<30)); 1264 if (num_registers_ <= register_index) { 1265 num_registers_ = register_index + 1; 1266 } 1267 return Operand(rbp, kRegisterZero - register_index * kPointerSize); 1268 } 1269 1270 1271 void RegExpMacroAssemblerX64::CheckPosition(int cp_offset, 1272 Label* on_outside_input) { 1273 __ cmpl(rdi, Immediate(-cp_offset * char_size())); 1274 BranchOrBacktrack(greater_equal, on_outside_input); 1275 } 1276 1277 1278 void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition, 1279 Label* to) { 1280 if (condition < 0) { // No condition 1281 if (to == NULL) { 1282 Backtrack(); 1283 return; 1284 } 1285 __ jmp(to); 1286 return; 1287 } 1288 if (to == NULL) { 1289 __ j(condition, &backtrack_label_); 1290 return; 1291 } 1292 __ j(condition, to); 1293 } 1294 1295 1296 void RegExpMacroAssemblerX64::SafeCall(Label* to) { 1297 __ call(to); 1298 } 1299 1300 1301 void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) { 1302 __ bind(label); 1303 __ subq(Operand(rsp, 0), code_object_pointer()); 1304 } 1305 1306 1307 void RegExpMacroAssemblerX64::SafeReturn() { 1308 __ addq(Operand(rsp, 0), code_object_pointer()); 1309 __ ret(0); 1310 } 1311 1312 1313 void RegExpMacroAssemblerX64::Push(Register source) { 1314 ASSERT(!source.is(backtrack_stackpointer())); 1315 // Notice: This updates flags, unlike normal Push. 1316 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1317 __ movl(Operand(backtrack_stackpointer(), 0), source); 1318 } 1319 1320 1321 void RegExpMacroAssemblerX64::Push(Immediate value) { 1322 // Notice: This updates flags, unlike normal Push. 1323 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1324 __ movl(Operand(backtrack_stackpointer(), 0), value); 1325 } 1326 1327 1328 void RegExpMacroAssemblerX64::FixupCodeRelativePositions() { 1329 for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) { 1330 int position = code_relative_fixup_positions_[i]; 1331 // The position succeeds a relative label offset from position. 1332 // Patch the relative offset to be relative to the Code object pointer 1333 // instead. 1334 int patch_position = position - kIntSize; 1335 int offset = masm_.long_at(patch_position); 1336 masm_.long_at_put(patch_position, 1337 offset 1338 + position 1339 + Code::kHeaderSize 1340 - kHeapObjectTag); 1341 } 1342 code_relative_fixup_positions_.Clear(); 1343 } 1344 1345 1346 void RegExpMacroAssemblerX64::Push(Label* backtrack_target) { 1347 __ subq(backtrack_stackpointer(), Immediate(kIntSize)); 1348 __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target); 1349 MarkPositionForCodeRelativeFixup(); 1350 } 1351 1352 1353 void RegExpMacroAssemblerX64::Pop(Register target) { 1354 ASSERT(!target.is(backtrack_stackpointer())); 1355 __ movsxlq(target, Operand(backtrack_stackpointer(), 0)); 1356 // Notice: This updates flags, unlike normal Pop. 1357 __ addq(backtrack_stackpointer(), Immediate(kIntSize)); 1358 } 1359 1360 1361 void RegExpMacroAssemblerX64::Drop() { 1362 __ addq(backtrack_stackpointer(), Immediate(kIntSize)); 1363 } 1364 1365 1366 void RegExpMacroAssemblerX64::CheckPreemption() { 1367 // Check for preemption. 1368 Label no_preempt; 1369 ExternalReference stack_limit = 1370 ExternalReference::address_of_stack_limit(masm_.isolate()); 1371 __ load_rax(stack_limit); 1372 __ cmpq(rsp, rax); 1373 __ j(above, &no_preempt); 1374 1375 SafeCall(&check_preempt_label_); 1376 1377 __ bind(&no_preempt); 1378 } 1379 1380 1381 void RegExpMacroAssemblerX64::CheckStackLimit() { 1382 Label no_stack_overflow; 1383 ExternalReference stack_limit = 1384 ExternalReference::address_of_regexp_stack_limit(masm_.isolate()); 1385 __ load_rax(stack_limit); 1386 __ cmpq(backtrack_stackpointer(), rax); 1387 __ j(above, &no_stack_overflow); 1388 1389 SafeCall(&stack_overflow_label_); 1390 1391 __ bind(&no_stack_overflow); 1392 } 1393 1394 1395 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, 1396 int characters) { 1397 if (mode_ == ASCII) { 1398 if (characters == 4) { 1399 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1400 } else if (characters == 2) { 1401 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1402 } else { 1403 ASSERT(characters == 1); 1404 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1405 } 1406 } else { 1407 ASSERT(mode_ == UC16); 1408 if (characters == 2) { 1409 __ movl(current_character(), 1410 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1411 } else { 1412 ASSERT(characters == 1); 1413 __ movzxwl(current_character(), 1414 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1415 } 1416 } 1417 } 1418 1419 #undef __ 1420 1421 #endif // V8_INTERPRETED_REGEXP 1422 1423 }} // namespace v8::internal 1424 1425 #endif // V8_TARGET_ARCH_X64 1426