1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if V8_TARGET_ARCH_ARM64 6 7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 8 9 #include "src/code-stubs.h" 10 #include "src/log.h" 11 #include "src/macro-assembler.h" 12 #include "src/regexp/regexp-macro-assembler.h" 13 #include "src/regexp/regexp-stack.h" 14 #include "src/unicode.h" 15 16 namespace v8 { 17 namespace internal { 18 19 #ifndef V8_INTERPRETED_REGEXP 20 /* 21 * This assembler uses the following register assignment convention: 22 * - w19 : Used to temporarely store a value before a call to C code. 23 * See CheckNotBackReferenceIgnoreCase. 24 * - x20 : Pointer to the current code object (Code*), 25 * it includes the heap object tag. 26 * - w21 : Current position in input, as negative offset from 27 * the end of the string. Please notice that this is 28 * the byte offset, not the character offset! 29 * - w22 : Currently loaded character. Must be loaded using 30 * LoadCurrentCharacter before using any of the dispatch methods. 31 * - x23 : Points to tip of backtrack stack. 32 * - w24 : Position of the first character minus one: non_position_value. 33 * Used to initialize capture registers. 34 * - x25 : Address at the end of the input string: input_end. 35 * Points to byte after last character in input. 36 * - x26 : Address at the start of the input string: input_start. 37 * - w27 : Where to start in the input string. 38 * - x28 : Output array pointer. 39 * - x29/fp : Frame pointer. Used to access arguments, local variables and 40 * RegExp registers. 41 * - x16/x17 : IP registers, used by assembler. Very volatile. 42 * - csp : Points to tip of C stack. 43 * 44 * - x0-x7 : Used as a cache to store 32 bit capture registers. These 45 * registers need to be retained every time a call to C code 46 * is done. 47 * 48 * The remaining registers are free for computations. 49 * Each call to a public method should retain this convention. 50 * 51 * The stack will have the following structure: 52 * 53 * Location Name Description 54 * (as referred to in 55 * the code) 56 * 57 * - fp[104] isolate Address of the current isolate. 58 * - fp[96] return_address Secondary link/return address 59 * used by an exit frame if this is a 60 * native call. 61 * ^^^ csp when called ^^^ 62 * - fp[88] lr Return from the RegExp code. 63 * - fp[80] r29 Old frame pointer (CalleeSaved). 64 * - fp[0..72] r19-r28 Backup of CalleeSaved registers. 65 * - fp[-8] direct_call 1 => Direct call from JavaScript code. 66 * 0 => Call through the runtime system. 67 * - fp[-16] stack_base High end of the memory area to use as 68 * the backtracking stack. 69 * - fp[-24] output_size Output may fit multiple sets of matches. 70 * - fp[-32] input Handle containing the input string. 71 * - fp[-40] success_counter 72 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^ 73 * - fp[-44] register N Capture registers initialized with 74 * - fp[-48] register N + 1 non_position_value. 75 * ... The first kNumCachedRegisters (N) registers 76 * ... are cached in x0 to x7. 77 * ... Only positions must be stored in the first 78 * - ... num_saved_registers_ registers. 79 * - ... 80 * - register N + num_registers - 1 81 * ^^^^^^^^^ csp ^^^^^^^^^ 82 * 83 * The first num_saved_registers_ registers are initialized to point to 84 * "character -1" in the string (i.e., char_size() bytes before the first 85 * character of the string). The remaining registers start out as garbage. 86 * 87 * The data up to the return address must be placed there by the calling 88 * code and the remaining arguments are passed in registers, e.g. by calling the 89 * code entry as cast to a function with the signature: 90 * int (*match)(String* input, 91 * int start_offset, 92 * Address input_start, 93 * Address input_end, 94 * int* output, 95 * int output_size, 96 * Address stack_base, 97 * bool direct_call = false, 98 * Address secondary_return_address, // Only used by native call. 99 * Isolate* isolate) 100 * The call is performed by NativeRegExpMacroAssembler::Execute() 101 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro 102 * in arm64/simulator-arm64.h. 103 * When calling as a non-direct call (i.e., from C++ code), the return address 104 * area is overwritten with the LR register by the RegExp code. When doing a 105 * direct call from generated code, the return address is placed there by 106 * the calling code, as in a normal exit frame. 107 */ 108 109 #define __ ACCESS_MASM(masm_) 110 111 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, 112 Zone* zone, Mode mode, 113 int registers_to_save) 114 : NativeRegExpMacroAssembler(isolate, zone), 115 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, 116 CodeObjectRequired::kYes)), 117 mode_(mode), 118 num_registers_(registers_to_save), 119 num_saved_registers_(registers_to_save), 120 entry_label_(), 121 start_label_(), 122 success_label_(), 123 backtrack_label_(), 124 exit_label_() { 125 __ SetStackPointer(csp); 126 DCHECK_EQ(0, registers_to_save % 2); 127 // We can cache at most 16 W registers in x0-x7. 128 STATIC_ASSERT(kNumCachedRegisters <= 16); 129 STATIC_ASSERT((kNumCachedRegisters % 2) == 0); 130 __ B(&entry_label_); // We'll write the entry code later. 131 __ Bind(&start_label_); // And then continue from here. 132 } 133 134 135 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() { 136 delete masm_; 137 // Unuse labels in case we throw away the assembler without calling GetCode. 138 entry_label_.Unuse(); 139 start_label_.Unuse(); 140 success_label_.Unuse(); 141 backtrack_label_.Unuse(); 142 exit_label_.Unuse(); 143 check_preempt_label_.Unuse(); 144 stack_overflow_label_.Unuse(); 145 } 146 147 int RegExpMacroAssemblerARM64::stack_limit_slack() { 148 return RegExpStack::kStackLimitSlack; 149 } 150 151 152 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) { 153 if (by != 0) { 154 __ Add(current_input_offset(), 155 current_input_offset(), by * char_size()); 156 } 157 } 158 159 160 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) { 161 DCHECK((reg >= 0) && (reg < num_registers_)); 162 if (by != 0) { 163 Register to_advance; 164 RegisterState register_state = GetRegisterState(reg); 165 switch (register_state) { 166 case STACKED: 167 __ Ldr(w10, register_location(reg)); 168 __ Add(w10, w10, by); 169 __ Str(w10, register_location(reg)); 170 break; 171 case CACHED_LSW: 172 to_advance = GetCachedRegister(reg); 173 __ Add(to_advance, to_advance, by); 174 break; 175 case CACHED_MSW: 176 to_advance = GetCachedRegister(reg); 177 __ Add(to_advance, to_advance, 178 static_cast<int64_t>(by) << kWRegSizeInBits); 179 break; 180 default: 181 UNREACHABLE(); 182 break; 183 } 184 } 185 } 186 187 188 void RegExpMacroAssemblerARM64::Backtrack() { 189 CheckPreemption(); 190 Pop(w10); 191 __ Add(x10, code_pointer(), Operand(w10, UXTW)); 192 __ Br(x10); 193 } 194 195 196 void RegExpMacroAssemblerARM64::Bind(Label* label) { 197 __ Bind(label); 198 } 199 200 201 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) { 202 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal); 203 } 204 205 206 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, 207 Label* on_greater) { 208 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); 209 } 210 211 212 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { 213 __ Add(w10, current_input_offset(), Operand(-char_size())); 214 __ Cmp(w10, string_start_minus_one()); 215 BranchOrBacktrack(eq, on_at_start); 216 } 217 218 219 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, 220 Label* on_not_at_start) { 221 __ Add(w10, current_input_offset(), 222 Operand(-char_size() + cp_offset * char_size())); 223 __ Cmp(w10, string_start_minus_one()); 224 BranchOrBacktrack(ne, on_not_at_start); 225 } 226 227 228 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) { 229 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less); 230 } 231 232 233 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str, 234 int cp_offset, 235 Label* on_failure, 236 bool check_end_of_string) { 237 // This method is only ever called from the cctests. 238 239 if (check_end_of_string) { 240 // Is last character of required match inside string. 241 CheckPosition(cp_offset + str.length() - 1, on_failure); 242 } 243 244 Register characters_address = x11; 245 246 __ Add(characters_address, 247 input_end(), 248 Operand(current_input_offset(), SXTW)); 249 if (cp_offset != 0) { 250 __ Add(characters_address, characters_address, cp_offset * char_size()); 251 } 252 253 for (int i = 0; i < str.length(); i++) { 254 if (mode_ == LATIN1) { 255 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex)); 256 DCHECK(str[i] <= String::kMaxOneByteCharCode); 257 } else { 258 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex)); 259 } 260 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure); 261 } 262 } 263 264 265 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { 266 __ Ldr(w10, MemOperand(backtrack_stackpointer())); 267 __ Cmp(current_input_offset(), w10); 268 __ Cset(x11, eq); 269 __ Add(backtrack_stackpointer(), 270 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2)); 271 BranchOrBacktrack(eq, on_equal); 272 } 273 274 275 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( 276 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 277 Label fallthrough; 278 279 Register capture_start_offset = w10; 280 // Save the capture length in a callee-saved register so it will 281 // be preserved if we call a C helper. 282 Register capture_length = w19; 283 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length)); 284 285 // Find length of back-referenced capture. 286 DCHECK((start_reg % 2) == 0); 287 if (start_reg < kNumCachedRegisters) { 288 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg)); 289 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 290 } else { 291 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); 292 } 293 __ Sub(capture_length, w11, capture_start_offset); // Length to check. 294 295 // At this point, the capture registers are either both set or both cleared. 296 // If the capture length is zero, then the capture is either empty or cleared. 297 // Fall through in both cases. 298 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 299 300 // Check that there are enough characters left in the input. 301 if (read_backward) { 302 __ Add(w12, string_start_minus_one(), capture_length); 303 __ Cmp(current_input_offset(), w12); 304 BranchOrBacktrack(le, on_no_match); 305 } else { 306 __ Cmn(capture_length, current_input_offset()); 307 BranchOrBacktrack(gt, on_no_match); 308 } 309 310 if (mode_ == LATIN1) { 311 Label success; 312 Label fail; 313 Label loop_check; 314 315 Register capture_start_address = x12; 316 Register capture_end_addresss = x13; 317 Register current_position_address = x14; 318 319 __ Add(capture_start_address, 320 input_end(), 321 Operand(capture_start_offset, SXTW)); 322 __ Add(capture_end_addresss, 323 capture_start_address, 324 Operand(capture_length, SXTW)); 325 __ Add(current_position_address, 326 input_end(), 327 Operand(current_input_offset(), SXTW)); 328 if (read_backward) { 329 // Offset by length when matching backwards. 330 __ Sub(current_position_address, current_position_address, 331 Operand(capture_length, SXTW)); 332 } 333 334 Label loop; 335 __ Bind(&loop); 336 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 337 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 338 __ Cmp(w10, w11); 339 __ B(eq, &loop_check); 340 341 // Mismatch, try case-insensitive match (converting letters to lower-case). 342 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case. 343 __ Orr(w11, w11, 0x20); // Also convert input character. 344 __ Cmp(w11, w10); 345 __ B(ne, &fail); 346 __ Sub(w10, w10, 'a'); 347 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter? 348 __ B(ls, &loop_check); // In range 'a'-'z'. 349 // Latin-1: Check for values in range [224,254] but not 247. 350 __ Sub(w10, w10, 224 - 'a'); 351 __ Cmp(w10, 254 - 224); 352 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247. 353 __ B(eq, &fail); // Weren't Latin-1 letters. 354 355 __ Bind(&loop_check); 356 __ Cmp(capture_start_address, capture_end_addresss); 357 __ B(lt, &loop); 358 __ B(&success); 359 360 __ Bind(&fail); 361 BranchOrBacktrack(al, on_no_match); 362 363 __ Bind(&success); 364 // Compute new value of character position after the matched part. 365 __ Sub(current_input_offset().X(), current_position_address, input_end()); 366 if (read_backward) { 367 __ Sub(current_input_offset().X(), current_input_offset().X(), 368 Operand(capture_length, SXTW)); 369 } 370 if (masm_->emit_debug_code()) { 371 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 372 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 373 // The current input offset should be <= 0, and fit in a W register. 374 __ Check(le, kOffsetOutOfRange); 375 } 376 } else { 377 DCHECK(mode_ == UC16); 378 int argument_count = 4; 379 380 // The cached registers need to be retained. 381 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 382 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 383 __ PushCPURegList(cached_registers); 384 385 // Put arguments into arguments registers. 386 // Parameters are 387 // x0: Address byte_offset1 - Address captured substring's start. 388 // x1: Address byte_offset2 - Address of current character position. 389 // w2: size_t byte_length - length of capture in bytes(!) 390 // x3: Isolate* isolate or 0 if unicode flag 391 392 // Address of start of capture. 393 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW)); 394 // Length of capture. 395 __ Mov(w2, capture_length); 396 // Address of current input position. 397 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); 398 if (read_backward) { 399 __ Sub(x1, x1, Operand(capture_length, SXTW)); 400 } 401 // Isolate. 402 #ifdef V8_I18N_SUPPORT 403 if (unicode) { 404 __ Mov(x3, Operand(0)); 405 } else // NOLINT 406 #endif // V8_I18N_SUPPORT 407 { 408 __ Mov(x3, ExternalReference::isolate_address(isolate())); 409 } 410 411 { 412 AllowExternalCallThatCantCauseGC scope(masm_); 413 ExternalReference function = 414 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 415 __ CallCFunction(function, argument_count); 416 } 417 418 // Check if function returned non-zero for success or zero for failure. 419 // x0 is one of the registers used as a cache so it must be tested before 420 // the cache is restored. 421 __ Cmp(x0, 0); 422 __ PopCPURegList(cached_registers); 423 BranchOrBacktrack(eq, on_no_match); 424 425 // On success, advance position by length of capture. 426 if (read_backward) { 427 __ Sub(current_input_offset(), current_input_offset(), capture_length); 428 } else { 429 __ Add(current_input_offset(), current_input_offset(), capture_length); 430 } 431 } 432 433 __ Bind(&fallthrough); 434 } 435 436 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, 437 bool read_backward, 438 Label* on_no_match) { 439 Label fallthrough; 440 441 Register capture_start_address = x12; 442 Register capture_end_address = x13; 443 Register current_position_address = x14; 444 Register capture_length = w15; 445 446 // Find length of back-referenced capture. 447 DCHECK((start_reg % 2) == 0); 448 if (start_reg < kNumCachedRegisters) { 449 __ Mov(x10, GetCachedRegister(start_reg)); 450 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 451 } else { 452 __ Ldp(w11, w10, capture_location(start_reg, x10)); 453 } 454 __ Sub(capture_length, w11, w10); // Length to check. 455 456 // At this point, the capture registers are either both set or both cleared. 457 // If the capture length is zero, then the capture is either empty or cleared. 458 // Fall through in both cases. 459 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 460 461 // Check that there are enough characters left in the input. 462 if (read_backward) { 463 __ Add(w12, string_start_minus_one(), capture_length); 464 __ Cmp(current_input_offset(), w12); 465 BranchOrBacktrack(le, on_no_match); 466 } else { 467 __ Cmn(capture_length, current_input_offset()); 468 BranchOrBacktrack(gt, on_no_match); 469 } 470 471 // Compute pointers to match string and capture string 472 __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); 473 __ Add(capture_end_address, 474 capture_start_address, 475 Operand(capture_length, SXTW)); 476 __ Add(current_position_address, 477 input_end(), 478 Operand(current_input_offset(), SXTW)); 479 if (read_backward) { 480 // Offset by length when matching backwards. 481 __ Sub(current_position_address, current_position_address, 482 Operand(capture_length, SXTW)); 483 } 484 485 Label loop; 486 __ Bind(&loop); 487 if (mode_ == LATIN1) { 488 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 489 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 490 } else { 491 DCHECK(mode_ == UC16); 492 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex)); 493 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex)); 494 } 495 __ Cmp(w10, w11); 496 BranchOrBacktrack(ne, on_no_match); 497 __ Cmp(capture_start_address, capture_end_address); 498 __ B(lt, &loop); 499 500 // Move current character position to position after match. 501 __ Sub(current_input_offset().X(), current_position_address, input_end()); 502 if (read_backward) { 503 __ Sub(current_input_offset().X(), current_input_offset().X(), 504 Operand(capture_length, SXTW)); 505 } 506 507 if (masm_->emit_debug_code()) { 508 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 509 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 510 // The current input offset should be <= 0, and fit in a W register. 511 __ Check(le, kOffsetOutOfRange); 512 } 513 __ Bind(&fallthrough); 514 } 515 516 517 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c, 518 Label* on_not_equal) { 519 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal); 520 } 521 522 523 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c, 524 uint32_t mask, 525 Label* on_equal) { 526 __ And(w10, current_character(), mask); 527 CompareAndBranchOrBacktrack(w10, c, eq, on_equal); 528 } 529 530 531 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c, 532 unsigned mask, 533 Label* on_not_equal) { 534 __ And(w10, current_character(), mask); 535 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 536 } 537 538 539 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd( 540 uc16 c, 541 uc16 minus, 542 uc16 mask, 543 Label* on_not_equal) { 544 DCHECK(minus < String::kMaxUtf16CodeUnit); 545 __ Sub(w10, current_character(), minus); 546 __ And(w10, w10, mask); 547 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 548 } 549 550 551 void RegExpMacroAssemblerARM64::CheckCharacterInRange( 552 uc16 from, 553 uc16 to, 554 Label* on_in_range) { 555 __ Sub(w10, current_character(), from); 556 // Unsigned lower-or-same condition. 557 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range); 558 } 559 560 561 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange( 562 uc16 from, 563 uc16 to, 564 Label* on_not_in_range) { 565 __ Sub(w10, current_character(), from); 566 // Unsigned higher condition. 567 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range); 568 } 569 570 571 void RegExpMacroAssemblerARM64::CheckBitInTable( 572 Handle<ByteArray> table, 573 Label* on_bit_set) { 574 __ Mov(x11, Operand(table)); 575 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) { 576 __ And(w10, current_character(), kTableMask); 577 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag); 578 } else { 579 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag); 580 } 581 __ Ldrb(w11, MemOperand(x11, w10, UXTW)); 582 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set); 583 } 584 585 586 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, 587 Label* on_no_match) { 588 // Range checks (c in min..max) are generally implemented by an unsigned 589 // (c - min) <= (max - min) check 590 switch (type) { 591 case 's': 592 // Match space-characters 593 if (mode_ == LATIN1) { 594 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 595 Label success; 596 // Check for ' ' or 0x00a0. 597 __ Cmp(current_character(), ' '); 598 __ Ccmp(current_character(), 0x00a0, ZFlag, ne); 599 __ B(eq, &success); 600 // Check range 0x09..0x0d. 601 __ Sub(w10, current_character(), '\t'); 602 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); 603 __ Bind(&success); 604 return true; 605 } 606 return false; 607 case 'S': 608 // The emitted code for generic character classes is good enough. 609 return false; 610 case 'd': 611 // Match ASCII digits ('0'..'9'). 612 __ Sub(w10, current_character(), '0'); 613 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match); 614 return true; 615 case 'D': 616 // Match ASCII non-digits. 617 __ Sub(w10, current_character(), '0'); 618 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); 619 return true; 620 case '.': { 621 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 622 // Here we emit the conditional branch only once at the end to make branch 623 // prediction more efficient, even though we could branch out of here 624 // as soon as a character matches. 625 __ Cmp(current_character(), 0x0a); 626 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 627 if (mode_ == UC16) { 628 __ Sub(w10, current_character(), 0x2028); 629 // If the Z flag was set we clear the flags to force a branch. 630 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 631 // ls -> !((C==1) && (Z==0)) 632 BranchOrBacktrack(ls, on_no_match); 633 } else { 634 BranchOrBacktrack(eq, on_no_match); 635 } 636 return true; 637 } 638 case 'n': { 639 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 640 // We have to check all 4 newline characters before emitting 641 // the conditional branch. 642 __ Cmp(current_character(), 0x0a); 643 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 644 if (mode_ == UC16) { 645 __ Sub(w10, current_character(), 0x2028); 646 // If the Z flag was set we clear the flags to force a fall-through. 647 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 648 // hi -> (C==1) && (Z==0) 649 BranchOrBacktrack(hi, on_no_match); 650 } else { 651 BranchOrBacktrack(ne, on_no_match); 652 } 653 return true; 654 } 655 case 'w': { 656 if (mode_ != LATIN1) { 657 // Table is 256 entries, so all Latin1 characters can be tested. 658 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match); 659 } 660 ExternalReference map = ExternalReference::re_word_character_map(); 661 __ Mov(x10, map); 662 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 663 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match); 664 return true; 665 } 666 case 'W': { 667 Label done; 668 if (mode_ != LATIN1) { 669 // Table is 256 entries, so all Latin1 characters can be tested. 670 __ Cmp(current_character(), 'z'); 671 __ B(hi, &done); 672 } 673 ExternalReference map = ExternalReference::re_word_character_map(); 674 __ Mov(x10, map); 675 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 676 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match); 677 __ Bind(&done); 678 return true; 679 } 680 case '*': 681 // Match any character. 682 return true; 683 // No custom implementation (yet): s(UC16), S(UC16). 684 default: 685 return false; 686 } 687 } 688 689 690 void RegExpMacroAssemblerARM64::Fail() { 691 __ Mov(w0, FAILURE); 692 __ B(&exit_label_); 693 } 694 695 696 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { 697 Label return_w0; 698 // Finalize code - write the entry point code now we know how many 699 // registers we need. 700 701 // Entry code: 702 __ Bind(&entry_label_); 703 704 // Arguments on entry: 705 // x0: String* input 706 // x1: int start_offset 707 // x2: byte* input_start 708 // x3: byte* input_end 709 // x4: int* output array 710 // x5: int output array size 711 // x6: Address stack_base 712 // x7: int direct_call 713 714 // The stack pointer should be csp on entry. 715 // csp[8]: address of the current isolate 716 // csp[0]: secondary link/return address used by native call 717 718 // Tell the system that we have a stack frame. Because the type is MANUAL, no 719 // code is generated. 720 FrameScope scope(masm_, StackFrame::MANUAL); 721 722 // Push registers on the stack, only push the argument registers that we need. 723 CPURegList argument_registers(x0, x5, x6, x7); 724 725 CPURegList registers_to_retain = kCalleeSaved; 726 DCHECK(kCalleeSaved.Count() == 11); 727 registers_to_retain.Combine(lr); 728 729 DCHECK(csp.Is(__ StackPointer())); 730 __ PushCPURegList(registers_to_retain); 731 __ PushCPURegList(argument_registers); 732 733 // Set frame pointer in place. 734 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize); 735 736 // Initialize callee-saved registers. 737 __ Mov(start_offset(), w1); 738 __ Mov(input_start(), x2); 739 __ Mov(input_end(), x3); 740 __ Mov(output_array(), x4); 741 742 // Set the number of registers we will need to allocate, that is: 743 // - success_counter (X register) 744 // - (num_registers_ - kNumCachedRegisters) (W registers) 745 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters; 746 // Do not allocate registers on the stack if they can all be cached. 747 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; } 748 // Make room for the success_counter. 749 num_wreg_to_allocate += 2; 750 751 // Make sure the stack alignment will be respected. 752 int alignment = masm_->ActivationFrameAlignment(); 753 DCHECK_EQ(alignment % 16, 0); 754 int align_mask = (alignment / kWRegSize) - 1; 755 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask; 756 757 // Check if we have space on the stack. 758 Label stack_limit_hit; 759 Label stack_ok; 760 761 ExternalReference stack_limit = 762 ExternalReference::address_of_stack_limit(isolate()); 763 __ Mov(x10, stack_limit); 764 __ Ldr(x10, MemOperand(x10)); 765 __ Subs(x10, csp, x10); 766 767 // Handle it if the stack pointer is already below the stack limit. 768 __ B(ls, &stack_limit_hit); 769 770 // Check if there is room for the variable number of registers above 771 // the stack limit. 772 __ Cmp(x10, num_wreg_to_allocate * kWRegSize); 773 __ B(hs, &stack_ok); 774 775 // Exit with OutOfMemory exception. There is not enough space on the stack 776 // for our working registers. 777 __ Mov(w0, EXCEPTION); 778 __ B(&return_w0); 779 780 __ Bind(&stack_limit_hit); 781 CallCheckStackGuardState(x10); 782 // If returned value is non-zero, we exit with the returned value as result. 783 __ Cbnz(w0, &return_w0); 784 785 __ Bind(&stack_ok); 786 787 // Allocate space on stack. 788 __ Claim(num_wreg_to_allocate, kWRegSize); 789 790 // Initialize success_counter with 0. 791 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter)); 792 793 // Find negative length (offset of start relative to end). 794 __ Sub(x10, input_start(), input_end()); 795 if (masm_->emit_debug_code()) { 796 // Check that the input string length is < 2^30. 797 __ Neg(x11, x10); 798 __ Cmp(x11, (1<<30) - 1); 799 __ Check(ls, kInputStringTooLong); 800 } 801 __ Mov(current_input_offset(), w10); 802 803 // The non-position value is used as a clearing value for the 804 // capture registers, it corresponds to the position of the first character 805 // minus one. 806 __ Sub(string_start_minus_one(), current_input_offset(), char_size()); 807 __ Sub(string_start_minus_one(), string_start_minus_one(), 808 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); 809 // We can store this value twice in an X register for initializing 810 // on-stack registers later. 811 __ Orr(twice_non_position_value(), string_start_minus_one().X(), 812 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits)); 813 814 // Initialize code pointer register. 815 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 816 817 Label load_char_start_regexp, start_regexp; 818 // Load newline if index is at start, previous character otherwise. 819 __ Cbnz(start_offset(), &load_char_start_regexp); 820 __ Mov(current_character(), '\n'); 821 __ B(&start_regexp); 822 823 // Global regexp restarts matching here. 824 __ Bind(&load_char_start_regexp); 825 // Load previous char as initial value of current character register. 826 LoadCurrentCharacterUnchecked(-1, 1); 827 __ Bind(&start_regexp); 828 // Initialize on-stack registers. 829 if (num_saved_registers_ > 0) { 830 ClearRegisters(0, num_saved_registers_ - 1); 831 } 832 833 // Initialize backtrack stack pointer. 834 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase)); 835 836 // Execute 837 __ B(&start_label_); 838 839 if (backtrack_label_.is_linked()) { 840 __ Bind(&backtrack_label_); 841 Backtrack(); 842 } 843 844 if (success_label_.is_linked()) { 845 Register first_capture_start = w15; 846 847 // Save captures when successful. 848 __ Bind(&success_label_); 849 850 if (num_saved_registers_ > 0) { 851 // V8 expects the output to be an int32_t array. 852 Register capture_start = w12; 853 Register capture_end = w13; 854 Register input_length = w14; 855 856 // Copy captures to output. 857 858 // Get string length. 859 __ Sub(x10, input_end(), input_start()); 860 if (masm_->emit_debug_code()) { 861 // Check that the input string length is < 2^30. 862 __ Cmp(x10, (1<<30) - 1); 863 __ Check(ls, kInputStringTooLong); 864 } 865 // input_start has a start_offset offset on entry. We need to include 866 // it when computing the length of the whole string. 867 if (mode_ == UC16) { 868 __ Add(input_length, start_offset(), Operand(w10, LSR, 1)); 869 } else { 870 __ Add(input_length, start_offset(), w10); 871 } 872 873 // Copy the results to the output array from the cached registers first. 874 for (int i = 0; 875 (i < num_saved_registers_) && (i < kNumCachedRegisters); 876 i += 2) { 877 __ Mov(capture_start.X(), GetCachedRegister(i)); 878 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits); 879 if ((i == 0) && global_with_zero_length_check()) { 880 // Keep capture start for the zero-length check later. 881 __ Mov(first_capture_start, capture_start); 882 } 883 // Offsets need to be relative to the start of the string. 884 if (mode_ == UC16) { 885 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 886 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 887 } else { 888 __ Add(capture_start, input_length, capture_start); 889 __ Add(capture_end, input_length, capture_end); 890 } 891 // The output pointer advances for a possible global match. 892 __ Stp(capture_start, 893 capture_end, 894 MemOperand(output_array(), kPointerSize, PostIndex)); 895 } 896 897 // Only carry on if there are more than kNumCachedRegisters capture 898 // registers. 899 int num_registers_left_on_stack = 900 num_saved_registers_ - kNumCachedRegisters; 901 if (num_registers_left_on_stack > 0) { 902 Register base = x10; 903 // There are always an even number of capture registers. A couple of 904 // registers determine one match with two offsets. 905 DCHECK_EQ(0, num_registers_left_on_stack % 2); 906 __ Add(base, frame_pointer(), kFirstCaptureOnStack); 907 908 // We can unroll the loop here, we should not unroll for less than 2 909 // registers. 910 STATIC_ASSERT(kNumRegistersToUnroll > 2); 911 if (num_registers_left_on_stack <= kNumRegistersToUnroll) { 912 for (int i = 0; i < num_registers_left_on_stack / 2; i++) { 913 __ Ldp(capture_end, 914 capture_start, 915 MemOperand(base, -kPointerSize, PostIndex)); 916 if ((i == 0) && global_with_zero_length_check()) { 917 // Keep capture start for the zero-length check later. 918 __ Mov(first_capture_start, capture_start); 919 } 920 // Offsets need to be relative to the start of the string. 921 if (mode_ == UC16) { 922 __ Add(capture_start, 923 input_length, 924 Operand(capture_start, ASR, 1)); 925 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 926 } else { 927 __ Add(capture_start, input_length, capture_start); 928 __ Add(capture_end, input_length, capture_end); 929 } 930 // The output pointer advances for a possible global match. 931 __ Stp(capture_start, 932 capture_end, 933 MemOperand(output_array(), kPointerSize, PostIndex)); 934 } 935 } else { 936 Label loop, start; 937 __ Mov(x11, num_registers_left_on_stack); 938 939 __ Ldp(capture_end, 940 capture_start, 941 MemOperand(base, -kPointerSize, PostIndex)); 942 if (global_with_zero_length_check()) { 943 __ Mov(first_capture_start, capture_start); 944 } 945 __ B(&start); 946 947 __ Bind(&loop); 948 __ Ldp(capture_end, 949 capture_start, 950 MemOperand(base, -kPointerSize, PostIndex)); 951 __ Bind(&start); 952 if (mode_ == UC16) { 953 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 954 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 955 } else { 956 __ Add(capture_start, input_length, capture_start); 957 __ Add(capture_end, input_length, capture_end); 958 } 959 // The output pointer advances for a possible global match. 960 __ Stp(capture_start, 961 capture_end, 962 MemOperand(output_array(), kPointerSize, PostIndex)); 963 __ Sub(x11, x11, 2); 964 __ Cbnz(x11, &loop); 965 } 966 } 967 } 968 969 if (global()) { 970 Register success_counter = w0; 971 Register output_size = x10; 972 // Restart matching if the regular expression is flagged as global. 973 974 // Increment success counter. 975 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 976 __ Add(success_counter, success_counter, 1); 977 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 978 979 // Capture results have been stored, so the number of remaining global 980 // output registers is reduced by the number of stored captures. 981 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize)); 982 __ Sub(output_size, output_size, num_saved_registers_); 983 // Check whether we have enough room for another set of capture results. 984 __ Cmp(output_size, num_saved_registers_); 985 __ B(lt, &return_w0); 986 987 // The output pointer is already set to the next field in the output 988 // array. 989 // Update output size on the frame before we restart matching. 990 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize)); 991 992 if (global_with_zero_length_check()) { 993 // Special case for zero-length matches. 994 __ Cmp(current_input_offset(), first_capture_start); 995 // Not a zero-length match, restart. 996 __ B(ne, &load_char_start_regexp); 997 // Offset from the end is zero if we already reached the end. 998 __ Cbz(current_input_offset(), &return_w0); 999 // Advance current position after a zero-length match. 1000 Label advance; 1001 __ bind(&advance); 1002 __ Add(current_input_offset(), 1003 current_input_offset(), 1004 Operand((mode_ == UC16) ? 2 : 1)); 1005 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 1006 } 1007 1008 __ B(&load_char_start_regexp); 1009 } else { 1010 __ Mov(w0, SUCCESS); 1011 } 1012 } 1013 1014 if (exit_label_.is_linked()) { 1015 // Exit and return w0 1016 __ Bind(&exit_label_); 1017 if (global()) { 1018 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter)); 1019 } 1020 } 1021 1022 __ Bind(&return_w0); 1023 1024 // Set stack pointer back to first register to retain 1025 DCHECK(csp.Is(__ StackPointer())); 1026 __ Mov(csp, fp); 1027 __ AssertStackConsistency(); 1028 1029 // Restore registers. 1030 __ PopCPURegList(registers_to_retain); 1031 1032 __ Ret(); 1033 1034 Label exit_with_exception; 1035 // Registers x0 to x7 are used to store the first captures, they need to be 1036 // retained over calls to C++ code. 1037 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 1038 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 1039 1040 if (check_preempt_label_.is_linked()) { 1041 __ Bind(&check_preempt_label_); 1042 SaveLinkRegister(); 1043 // The cached registers need to be retained. 1044 __ PushCPURegList(cached_registers); 1045 CallCheckStackGuardState(x10); 1046 // Returning from the regexp code restores the stack (csp <- fp) 1047 // so we don't need to drop the link register from it before exiting. 1048 __ Cbnz(w0, &return_w0); 1049 // Reset the cached registers. 1050 __ PopCPURegList(cached_registers); 1051 RestoreLinkRegister(); 1052 __ Ret(); 1053 } 1054 1055 if (stack_overflow_label_.is_linked()) { 1056 __ Bind(&stack_overflow_label_); 1057 SaveLinkRegister(); 1058 // The cached registers need to be retained. 1059 __ PushCPURegList(cached_registers); 1060 // Call GrowStack(backtrack_stackpointer(), &stack_base) 1061 __ Mov(x2, ExternalReference::isolate_address(isolate())); 1062 __ Add(x1, frame_pointer(), kStackBase); 1063 __ Mov(x0, backtrack_stackpointer()); 1064 ExternalReference grow_stack = 1065 ExternalReference::re_grow_stack(isolate()); 1066 __ CallCFunction(grow_stack, 3); 1067 // If return NULL, we have failed to grow the stack, and 1068 // must exit with a stack-overflow exception. 1069 // Returning from the regexp code restores the stack (csp <- fp) 1070 // so we don't need to drop the link register from it before exiting. 1071 __ Cbz(w0, &exit_with_exception); 1072 // Otherwise use return value as new stack pointer. 1073 __ Mov(backtrack_stackpointer(), x0); 1074 // Reset the cached registers. 1075 __ PopCPURegList(cached_registers); 1076 RestoreLinkRegister(); 1077 __ Ret(); 1078 } 1079 1080 if (exit_with_exception.is_linked()) { 1081 __ Bind(&exit_with_exception); 1082 __ Mov(w0, EXCEPTION); 1083 __ B(&return_w0); 1084 } 1085 1086 CodeDesc code_desc; 1087 masm_->GetCode(&code_desc); 1088 Handle<Code> code = isolate()->factory()->NewCode( 1089 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject()); 1090 PROFILE(masm_->isolate(), 1091 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source)); 1092 return Handle<HeapObject>::cast(code); 1093 } 1094 1095 1096 void RegExpMacroAssemblerARM64::GoTo(Label* to) { 1097 BranchOrBacktrack(al, to); 1098 } 1099 1100 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand, 1101 Label* if_ge) { 1102 Register to_compare = GetRegister(reg, w10); 1103 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge); 1104 } 1105 1106 1107 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand, 1108 Label* if_lt) { 1109 Register to_compare = GetRegister(reg, w10); 1110 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt); 1111 } 1112 1113 1114 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) { 1115 Register to_compare = GetRegister(reg, w10); 1116 __ Cmp(to_compare, current_input_offset()); 1117 BranchOrBacktrack(eq, if_eq); 1118 } 1119 1120 RegExpMacroAssembler::IrregexpImplementation 1121 RegExpMacroAssemblerARM64::Implementation() { 1122 return kARM64Implementation; 1123 } 1124 1125 1126 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, 1127 Label* on_end_of_input, 1128 bool check_bounds, 1129 int characters) { 1130 // TODO(pielan): Make sure long strings are caught before this, and not 1131 // just asserted in debug mode. 1132 // Be sane! (And ensure that an int32_t can be used to index the string) 1133 DCHECK(cp_offset < (1<<30)); 1134 if (check_bounds) { 1135 if (cp_offset >= 0) { 1136 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1137 } else { 1138 CheckPosition(cp_offset, on_end_of_input); 1139 } 1140 } 1141 LoadCurrentCharacterUnchecked(cp_offset, characters); 1142 } 1143 1144 1145 void RegExpMacroAssemblerARM64::PopCurrentPosition() { 1146 Pop(current_input_offset()); 1147 } 1148 1149 1150 void RegExpMacroAssemblerARM64::PopRegister(int register_index) { 1151 Pop(w10); 1152 StoreRegister(register_index, w10); 1153 } 1154 1155 1156 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { 1157 if (label->is_bound()) { 1158 int target = label->pos(); 1159 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag); 1160 } else { 1161 __ Adr(x10, label, MacroAssembler::kAdrFar); 1162 __ Sub(x10, x10, code_pointer()); 1163 if (masm_->emit_debug_code()) { 1164 __ Cmp(x10, kWRegMask); 1165 // The code offset has to fit in a W register. 1166 __ Check(ls, kOffsetOutOfRange); 1167 } 1168 } 1169 Push(w10); 1170 CheckStackLimit(); 1171 } 1172 1173 1174 void RegExpMacroAssemblerARM64::PushCurrentPosition() { 1175 Push(current_input_offset()); 1176 } 1177 1178 1179 void RegExpMacroAssemblerARM64::PushRegister(int register_index, 1180 StackCheckFlag check_stack_limit) { 1181 Register to_push = GetRegister(register_index, w10); 1182 Push(to_push); 1183 if (check_stack_limit) CheckStackLimit(); 1184 } 1185 1186 1187 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) { 1188 Register cached_register; 1189 RegisterState register_state = GetRegisterState(reg); 1190 switch (register_state) { 1191 case STACKED: 1192 __ Ldr(current_input_offset(), register_location(reg)); 1193 break; 1194 case CACHED_LSW: 1195 cached_register = GetCachedRegister(reg); 1196 __ Mov(current_input_offset(), cached_register.W()); 1197 break; 1198 case CACHED_MSW: 1199 cached_register = GetCachedRegister(reg); 1200 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits); 1201 break; 1202 default: 1203 UNREACHABLE(); 1204 break; 1205 } 1206 } 1207 1208 1209 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) { 1210 Register read_from = GetRegister(reg, w10); 1211 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase)); 1212 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW)); 1213 } 1214 1215 1216 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) { 1217 Label after_position; 1218 __ Cmp(current_input_offset(), -by * char_size()); 1219 __ B(ge, &after_position); 1220 __ Mov(current_input_offset(), -by * char_size()); 1221 // On RegExp code entry (where this operation is used), the character before 1222 // the current position is expected to be already loaded. 1223 // We have advanced the position, so it's safe to read backwards. 1224 LoadCurrentCharacterUnchecked(-1, 1); 1225 __ Bind(&after_position); 1226 } 1227 1228 1229 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) { 1230 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1231 Register set_to = wzr; 1232 if (to != 0) { 1233 set_to = w10; 1234 __ Mov(set_to, to); 1235 } 1236 StoreRegister(register_index, set_to); 1237 } 1238 1239 1240 bool RegExpMacroAssemblerARM64::Succeed() { 1241 __ B(&success_label_); 1242 return global(); 1243 } 1244 1245 1246 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg, 1247 int cp_offset) { 1248 Register position = current_input_offset(); 1249 if (cp_offset != 0) { 1250 position = w10; 1251 __ Add(position, current_input_offset(), cp_offset * char_size()); 1252 } 1253 StoreRegister(reg, position); 1254 } 1255 1256 1257 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { 1258 DCHECK(reg_from <= reg_to); 1259 int num_registers = reg_to - reg_from + 1; 1260 1261 // If the first capture register is cached in a hardware register but not 1262 // aligned on a 64-bit one, we need to clear the first one specifically. 1263 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { 1264 StoreRegister(reg_from, string_start_minus_one()); 1265 num_registers--; 1266 reg_from++; 1267 } 1268 1269 // Clear cached registers in pairs as far as possible. 1270 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) { 1271 DCHECK(GetRegisterState(reg_from) == CACHED_LSW); 1272 __ Mov(GetCachedRegister(reg_from), twice_non_position_value()); 1273 reg_from += 2; 1274 num_registers -= 2; 1275 } 1276 1277 if ((num_registers % 2) == 1) { 1278 StoreRegister(reg_from, string_start_minus_one()); 1279 num_registers--; 1280 reg_from++; 1281 } 1282 1283 if (num_registers > 0) { 1284 // If there are some remaining registers, they are stored on the stack. 1285 DCHECK(reg_from >= kNumCachedRegisters); 1286 1287 // Move down the indexes of the registers on stack to get the correct offset 1288 // in memory. 1289 reg_from -= kNumCachedRegisters; 1290 reg_to -= kNumCachedRegisters; 1291 // We should not unroll the loop for less than 2 registers. 1292 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1293 // We position the base pointer to (reg_from + 1). 1294 int base_offset = kFirstRegisterOnStack - 1295 kWRegSize - (kWRegSize * reg_from); 1296 if (num_registers > kNumRegistersToUnroll) { 1297 Register base = x10; 1298 __ Add(base, frame_pointer(), base_offset); 1299 1300 Label loop; 1301 __ Mov(x11, num_registers); 1302 __ Bind(&loop); 1303 __ Str(twice_non_position_value(), 1304 MemOperand(base, -kPointerSize, PostIndex)); 1305 __ Sub(x11, x11, 2); 1306 __ Cbnz(x11, &loop); 1307 } else { 1308 for (int i = reg_from; i <= reg_to; i += 2) { 1309 __ Str(twice_non_position_value(), 1310 MemOperand(frame_pointer(), base_offset)); 1311 base_offset -= kWRegSize * 2; 1312 } 1313 } 1314 } 1315 } 1316 1317 1318 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { 1319 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase)); 1320 __ Sub(x10, backtrack_stackpointer(), x10); 1321 if (masm_->emit_debug_code()) { 1322 __ Cmp(x10, Operand(w10, SXTW)); 1323 // The stack offset needs to fit in a W register. 1324 __ Check(eq, kOffsetOutOfRange); 1325 } 1326 StoreRegister(reg, w10); 1327 } 1328 1329 1330 // Helper function for reading a value out of a stack frame. 1331 template <typename T> 1332 static T& frame_entry(Address re_frame, int frame_offset) { 1333 return *reinterpret_cast<T*>(re_frame + frame_offset); 1334 } 1335 1336 1337 template <typename T> 1338 static T* frame_entry_address(Address re_frame, int frame_offset) { 1339 return reinterpret_cast<T*>(re_frame + frame_offset); 1340 } 1341 1342 1343 int RegExpMacroAssemblerARM64::CheckStackGuardState( 1344 Address* return_address, Code* re_code, Address re_frame, int start_index, 1345 const byte** input_start, const byte** input_end) { 1346 return NativeRegExpMacroAssembler::CheckStackGuardState( 1347 frame_entry<Isolate*>(re_frame, kIsolate), start_index, 1348 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, 1349 frame_entry_address<String*>(re_frame, kInput), input_start, input_end); 1350 } 1351 1352 1353 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, 1354 Label* on_outside_input) { 1355 if (cp_offset >= 0) { 1356 CompareAndBranchOrBacktrack(current_input_offset(), 1357 -cp_offset * char_size(), ge, on_outside_input); 1358 } else { 1359 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size())); 1360 __ Cmp(w12, string_start_minus_one()); 1361 BranchOrBacktrack(le, on_outside_input); 1362 } 1363 } 1364 1365 1366 // Private methods: 1367 1368 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { 1369 // Allocate space on the stack to store the return address. The 1370 // CheckStackGuardState C++ function will override it if the code 1371 // moved. Allocate extra space for 2 arguments passed by pointers. 1372 // AAPCS64 requires the stack to be 16 byte aligned. 1373 int alignment = masm_->ActivationFrameAlignment(); 1374 DCHECK_EQ(alignment % 16, 0); 1375 int align_mask = (alignment / kXRegSize) - 1; 1376 int xreg_to_claim = (3 + align_mask) & ~align_mask; 1377 1378 DCHECK(csp.Is(__ StackPointer())); 1379 __ Claim(xreg_to_claim); 1380 1381 // CheckStackGuardState needs the end and start addresses of the input string. 1382 __ Poke(input_end(), 2 * kPointerSize); 1383 __ Add(x5, csp, 2 * kPointerSize); 1384 __ Poke(input_start(), kPointerSize); 1385 __ Add(x4, csp, kPointerSize); 1386 1387 __ Mov(w3, start_offset()); 1388 // RegExp code frame pointer. 1389 __ Mov(x2, frame_pointer()); 1390 // Code* of self. 1391 __ Mov(x1, Operand(masm_->CodeObject())); 1392 1393 // We need to pass a pointer to the return address as first argument. 1394 // The DirectCEntry stub will place the return address on the stack before 1395 // calling so the stack pointer will point to it. 1396 __ Mov(x0, csp); 1397 1398 ExternalReference check_stack_guard_state = 1399 ExternalReference::re_check_stack_guard_state(isolate()); 1400 __ Mov(scratch, check_stack_guard_state); 1401 DirectCEntryStub stub(isolate()); 1402 stub.GenerateCall(masm_, scratch); 1403 1404 // The input string may have been moved in memory, we need to reload it. 1405 __ Peek(input_start(), kPointerSize); 1406 __ Peek(input_end(), 2 * kPointerSize); 1407 1408 DCHECK(csp.Is(__ StackPointer())); 1409 __ Drop(xreg_to_claim); 1410 1411 // Reload the Code pointer. 1412 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 1413 } 1414 1415 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition, 1416 Label* to) { 1417 if (condition == al) { // Unconditional. 1418 if (to == NULL) { 1419 Backtrack(); 1420 return; 1421 } 1422 __ B(to); 1423 return; 1424 } 1425 if (to == NULL) { 1426 to = &backtrack_label_; 1427 } 1428 __ B(condition, to); 1429 } 1430 1431 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, 1432 int immediate, 1433 Condition condition, 1434 Label* to) { 1435 if ((immediate == 0) && ((condition == eq) || (condition == ne))) { 1436 if (to == NULL) { 1437 to = &backtrack_label_; 1438 } 1439 if (condition == eq) { 1440 __ Cbz(reg, to); 1441 } else { 1442 __ Cbnz(reg, to); 1443 } 1444 } else { 1445 __ Cmp(reg, immediate); 1446 BranchOrBacktrack(condition, to); 1447 } 1448 } 1449 1450 1451 void RegExpMacroAssemblerARM64::CheckPreemption() { 1452 // Check for preemption. 1453 ExternalReference stack_limit = 1454 ExternalReference::address_of_stack_limit(isolate()); 1455 __ Mov(x10, stack_limit); 1456 __ Ldr(x10, MemOperand(x10)); 1457 DCHECK(csp.Is(__ StackPointer())); 1458 __ Cmp(csp, x10); 1459 CallIf(&check_preempt_label_, ls); 1460 } 1461 1462 1463 void RegExpMacroAssemblerARM64::CheckStackLimit() { 1464 ExternalReference stack_limit = 1465 ExternalReference::address_of_regexp_stack_limit(isolate()); 1466 __ Mov(x10, stack_limit); 1467 __ Ldr(x10, MemOperand(x10)); 1468 __ Cmp(backtrack_stackpointer(), x10); 1469 CallIf(&stack_overflow_label_, ls); 1470 } 1471 1472 1473 void RegExpMacroAssemblerARM64::Push(Register source) { 1474 DCHECK(source.Is32Bits()); 1475 DCHECK(!source.is(backtrack_stackpointer())); 1476 __ Str(source, 1477 MemOperand(backtrack_stackpointer(), 1478 -static_cast<int>(kWRegSize), 1479 PreIndex)); 1480 } 1481 1482 1483 void RegExpMacroAssemblerARM64::Pop(Register target) { 1484 DCHECK(target.Is32Bits()); 1485 DCHECK(!target.is(backtrack_stackpointer())); 1486 __ Ldr(target, 1487 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex)); 1488 } 1489 1490 1491 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) { 1492 DCHECK(register_index < kNumCachedRegisters); 1493 return Register::Create(register_index / 2, kXRegSizeInBits); 1494 } 1495 1496 1497 Register RegExpMacroAssemblerARM64::GetRegister(int register_index, 1498 Register maybe_result) { 1499 DCHECK(maybe_result.Is32Bits()); 1500 DCHECK(register_index >= 0); 1501 if (num_registers_ <= register_index) { 1502 num_registers_ = register_index + 1; 1503 } 1504 Register result; 1505 RegisterState register_state = GetRegisterState(register_index); 1506 switch (register_state) { 1507 case STACKED: 1508 __ Ldr(maybe_result, register_location(register_index)); 1509 result = maybe_result; 1510 break; 1511 case CACHED_LSW: 1512 result = GetCachedRegister(register_index).W(); 1513 break; 1514 case CACHED_MSW: 1515 __ Lsr(maybe_result.X(), GetCachedRegister(register_index), 1516 kWRegSizeInBits); 1517 result = maybe_result; 1518 break; 1519 default: 1520 UNREACHABLE(); 1521 break; 1522 } 1523 DCHECK(result.Is32Bits()); 1524 return result; 1525 } 1526 1527 1528 void RegExpMacroAssemblerARM64::StoreRegister(int register_index, 1529 Register source) { 1530 DCHECK(source.Is32Bits()); 1531 DCHECK(register_index >= 0); 1532 if (num_registers_ <= register_index) { 1533 num_registers_ = register_index + 1; 1534 } 1535 1536 Register cached_register; 1537 RegisterState register_state = GetRegisterState(register_index); 1538 switch (register_state) { 1539 case STACKED: 1540 __ Str(source, register_location(register_index)); 1541 break; 1542 case CACHED_LSW: 1543 cached_register = GetCachedRegister(register_index); 1544 if (!source.Is(cached_register.W())) { 1545 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits); 1546 } 1547 break; 1548 case CACHED_MSW: 1549 cached_register = GetCachedRegister(register_index); 1550 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits); 1551 break; 1552 default: 1553 UNREACHABLE(); 1554 break; 1555 } 1556 } 1557 1558 1559 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) { 1560 Label skip_call; 1561 if (condition != al) __ B(&skip_call, NegateCondition(condition)); 1562 __ Bl(to); 1563 __ Bind(&skip_call); 1564 } 1565 1566 1567 void RegExpMacroAssemblerARM64::RestoreLinkRegister() { 1568 DCHECK(csp.Is(__ StackPointer())); 1569 __ Pop(lr, xzr); 1570 __ Add(lr, lr, Operand(masm_->CodeObject())); 1571 } 1572 1573 1574 void RegExpMacroAssemblerARM64::SaveLinkRegister() { 1575 DCHECK(csp.Is(__ StackPointer())); 1576 __ Sub(lr, lr, Operand(masm_->CodeObject())); 1577 __ Push(xzr, lr); 1578 } 1579 1580 1581 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) { 1582 DCHECK(register_index < (1<<30)); 1583 DCHECK(register_index >= kNumCachedRegisters); 1584 if (num_registers_ <= register_index) { 1585 num_registers_ = register_index + 1; 1586 } 1587 register_index -= kNumCachedRegisters; 1588 int offset = kFirstRegisterOnStack - register_index * kWRegSize; 1589 return MemOperand(frame_pointer(), offset); 1590 } 1591 1592 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index, 1593 Register scratch) { 1594 DCHECK(register_index < (1<<30)); 1595 DCHECK(register_index < num_saved_registers_); 1596 DCHECK(register_index >= kNumCachedRegisters); 1597 DCHECK_EQ(register_index % 2, 0); 1598 register_index -= kNumCachedRegisters; 1599 int offset = kFirstCaptureOnStack - register_index * kWRegSize; 1600 // capture_location is used with Stp instructions to load/store 2 registers. 1601 // The immediate field in the encoding is limited to 7 bits (signed). 1602 if (is_int7(offset)) { 1603 return MemOperand(frame_pointer(), offset); 1604 } else { 1605 __ Add(scratch, frame_pointer(), offset); 1606 return MemOperand(scratch); 1607 } 1608 } 1609 1610 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, 1611 int characters) { 1612 Register offset = current_input_offset(); 1613 1614 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1615 // and the operating system running on the target allow it. 1616 // If unaligned load/stores are not supported then this function must only 1617 // be used to load a single character at a time. 1618 1619 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to 1620 // disable it. 1621 // TODO(pielan): See whether or not we should disable unaligned accesses. 1622 if (!CanReadUnaligned()) { 1623 DCHECK(characters == 1); 1624 } 1625 1626 if (cp_offset != 0) { 1627 if (masm_->emit_debug_code()) { 1628 __ Mov(x10, cp_offset * char_size()); 1629 __ Add(x10, x10, Operand(current_input_offset(), SXTW)); 1630 __ Cmp(x10, Operand(w10, SXTW)); 1631 // The offset needs to fit in a W register. 1632 __ Check(eq, kOffsetOutOfRange); 1633 } else { 1634 __ Add(w10, current_input_offset(), cp_offset * char_size()); 1635 } 1636 offset = w10; 1637 } 1638 1639 if (mode_ == LATIN1) { 1640 if (characters == 4) { 1641 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1642 } else if (characters == 2) { 1643 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1644 } else { 1645 DCHECK(characters == 1); 1646 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW)); 1647 } 1648 } else { 1649 DCHECK(mode_ == UC16); 1650 if (characters == 2) { 1651 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1652 } else { 1653 DCHECK(characters == 1); 1654 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1655 } 1656 } 1657 } 1658 1659 #endif // V8_INTERPRETED_REGEXP 1660 1661 } // namespace internal 1662 } // namespace v8 1663 1664 #endif // V8_TARGET_ARCH_ARM64 1665