1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if V8_TARGET_ARCH_ARM64 6 7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 8 9 #include "src/arm64/macro-assembler-arm64-inl.h" 10 #include "src/code-stubs.h" 11 #include "src/log.h" 12 #include "src/macro-assembler.h" 13 #include "src/objects-inl.h" 14 #include "src/regexp/regexp-macro-assembler.h" 15 #include "src/regexp/regexp-stack.h" 16 #include "src/unicode.h" 17 18 namespace v8 { 19 namespace internal { 20 21 #ifndef V8_INTERPRETED_REGEXP 22 /* 23 * This assembler uses the following register assignment convention: 24 * - w19 : Used to temporarely store a value before a call to C code. 25 * See CheckNotBackReferenceIgnoreCase. 26 * - x20 : Pointer to the current code object (Code*), 27 * it includes the heap object tag. 28 * - w21 : Current position in input, as negative offset from 29 * the end of the string. Please notice that this is 30 * the byte offset, not the character offset! 31 * - w22 : Currently loaded character. Must be loaded using 32 * LoadCurrentCharacter before using any of the dispatch methods. 33 * - x23 : Points to tip of backtrack stack. 34 * - w24 : Position of the first character minus one: non_position_value. 35 * Used to initialize capture registers. 36 * - x25 : Address at the end of the input string: input_end. 37 * Points to byte after last character in input. 38 * - x26 : Address at the start of the input string: input_start. 39 * - w27 : Where to start in the input string. 40 * - x28 : Output array pointer. 41 * - x29/fp : Frame pointer. Used to access arguments, local variables and 42 * RegExp registers. 43 * - x16/x17 : IP registers, used by assembler. Very volatile. 44 * - sp : Points to tip of C stack. 45 * 46 * - x0-x7 : Used as a cache to store 32 bit capture registers. These 47 * registers need to be retained every time a call to C code 48 * is done. 49 * 50 * The remaining registers are free for computations. 51 * Each call to a public method should retain this convention. 52 * 53 * The stack will have the following structure: 54 * 55 * Location Name Description 56 * (as referred to in 57 * the code) 58 * 59 * - fp[96] isolate Address of the current isolate. 60 * ^^^ sp when called ^^^ 61 * - fp[88] lr Return from the RegExp code. 62 * - fp[80] r29 Old frame pointer (CalleeSaved). 63 * - fp[0..72] r19-r28 Backup of CalleeSaved registers. 64 * - fp[-8] direct_call 1 => Direct call from JavaScript code. 65 * 0 => Call through the runtime system. 66 * - fp[-16] stack_base High end of the memory area to use as 67 * the backtracking stack. 68 * - fp[-24] output_size Output may fit multiple sets of matches. 69 * - fp[-32] input Handle containing the input string. 70 * - fp[-40] success_counter 71 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^ 72 * - fp[-44] register N Capture registers initialized with 73 * - fp[-48] register N + 1 non_position_value. 74 * ... The first kNumCachedRegisters (N) registers 75 * ... are cached in x0 to x7. 76 * ... Only positions must be stored in the first 77 * - ... num_saved_registers_ registers. 78 * - ... 79 * - register N + num_registers - 1 80 * ^^^^^^^^^ sp ^^^^^^^^^ 81 * 82 * The first num_saved_registers_ registers are initialized to point to 83 * "character -1" in the string (i.e., char_size() bytes before the first 84 * character of the string). The remaining registers start out as garbage. 85 * 86 * The data up to the return address must be placed there by the calling 87 * code and the remaining arguments are passed in registers, e.g. by calling the 88 * code entry as cast to a function with the signature: 89 * int (*match)(String* input_string, 90 * int start_index, 91 * Address start, 92 * Address end, 93 * int* capture_output_array, 94 * int num_capture_registers, 95 * byte* stack_area_base, 96 * bool direct_call = false, 97 * Isolate* isolate); 98 * The call is performed by NativeRegExpMacroAssembler::Execute() 99 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. 100 */ 101 102 #define __ ACCESS_MASM(masm_) 103 104 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, 105 Zone* zone, Mode mode, 106 int registers_to_save) 107 : NativeRegExpMacroAssembler(isolate, zone), 108 masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize, 109 CodeObjectRequired::kYes)), 110 mode_(mode), 111 num_registers_(registers_to_save), 112 num_saved_registers_(registers_to_save), 113 entry_label_(), 114 start_label_(), 115 success_label_(), 116 backtrack_label_(), 117 exit_label_() { 118 DCHECK_EQ(0, registers_to_save % 2); 119 // We can cache at most 16 W registers in x0-x7. 120 STATIC_ASSERT(kNumCachedRegisters <= 16); 121 STATIC_ASSERT((kNumCachedRegisters % 2) == 0); 122 __ B(&entry_label_); // We'll write the entry code later. 123 __ Bind(&start_label_); // And then continue from here. 124 } 125 126 127 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() { 128 delete masm_; 129 // Unuse labels in case we throw away the assembler without calling GetCode. 130 entry_label_.Unuse(); 131 start_label_.Unuse(); 132 success_label_.Unuse(); 133 backtrack_label_.Unuse(); 134 exit_label_.Unuse(); 135 check_preempt_label_.Unuse(); 136 stack_overflow_label_.Unuse(); 137 } 138 139 int RegExpMacroAssemblerARM64::stack_limit_slack() { 140 return RegExpStack::kStackLimitSlack; 141 } 142 143 144 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) { 145 if (by != 0) { 146 __ Add(current_input_offset(), 147 current_input_offset(), by * char_size()); 148 } 149 } 150 151 152 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) { 153 DCHECK((reg >= 0) && (reg < num_registers_)); 154 if (by != 0) { 155 RegisterState register_state = GetRegisterState(reg); 156 switch (register_state) { 157 case STACKED: 158 __ Ldr(w10, register_location(reg)); 159 __ Add(w10, w10, by); 160 __ Str(w10, register_location(reg)); 161 break; 162 case CACHED_LSW: { 163 Register to_advance = GetCachedRegister(reg); 164 __ Add(to_advance, to_advance, by); 165 break; 166 } 167 case CACHED_MSW: { 168 Register to_advance = GetCachedRegister(reg); 169 __ Add(to_advance, to_advance, 170 static_cast<int64_t>(by) << kWRegSizeInBits); 171 break; 172 } 173 default: 174 UNREACHABLE(); 175 break; 176 } 177 } 178 } 179 180 181 void RegExpMacroAssemblerARM64::Backtrack() { 182 CheckPreemption(); 183 Pop(w10); 184 __ Add(x10, code_pointer(), Operand(w10, UXTW)); 185 __ Br(x10); 186 } 187 188 189 void RegExpMacroAssemblerARM64::Bind(Label* label) { 190 __ Bind(label); 191 } 192 193 194 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) { 195 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal); 196 } 197 198 199 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, 200 Label* on_greater) { 201 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); 202 } 203 204 205 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { 206 __ Add(w10, current_input_offset(), Operand(-char_size())); 207 __ Cmp(w10, string_start_minus_one()); 208 BranchOrBacktrack(eq, on_at_start); 209 } 210 211 212 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, 213 Label* on_not_at_start) { 214 __ Add(w10, current_input_offset(), 215 Operand(-char_size() + cp_offset * char_size())); 216 __ Cmp(w10, string_start_minus_one()); 217 BranchOrBacktrack(ne, on_not_at_start); 218 } 219 220 221 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) { 222 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less); 223 } 224 225 226 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str, 227 int cp_offset, 228 Label* on_failure, 229 bool check_end_of_string) { 230 // This method is only ever called from the cctests. 231 232 if (check_end_of_string) { 233 // Is last character of required match inside string. 234 CheckPosition(cp_offset + str.length() - 1, on_failure); 235 } 236 237 Register characters_address = x11; 238 239 __ Add(characters_address, 240 input_end(), 241 Operand(current_input_offset(), SXTW)); 242 if (cp_offset != 0) { 243 __ Add(characters_address, characters_address, cp_offset * char_size()); 244 } 245 246 for (int i = 0; i < str.length(); i++) { 247 if (mode_ == LATIN1) { 248 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex)); 249 DCHECK_GE(String::kMaxOneByteCharCode, str[i]); 250 } else { 251 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex)); 252 } 253 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure); 254 } 255 } 256 257 258 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { 259 __ Ldr(w10, MemOperand(backtrack_stackpointer())); 260 __ Cmp(current_input_offset(), w10); 261 __ Cset(x11, eq); 262 __ Add(backtrack_stackpointer(), 263 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2)); 264 BranchOrBacktrack(eq, on_equal); 265 } 266 267 268 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( 269 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 270 Label fallthrough; 271 272 Register capture_start_offset = w10; 273 // Save the capture length in a callee-saved register so it will 274 // be preserved if we call a C helper. 275 Register capture_length = w19; 276 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length)); 277 278 // Find length of back-referenced capture. 279 DCHECK_EQ(0, start_reg % 2); 280 if (start_reg < kNumCachedRegisters) { 281 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg)); 282 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 283 } else { 284 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); 285 } 286 __ Sub(capture_length, w11, capture_start_offset); // Length to check. 287 288 // At this point, the capture registers are either both set or both cleared. 289 // If the capture length is zero, then the capture is either empty or cleared. 290 // Fall through in both cases. 291 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 292 293 // Check that there are enough characters left in the input. 294 if (read_backward) { 295 __ Add(w12, string_start_minus_one(), capture_length); 296 __ Cmp(current_input_offset(), w12); 297 BranchOrBacktrack(le, on_no_match); 298 } else { 299 __ Cmn(capture_length, current_input_offset()); 300 BranchOrBacktrack(gt, on_no_match); 301 } 302 303 if (mode_ == LATIN1) { 304 Label success; 305 Label fail; 306 Label loop_check; 307 308 Register capture_start_address = x12; 309 Register capture_end_addresss = x13; 310 Register current_position_address = x14; 311 312 __ Add(capture_start_address, 313 input_end(), 314 Operand(capture_start_offset, SXTW)); 315 __ Add(capture_end_addresss, 316 capture_start_address, 317 Operand(capture_length, SXTW)); 318 __ Add(current_position_address, 319 input_end(), 320 Operand(current_input_offset(), SXTW)); 321 if (read_backward) { 322 // Offset by length when matching backwards. 323 __ Sub(current_position_address, current_position_address, 324 Operand(capture_length, SXTW)); 325 } 326 327 Label loop; 328 __ Bind(&loop); 329 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 330 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 331 __ Cmp(w10, w11); 332 __ B(eq, &loop_check); 333 334 // Mismatch, try case-insensitive match (converting letters to lower-case). 335 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case. 336 __ Orr(w11, w11, 0x20); // Also convert input character. 337 __ Cmp(w11, w10); 338 __ B(ne, &fail); 339 __ Sub(w10, w10, 'a'); 340 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter? 341 __ B(ls, &loop_check); // In range 'a'-'z'. 342 // Latin-1: Check for values in range [224,254] but not 247. 343 __ Sub(w10, w10, 224 - 'a'); 344 __ Cmp(w10, 254 - 224); 345 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247. 346 __ B(eq, &fail); // Weren't Latin-1 letters. 347 348 __ Bind(&loop_check); 349 __ Cmp(capture_start_address, capture_end_addresss); 350 __ B(lt, &loop); 351 __ B(&success); 352 353 __ Bind(&fail); 354 BranchOrBacktrack(al, on_no_match); 355 356 __ Bind(&success); 357 // Compute new value of character position after the matched part. 358 __ Sub(current_input_offset().X(), current_position_address, input_end()); 359 if (read_backward) { 360 __ Sub(current_input_offset().X(), current_input_offset().X(), 361 Operand(capture_length, SXTW)); 362 } 363 if (masm_->emit_debug_code()) { 364 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 365 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 366 // The current input offset should be <= 0, and fit in a W register. 367 __ Check(le, AbortReason::kOffsetOutOfRange); 368 } 369 } else { 370 DCHECK(mode_ == UC16); 371 int argument_count = 4; 372 373 // The cached registers need to be retained. 374 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 375 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2); 376 __ PushCPURegList(cached_registers); 377 378 // Put arguments into arguments registers. 379 // Parameters are 380 // x0: Address byte_offset1 - Address captured substring's start. 381 // x1: Address byte_offset2 - Address of current character position. 382 // w2: size_t byte_length - length of capture in bytes(!) 383 // x3: Isolate* isolate or 0 if unicode flag 384 385 // Address of start of capture. 386 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW)); 387 // Length of capture. 388 __ Mov(w2, capture_length); 389 // Address of current input position. 390 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); 391 if (read_backward) { 392 __ Sub(x1, x1, Operand(capture_length, SXTW)); 393 } 394 // Isolate. 395 #ifdef V8_INTL_SUPPORT 396 if (unicode) { 397 __ Mov(x3, Operand(0)); 398 } else // NOLINT 399 #endif // V8_INTL_SUPPORT 400 { 401 __ Mov(x3, ExternalReference::isolate_address(isolate())); 402 } 403 404 { 405 AllowExternalCallThatCantCauseGC scope(masm_); 406 ExternalReference function = 407 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 408 __ CallCFunction(function, argument_count); 409 } 410 411 // Check if function returned non-zero for success or zero for failure. 412 // x0 is one of the registers used as a cache so it must be tested before 413 // the cache is restored. 414 __ Cmp(x0, 0); 415 __ PopCPURegList(cached_registers); 416 BranchOrBacktrack(eq, on_no_match); 417 418 // On success, advance position by length of capture. 419 if (read_backward) { 420 __ Sub(current_input_offset(), current_input_offset(), capture_length); 421 } else { 422 __ Add(current_input_offset(), current_input_offset(), capture_length); 423 } 424 } 425 426 __ Bind(&fallthrough); 427 } 428 429 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, 430 bool read_backward, 431 Label* on_no_match) { 432 Label fallthrough; 433 434 Register capture_start_address = x12; 435 Register capture_end_address = x13; 436 Register current_position_address = x14; 437 Register capture_length = w15; 438 439 // Find length of back-referenced capture. 440 DCHECK_EQ(0, start_reg % 2); 441 if (start_reg < kNumCachedRegisters) { 442 __ Mov(x10, GetCachedRegister(start_reg)); 443 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 444 } else { 445 __ Ldp(w11, w10, capture_location(start_reg, x10)); 446 } 447 __ Sub(capture_length, w11, w10); // Length to check. 448 449 // At this point, the capture registers are either both set or both cleared. 450 // If the capture length is zero, then the capture is either empty or cleared. 451 // Fall through in both cases. 452 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 453 454 // Check that there are enough characters left in the input. 455 if (read_backward) { 456 __ Add(w12, string_start_minus_one(), capture_length); 457 __ Cmp(current_input_offset(), w12); 458 BranchOrBacktrack(le, on_no_match); 459 } else { 460 __ Cmn(capture_length, current_input_offset()); 461 BranchOrBacktrack(gt, on_no_match); 462 } 463 464 // Compute pointers to match string and capture string 465 __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); 466 __ Add(capture_end_address, 467 capture_start_address, 468 Operand(capture_length, SXTW)); 469 __ Add(current_position_address, 470 input_end(), 471 Operand(current_input_offset(), SXTW)); 472 if (read_backward) { 473 // Offset by length when matching backwards. 474 __ Sub(current_position_address, current_position_address, 475 Operand(capture_length, SXTW)); 476 } 477 478 Label loop; 479 __ Bind(&loop); 480 if (mode_ == LATIN1) { 481 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 482 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 483 } else { 484 DCHECK(mode_ == UC16); 485 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex)); 486 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex)); 487 } 488 __ Cmp(w10, w11); 489 BranchOrBacktrack(ne, on_no_match); 490 __ Cmp(capture_start_address, capture_end_address); 491 __ B(lt, &loop); 492 493 // Move current character position to position after match. 494 __ Sub(current_input_offset().X(), current_position_address, input_end()); 495 if (read_backward) { 496 __ Sub(current_input_offset().X(), current_input_offset().X(), 497 Operand(capture_length, SXTW)); 498 } 499 500 if (masm_->emit_debug_code()) { 501 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 502 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 503 // The current input offset should be <= 0, and fit in a W register. 504 __ Check(le, AbortReason::kOffsetOutOfRange); 505 } 506 __ Bind(&fallthrough); 507 } 508 509 510 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c, 511 Label* on_not_equal) { 512 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal); 513 } 514 515 516 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c, 517 uint32_t mask, 518 Label* on_equal) { 519 __ And(w10, current_character(), mask); 520 CompareAndBranchOrBacktrack(w10, c, eq, on_equal); 521 } 522 523 524 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c, 525 unsigned mask, 526 Label* on_not_equal) { 527 __ And(w10, current_character(), mask); 528 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 529 } 530 531 532 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd( 533 uc16 c, 534 uc16 minus, 535 uc16 mask, 536 Label* on_not_equal) { 537 DCHECK_GT(String::kMaxUtf16CodeUnit, minus); 538 __ Sub(w10, current_character(), minus); 539 __ And(w10, w10, mask); 540 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 541 } 542 543 544 void RegExpMacroAssemblerARM64::CheckCharacterInRange( 545 uc16 from, 546 uc16 to, 547 Label* on_in_range) { 548 __ Sub(w10, current_character(), from); 549 // Unsigned lower-or-same condition. 550 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range); 551 } 552 553 554 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange( 555 uc16 from, 556 uc16 to, 557 Label* on_not_in_range) { 558 __ Sub(w10, current_character(), from); 559 // Unsigned higher condition. 560 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range); 561 } 562 563 564 void RegExpMacroAssemblerARM64::CheckBitInTable( 565 Handle<ByteArray> table, 566 Label* on_bit_set) { 567 __ Mov(x11, Operand(table)); 568 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) { 569 __ And(w10, current_character(), kTableMask); 570 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag); 571 } else { 572 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag); 573 } 574 __ Ldrb(w11, MemOperand(x11, w10, UXTW)); 575 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set); 576 } 577 578 579 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, 580 Label* on_no_match) { 581 // Range checks (c in min..max) are generally implemented by an unsigned 582 // (c - min) <= (max - min) check 583 switch (type) { 584 case 's': 585 // Match space-characters 586 if (mode_ == LATIN1) { 587 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 588 Label success; 589 // Check for ' ' or 0x00A0. 590 __ Cmp(current_character(), ' '); 591 __ Ccmp(current_character(), 0x00A0, ZFlag, ne); 592 __ B(eq, &success); 593 // Check range 0x09..0x0D. 594 __ Sub(w10, current_character(), '\t'); 595 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); 596 __ Bind(&success); 597 return true; 598 } 599 return false; 600 case 'S': 601 // The emitted code for generic character classes is good enough. 602 return false; 603 case 'd': 604 // Match ASCII digits ('0'..'9'). 605 __ Sub(w10, current_character(), '0'); 606 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match); 607 return true; 608 case 'D': 609 // Match ASCII non-digits. 610 __ Sub(w10, current_character(), '0'); 611 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); 612 return true; 613 case '.': { 614 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) 615 // Here we emit the conditional branch only once at the end to make branch 616 // prediction more efficient, even though we could branch out of here 617 // as soon as a character matches. 618 __ Cmp(current_character(), 0x0A); 619 __ Ccmp(current_character(), 0x0D, ZFlag, ne); 620 if (mode_ == UC16) { 621 __ Sub(w10, current_character(), 0x2028); 622 // If the Z flag was set we clear the flags to force a branch. 623 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 624 // ls -> !((C==1) && (Z==0)) 625 BranchOrBacktrack(ls, on_no_match); 626 } else { 627 BranchOrBacktrack(eq, on_no_match); 628 } 629 return true; 630 } 631 case 'n': { 632 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) 633 // We have to check all 4 newline characters before emitting 634 // the conditional branch. 635 __ Cmp(current_character(), 0x0A); 636 __ Ccmp(current_character(), 0x0D, ZFlag, ne); 637 if (mode_ == UC16) { 638 __ Sub(w10, current_character(), 0x2028); 639 // If the Z flag was set we clear the flags to force a fall-through. 640 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 641 // hi -> (C==1) && (Z==0) 642 BranchOrBacktrack(hi, on_no_match); 643 } else { 644 BranchOrBacktrack(ne, on_no_match); 645 } 646 return true; 647 } 648 case 'w': { 649 if (mode_ != LATIN1) { 650 // Table is 256 entries, so all Latin1 characters can be tested. 651 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match); 652 } 653 ExternalReference map = ExternalReference::re_word_character_map(isolate()); 654 __ Mov(x10, map); 655 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 656 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match); 657 return true; 658 } 659 case 'W': { 660 Label done; 661 if (mode_ != LATIN1) { 662 // Table is 256 entries, so all Latin1 characters can be tested. 663 __ Cmp(current_character(), 'z'); 664 __ B(hi, &done); 665 } 666 ExternalReference map = ExternalReference::re_word_character_map(isolate()); 667 __ Mov(x10, map); 668 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 669 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match); 670 __ Bind(&done); 671 return true; 672 } 673 case '*': 674 // Match any character. 675 return true; 676 // No custom implementation (yet): s(UC16), S(UC16). 677 default: 678 return false; 679 } 680 } 681 682 683 void RegExpMacroAssemblerARM64::Fail() { 684 __ Mov(w0, FAILURE); 685 __ B(&exit_label_); 686 } 687 688 689 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { 690 Label return_w0; 691 // Finalize code - write the entry point code now we know how many 692 // registers we need. 693 694 // Entry code: 695 __ Bind(&entry_label_); 696 697 // Arguments on entry: 698 // x0: String* input 699 // x1: int start_offset 700 // x2: byte* input_start 701 // x3: byte* input_end 702 // x4: int* output array 703 // x5: int output array size 704 // x6: Address stack_base 705 // x7: int direct_call 706 707 // sp[8]: address of the current isolate 708 // sp[0]: secondary link/return address used by native call 709 710 // Tell the system that we have a stack frame. Because the type is MANUAL, no 711 // code is generated. 712 FrameScope scope(masm_, StackFrame::MANUAL); 713 714 // Push registers on the stack, only push the argument registers that we need. 715 CPURegList argument_registers(x0, x5, x6, x7); 716 717 CPURegList registers_to_retain = kCalleeSaved; 718 DCHECK_EQ(11, kCalleeSaved.Count()); 719 registers_to_retain.Combine(lr); 720 721 __ PushCPURegList(registers_to_retain); 722 __ PushCPURegList(argument_registers); 723 724 // Set frame pointer in place. 725 __ Add(frame_pointer(), sp, argument_registers.Count() * kPointerSize); 726 727 // Initialize callee-saved registers. 728 __ Mov(start_offset(), w1); 729 __ Mov(input_start(), x2); 730 __ Mov(input_end(), x3); 731 __ Mov(output_array(), x4); 732 733 // Set the number of registers we will need to allocate, that is: 734 // - success_counter (X register) 735 // - (num_registers_ - kNumCachedRegisters) (W registers) 736 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters; 737 // Do not allocate registers on the stack if they can all be cached. 738 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; } 739 // Make room for the success_counter. 740 num_wreg_to_allocate += 2; 741 742 // Make sure the stack alignment will be respected. 743 int alignment = masm_->ActivationFrameAlignment(); 744 DCHECK_EQ(alignment % 16, 0); 745 int align_mask = (alignment / kWRegSize) - 1; 746 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask; 747 748 // Check if we have space on the stack. 749 Label stack_limit_hit; 750 Label stack_ok; 751 752 ExternalReference stack_limit = 753 ExternalReference::address_of_stack_limit(isolate()); 754 __ Mov(x10, stack_limit); 755 __ Ldr(x10, MemOperand(x10)); 756 __ Subs(x10, sp, x10); 757 758 // Handle it if the stack pointer is already below the stack limit. 759 __ B(ls, &stack_limit_hit); 760 761 // Check if there is room for the variable number of registers above 762 // the stack limit. 763 __ Cmp(x10, num_wreg_to_allocate * kWRegSize); 764 __ B(hs, &stack_ok); 765 766 // Exit with OutOfMemory exception. There is not enough space on the stack 767 // for our working registers. 768 __ Mov(w0, EXCEPTION); 769 __ B(&return_w0); 770 771 __ Bind(&stack_limit_hit); 772 CallCheckStackGuardState(x10); 773 // If returned value is non-zero, we exit with the returned value as result. 774 __ Cbnz(w0, &return_w0); 775 776 __ Bind(&stack_ok); 777 778 // Allocate space on stack. 779 __ Claim(num_wreg_to_allocate, kWRegSize); 780 781 // Initialize success_counter with 0. 782 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter)); 783 784 // Find negative length (offset of start relative to end). 785 __ Sub(x10, input_start(), input_end()); 786 if (masm_->emit_debug_code()) { 787 // Check that the size of the input string chars is in range. 788 __ Neg(x11, x10); 789 __ Cmp(x11, SeqTwoByteString::kMaxCharsSize); 790 __ Check(ls, AbortReason::kInputStringTooLong); 791 } 792 __ Mov(current_input_offset(), w10); 793 794 // The non-position value is used as a clearing value for the 795 // capture registers, it corresponds to the position of the first character 796 // minus one. 797 __ Sub(string_start_minus_one(), current_input_offset(), char_size()); 798 __ Sub(string_start_minus_one(), string_start_minus_one(), 799 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); 800 // We can store this value twice in an X register for initializing 801 // on-stack registers later. 802 __ Orr(twice_non_position_value(), string_start_minus_one().X(), 803 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits)); 804 805 // Initialize code pointer register. 806 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 807 808 Label load_char_start_regexp, start_regexp; 809 // Load newline if index is at start, previous character otherwise. 810 __ Cbnz(start_offset(), &load_char_start_regexp); 811 __ Mov(current_character(), '\n'); 812 __ B(&start_regexp); 813 814 // Global regexp restarts matching here. 815 __ Bind(&load_char_start_regexp); 816 // Load previous char as initial value of current character register. 817 LoadCurrentCharacterUnchecked(-1, 1); 818 __ Bind(&start_regexp); 819 // Initialize on-stack registers. 820 if (num_saved_registers_ > 0) { 821 ClearRegisters(0, num_saved_registers_ - 1); 822 } 823 824 // Initialize backtrack stack pointer. 825 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase)); 826 827 // Execute 828 __ B(&start_label_); 829 830 if (backtrack_label_.is_linked()) { 831 __ Bind(&backtrack_label_); 832 Backtrack(); 833 } 834 835 if (success_label_.is_linked()) { 836 Register first_capture_start = w15; 837 838 // Save captures when successful. 839 __ Bind(&success_label_); 840 841 if (num_saved_registers_ > 0) { 842 // V8 expects the output to be an int32_t array. 843 Register capture_start = w12; 844 Register capture_end = w13; 845 Register input_length = w14; 846 847 // Copy captures to output. 848 849 // Get string length. 850 __ Sub(x10, input_end(), input_start()); 851 if (masm_->emit_debug_code()) { 852 // Check that the size of the input string chars is in range. 853 __ Cmp(x10, SeqTwoByteString::kMaxCharsSize); 854 __ Check(ls, AbortReason::kInputStringTooLong); 855 } 856 // input_start has a start_offset offset on entry. We need to include 857 // it when computing the length of the whole string. 858 if (mode_ == UC16) { 859 __ Add(input_length, start_offset(), Operand(w10, LSR, 1)); 860 } else { 861 __ Add(input_length, start_offset(), w10); 862 } 863 864 // Copy the results to the output array from the cached registers first. 865 for (int i = 0; 866 (i < num_saved_registers_) && (i < kNumCachedRegisters); 867 i += 2) { 868 __ Mov(capture_start.X(), GetCachedRegister(i)); 869 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits); 870 if ((i == 0) && global_with_zero_length_check()) { 871 // Keep capture start for the zero-length check later. 872 __ Mov(first_capture_start, capture_start); 873 } 874 // Offsets need to be relative to the start of the string. 875 if (mode_ == UC16) { 876 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 877 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 878 } else { 879 __ Add(capture_start, input_length, capture_start); 880 __ Add(capture_end, input_length, capture_end); 881 } 882 // The output pointer advances for a possible global match. 883 __ Stp(capture_start, 884 capture_end, 885 MemOperand(output_array(), kPointerSize, PostIndex)); 886 } 887 888 // Only carry on if there are more than kNumCachedRegisters capture 889 // registers. 890 int num_registers_left_on_stack = 891 num_saved_registers_ - kNumCachedRegisters; 892 if (num_registers_left_on_stack > 0) { 893 Register base = x10; 894 // There are always an even number of capture registers. A couple of 895 // registers determine one match with two offsets. 896 DCHECK_EQ(0, num_registers_left_on_stack % 2); 897 __ Add(base, frame_pointer(), kFirstCaptureOnStack); 898 899 // We can unroll the loop here, we should not unroll for less than 2 900 // registers. 901 STATIC_ASSERT(kNumRegistersToUnroll > 2); 902 if (num_registers_left_on_stack <= kNumRegistersToUnroll) { 903 for (int i = 0; i < num_registers_left_on_stack / 2; i++) { 904 __ Ldp(capture_end, 905 capture_start, 906 MemOperand(base, -kPointerSize, PostIndex)); 907 if ((i == 0) && global_with_zero_length_check()) { 908 // Keep capture start for the zero-length check later. 909 __ Mov(first_capture_start, capture_start); 910 } 911 // Offsets need to be relative to the start of the string. 912 if (mode_ == UC16) { 913 __ Add(capture_start, 914 input_length, 915 Operand(capture_start, ASR, 1)); 916 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 917 } else { 918 __ Add(capture_start, input_length, capture_start); 919 __ Add(capture_end, input_length, capture_end); 920 } 921 // The output pointer advances for a possible global match. 922 __ Stp(capture_start, 923 capture_end, 924 MemOperand(output_array(), kPointerSize, PostIndex)); 925 } 926 } else { 927 Label loop, start; 928 __ Mov(x11, num_registers_left_on_stack); 929 930 __ Ldp(capture_end, 931 capture_start, 932 MemOperand(base, -kPointerSize, PostIndex)); 933 if (global_with_zero_length_check()) { 934 __ Mov(first_capture_start, capture_start); 935 } 936 __ B(&start); 937 938 __ Bind(&loop); 939 __ Ldp(capture_end, 940 capture_start, 941 MemOperand(base, -kPointerSize, PostIndex)); 942 __ Bind(&start); 943 if (mode_ == UC16) { 944 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 945 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 946 } else { 947 __ Add(capture_start, input_length, capture_start); 948 __ Add(capture_end, input_length, capture_end); 949 } 950 // The output pointer advances for a possible global match. 951 __ Stp(capture_start, 952 capture_end, 953 MemOperand(output_array(), kPointerSize, PostIndex)); 954 __ Sub(x11, x11, 2); 955 __ Cbnz(x11, &loop); 956 } 957 } 958 } 959 960 if (global()) { 961 Register success_counter = w0; 962 Register output_size = x10; 963 // Restart matching if the regular expression is flagged as global. 964 965 // Increment success counter. 966 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 967 __ Add(success_counter, success_counter, 1); 968 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 969 970 // Capture results have been stored, so the number of remaining global 971 // output registers is reduced by the number of stored captures. 972 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize)); 973 __ Sub(output_size, output_size, num_saved_registers_); 974 // Check whether we have enough room for another set of capture results. 975 __ Cmp(output_size, num_saved_registers_); 976 __ B(lt, &return_w0); 977 978 // The output pointer is already set to the next field in the output 979 // array. 980 // Update output size on the frame before we restart matching. 981 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize)); 982 983 if (global_with_zero_length_check()) { 984 // Special case for zero-length matches. 985 __ Cmp(current_input_offset(), first_capture_start); 986 // Not a zero-length match, restart. 987 __ B(ne, &load_char_start_regexp); 988 // Offset from the end is zero if we already reached the end. 989 __ Cbz(current_input_offset(), &return_w0); 990 // Advance current position after a zero-length match. 991 Label advance; 992 __ bind(&advance); 993 __ Add(current_input_offset(), 994 current_input_offset(), 995 Operand((mode_ == UC16) ? 2 : 1)); 996 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 997 } 998 999 __ B(&load_char_start_regexp); 1000 } else { 1001 __ Mov(w0, SUCCESS); 1002 } 1003 } 1004 1005 if (exit_label_.is_linked()) { 1006 // Exit and return w0 1007 __ Bind(&exit_label_); 1008 if (global()) { 1009 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter)); 1010 } 1011 } 1012 1013 __ Bind(&return_w0); 1014 1015 // Set stack pointer back to first register to retain 1016 __ Mov(sp, fp); 1017 1018 // Restore registers. 1019 __ PopCPURegList(registers_to_retain); 1020 1021 __ Ret(); 1022 1023 Label exit_with_exception; 1024 // Registers x0 to x7 are used to store the first captures, they need to be 1025 // retained over calls to C++ code. 1026 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 1027 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2); 1028 1029 if (check_preempt_label_.is_linked()) { 1030 __ Bind(&check_preempt_label_); 1031 SaveLinkRegister(); 1032 // The cached registers need to be retained. 1033 __ PushCPURegList(cached_registers); 1034 CallCheckStackGuardState(x10); 1035 // Returning from the regexp code restores the stack (sp <- fp) 1036 // so we don't need to drop the link register from it before exiting. 1037 __ Cbnz(w0, &return_w0); 1038 // Reset the cached registers. 1039 __ PopCPURegList(cached_registers); 1040 RestoreLinkRegister(); 1041 __ Ret(); 1042 } 1043 1044 if (stack_overflow_label_.is_linked()) { 1045 __ Bind(&stack_overflow_label_); 1046 SaveLinkRegister(); 1047 // The cached registers need to be retained. 1048 __ PushCPURegList(cached_registers); 1049 // Call GrowStack(backtrack_stackpointer(), &stack_base) 1050 __ Mov(x2, ExternalReference::isolate_address(isolate())); 1051 __ Add(x1, frame_pointer(), kStackBase); 1052 __ Mov(x0, backtrack_stackpointer()); 1053 ExternalReference grow_stack = 1054 ExternalReference::re_grow_stack(isolate()); 1055 __ CallCFunction(grow_stack, 3); 1056 // If return nullptr, we have failed to grow the stack, and 1057 // must exit with a stack-overflow exception. 1058 // Returning from the regexp code restores the stack (sp <- fp) 1059 // so we don't need to drop the link register from it before exiting. 1060 __ Cbz(w0, &exit_with_exception); 1061 // Otherwise use return value as new stack pointer. 1062 __ Mov(backtrack_stackpointer(), x0); 1063 // Reset the cached registers. 1064 __ PopCPURegList(cached_registers); 1065 RestoreLinkRegister(); 1066 __ Ret(); 1067 } 1068 1069 if (exit_with_exception.is_linked()) { 1070 __ Bind(&exit_with_exception); 1071 __ Mov(w0, EXCEPTION); 1072 __ B(&return_w0); 1073 } 1074 1075 CodeDesc code_desc; 1076 masm_->GetCode(isolate(), &code_desc); 1077 Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP, 1078 masm_->CodeObject()); 1079 PROFILE(masm_->isolate(), 1080 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source)); 1081 return Handle<HeapObject>::cast(code); 1082 } 1083 1084 1085 void RegExpMacroAssemblerARM64::GoTo(Label* to) { 1086 BranchOrBacktrack(al, to); 1087 } 1088 1089 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand, 1090 Label* if_ge) { 1091 Register to_compare = GetRegister(reg, w10); 1092 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge); 1093 } 1094 1095 1096 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand, 1097 Label* if_lt) { 1098 Register to_compare = GetRegister(reg, w10); 1099 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt); 1100 } 1101 1102 1103 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) { 1104 Register to_compare = GetRegister(reg, w10); 1105 __ Cmp(to_compare, current_input_offset()); 1106 BranchOrBacktrack(eq, if_eq); 1107 } 1108 1109 RegExpMacroAssembler::IrregexpImplementation 1110 RegExpMacroAssemblerARM64::Implementation() { 1111 return kARM64Implementation; 1112 } 1113 1114 1115 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, 1116 Label* on_end_of_input, 1117 bool check_bounds, 1118 int characters) { 1119 // TODO(pielan): Make sure long strings are caught before this, and not 1120 // just asserted in debug mode. 1121 // Be sane! (And ensure that an int32_t can be used to index the string) 1122 DCHECK(cp_offset < (1<<30)); 1123 if (check_bounds) { 1124 if (cp_offset >= 0) { 1125 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1126 } else { 1127 CheckPosition(cp_offset, on_end_of_input); 1128 } 1129 } 1130 LoadCurrentCharacterUnchecked(cp_offset, characters); 1131 } 1132 1133 1134 void RegExpMacroAssemblerARM64::PopCurrentPosition() { 1135 Pop(current_input_offset()); 1136 } 1137 1138 1139 void RegExpMacroAssemblerARM64::PopRegister(int register_index) { 1140 Pop(w10); 1141 StoreRegister(register_index, w10); 1142 } 1143 1144 1145 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { 1146 if (label->is_bound()) { 1147 int target = label->pos(); 1148 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag); 1149 } else { 1150 __ Adr(x10, label, MacroAssembler::kAdrFar); 1151 __ Sub(x10, x10, code_pointer()); 1152 if (masm_->emit_debug_code()) { 1153 __ Cmp(x10, kWRegMask); 1154 // The code offset has to fit in a W register. 1155 __ Check(ls, AbortReason::kOffsetOutOfRange); 1156 } 1157 } 1158 Push(w10); 1159 CheckStackLimit(); 1160 } 1161 1162 1163 void RegExpMacroAssemblerARM64::PushCurrentPosition() { 1164 Push(current_input_offset()); 1165 } 1166 1167 1168 void RegExpMacroAssemblerARM64::PushRegister(int register_index, 1169 StackCheckFlag check_stack_limit) { 1170 Register to_push = GetRegister(register_index, w10); 1171 Push(to_push); 1172 if (check_stack_limit) CheckStackLimit(); 1173 } 1174 1175 1176 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) { 1177 RegisterState register_state = GetRegisterState(reg); 1178 switch (register_state) { 1179 case STACKED: 1180 __ Ldr(current_input_offset(), register_location(reg)); 1181 break; 1182 case CACHED_LSW: 1183 __ Mov(current_input_offset(), GetCachedRegister(reg).W()); 1184 break; 1185 case CACHED_MSW: 1186 __ Lsr(current_input_offset().X(), GetCachedRegister(reg), 1187 kWRegSizeInBits); 1188 break; 1189 default: 1190 UNREACHABLE(); 1191 break; 1192 } 1193 } 1194 1195 1196 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) { 1197 Register read_from = GetRegister(reg, w10); 1198 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase)); 1199 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW)); 1200 } 1201 1202 1203 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) { 1204 Label after_position; 1205 __ Cmp(current_input_offset(), -by * char_size()); 1206 __ B(ge, &after_position); 1207 __ Mov(current_input_offset(), -by * char_size()); 1208 // On RegExp code entry (where this operation is used), the character before 1209 // the current position is expected to be already loaded. 1210 // We have advanced the position, so it's safe to read backwards. 1211 LoadCurrentCharacterUnchecked(-1, 1); 1212 __ Bind(&after_position); 1213 } 1214 1215 1216 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) { 1217 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1218 Register set_to = wzr; 1219 if (to != 0) { 1220 set_to = w10; 1221 __ Mov(set_to, to); 1222 } 1223 StoreRegister(register_index, set_to); 1224 } 1225 1226 1227 bool RegExpMacroAssemblerARM64::Succeed() { 1228 __ B(&success_label_); 1229 return global(); 1230 } 1231 1232 1233 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg, 1234 int cp_offset) { 1235 Register position = current_input_offset(); 1236 if (cp_offset != 0) { 1237 position = w10; 1238 __ Add(position, current_input_offset(), cp_offset * char_size()); 1239 } 1240 StoreRegister(reg, position); 1241 } 1242 1243 1244 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { 1245 DCHECK(reg_from <= reg_to); 1246 int num_registers = reg_to - reg_from + 1; 1247 1248 // If the first capture register is cached in a hardware register but not 1249 // aligned on a 64-bit one, we need to clear the first one specifically. 1250 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { 1251 StoreRegister(reg_from, string_start_minus_one()); 1252 num_registers--; 1253 reg_from++; 1254 } 1255 1256 // Clear cached registers in pairs as far as possible. 1257 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) { 1258 DCHECK(GetRegisterState(reg_from) == CACHED_LSW); 1259 __ Mov(GetCachedRegister(reg_from), twice_non_position_value()); 1260 reg_from += 2; 1261 num_registers -= 2; 1262 } 1263 1264 if ((num_registers % 2) == 1) { 1265 StoreRegister(reg_from, string_start_minus_one()); 1266 num_registers--; 1267 reg_from++; 1268 } 1269 1270 if (num_registers > 0) { 1271 // If there are some remaining registers, they are stored on the stack. 1272 DCHECK_LE(kNumCachedRegisters, reg_from); 1273 1274 // Move down the indexes of the registers on stack to get the correct offset 1275 // in memory. 1276 reg_from -= kNumCachedRegisters; 1277 reg_to -= kNumCachedRegisters; 1278 // We should not unroll the loop for less than 2 registers. 1279 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1280 // We position the base pointer to (reg_from + 1). 1281 int base_offset = kFirstRegisterOnStack - 1282 kWRegSize - (kWRegSize * reg_from); 1283 if (num_registers > kNumRegistersToUnroll) { 1284 Register base = x10; 1285 __ Add(base, frame_pointer(), base_offset); 1286 1287 Label loop; 1288 __ Mov(x11, num_registers); 1289 __ Bind(&loop); 1290 __ Str(twice_non_position_value(), 1291 MemOperand(base, -kPointerSize, PostIndex)); 1292 __ Sub(x11, x11, 2); 1293 __ Cbnz(x11, &loop); 1294 } else { 1295 for (int i = reg_from; i <= reg_to; i += 2) { 1296 __ Str(twice_non_position_value(), 1297 MemOperand(frame_pointer(), base_offset)); 1298 base_offset -= kWRegSize * 2; 1299 } 1300 } 1301 } 1302 } 1303 1304 1305 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { 1306 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase)); 1307 __ Sub(x10, backtrack_stackpointer(), x10); 1308 if (masm_->emit_debug_code()) { 1309 __ Cmp(x10, Operand(w10, SXTW)); 1310 // The stack offset needs to fit in a W register. 1311 __ Check(eq, AbortReason::kOffsetOutOfRange); 1312 } 1313 StoreRegister(reg, w10); 1314 } 1315 1316 1317 // Helper function for reading a value out of a stack frame. 1318 template <typename T> 1319 static T& frame_entry(Address re_frame, int frame_offset) { 1320 return *reinterpret_cast<T*>(re_frame + frame_offset); 1321 } 1322 1323 1324 template <typename T> 1325 static T* frame_entry_address(Address re_frame, int frame_offset) { 1326 return reinterpret_cast<T*>(re_frame + frame_offset); 1327 } 1328 1329 1330 int RegExpMacroAssemblerARM64::CheckStackGuardState( 1331 Address* return_address, Code* re_code, Address re_frame, int start_index, 1332 const byte** input_start, const byte** input_end) { 1333 return NativeRegExpMacroAssembler::CheckStackGuardState( 1334 frame_entry<Isolate*>(re_frame, kIsolate), start_index, 1335 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, 1336 frame_entry_address<String*>(re_frame, kInput), input_start, input_end); 1337 } 1338 1339 1340 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, 1341 Label* on_outside_input) { 1342 if (cp_offset >= 0) { 1343 CompareAndBranchOrBacktrack(current_input_offset(), 1344 -cp_offset * char_size(), ge, on_outside_input); 1345 } else { 1346 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size())); 1347 __ Cmp(w12, string_start_minus_one()); 1348 BranchOrBacktrack(le, on_outside_input); 1349 } 1350 } 1351 1352 1353 // Private methods: 1354 1355 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { 1356 // Allocate space on the stack to store the return address. The 1357 // CheckStackGuardState C++ function will override it if the code 1358 // moved. Allocate extra space for 2 arguments passed by pointers. 1359 // AAPCS64 requires the stack to be 16 byte aligned. 1360 int alignment = masm_->ActivationFrameAlignment(); 1361 DCHECK_EQ(alignment % 16, 0); 1362 int align_mask = (alignment / kXRegSize) - 1; 1363 int xreg_to_claim = (3 + align_mask) & ~align_mask; 1364 1365 __ Claim(xreg_to_claim); 1366 1367 // CheckStackGuardState needs the end and start addresses of the input string. 1368 __ Poke(input_end(), 2 * kPointerSize); 1369 __ Add(x5, sp, 2 * kPointerSize); 1370 __ Poke(input_start(), kPointerSize); 1371 __ Add(x4, sp, kPointerSize); 1372 1373 __ Mov(w3, start_offset()); 1374 // RegExp code frame pointer. 1375 __ Mov(x2, frame_pointer()); 1376 // Code* of self. 1377 __ Mov(x1, Operand(masm_->CodeObject())); 1378 1379 // We need to pass a pointer to the return address as first argument. 1380 // The DirectCEntry stub will place the return address on the stack before 1381 // calling so the stack pointer will point to it. 1382 __ Mov(x0, sp); 1383 1384 ExternalReference check_stack_guard_state = 1385 ExternalReference::re_check_stack_guard_state(isolate()); 1386 __ Mov(scratch, check_stack_guard_state); 1387 DirectCEntryStub stub(isolate()); 1388 stub.GenerateCall(masm_, scratch); 1389 1390 // The input string may have been moved in memory, we need to reload it. 1391 __ Peek(input_start(), kPointerSize); 1392 __ Peek(input_end(), 2 * kPointerSize); 1393 1394 __ Drop(xreg_to_claim); 1395 1396 // Reload the Code pointer. 1397 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 1398 } 1399 1400 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition, 1401 Label* to) { 1402 if (condition == al) { // Unconditional. 1403 if (to == nullptr) { 1404 Backtrack(); 1405 return; 1406 } 1407 __ B(to); 1408 return; 1409 } 1410 if (to == nullptr) { 1411 to = &backtrack_label_; 1412 } 1413 __ B(condition, to); 1414 } 1415 1416 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, 1417 int immediate, 1418 Condition condition, 1419 Label* to) { 1420 if ((immediate == 0) && ((condition == eq) || (condition == ne))) { 1421 if (to == nullptr) { 1422 to = &backtrack_label_; 1423 } 1424 if (condition == eq) { 1425 __ Cbz(reg, to); 1426 } else { 1427 __ Cbnz(reg, to); 1428 } 1429 } else { 1430 __ Cmp(reg, immediate); 1431 BranchOrBacktrack(condition, to); 1432 } 1433 } 1434 1435 1436 void RegExpMacroAssemblerARM64::CheckPreemption() { 1437 // Check for preemption. 1438 ExternalReference stack_limit = 1439 ExternalReference::address_of_stack_limit(isolate()); 1440 __ Mov(x10, stack_limit); 1441 __ Ldr(x10, MemOperand(x10)); 1442 __ Cmp(sp, x10); 1443 CallIf(&check_preempt_label_, ls); 1444 } 1445 1446 1447 void RegExpMacroAssemblerARM64::CheckStackLimit() { 1448 ExternalReference stack_limit = 1449 ExternalReference::address_of_regexp_stack_limit(isolate()); 1450 __ Mov(x10, stack_limit); 1451 __ Ldr(x10, MemOperand(x10)); 1452 __ Cmp(backtrack_stackpointer(), x10); 1453 CallIf(&stack_overflow_label_, ls); 1454 } 1455 1456 1457 void RegExpMacroAssemblerARM64::Push(Register source) { 1458 DCHECK(source.Is32Bits()); 1459 DCHECK(!source.is(backtrack_stackpointer())); 1460 __ Str(source, 1461 MemOperand(backtrack_stackpointer(), 1462 -static_cast<int>(kWRegSize), 1463 PreIndex)); 1464 } 1465 1466 1467 void RegExpMacroAssemblerARM64::Pop(Register target) { 1468 DCHECK(target.Is32Bits()); 1469 DCHECK(!target.is(backtrack_stackpointer())); 1470 __ Ldr(target, 1471 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex)); 1472 } 1473 1474 1475 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) { 1476 DCHECK_GT(kNumCachedRegisters, register_index); 1477 return Register::Create(register_index / 2, kXRegSizeInBits); 1478 } 1479 1480 1481 Register RegExpMacroAssemblerARM64::GetRegister(int register_index, 1482 Register maybe_result) { 1483 DCHECK(maybe_result.Is32Bits()); 1484 DCHECK_LE(0, register_index); 1485 if (num_registers_ <= register_index) { 1486 num_registers_ = register_index + 1; 1487 } 1488 Register result = NoReg; 1489 RegisterState register_state = GetRegisterState(register_index); 1490 switch (register_state) { 1491 case STACKED: 1492 __ Ldr(maybe_result, register_location(register_index)); 1493 result = maybe_result; 1494 break; 1495 case CACHED_LSW: 1496 result = GetCachedRegister(register_index).W(); 1497 break; 1498 case CACHED_MSW: 1499 __ Lsr(maybe_result.X(), GetCachedRegister(register_index), 1500 kWRegSizeInBits); 1501 result = maybe_result; 1502 break; 1503 default: 1504 UNREACHABLE(); 1505 break; 1506 } 1507 DCHECK(result.Is32Bits()); 1508 return result; 1509 } 1510 1511 1512 void RegExpMacroAssemblerARM64::StoreRegister(int register_index, 1513 Register source) { 1514 DCHECK(source.Is32Bits()); 1515 DCHECK_LE(0, register_index); 1516 if (num_registers_ <= register_index) { 1517 num_registers_ = register_index + 1; 1518 } 1519 1520 RegisterState register_state = GetRegisterState(register_index); 1521 switch (register_state) { 1522 case STACKED: 1523 __ Str(source, register_location(register_index)); 1524 break; 1525 case CACHED_LSW: { 1526 Register cached_register = GetCachedRegister(register_index); 1527 if (!source.Is(cached_register.W())) { 1528 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits); 1529 } 1530 break; 1531 } 1532 case CACHED_MSW: { 1533 Register cached_register = GetCachedRegister(register_index); 1534 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits); 1535 break; 1536 } 1537 default: 1538 UNREACHABLE(); 1539 break; 1540 } 1541 } 1542 1543 1544 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) { 1545 Label skip_call; 1546 if (condition != al) __ B(&skip_call, NegateCondition(condition)); 1547 __ Bl(to); 1548 __ Bind(&skip_call); 1549 } 1550 1551 1552 void RegExpMacroAssemblerARM64::RestoreLinkRegister() { 1553 __ Pop(lr, xzr); 1554 __ Add(lr, lr, Operand(masm_->CodeObject())); 1555 } 1556 1557 1558 void RegExpMacroAssemblerARM64::SaveLinkRegister() { 1559 __ Sub(lr, lr, Operand(masm_->CodeObject())); 1560 __ Push(xzr, lr); 1561 } 1562 1563 1564 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) { 1565 DCHECK(register_index < (1<<30)); 1566 DCHECK_LE(kNumCachedRegisters, register_index); 1567 if (num_registers_ <= register_index) { 1568 num_registers_ = register_index + 1; 1569 } 1570 register_index -= kNumCachedRegisters; 1571 int offset = kFirstRegisterOnStack - register_index * kWRegSize; 1572 return MemOperand(frame_pointer(), offset); 1573 } 1574 1575 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index, 1576 Register scratch) { 1577 DCHECK(register_index < (1<<30)); 1578 DCHECK(register_index < num_saved_registers_); 1579 DCHECK_LE(kNumCachedRegisters, register_index); 1580 DCHECK_EQ(register_index % 2, 0); 1581 register_index -= kNumCachedRegisters; 1582 int offset = kFirstCaptureOnStack - register_index * kWRegSize; 1583 // capture_location is used with Stp instructions to load/store 2 registers. 1584 // The immediate field in the encoding is limited to 7 bits (signed). 1585 if (is_int7(offset)) { 1586 return MemOperand(frame_pointer(), offset); 1587 } else { 1588 __ Add(scratch, frame_pointer(), offset); 1589 return MemOperand(scratch); 1590 } 1591 } 1592 1593 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, 1594 int characters) { 1595 Register offset = current_input_offset(); 1596 1597 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1598 // and the operating system running on the target allow it. 1599 // If unaligned load/stores are not supported then this function must only 1600 // be used to load a single character at a time. 1601 1602 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to 1603 // disable it. 1604 // TODO(pielan): See whether or not we should disable unaligned accesses. 1605 if (!CanReadUnaligned()) { 1606 DCHECK_EQ(1, characters); 1607 } 1608 1609 if (cp_offset != 0) { 1610 if (masm_->emit_debug_code()) { 1611 __ Mov(x10, cp_offset * char_size()); 1612 __ Add(x10, x10, Operand(current_input_offset(), SXTW)); 1613 __ Cmp(x10, Operand(w10, SXTW)); 1614 // The offset needs to fit in a W register. 1615 __ Check(eq, AbortReason::kOffsetOutOfRange); 1616 } else { 1617 __ Add(w10, current_input_offset(), cp_offset * char_size()); 1618 } 1619 offset = w10; 1620 } 1621 1622 if (mode_ == LATIN1) { 1623 if (characters == 4) { 1624 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1625 } else if (characters == 2) { 1626 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1627 } else { 1628 DCHECK_EQ(1, characters); 1629 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW)); 1630 } 1631 } else { 1632 DCHECK(mode_ == UC16); 1633 if (characters == 2) { 1634 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1635 } else { 1636 DCHECK_EQ(1, characters); 1637 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1638 } 1639 } 1640 } 1641 1642 #endif // V8_INTERPRETED_REGEXP 1643 1644 } // namespace internal 1645 } // namespace v8 1646 1647 #undef __ 1648 1649 #endif // V8_TARGET_ARCH_ARM64 1650