1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if V8_TARGET_ARCH_ARM64 6 7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 8 9 #include "src/code-stubs.h" 10 #include "src/log.h" 11 #include "src/macro-assembler.h" 12 #include "src/profiler/cpu-profiler.h" 13 #include "src/regexp/regexp-macro-assembler.h" 14 #include "src/regexp/regexp-stack.h" 15 #include "src/unicode.h" 16 17 namespace v8 { 18 namespace internal { 19 20 #ifndef V8_INTERPRETED_REGEXP 21 /* 22 * This assembler uses the following register assignment convention: 23 * - w19 : Used to temporarely store a value before a call to C code. 24 * See CheckNotBackReferenceIgnoreCase. 25 * - x20 : Pointer to the current code object (Code*), 26 * it includes the heap object tag. 27 * - w21 : Current position in input, as negative offset from 28 * the end of the string. Please notice that this is 29 * the byte offset, not the character offset! 30 * - w22 : Currently loaded character. Must be loaded using 31 * LoadCurrentCharacter before using any of the dispatch methods. 32 * - x23 : Points to tip of backtrack stack. 33 * - w24 : Position of the first character minus one: non_position_value. 34 * Used to initialize capture registers. 35 * - x25 : Address at the end of the input string: input_end. 36 * Points to byte after last character in input. 37 * - x26 : Address at the start of the input string: input_start. 38 * - w27 : Where to start in the input string. 39 * - x28 : Output array pointer. 40 * - x29/fp : Frame pointer. Used to access arguments, local variables and 41 * RegExp registers. 42 * - x16/x17 : IP registers, used by assembler. Very volatile. 43 * - csp : Points to tip of C stack. 44 * 45 * - x0-x7 : Used as a cache to store 32 bit capture registers. These 46 * registers need to be retained every time a call to C code 47 * is done. 48 * 49 * The remaining registers are free for computations. 50 * Each call to a public method should retain this convention. 51 * 52 * The stack will have the following structure: 53 * 54 * Location Name Description 55 * (as referred to in 56 * the code) 57 * 58 * - fp[104] isolate Address of the current isolate. 59 * - fp[96] return_address Secondary link/return address 60 * used by an exit frame if this is a 61 * native call. 62 * ^^^ csp when called ^^^ 63 * - fp[88] lr Return from the RegExp code. 64 * - fp[80] r29 Old frame pointer (CalleeSaved). 65 * - fp[0..72] r19-r28 Backup of CalleeSaved registers. 66 * - fp[-8] direct_call 1 => Direct call from JavaScript code. 67 * 0 => Call through the runtime system. 68 * - fp[-16] stack_base High end of the memory area to use as 69 * the backtracking stack. 70 * - fp[-24] output_size Output may fit multiple sets of matches. 71 * - fp[-32] input Handle containing the input string. 72 * - fp[-40] success_counter 73 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^ 74 * - fp[-44] register N Capture registers initialized with 75 * - fp[-48] register N + 1 non_position_value. 76 * ... The first kNumCachedRegisters (N) registers 77 * ... are cached in x0 to x7. 78 * ... Only positions must be stored in the first 79 * - ... num_saved_registers_ registers. 80 * - ... 81 * - register N + num_registers - 1 82 * ^^^^^^^^^ csp ^^^^^^^^^ 83 * 84 * The first num_saved_registers_ registers are initialized to point to 85 * "character -1" in the string (i.e., char_size() bytes before the first 86 * character of the string). The remaining registers start out as garbage. 87 * 88 * The data up to the return address must be placed there by the calling 89 * code and the remaining arguments are passed in registers, e.g. by calling the 90 * code entry as cast to a function with the signature: 91 * int (*match)(String* input, 92 * int start_offset, 93 * Address input_start, 94 * Address input_end, 95 * int* output, 96 * int output_size, 97 * Address stack_base, 98 * bool direct_call = false, 99 * Address secondary_return_address, // Only used by native call. 100 * Isolate* isolate) 101 * The call is performed by NativeRegExpMacroAssembler::Execute() 102 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro 103 * in arm64/simulator-arm64.h. 104 * When calling as a non-direct call (i.e., from C++ code), the return address 105 * area is overwritten with the LR register by the RegExp code. When doing a 106 * direct call from generated code, the return address is placed there by 107 * the calling code, as in a normal exit frame. 108 */ 109 110 #define __ ACCESS_MASM(masm_) 111 112 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, 113 Zone* zone, Mode mode, 114 int registers_to_save) 115 : NativeRegExpMacroAssembler(isolate, zone), 116 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, 117 CodeObjectRequired::kYes)), 118 mode_(mode), 119 num_registers_(registers_to_save), 120 num_saved_registers_(registers_to_save), 121 entry_label_(), 122 start_label_(), 123 success_label_(), 124 backtrack_label_(), 125 exit_label_() { 126 __ SetStackPointer(csp); 127 DCHECK_EQ(0, registers_to_save % 2); 128 // We can cache at most 16 W registers in x0-x7. 129 STATIC_ASSERT(kNumCachedRegisters <= 16); 130 STATIC_ASSERT((kNumCachedRegisters % 2) == 0); 131 __ B(&entry_label_); // We'll write the entry code later. 132 __ Bind(&start_label_); // And then continue from here. 133 } 134 135 136 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() { 137 delete masm_; 138 // Unuse labels in case we throw away the assembler without calling GetCode. 139 entry_label_.Unuse(); 140 start_label_.Unuse(); 141 success_label_.Unuse(); 142 backtrack_label_.Unuse(); 143 exit_label_.Unuse(); 144 check_preempt_label_.Unuse(); 145 stack_overflow_label_.Unuse(); 146 } 147 148 int RegExpMacroAssemblerARM64::stack_limit_slack() { 149 return RegExpStack::kStackLimitSlack; 150 } 151 152 153 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) { 154 if (by != 0) { 155 __ Add(current_input_offset(), 156 current_input_offset(), by * char_size()); 157 } 158 } 159 160 161 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) { 162 DCHECK((reg >= 0) && (reg < num_registers_)); 163 if (by != 0) { 164 Register to_advance; 165 RegisterState register_state = GetRegisterState(reg); 166 switch (register_state) { 167 case STACKED: 168 __ Ldr(w10, register_location(reg)); 169 __ Add(w10, w10, by); 170 __ Str(w10, register_location(reg)); 171 break; 172 case CACHED_LSW: 173 to_advance = GetCachedRegister(reg); 174 __ Add(to_advance, to_advance, by); 175 break; 176 case CACHED_MSW: 177 to_advance = GetCachedRegister(reg); 178 __ Add(to_advance, to_advance, 179 static_cast<int64_t>(by) << kWRegSizeInBits); 180 break; 181 default: 182 UNREACHABLE(); 183 break; 184 } 185 } 186 } 187 188 189 void RegExpMacroAssemblerARM64::Backtrack() { 190 CheckPreemption(); 191 Pop(w10); 192 __ Add(x10, code_pointer(), Operand(w10, UXTW)); 193 __ Br(x10); 194 } 195 196 197 void RegExpMacroAssemblerARM64::Bind(Label* label) { 198 __ Bind(label); 199 } 200 201 202 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) { 203 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal); 204 } 205 206 207 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, 208 Label* on_greater) { 209 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); 210 } 211 212 213 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { 214 __ Add(w10, current_input_offset(), Operand(-char_size())); 215 __ Cmp(w10, string_start_minus_one()); 216 BranchOrBacktrack(eq, on_at_start); 217 } 218 219 220 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, 221 Label* on_not_at_start) { 222 __ Add(w10, current_input_offset(), 223 Operand(-char_size() + cp_offset * char_size())); 224 __ Cmp(w10, string_start_minus_one()); 225 BranchOrBacktrack(ne, on_not_at_start); 226 } 227 228 229 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) { 230 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less); 231 } 232 233 234 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str, 235 int cp_offset, 236 Label* on_failure, 237 bool check_end_of_string) { 238 // This method is only ever called from the cctests. 239 240 if (check_end_of_string) { 241 // Is last character of required match inside string. 242 CheckPosition(cp_offset + str.length() - 1, on_failure); 243 } 244 245 Register characters_address = x11; 246 247 __ Add(characters_address, 248 input_end(), 249 Operand(current_input_offset(), SXTW)); 250 if (cp_offset != 0) { 251 __ Add(characters_address, characters_address, cp_offset * char_size()); 252 } 253 254 for (int i = 0; i < str.length(); i++) { 255 if (mode_ == LATIN1) { 256 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex)); 257 DCHECK(str[i] <= String::kMaxOneByteCharCode); 258 } else { 259 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex)); 260 } 261 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure); 262 } 263 } 264 265 266 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { 267 __ Ldr(w10, MemOperand(backtrack_stackpointer())); 268 __ Cmp(current_input_offset(), w10); 269 __ Cset(x11, eq); 270 __ Add(backtrack_stackpointer(), 271 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2)); 272 BranchOrBacktrack(eq, on_equal); 273 } 274 275 276 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( 277 int start_reg, bool read_backward, Label* on_no_match) { 278 Label fallthrough; 279 280 Register capture_start_offset = w10; 281 // Save the capture length in a callee-saved register so it will 282 // be preserved if we call a C helper. 283 Register capture_length = w19; 284 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length)); 285 286 // Find length of back-referenced capture. 287 DCHECK((start_reg % 2) == 0); 288 if (start_reg < kNumCachedRegisters) { 289 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg)); 290 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 291 } else { 292 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); 293 } 294 __ Sub(capture_length, w11, capture_start_offset); // Length to check. 295 296 // At this point, the capture registers are either both set or both cleared. 297 // If the capture length is zero, then the capture is either empty or cleared. 298 // Fall through in both cases. 299 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 300 301 // Check that there are enough characters left in the input. 302 if (read_backward) { 303 __ Add(w12, string_start_minus_one(), capture_length); 304 __ Cmp(current_input_offset(), w12); 305 BranchOrBacktrack(le, on_no_match); 306 } else { 307 __ Cmn(capture_length, current_input_offset()); 308 BranchOrBacktrack(gt, on_no_match); 309 } 310 311 if (mode_ == LATIN1) { 312 Label success; 313 Label fail; 314 Label loop_check; 315 316 Register capture_start_address = x12; 317 Register capture_end_addresss = x13; 318 Register current_position_address = x14; 319 320 __ Add(capture_start_address, 321 input_end(), 322 Operand(capture_start_offset, SXTW)); 323 __ Add(capture_end_addresss, 324 capture_start_address, 325 Operand(capture_length, SXTW)); 326 __ Add(current_position_address, 327 input_end(), 328 Operand(current_input_offset(), SXTW)); 329 if (read_backward) { 330 // Offset by length when matching backwards. 331 __ Sub(current_position_address, current_position_address, 332 Operand(capture_length, SXTW)); 333 } 334 335 Label loop; 336 __ Bind(&loop); 337 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 338 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 339 __ Cmp(w10, w11); 340 __ B(eq, &loop_check); 341 342 // Mismatch, try case-insensitive match (converting letters to lower-case). 343 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case. 344 __ Orr(w11, w11, 0x20); // Also convert input character. 345 __ Cmp(w11, w10); 346 __ B(ne, &fail); 347 __ Sub(w10, w10, 'a'); 348 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter? 349 __ B(ls, &loop_check); // In range 'a'-'z'. 350 // Latin-1: Check for values in range [224,254] but not 247. 351 __ Sub(w10, w10, 224 - 'a'); 352 __ Cmp(w10, 254 - 224); 353 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247. 354 __ B(eq, &fail); // Weren't Latin-1 letters. 355 356 __ Bind(&loop_check); 357 __ Cmp(capture_start_address, capture_end_addresss); 358 __ B(lt, &loop); 359 __ B(&success); 360 361 __ Bind(&fail); 362 BranchOrBacktrack(al, on_no_match); 363 364 __ Bind(&success); 365 // Compute new value of character position after the matched part. 366 __ Sub(current_input_offset().X(), current_position_address, input_end()); 367 if (read_backward) { 368 __ Sub(current_input_offset().X(), current_input_offset().X(), 369 Operand(capture_length, SXTW)); 370 } 371 if (masm_->emit_debug_code()) { 372 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 373 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 374 // The current input offset should be <= 0, and fit in a W register. 375 __ Check(le, kOffsetOutOfRange); 376 } 377 } else { 378 DCHECK(mode_ == UC16); 379 int argument_count = 4; 380 381 // The cached registers need to be retained. 382 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 383 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 384 __ PushCPURegList(cached_registers); 385 386 // Put arguments into arguments registers. 387 // Parameters are 388 // x0: Address byte_offset1 - Address captured substring's start. 389 // x1: Address byte_offset2 - Address of current character position. 390 // w2: size_t byte_length - length of capture in bytes(!) 391 // x3: Isolate* isolate 392 393 // Address of start of capture. 394 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW)); 395 // Length of capture. 396 __ Mov(w2, capture_length); 397 // Address of current input position. 398 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); 399 if (read_backward) { 400 __ Sub(x1, x1, Operand(capture_length, SXTW)); 401 } 402 // Isolate. 403 __ Mov(x3, ExternalReference::isolate_address(isolate())); 404 405 { 406 AllowExternalCallThatCantCauseGC scope(masm_); 407 ExternalReference function = 408 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 409 __ CallCFunction(function, argument_count); 410 } 411 412 // Check if function returned non-zero for success or zero for failure. 413 // x0 is one of the registers used as a cache so it must be tested before 414 // the cache is restored. 415 __ Cmp(x0, 0); 416 __ PopCPURegList(cached_registers); 417 BranchOrBacktrack(eq, on_no_match); 418 419 // On success, advance position by length of capture. 420 if (read_backward) { 421 __ Sub(current_input_offset(), current_input_offset(), capture_length); 422 } else { 423 __ Add(current_input_offset(), current_input_offset(), capture_length); 424 } 425 } 426 427 __ Bind(&fallthrough); 428 } 429 430 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, 431 bool read_backward, 432 Label* on_no_match) { 433 Label fallthrough; 434 435 Register capture_start_address = x12; 436 Register capture_end_address = x13; 437 Register current_position_address = x14; 438 Register capture_length = w15; 439 440 // Find length of back-referenced capture. 441 DCHECK((start_reg % 2) == 0); 442 if (start_reg < kNumCachedRegisters) { 443 __ Mov(x10, GetCachedRegister(start_reg)); 444 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 445 } else { 446 __ Ldp(w11, w10, capture_location(start_reg, x10)); 447 } 448 __ Sub(capture_length, w11, w10); // Length to check. 449 450 // At this point, the capture registers are either both set or both cleared. 451 // If the capture length is zero, then the capture is either empty or cleared. 452 // Fall through in both cases. 453 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); 454 455 // Check that there are enough characters left in the input. 456 if (read_backward) { 457 __ Add(w12, string_start_minus_one(), capture_length); 458 __ Cmp(current_input_offset(), w12); 459 BranchOrBacktrack(le, on_no_match); 460 } else { 461 __ Cmn(capture_length, current_input_offset()); 462 BranchOrBacktrack(gt, on_no_match); 463 } 464 465 // Compute pointers to match string and capture string 466 __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); 467 __ Add(capture_end_address, 468 capture_start_address, 469 Operand(capture_length, SXTW)); 470 __ Add(current_position_address, 471 input_end(), 472 Operand(current_input_offset(), SXTW)); 473 if (read_backward) { 474 // Offset by length when matching backwards. 475 __ Sub(current_position_address, current_position_address, 476 Operand(capture_length, SXTW)); 477 } 478 479 Label loop; 480 __ Bind(&loop); 481 if (mode_ == LATIN1) { 482 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 483 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 484 } else { 485 DCHECK(mode_ == UC16); 486 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex)); 487 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex)); 488 } 489 __ Cmp(w10, w11); 490 BranchOrBacktrack(ne, on_no_match); 491 __ Cmp(capture_start_address, capture_end_address); 492 __ B(lt, &loop); 493 494 // Move current character position to position after match. 495 __ Sub(current_input_offset().X(), current_position_address, input_end()); 496 if (read_backward) { 497 __ Sub(current_input_offset().X(), current_input_offset().X(), 498 Operand(capture_length, SXTW)); 499 } 500 501 if (masm_->emit_debug_code()) { 502 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 503 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 504 // The current input offset should be <= 0, and fit in a W register. 505 __ Check(le, kOffsetOutOfRange); 506 } 507 __ Bind(&fallthrough); 508 } 509 510 511 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c, 512 Label* on_not_equal) { 513 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal); 514 } 515 516 517 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c, 518 uint32_t mask, 519 Label* on_equal) { 520 __ And(w10, current_character(), mask); 521 CompareAndBranchOrBacktrack(w10, c, eq, on_equal); 522 } 523 524 525 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c, 526 unsigned mask, 527 Label* on_not_equal) { 528 __ And(w10, current_character(), mask); 529 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 530 } 531 532 533 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd( 534 uc16 c, 535 uc16 minus, 536 uc16 mask, 537 Label* on_not_equal) { 538 DCHECK(minus < String::kMaxUtf16CodeUnit); 539 __ Sub(w10, current_character(), minus); 540 __ And(w10, w10, mask); 541 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 542 } 543 544 545 void RegExpMacroAssemblerARM64::CheckCharacterInRange( 546 uc16 from, 547 uc16 to, 548 Label* on_in_range) { 549 __ Sub(w10, current_character(), from); 550 // Unsigned lower-or-same condition. 551 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range); 552 } 553 554 555 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange( 556 uc16 from, 557 uc16 to, 558 Label* on_not_in_range) { 559 __ Sub(w10, current_character(), from); 560 // Unsigned higher condition. 561 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range); 562 } 563 564 565 void RegExpMacroAssemblerARM64::CheckBitInTable( 566 Handle<ByteArray> table, 567 Label* on_bit_set) { 568 __ Mov(x11, Operand(table)); 569 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) { 570 __ And(w10, current_character(), kTableMask); 571 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag); 572 } else { 573 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag); 574 } 575 __ Ldrb(w11, MemOperand(x11, w10, UXTW)); 576 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set); 577 } 578 579 580 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, 581 Label* on_no_match) { 582 // Range checks (c in min..max) are generally implemented by an unsigned 583 // (c - min) <= (max - min) check 584 switch (type) { 585 case 's': 586 // Match space-characters 587 if (mode_ == LATIN1) { 588 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 589 Label success; 590 // Check for ' ' or 0x00a0. 591 __ Cmp(current_character(), ' '); 592 __ Ccmp(current_character(), 0x00a0, ZFlag, ne); 593 __ B(eq, &success); 594 // Check range 0x09..0x0d. 595 __ Sub(w10, current_character(), '\t'); 596 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); 597 __ Bind(&success); 598 return true; 599 } 600 return false; 601 case 'S': 602 // The emitted code for generic character classes is good enough. 603 return false; 604 case 'd': 605 // Match ASCII digits ('0'..'9'). 606 __ Sub(w10, current_character(), '0'); 607 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match); 608 return true; 609 case 'D': 610 // Match ASCII non-digits. 611 __ Sub(w10, current_character(), '0'); 612 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); 613 return true; 614 case '.': { 615 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 616 // Here we emit the conditional branch only once at the end to make branch 617 // prediction more efficient, even though we could branch out of here 618 // as soon as a character matches. 619 __ Cmp(current_character(), 0x0a); 620 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 621 if (mode_ == UC16) { 622 __ Sub(w10, current_character(), 0x2028); 623 // If the Z flag was set we clear the flags to force a branch. 624 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 625 // ls -> !((C==1) && (Z==0)) 626 BranchOrBacktrack(ls, on_no_match); 627 } else { 628 BranchOrBacktrack(eq, on_no_match); 629 } 630 return true; 631 } 632 case 'n': { 633 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 634 // We have to check all 4 newline characters before emitting 635 // the conditional branch. 636 __ Cmp(current_character(), 0x0a); 637 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 638 if (mode_ == UC16) { 639 __ Sub(w10, current_character(), 0x2028); 640 // If the Z flag was set we clear the flags to force a fall-through. 641 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 642 // hi -> (C==1) && (Z==0) 643 BranchOrBacktrack(hi, on_no_match); 644 } else { 645 BranchOrBacktrack(ne, on_no_match); 646 } 647 return true; 648 } 649 case 'w': { 650 if (mode_ != LATIN1) { 651 // Table is 256 entries, so all Latin1 characters can be tested. 652 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match); 653 } 654 ExternalReference map = ExternalReference::re_word_character_map(); 655 __ Mov(x10, map); 656 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 657 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match); 658 return true; 659 } 660 case 'W': { 661 Label done; 662 if (mode_ != LATIN1) { 663 // Table is 256 entries, so all Latin1 characters can be tested. 664 __ Cmp(current_character(), 'z'); 665 __ B(hi, &done); 666 } 667 ExternalReference map = ExternalReference::re_word_character_map(); 668 __ Mov(x10, map); 669 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 670 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match); 671 __ Bind(&done); 672 return true; 673 } 674 case '*': 675 // Match any character. 676 return true; 677 // No custom implementation (yet): s(UC16), S(UC16). 678 default: 679 return false; 680 } 681 } 682 683 684 void RegExpMacroAssemblerARM64::Fail() { 685 __ Mov(w0, FAILURE); 686 __ B(&exit_label_); 687 } 688 689 690 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { 691 Label return_w0; 692 // Finalize code - write the entry point code now we know how many 693 // registers we need. 694 695 // Entry code: 696 __ Bind(&entry_label_); 697 698 // Arguments on entry: 699 // x0: String* input 700 // x1: int start_offset 701 // x2: byte* input_start 702 // x3: byte* input_end 703 // x4: int* output array 704 // x5: int output array size 705 // x6: Address stack_base 706 // x7: int direct_call 707 708 // The stack pointer should be csp on entry. 709 // csp[8]: address of the current isolate 710 // csp[0]: secondary link/return address used by native call 711 712 // Tell the system that we have a stack frame. Because the type is MANUAL, no 713 // code is generated. 714 FrameScope scope(masm_, StackFrame::MANUAL); 715 716 // Push registers on the stack, only push the argument registers that we need. 717 CPURegList argument_registers(x0, x5, x6, x7); 718 719 CPURegList registers_to_retain = kCalleeSaved; 720 DCHECK(kCalleeSaved.Count() == 11); 721 registers_to_retain.Combine(lr); 722 723 DCHECK(csp.Is(__ StackPointer())); 724 __ PushCPURegList(registers_to_retain); 725 __ PushCPURegList(argument_registers); 726 727 // Set frame pointer in place. 728 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize); 729 730 // Initialize callee-saved registers. 731 __ Mov(start_offset(), w1); 732 __ Mov(input_start(), x2); 733 __ Mov(input_end(), x3); 734 __ Mov(output_array(), x4); 735 736 // Set the number of registers we will need to allocate, that is: 737 // - success_counter (X register) 738 // - (num_registers_ - kNumCachedRegisters) (W registers) 739 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters; 740 // Do not allocate registers on the stack if they can all be cached. 741 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; } 742 // Make room for the success_counter. 743 num_wreg_to_allocate += 2; 744 745 // Make sure the stack alignment will be respected. 746 int alignment = masm_->ActivationFrameAlignment(); 747 DCHECK_EQ(alignment % 16, 0); 748 int align_mask = (alignment / kWRegSize) - 1; 749 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask; 750 751 // Check if we have space on the stack. 752 Label stack_limit_hit; 753 Label stack_ok; 754 755 ExternalReference stack_limit = 756 ExternalReference::address_of_stack_limit(isolate()); 757 __ Mov(x10, stack_limit); 758 __ Ldr(x10, MemOperand(x10)); 759 __ Subs(x10, csp, x10); 760 761 // Handle it if the stack pointer is already below the stack limit. 762 __ B(ls, &stack_limit_hit); 763 764 // Check if there is room for the variable number of registers above 765 // the stack limit. 766 __ Cmp(x10, num_wreg_to_allocate * kWRegSize); 767 __ B(hs, &stack_ok); 768 769 // Exit with OutOfMemory exception. There is not enough space on the stack 770 // for our working registers. 771 __ Mov(w0, EXCEPTION); 772 __ B(&return_w0); 773 774 __ Bind(&stack_limit_hit); 775 CallCheckStackGuardState(x10); 776 // If returned value is non-zero, we exit with the returned value as result. 777 __ Cbnz(w0, &return_w0); 778 779 __ Bind(&stack_ok); 780 781 // Allocate space on stack. 782 __ Claim(num_wreg_to_allocate, kWRegSize); 783 784 // Initialize success_counter with 0. 785 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter)); 786 787 // Find negative length (offset of start relative to end). 788 __ Sub(x10, input_start(), input_end()); 789 if (masm_->emit_debug_code()) { 790 // Check that the input string length is < 2^30. 791 __ Neg(x11, x10); 792 __ Cmp(x11, (1<<30) - 1); 793 __ Check(ls, kInputStringTooLong); 794 } 795 __ Mov(current_input_offset(), w10); 796 797 // The non-position value is used as a clearing value for the 798 // capture registers, it corresponds to the position of the first character 799 // minus one. 800 __ Sub(string_start_minus_one(), current_input_offset(), char_size()); 801 __ Sub(string_start_minus_one(), string_start_minus_one(), 802 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); 803 // We can store this value twice in an X register for initializing 804 // on-stack registers later. 805 __ Orr(twice_non_position_value(), string_start_minus_one().X(), 806 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits)); 807 808 // Initialize code pointer register. 809 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 810 811 Label load_char_start_regexp, start_regexp; 812 // Load newline if index is at start, previous character otherwise. 813 __ Cbnz(start_offset(), &load_char_start_regexp); 814 __ Mov(current_character(), '\n'); 815 __ B(&start_regexp); 816 817 // Global regexp restarts matching here. 818 __ Bind(&load_char_start_regexp); 819 // Load previous char as initial value of current character register. 820 LoadCurrentCharacterUnchecked(-1, 1); 821 __ Bind(&start_regexp); 822 // Initialize on-stack registers. 823 if (num_saved_registers_ > 0) { 824 ClearRegisters(0, num_saved_registers_ - 1); 825 } 826 827 // Initialize backtrack stack pointer. 828 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase)); 829 830 // Execute 831 __ B(&start_label_); 832 833 if (backtrack_label_.is_linked()) { 834 __ Bind(&backtrack_label_); 835 Backtrack(); 836 } 837 838 if (success_label_.is_linked()) { 839 Register first_capture_start = w15; 840 841 // Save captures when successful. 842 __ Bind(&success_label_); 843 844 if (num_saved_registers_ > 0) { 845 // V8 expects the output to be an int32_t array. 846 Register capture_start = w12; 847 Register capture_end = w13; 848 Register input_length = w14; 849 850 // Copy captures to output. 851 852 // Get string length. 853 __ Sub(x10, input_end(), input_start()); 854 if (masm_->emit_debug_code()) { 855 // Check that the input string length is < 2^30. 856 __ Cmp(x10, (1<<30) - 1); 857 __ Check(ls, kInputStringTooLong); 858 } 859 // input_start has a start_offset offset on entry. We need to include 860 // it when computing the length of the whole string. 861 if (mode_ == UC16) { 862 __ Add(input_length, start_offset(), Operand(w10, LSR, 1)); 863 } else { 864 __ Add(input_length, start_offset(), w10); 865 } 866 867 // Copy the results to the output array from the cached registers first. 868 for (int i = 0; 869 (i < num_saved_registers_) && (i < kNumCachedRegisters); 870 i += 2) { 871 __ Mov(capture_start.X(), GetCachedRegister(i)); 872 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits); 873 if ((i == 0) && global_with_zero_length_check()) { 874 // Keep capture start for the zero-length check later. 875 __ Mov(first_capture_start, capture_start); 876 } 877 // Offsets need to be relative to the start of the string. 878 if (mode_ == UC16) { 879 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 880 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 881 } else { 882 __ Add(capture_start, input_length, capture_start); 883 __ Add(capture_end, input_length, capture_end); 884 } 885 // The output pointer advances for a possible global match. 886 __ Stp(capture_start, 887 capture_end, 888 MemOperand(output_array(), kPointerSize, PostIndex)); 889 } 890 891 // Only carry on if there are more than kNumCachedRegisters capture 892 // registers. 893 int num_registers_left_on_stack = 894 num_saved_registers_ - kNumCachedRegisters; 895 if (num_registers_left_on_stack > 0) { 896 Register base = x10; 897 // There are always an even number of capture registers. A couple of 898 // registers determine one match with two offsets. 899 DCHECK_EQ(0, num_registers_left_on_stack % 2); 900 __ Add(base, frame_pointer(), kFirstCaptureOnStack); 901 902 // We can unroll the loop here, we should not unroll for less than 2 903 // registers. 904 STATIC_ASSERT(kNumRegistersToUnroll > 2); 905 if (num_registers_left_on_stack <= kNumRegistersToUnroll) { 906 for (int i = 0; i < num_registers_left_on_stack / 2; i++) { 907 __ Ldp(capture_end, 908 capture_start, 909 MemOperand(base, -kPointerSize, PostIndex)); 910 if ((i == 0) && global_with_zero_length_check()) { 911 // Keep capture start for the zero-length check later. 912 __ Mov(first_capture_start, capture_start); 913 } 914 // Offsets need to be relative to the start of the string. 915 if (mode_ == UC16) { 916 __ Add(capture_start, 917 input_length, 918 Operand(capture_start, ASR, 1)); 919 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 920 } else { 921 __ Add(capture_start, input_length, capture_start); 922 __ Add(capture_end, input_length, capture_end); 923 } 924 // The output pointer advances for a possible global match. 925 __ Stp(capture_start, 926 capture_end, 927 MemOperand(output_array(), kPointerSize, PostIndex)); 928 } 929 } else { 930 Label loop, start; 931 __ Mov(x11, num_registers_left_on_stack); 932 933 __ Ldp(capture_end, 934 capture_start, 935 MemOperand(base, -kPointerSize, PostIndex)); 936 if (global_with_zero_length_check()) { 937 __ Mov(first_capture_start, capture_start); 938 } 939 __ B(&start); 940 941 __ Bind(&loop); 942 __ Ldp(capture_end, 943 capture_start, 944 MemOperand(base, -kPointerSize, PostIndex)); 945 __ Bind(&start); 946 if (mode_ == UC16) { 947 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 948 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 949 } else { 950 __ Add(capture_start, input_length, capture_start); 951 __ Add(capture_end, input_length, capture_end); 952 } 953 // The output pointer advances for a possible global match. 954 __ Stp(capture_start, 955 capture_end, 956 MemOperand(output_array(), kPointerSize, PostIndex)); 957 __ Sub(x11, x11, 2); 958 __ Cbnz(x11, &loop); 959 } 960 } 961 } 962 963 if (global()) { 964 Register success_counter = w0; 965 Register output_size = x10; 966 // Restart matching if the regular expression is flagged as global. 967 968 // Increment success counter. 969 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 970 __ Add(success_counter, success_counter, 1); 971 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 972 973 // Capture results have been stored, so the number of remaining global 974 // output registers is reduced by the number of stored captures. 975 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize)); 976 __ Sub(output_size, output_size, num_saved_registers_); 977 // Check whether we have enough room for another set of capture results. 978 __ Cmp(output_size, num_saved_registers_); 979 __ B(lt, &return_w0); 980 981 // The output pointer is already set to the next field in the output 982 // array. 983 // Update output size on the frame before we restart matching. 984 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize)); 985 986 if (global_with_zero_length_check()) { 987 // Special case for zero-length matches. 988 __ Cmp(current_input_offset(), first_capture_start); 989 // Not a zero-length match, restart. 990 __ B(ne, &load_char_start_regexp); 991 // Offset from the end is zero if we already reached the end. 992 __ Cbz(current_input_offset(), &return_w0); 993 // Advance current position after a zero-length match. 994 __ Add(current_input_offset(), 995 current_input_offset(), 996 Operand((mode_ == UC16) ? 2 : 1)); 997 } 998 999 __ B(&load_char_start_regexp); 1000 } else { 1001 __ Mov(w0, SUCCESS); 1002 } 1003 } 1004 1005 if (exit_label_.is_linked()) { 1006 // Exit and return w0 1007 __ Bind(&exit_label_); 1008 if (global()) { 1009 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter)); 1010 } 1011 } 1012 1013 __ Bind(&return_w0); 1014 1015 // Set stack pointer back to first register to retain 1016 DCHECK(csp.Is(__ StackPointer())); 1017 __ Mov(csp, fp); 1018 __ AssertStackConsistency(); 1019 1020 // Restore registers. 1021 __ PopCPURegList(registers_to_retain); 1022 1023 __ Ret(); 1024 1025 Label exit_with_exception; 1026 // Registers x0 to x7 are used to store the first captures, they need to be 1027 // retained over calls to C++ code. 1028 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 1029 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 1030 1031 if (check_preempt_label_.is_linked()) { 1032 __ Bind(&check_preempt_label_); 1033 SaveLinkRegister(); 1034 // The cached registers need to be retained. 1035 __ PushCPURegList(cached_registers); 1036 CallCheckStackGuardState(x10); 1037 // Returning from the regexp code restores the stack (csp <- fp) 1038 // so we don't need to drop the link register from it before exiting. 1039 __ Cbnz(w0, &return_w0); 1040 // Reset the cached registers. 1041 __ PopCPURegList(cached_registers); 1042 RestoreLinkRegister(); 1043 __ Ret(); 1044 } 1045 1046 if (stack_overflow_label_.is_linked()) { 1047 __ Bind(&stack_overflow_label_); 1048 SaveLinkRegister(); 1049 // The cached registers need to be retained. 1050 __ PushCPURegList(cached_registers); 1051 // Call GrowStack(backtrack_stackpointer(), &stack_base) 1052 __ Mov(x2, ExternalReference::isolate_address(isolate())); 1053 __ Add(x1, frame_pointer(), kStackBase); 1054 __ Mov(x0, backtrack_stackpointer()); 1055 ExternalReference grow_stack = 1056 ExternalReference::re_grow_stack(isolate()); 1057 __ CallCFunction(grow_stack, 3); 1058 // If return NULL, we have failed to grow the stack, and 1059 // must exit with a stack-overflow exception. 1060 // Returning from the regexp code restores the stack (csp <- fp) 1061 // so we don't need to drop the link register from it before exiting. 1062 __ Cbz(w0, &exit_with_exception); 1063 // Otherwise use return value as new stack pointer. 1064 __ Mov(backtrack_stackpointer(), x0); 1065 // Reset the cached registers. 1066 __ PopCPURegList(cached_registers); 1067 RestoreLinkRegister(); 1068 __ Ret(); 1069 } 1070 1071 if (exit_with_exception.is_linked()) { 1072 __ Bind(&exit_with_exception); 1073 __ Mov(w0, EXCEPTION); 1074 __ B(&return_w0); 1075 } 1076 1077 CodeDesc code_desc; 1078 masm_->GetCode(&code_desc); 1079 Handle<Code> code = isolate()->factory()->NewCode( 1080 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject()); 1081 PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source)); 1082 return Handle<HeapObject>::cast(code); 1083 } 1084 1085 1086 void RegExpMacroAssemblerARM64::GoTo(Label* to) { 1087 BranchOrBacktrack(al, to); 1088 } 1089 1090 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand, 1091 Label* if_ge) { 1092 Register to_compare = GetRegister(reg, w10); 1093 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge); 1094 } 1095 1096 1097 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand, 1098 Label* if_lt) { 1099 Register to_compare = GetRegister(reg, w10); 1100 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt); 1101 } 1102 1103 1104 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) { 1105 Register to_compare = GetRegister(reg, w10); 1106 __ Cmp(to_compare, current_input_offset()); 1107 BranchOrBacktrack(eq, if_eq); 1108 } 1109 1110 RegExpMacroAssembler::IrregexpImplementation 1111 RegExpMacroAssemblerARM64::Implementation() { 1112 return kARM64Implementation; 1113 } 1114 1115 1116 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, 1117 Label* on_end_of_input, 1118 bool check_bounds, 1119 int characters) { 1120 // TODO(pielan): Make sure long strings are caught before this, and not 1121 // just asserted in debug mode. 1122 // Be sane! (And ensure that an int32_t can be used to index the string) 1123 DCHECK(cp_offset < (1<<30)); 1124 if (check_bounds) { 1125 if (cp_offset >= 0) { 1126 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1127 } else { 1128 CheckPosition(cp_offset, on_end_of_input); 1129 } 1130 } 1131 LoadCurrentCharacterUnchecked(cp_offset, characters); 1132 } 1133 1134 1135 void RegExpMacroAssemblerARM64::PopCurrentPosition() { 1136 Pop(current_input_offset()); 1137 } 1138 1139 1140 void RegExpMacroAssemblerARM64::PopRegister(int register_index) { 1141 Pop(w10); 1142 StoreRegister(register_index, w10); 1143 } 1144 1145 1146 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { 1147 if (label->is_bound()) { 1148 int target = label->pos(); 1149 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag); 1150 } else { 1151 __ Adr(x10, label, MacroAssembler::kAdrFar); 1152 __ Sub(x10, x10, code_pointer()); 1153 if (masm_->emit_debug_code()) { 1154 __ Cmp(x10, kWRegMask); 1155 // The code offset has to fit in a W register. 1156 __ Check(ls, kOffsetOutOfRange); 1157 } 1158 } 1159 Push(w10); 1160 CheckStackLimit(); 1161 } 1162 1163 1164 void RegExpMacroAssemblerARM64::PushCurrentPosition() { 1165 Push(current_input_offset()); 1166 } 1167 1168 1169 void RegExpMacroAssemblerARM64::PushRegister(int register_index, 1170 StackCheckFlag check_stack_limit) { 1171 Register to_push = GetRegister(register_index, w10); 1172 Push(to_push); 1173 if (check_stack_limit) CheckStackLimit(); 1174 } 1175 1176 1177 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) { 1178 Register cached_register; 1179 RegisterState register_state = GetRegisterState(reg); 1180 switch (register_state) { 1181 case STACKED: 1182 __ Ldr(current_input_offset(), register_location(reg)); 1183 break; 1184 case CACHED_LSW: 1185 cached_register = GetCachedRegister(reg); 1186 __ Mov(current_input_offset(), cached_register.W()); 1187 break; 1188 case CACHED_MSW: 1189 cached_register = GetCachedRegister(reg); 1190 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits); 1191 break; 1192 default: 1193 UNREACHABLE(); 1194 break; 1195 } 1196 } 1197 1198 1199 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) { 1200 Register read_from = GetRegister(reg, w10); 1201 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase)); 1202 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW)); 1203 } 1204 1205 1206 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) { 1207 Label after_position; 1208 __ Cmp(current_input_offset(), -by * char_size()); 1209 __ B(ge, &after_position); 1210 __ Mov(current_input_offset(), -by * char_size()); 1211 // On RegExp code entry (where this operation is used), the character before 1212 // the current position is expected to be already loaded. 1213 // We have advanced the position, so it's safe to read backwards. 1214 LoadCurrentCharacterUnchecked(-1, 1); 1215 __ Bind(&after_position); 1216 } 1217 1218 1219 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) { 1220 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1221 Register set_to = wzr; 1222 if (to != 0) { 1223 set_to = w10; 1224 __ Mov(set_to, to); 1225 } 1226 StoreRegister(register_index, set_to); 1227 } 1228 1229 1230 bool RegExpMacroAssemblerARM64::Succeed() { 1231 __ B(&success_label_); 1232 return global(); 1233 } 1234 1235 1236 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg, 1237 int cp_offset) { 1238 Register position = current_input_offset(); 1239 if (cp_offset != 0) { 1240 position = w10; 1241 __ Add(position, current_input_offset(), cp_offset * char_size()); 1242 } 1243 StoreRegister(reg, position); 1244 } 1245 1246 1247 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { 1248 DCHECK(reg_from <= reg_to); 1249 int num_registers = reg_to - reg_from + 1; 1250 1251 // If the first capture register is cached in a hardware register but not 1252 // aligned on a 64-bit one, we need to clear the first one specifically. 1253 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { 1254 StoreRegister(reg_from, string_start_minus_one()); 1255 num_registers--; 1256 reg_from++; 1257 } 1258 1259 // Clear cached registers in pairs as far as possible. 1260 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) { 1261 DCHECK(GetRegisterState(reg_from) == CACHED_LSW); 1262 __ Mov(GetCachedRegister(reg_from), twice_non_position_value()); 1263 reg_from += 2; 1264 num_registers -= 2; 1265 } 1266 1267 if ((num_registers % 2) == 1) { 1268 StoreRegister(reg_from, string_start_minus_one()); 1269 num_registers--; 1270 reg_from++; 1271 } 1272 1273 if (num_registers > 0) { 1274 // If there are some remaining registers, they are stored on the stack. 1275 DCHECK(reg_from >= kNumCachedRegisters); 1276 1277 // Move down the indexes of the registers on stack to get the correct offset 1278 // in memory. 1279 reg_from -= kNumCachedRegisters; 1280 reg_to -= kNumCachedRegisters; 1281 // We should not unroll the loop for less than 2 registers. 1282 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1283 // We position the base pointer to (reg_from + 1). 1284 int base_offset = kFirstRegisterOnStack - 1285 kWRegSize - (kWRegSize * reg_from); 1286 if (num_registers > kNumRegistersToUnroll) { 1287 Register base = x10; 1288 __ Add(base, frame_pointer(), base_offset); 1289 1290 Label loop; 1291 __ Mov(x11, num_registers); 1292 __ Bind(&loop); 1293 __ Str(twice_non_position_value(), 1294 MemOperand(base, -kPointerSize, PostIndex)); 1295 __ Sub(x11, x11, 2); 1296 __ Cbnz(x11, &loop); 1297 } else { 1298 for (int i = reg_from; i <= reg_to; i += 2) { 1299 __ Str(twice_non_position_value(), 1300 MemOperand(frame_pointer(), base_offset)); 1301 base_offset -= kWRegSize * 2; 1302 } 1303 } 1304 } 1305 } 1306 1307 1308 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { 1309 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase)); 1310 __ Sub(x10, backtrack_stackpointer(), x10); 1311 if (masm_->emit_debug_code()) { 1312 __ Cmp(x10, Operand(w10, SXTW)); 1313 // The stack offset needs to fit in a W register. 1314 __ Check(eq, kOffsetOutOfRange); 1315 } 1316 StoreRegister(reg, w10); 1317 } 1318 1319 1320 // Helper function for reading a value out of a stack frame. 1321 template <typename T> 1322 static T& frame_entry(Address re_frame, int frame_offset) { 1323 return *reinterpret_cast<T*>(re_frame + frame_offset); 1324 } 1325 1326 1327 template <typename T> 1328 static T* frame_entry_address(Address re_frame, int frame_offset) { 1329 return reinterpret_cast<T*>(re_frame + frame_offset); 1330 } 1331 1332 1333 int RegExpMacroAssemblerARM64::CheckStackGuardState( 1334 Address* return_address, Code* re_code, Address re_frame, int start_index, 1335 const byte** input_start, const byte** input_end) { 1336 return NativeRegExpMacroAssembler::CheckStackGuardState( 1337 frame_entry<Isolate*>(re_frame, kIsolate), start_index, 1338 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, 1339 frame_entry_address<String*>(re_frame, kInput), input_start, input_end); 1340 } 1341 1342 1343 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, 1344 Label* on_outside_input) { 1345 if (cp_offset >= 0) { 1346 CompareAndBranchOrBacktrack(current_input_offset(), 1347 -cp_offset * char_size(), ge, on_outside_input); 1348 } else { 1349 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size())); 1350 __ Cmp(w12, string_start_minus_one()); 1351 BranchOrBacktrack(le, on_outside_input); 1352 } 1353 } 1354 1355 1356 bool RegExpMacroAssemblerARM64::CanReadUnaligned() { 1357 // TODO(pielan): See whether or not we should disable unaligned accesses. 1358 return !slow_safe(); 1359 } 1360 1361 1362 // Private methods: 1363 1364 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { 1365 // Allocate space on the stack to store the return address. The 1366 // CheckStackGuardState C++ function will override it if the code 1367 // moved. Allocate extra space for 2 arguments passed by pointers. 1368 // AAPCS64 requires the stack to be 16 byte aligned. 1369 int alignment = masm_->ActivationFrameAlignment(); 1370 DCHECK_EQ(alignment % 16, 0); 1371 int align_mask = (alignment / kXRegSize) - 1; 1372 int xreg_to_claim = (3 + align_mask) & ~align_mask; 1373 1374 DCHECK(csp.Is(__ StackPointer())); 1375 __ Claim(xreg_to_claim); 1376 1377 // CheckStackGuardState needs the end and start addresses of the input string. 1378 __ Poke(input_end(), 2 * kPointerSize); 1379 __ Add(x5, csp, 2 * kPointerSize); 1380 __ Poke(input_start(), kPointerSize); 1381 __ Add(x4, csp, kPointerSize); 1382 1383 __ Mov(w3, start_offset()); 1384 // RegExp code frame pointer. 1385 __ Mov(x2, frame_pointer()); 1386 // Code* of self. 1387 __ Mov(x1, Operand(masm_->CodeObject())); 1388 1389 // We need to pass a pointer to the return address as first argument. 1390 // The DirectCEntry stub will place the return address on the stack before 1391 // calling so the stack pointer will point to it. 1392 __ Mov(x0, csp); 1393 1394 ExternalReference check_stack_guard_state = 1395 ExternalReference::re_check_stack_guard_state(isolate()); 1396 __ Mov(scratch, check_stack_guard_state); 1397 DirectCEntryStub stub(isolate()); 1398 stub.GenerateCall(masm_, scratch); 1399 1400 // The input string may have been moved in memory, we need to reload it. 1401 __ Peek(input_start(), kPointerSize); 1402 __ Peek(input_end(), 2 * kPointerSize); 1403 1404 DCHECK(csp.Is(__ StackPointer())); 1405 __ Drop(xreg_to_claim); 1406 1407 // Reload the Code pointer. 1408 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 1409 } 1410 1411 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition, 1412 Label* to) { 1413 if (condition == al) { // Unconditional. 1414 if (to == NULL) { 1415 Backtrack(); 1416 return; 1417 } 1418 __ B(to); 1419 return; 1420 } 1421 if (to == NULL) { 1422 to = &backtrack_label_; 1423 } 1424 __ B(condition, to); 1425 } 1426 1427 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, 1428 int immediate, 1429 Condition condition, 1430 Label* to) { 1431 if ((immediate == 0) && ((condition == eq) || (condition == ne))) { 1432 if (to == NULL) { 1433 to = &backtrack_label_; 1434 } 1435 if (condition == eq) { 1436 __ Cbz(reg, to); 1437 } else { 1438 __ Cbnz(reg, to); 1439 } 1440 } else { 1441 __ Cmp(reg, immediate); 1442 BranchOrBacktrack(condition, to); 1443 } 1444 } 1445 1446 1447 void RegExpMacroAssemblerARM64::CheckPreemption() { 1448 // Check for preemption. 1449 ExternalReference stack_limit = 1450 ExternalReference::address_of_stack_limit(isolate()); 1451 __ Mov(x10, stack_limit); 1452 __ Ldr(x10, MemOperand(x10)); 1453 DCHECK(csp.Is(__ StackPointer())); 1454 __ Cmp(csp, x10); 1455 CallIf(&check_preempt_label_, ls); 1456 } 1457 1458 1459 void RegExpMacroAssemblerARM64::CheckStackLimit() { 1460 ExternalReference stack_limit = 1461 ExternalReference::address_of_regexp_stack_limit(isolate()); 1462 __ Mov(x10, stack_limit); 1463 __ Ldr(x10, MemOperand(x10)); 1464 __ Cmp(backtrack_stackpointer(), x10); 1465 CallIf(&stack_overflow_label_, ls); 1466 } 1467 1468 1469 void RegExpMacroAssemblerARM64::Push(Register source) { 1470 DCHECK(source.Is32Bits()); 1471 DCHECK(!source.is(backtrack_stackpointer())); 1472 __ Str(source, 1473 MemOperand(backtrack_stackpointer(), 1474 -static_cast<int>(kWRegSize), 1475 PreIndex)); 1476 } 1477 1478 1479 void RegExpMacroAssemblerARM64::Pop(Register target) { 1480 DCHECK(target.Is32Bits()); 1481 DCHECK(!target.is(backtrack_stackpointer())); 1482 __ Ldr(target, 1483 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex)); 1484 } 1485 1486 1487 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) { 1488 DCHECK(register_index < kNumCachedRegisters); 1489 return Register::Create(register_index / 2, kXRegSizeInBits); 1490 } 1491 1492 1493 Register RegExpMacroAssemblerARM64::GetRegister(int register_index, 1494 Register maybe_result) { 1495 DCHECK(maybe_result.Is32Bits()); 1496 DCHECK(register_index >= 0); 1497 if (num_registers_ <= register_index) { 1498 num_registers_ = register_index + 1; 1499 } 1500 Register result; 1501 RegisterState register_state = GetRegisterState(register_index); 1502 switch (register_state) { 1503 case STACKED: 1504 __ Ldr(maybe_result, register_location(register_index)); 1505 result = maybe_result; 1506 break; 1507 case CACHED_LSW: 1508 result = GetCachedRegister(register_index).W(); 1509 break; 1510 case CACHED_MSW: 1511 __ Lsr(maybe_result.X(), GetCachedRegister(register_index), 1512 kWRegSizeInBits); 1513 result = maybe_result; 1514 break; 1515 default: 1516 UNREACHABLE(); 1517 break; 1518 } 1519 DCHECK(result.Is32Bits()); 1520 return result; 1521 } 1522 1523 1524 void RegExpMacroAssemblerARM64::StoreRegister(int register_index, 1525 Register source) { 1526 DCHECK(source.Is32Bits()); 1527 DCHECK(register_index >= 0); 1528 if (num_registers_ <= register_index) { 1529 num_registers_ = register_index + 1; 1530 } 1531 1532 Register cached_register; 1533 RegisterState register_state = GetRegisterState(register_index); 1534 switch (register_state) { 1535 case STACKED: 1536 __ Str(source, register_location(register_index)); 1537 break; 1538 case CACHED_LSW: 1539 cached_register = GetCachedRegister(register_index); 1540 if (!source.Is(cached_register.W())) { 1541 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits); 1542 } 1543 break; 1544 case CACHED_MSW: 1545 cached_register = GetCachedRegister(register_index); 1546 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits); 1547 break; 1548 default: 1549 UNREACHABLE(); 1550 break; 1551 } 1552 } 1553 1554 1555 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) { 1556 Label skip_call; 1557 if (condition != al) __ B(&skip_call, NegateCondition(condition)); 1558 __ Bl(to); 1559 __ Bind(&skip_call); 1560 } 1561 1562 1563 void RegExpMacroAssemblerARM64::RestoreLinkRegister() { 1564 DCHECK(csp.Is(__ StackPointer())); 1565 __ Pop(lr, xzr); 1566 __ Add(lr, lr, Operand(masm_->CodeObject())); 1567 } 1568 1569 1570 void RegExpMacroAssemblerARM64::SaveLinkRegister() { 1571 DCHECK(csp.Is(__ StackPointer())); 1572 __ Sub(lr, lr, Operand(masm_->CodeObject())); 1573 __ Push(xzr, lr); 1574 } 1575 1576 1577 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) { 1578 DCHECK(register_index < (1<<30)); 1579 DCHECK(register_index >= kNumCachedRegisters); 1580 if (num_registers_ <= register_index) { 1581 num_registers_ = register_index + 1; 1582 } 1583 register_index -= kNumCachedRegisters; 1584 int offset = kFirstRegisterOnStack - register_index * kWRegSize; 1585 return MemOperand(frame_pointer(), offset); 1586 } 1587 1588 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index, 1589 Register scratch) { 1590 DCHECK(register_index < (1<<30)); 1591 DCHECK(register_index < num_saved_registers_); 1592 DCHECK(register_index >= kNumCachedRegisters); 1593 DCHECK_EQ(register_index % 2, 0); 1594 register_index -= kNumCachedRegisters; 1595 int offset = kFirstCaptureOnStack - register_index * kWRegSize; 1596 // capture_location is used with Stp instructions to load/store 2 registers. 1597 // The immediate field in the encoding is limited to 7 bits (signed). 1598 if (is_int7(offset)) { 1599 return MemOperand(frame_pointer(), offset); 1600 } else { 1601 __ Add(scratch, frame_pointer(), offset); 1602 return MemOperand(scratch); 1603 } 1604 } 1605 1606 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, 1607 int characters) { 1608 Register offset = current_input_offset(); 1609 1610 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1611 // and the operating system running on the target allow it. 1612 // If unaligned load/stores are not supported then this function must only 1613 // be used to load a single character at a time. 1614 1615 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to 1616 // disable it. 1617 // TODO(pielan): See whether or not we should disable unaligned accesses. 1618 if (!CanReadUnaligned()) { 1619 DCHECK(characters == 1); 1620 } 1621 1622 if (cp_offset != 0) { 1623 if (masm_->emit_debug_code()) { 1624 __ Mov(x10, cp_offset * char_size()); 1625 __ Add(x10, x10, Operand(current_input_offset(), SXTW)); 1626 __ Cmp(x10, Operand(w10, SXTW)); 1627 // The offset needs to fit in a W register. 1628 __ Check(eq, kOffsetOutOfRange); 1629 } else { 1630 __ Add(w10, current_input_offset(), cp_offset * char_size()); 1631 } 1632 offset = w10; 1633 } 1634 1635 if (mode_ == LATIN1) { 1636 if (characters == 4) { 1637 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1638 } else if (characters == 2) { 1639 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1640 } else { 1641 DCHECK(characters == 1); 1642 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW)); 1643 } 1644 } else { 1645 DCHECK(mode_ == UC16); 1646 if (characters == 2) { 1647 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1648 } else { 1649 DCHECK(characters == 1); 1650 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1651 } 1652 } 1653 } 1654 1655 #endif // V8_INTERPRETED_REGEXP 1656 1657 } // namespace internal 1658 } // namespace v8 1659 1660 #endif // V8_TARGET_ARCH_ARM64 1661