1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/v8.h" 6 7 #if V8_TARGET_ARCH_ARM64 8 9 #include "src/code-stubs.h" 10 #include "src/cpu-profiler.h" 11 #include "src/log.h" 12 #include "src/macro-assembler.h" 13 #include "src/regexp-macro-assembler.h" 14 #include "src/regexp-stack.h" 15 #include "src/unicode.h" 16 17 #include "src/arm64/regexp-macro-assembler-arm64.h" 18 19 namespace v8 { 20 namespace internal { 21 22 #ifndef V8_INTERPRETED_REGEXP 23 /* 24 * This assembler uses the following register assignment convention: 25 * - w19 : Used to temporarely store a value before a call to C code. 26 * See CheckNotBackReferenceIgnoreCase. 27 * - x20 : Pointer to the current code object (Code*), 28 * it includes the heap object tag. 29 * - w21 : Current position in input, as negative offset from 30 * the end of the string. Please notice that this is 31 * the byte offset, not the character offset! 32 * - w22 : Currently loaded character. Must be loaded using 33 * LoadCurrentCharacter before using any of the dispatch methods. 34 * - x23 : Points to tip of backtrack stack. 35 * - w24 : Position of the first character minus one: non_position_value. 36 * Used to initialize capture registers. 37 * - x25 : Address at the end of the input string: input_end. 38 * Points to byte after last character in input. 39 * - x26 : Address at the start of the input string: input_start. 40 * - w27 : Where to start in the input string. 41 * - x28 : Output array pointer. 42 * - x29/fp : Frame pointer. Used to access arguments, local variables and 43 * RegExp registers. 44 * - x16/x17 : IP registers, used by assembler. Very volatile. 45 * - csp : Points to tip of C stack. 46 * 47 * - x0-x7 : Used as a cache to store 32 bit capture registers. These 48 * registers need to be retained every time a call to C code 49 * is done. 50 * 51 * The remaining registers are free for computations. 52 * Each call to a public method should retain this convention. 53 * 54 * The stack will have the following structure: 55 * 56 * Location Name Description 57 * (as referred to in 58 * the code) 59 * 60 * - fp[104] isolate Address of the current isolate. 61 * - fp[96] return_address Secondary link/return address 62 * used by an exit frame if this is a 63 * native call. 64 * ^^^ csp when called ^^^ 65 * - fp[88] lr Return from the RegExp code. 66 * - fp[80] r29 Old frame pointer (CalleeSaved). 67 * - fp[0..72] r19-r28 Backup of CalleeSaved registers. 68 * - fp[-8] direct_call 1 => Direct call from JavaScript code. 69 * 0 => Call through the runtime system. 70 * - fp[-16] stack_base High end of the memory area to use as 71 * the backtracking stack. 72 * - fp[-24] output_size Output may fit multiple sets of matches. 73 * - fp[-32] input Handle containing the input string. 74 * - fp[-40] success_counter 75 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^ 76 * - fp[-44] register N Capture registers initialized with 77 * - fp[-48] register N + 1 non_position_value. 78 * ... The first kNumCachedRegisters (N) registers 79 * ... are cached in x0 to x7. 80 * ... Only positions must be stored in the first 81 * - ... num_saved_registers_ registers. 82 * - ... 83 * - register N + num_registers - 1 84 * ^^^^^^^^^ csp ^^^^^^^^^ 85 * 86 * The first num_saved_registers_ registers are initialized to point to 87 * "character -1" in the string (i.e., char_size() bytes before the first 88 * character of the string). The remaining registers start out as garbage. 89 * 90 * The data up to the return address must be placed there by the calling 91 * code and the remaining arguments are passed in registers, e.g. by calling the 92 * code entry as cast to a function with the signature: 93 * int (*match)(String* input, 94 * int start_offset, 95 * Address input_start, 96 * Address input_end, 97 * int* output, 98 * int output_size, 99 * Address stack_base, 100 * bool direct_call = false, 101 * Address secondary_return_address, // Only used by native call. 102 * Isolate* isolate) 103 * The call is performed by NativeRegExpMacroAssembler::Execute() 104 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro 105 * in arm64/simulator-arm64.h. 106 * When calling as a non-direct call (i.e., from C++ code), the return address 107 * area is overwritten with the LR register by the RegExp code. When doing a 108 * direct call from generated code, the return address is placed there by 109 * the calling code, as in a normal exit frame. 110 */ 111 112 #define __ ACCESS_MASM(masm_) 113 114 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64( 115 Mode mode, 116 int registers_to_save, 117 Zone* zone) 118 : NativeRegExpMacroAssembler(zone), 119 masm_(new MacroAssembler(zone->isolate(), NULL, kRegExpCodeSize)), 120 mode_(mode), 121 num_registers_(registers_to_save), 122 num_saved_registers_(registers_to_save), 123 entry_label_(), 124 start_label_(), 125 success_label_(), 126 backtrack_label_(), 127 exit_label_() { 128 __ SetStackPointer(csp); 129 DCHECK_EQ(0, registers_to_save % 2); 130 // We can cache at most 16 W registers in x0-x7. 131 STATIC_ASSERT(kNumCachedRegisters <= 16); 132 STATIC_ASSERT((kNumCachedRegisters % 2) == 0); 133 __ B(&entry_label_); // We'll write the entry code later. 134 __ Bind(&start_label_); // And then continue from here. 135 } 136 137 138 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() { 139 delete masm_; 140 // Unuse labels in case we throw away the assembler without calling GetCode. 141 entry_label_.Unuse(); 142 start_label_.Unuse(); 143 success_label_.Unuse(); 144 backtrack_label_.Unuse(); 145 exit_label_.Unuse(); 146 check_preempt_label_.Unuse(); 147 stack_overflow_label_.Unuse(); 148 } 149 150 int RegExpMacroAssemblerARM64::stack_limit_slack() { 151 return RegExpStack::kStackLimitSlack; 152 } 153 154 155 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) { 156 if (by != 0) { 157 __ Add(current_input_offset(), 158 current_input_offset(), by * char_size()); 159 } 160 } 161 162 163 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) { 164 DCHECK((reg >= 0) && (reg < num_registers_)); 165 if (by != 0) { 166 Register to_advance; 167 RegisterState register_state = GetRegisterState(reg); 168 switch (register_state) { 169 case STACKED: 170 __ Ldr(w10, register_location(reg)); 171 __ Add(w10, w10, by); 172 __ Str(w10, register_location(reg)); 173 break; 174 case CACHED_LSW: 175 to_advance = GetCachedRegister(reg); 176 __ Add(to_advance, to_advance, by); 177 break; 178 case CACHED_MSW: 179 to_advance = GetCachedRegister(reg); 180 __ Add(to_advance, to_advance, 181 static_cast<int64_t>(by) << kWRegSizeInBits); 182 break; 183 default: 184 UNREACHABLE(); 185 break; 186 } 187 } 188 } 189 190 191 void RegExpMacroAssemblerARM64::Backtrack() { 192 CheckPreemption(); 193 Pop(w10); 194 __ Add(x10, code_pointer(), Operand(w10, UXTW)); 195 __ Br(x10); 196 } 197 198 199 void RegExpMacroAssemblerARM64::Bind(Label* label) { 200 __ Bind(label); 201 } 202 203 204 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) { 205 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal); 206 } 207 208 209 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, 210 Label* on_greater) { 211 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); 212 } 213 214 215 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { 216 Label not_at_start; 217 // Did we start the match at the start of the input string? 218 CompareAndBranchOrBacktrack(start_offset(), 0, ne, ¬_at_start); 219 // If we did, are we still at the start of the input string? 220 __ Add(x10, input_end(), Operand(current_input_offset(), SXTW)); 221 __ Cmp(x10, input_start()); 222 BranchOrBacktrack(eq, on_at_start); 223 __ Bind(¬_at_start); 224 } 225 226 227 void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) { 228 // Did we start the match at the start of the input string? 229 CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start); 230 // If we did, are we still at the start of the input string? 231 __ Add(x10, input_end(), Operand(current_input_offset(), SXTW)); 232 __ Cmp(x10, input_start()); 233 BranchOrBacktrack(ne, on_not_at_start); 234 } 235 236 237 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) { 238 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less); 239 } 240 241 242 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str, 243 int cp_offset, 244 Label* on_failure, 245 bool check_end_of_string) { 246 // This method is only ever called from the cctests. 247 248 if (check_end_of_string) { 249 // Is last character of required match inside string. 250 CheckPosition(cp_offset + str.length() - 1, on_failure); 251 } 252 253 Register characters_address = x11; 254 255 __ Add(characters_address, 256 input_end(), 257 Operand(current_input_offset(), SXTW)); 258 if (cp_offset != 0) { 259 __ Add(characters_address, characters_address, cp_offset * char_size()); 260 } 261 262 for (int i = 0; i < str.length(); i++) { 263 if (mode_ == LATIN1) { 264 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex)); 265 DCHECK(str[i] <= String::kMaxOneByteCharCode); 266 } else { 267 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex)); 268 } 269 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure); 270 } 271 } 272 273 274 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { 275 __ Ldr(w10, MemOperand(backtrack_stackpointer())); 276 __ Cmp(current_input_offset(), w10); 277 __ Cset(x11, eq); 278 __ Add(backtrack_stackpointer(), 279 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2)); 280 BranchOrBacktrack(eq, on_equal); 281 } 282 283 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( 284 int start_reg, 285 Label* on_no_match) { 286 Label fallthrough; 287 288 Register capture_start_offset = w10; 289 // Save the capture length in a callee-saved register so it will 290 // be preserved if we call a C helper. 291 Register capture_length = w19; 292 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length)); 293 294 // Find length of back-referenced capture. 295 DCHECK((start_reg % 2) == 0); 296 if (start_reg < kNumCachedRegisters) { 297 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg)); 298 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 299 } else { 300 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); 301 } 302 __ Sub(capture_length, w11, capture_start_offset); // Length to check. 303 // Succeed on empty capture (including no capture). 304 __ Cbz(capture_length, &fallthrough); 305 306 // Check that there are enough characters left in the input. 307 __ Cmn(capture_length, current_input_offset()); 308 BranchOrBacktrack(gt, on_no_match); 309 310 if (mode_ == LATIN1) { 311 Label success; 312 Label fail; 313 Label loop_check; 314 315 Register capture_start_address = x12; 316 Register capture_end_addresss = x13; 317 Register current_position_address = x14; 318 319 __ Add(capture_start_address, 320 input_end(), 321 Operand(capture_start_offset, SXTW)); 322 __ Add(capture_end_addresss, 323 capture_start_address, 324 Operand(capture_length, SXTW)); 325 __ Add(current_position_address, 326 input_end(), 327 Operand(current_input_offset(), SXTW)); 328 329 Label loop; 330 __ Bind(&loop); 331 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 332 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 333 __ Cmp(w10, w11); 334 __ B(eq, &loop_check); 335 336 // Mismatch, try case-insensitive match (converting letters to lower-case). 337 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case. 338 __ Orr(w11, w11, 0x20); // Also convert input character. 339 __ Cmp(w11, w10); 340 __ B(ne, &fail); 341 __ Sub(w10, w10, 'a'); 342 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter? 343 __ B(ls, &loop_check); // In range 'a'-'z'. 344 // Latin-1: Check for values in range [224,254] but not 247. 345 __ Sub(w10, w10, 224 - 'a'); 346 __ Cmp(w10, 254 - 224); 347 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247. 348 __ B(eq, &fail); // Weren't Latin-1 letters. 349 350 __ Bind(&loop_check); 351 __ Cmp(capture_start_address, capture_end_addresss); 352 __ B(lt, &loop); 353 __ B(&success); 354 355 __ Bind(&fail); 356 BranchOrBacktrack(al, on_no_match); 357 358 __ Bind(&success); 359 // Compute new value of character position after the matched part. 360 __ Sub(current_input_offset().X(), current_position_address, input_end()); 361 if (masm_->emit_debug_code()) { 362 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 363 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 364 // The current input offset should be <= 0, and fit in a W register. 365 __ Check(le, kOffsetOutOfRange); 366 } 367 } else { 368 DCHECK(mode_ == UC16); 369 int argument_count = 4; 370 371 // The cached registers need to be retained. 372 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 373 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 374 __ PushCPURegList(cached_registers); 375 376 // Put arguments into arguments registers. 377 // Parameters are 378 // x0: Address byte_offset1 - Address captured substring's start. 379 // x1: Address byte_offset2 - Address of current character position. 380 // w2: size_t byte_length - length of capture in bytes(!) 381 // x3: Isolate* isolate 382 383 // Address of start of capture. 384 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW)); 385 // Length of capture. 386 __ Mov(w2, capture_length); 387 // Address of current input position. 388 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); 389 // Isolate. 390 __ Mov(x3, ExternalReference::isolate_address(isolate())); 391 392 { 393 AllowExternalCallThatCantCauseGC scope(masm_); 394 ExternalReference function = 395 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 396 __ CallCFunction(function, argument_count); 397 } 398 399 // Check if function returned non-zero for success or zero for failure. 400 // x0 is one of the registers used as a cache so it must be tested before 401 // the cache is restored. 402 __ Cmp(x0, 0); 403 __ PopCPURegList(cached_registers); 404 BranchOrBacktrack(eq, on_no_match); 405 406 // On success, increment position by length of capture. 407 __ Add(current_input_offset(), current_input_offset(), capture_length); 408 } 409 410 __ Bind(&fallthrough); 411 } 412 413 void RegExpMacroAssemblerARM64::CheckNotBackReference( 414 int start_reg, 415 Label* on_no_match) { 416 Label fallthrough; 417 418 Register capture_start_address = x12; 419 Register capture_end_address = x13; 420 Register current_position_address = x14; 421 Register capture_length = w15; 422 423 // Find length of back-referenced capture. 424 DCHECK((start_reg % 2) == 0); 425 if (start_reg < kNumCachedRegisters) { 426 __ Mov(x10, GetCachedRegister(start_reg)); 427 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits); 428 } else { 429 __ Ldp(w11, w10, capture_location(start_reg, x10)); 430 } 431 __ Sub(capture_length, w11, w10); // Length to check. 432 // Succeed on empty capture (including no capture). 433 __ Cbz(capture_length, &fallthrough); 434 435 // Check that there are enough characters left in the input. 436 __ Cmn(capture_length, current_input_offset()); 437 BranchOrBacktrack(gt, on_no_match); 438 439 // Compute pointers to match string and capture string 440 __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); 441 __ Add(capture_end_address, 442 capture_start_address, 443 Operand(capture_length, SXTW)); 444 __ Add(current_position_address, 445 input_end(), 446 Operand(current_input_offset(), SXTW)); 447 448 Label loop; 449 __ Bind(&loop); 450 if (mode_ == LATIN1) { 451 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex)); 452 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex)); 453 } else { 454 DCHECK(mode_ == UC16); 455 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex)); 456 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex)); 457 } 458 __ Cmp(w10, w11); 459 BranchOrBacktrack(ne, on_no_match); 460 __ Cmp(capture_start_address, capture_end_address); 461 __ B(lt, &loop); 462 463 // Move current character position to position after match. 464 __ Sub(current_input_offset().X(), current_position_address, input_end()); 465 if (masm_->emit_debug_code()) { 466 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); 467 __ Ccmp(current_input_offset(), 0, NoFlag, eq); 468 // The current input offset should be <= 0, and fit in a W register. 469 __ Check(le, kOffsetOutOfRange); 470 } 471 __ Bind(&fallthrough); 472 } 473 474 475 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c, 476 Label* on_not_equal) { 477 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal); 478 } 479 480 481 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c, 482 uint32_t mask, 483 Label* on_equal) { 484 __ And(w10, current_character(), mask); 485 CompareAndBranchOrBacktrack(w10, c, eq, on_equal); 486 } 487 488 489 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c, 490 unsigned mask, 491 Label* on_not_equal) { 492 __ And(w10, current_character(), mask); 493 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 494 } 495 496 497 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd( 498 uc16 c, 499 uc16 minus, 500 uc16 mask, 501 Label* on_not_equal) { 502 DCHECK(minus < String::kMaxUtf16CodeUnit); 503 __ Sub(w10, current_character(), minus); 504 __ And(w10, w10, mask); 505 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal); 506 } 507 508 509 void RegExpMacroAssemblerARM64::CheckCharacterInRange( 510 uc16 from, 511 uc16 to, 512 Label* on_in_range) { 513 __ Sub(w10, current_character(), from); 514 // Unsigned lower-or-same condition. 515 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range); 516 } 517 518 519 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange( 520 uc16 from, 521 uc16 to, 522 Label* on_not_in_range) { 523 __ Sub(w10, current_character(), from); 524 // Unsigned higher condition. 525 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range); 526 } 527 528 529 void RegExpMacroAssemblerARM64::CheckBitInTable( 530 Handle<ByteArray> table, 531 Label* on_bit_set) { 532 __ Mov(x11, Operand(table)); 533 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) { 534 __ And(w10, current_character(), kTableMask); 535 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag); 536 } else { 537 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag); 538 } 539 __ Ldrb(w11, MemOperand(x11, w10, UXTW)); 540 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set); 541 } 542 543 544 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, 545 Label* on_no_match) { 546 // Range checks (c in min..max) are generally implemented by an unsigned 547 // (c - min) <= (max - min) check 548 switch (type) { 549 case 's': 550 // Match space-characters 551 if (mode_ == LATIN1) { 552 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 553 Label success; 554 // Check for ' ' or 0x00a0. 555 __ Cmp(current_character(), ' '); 556 __ Ccmp(current_character(), 0x00a0, ZFlag, ne); 557 __ B(eq, &success); 558 // Check range 0x09..0x0d. 559 __ Sub(w10, current_character(), '\t'); 560 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); 561 __ Bind(&success); 562 return true; 563 } 564 return false; 565 case 'S': 566 // The emitted code for generic character classes is good enough. 567 return false; 568 case 'd': 569 // Match ASCII digits ('0'..'9'). 570 __ Sub(w10, current_character(), '0'); 571 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match); 572 return true; 573 case 'D': 574 // Match ASCII non-digits. 575 __ Sub(w10, current_character(), '0'); 576 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); 577 return true; 578 case '.': { 579 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 580 // Here we emit the conditional branch only once at the end to make branch 581 // prediction more efficient, even though we could branch out of here 582 // as soon as a character matches. 583 __ Cmp(current_character(), 0x0a); 584 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 585 if (mode_ == UC16) { 586 __ Sub(w10, current_character(), 0x2028); 587 // If the Z flag was set we clear the flags to force a branch. 588 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 589 // ls -> !((C==1) && (Z==0)) 590 BranchOrBacktrack(ls, on_no_match); 591 } else { 592 BranchOrBacktrack(eq, on_no_match); 593 } 594 return true; 595 } 596 case 'n': { 597 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 598 // We have to check all 4 newline characters before emitting 599 // the conditional branch. 600 __ Cmp(current_character(), 0x0a); 601 __ Ccmp(current_character(), 0x0d, ZFlag, ne); 602 if (mode_ == UC16) { 603 __ Sub(w10, current_character(), 0x2028); 604 // If the Z flag was set we clear the flags to force a fall-through. 605 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne); 606 // hi -> (C==1) && (Z==0) 607 BranchOrBacktrack(hi, on_no_match); 608 } else { 609 BranchOrBacktrack(ne, on_no_match); 610 } 611 return true; 612 } 613 case 'w': { 614 if (mode_ != LATIN1) { 615 // Table is 256 entries, so all Latin1 characters can be tested. 616 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match); 617 } 618 ExternalReference map = ExternalReference::re_word_character_map(); 619 __ Mov(x10, map); 620 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 621 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match); 622 return true; 623 } 624 case 'W': { 625 Label done; 626 if (mode_ != LATIN1) { 627 // Table is 256 entries, so all Latin1 characters can be tested. 628 __ Cmp(current_character(), 'z'); 629 __ B(hi, &done); 630 } 631 ExternalReference map = ExternalReference::re_word_character_map(); 632 __ Mov(x10, map); 633 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW)); 634 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match); 635 __ Bind(&done); 636 return true; 637 } 638 case '*': 639 // Match any character. 640 return true; 641 // No custom implementation (yet): s(UC16), S(UC16). 642 default: 643 return false; 644 } 645 } 646 647 648 void RegExpMacroAssemblerARM64::Fail() { 649 __ Mov(w0, FAILURE); 650 __ B(&exit_label_); 651 } 652 653 654 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { 655 Label return_w0; 656 // Finalize code - write the entry point code now we know how many 657 // registers we need. 658 659 // Entry code: 660 __ Bind(&entry_label_); 661 662 // Arguments on entry: 663 // x0: String* input 664 // x1: int start_offset 665 // x2: byte* input_start 666 // x3: byte* input_end 667 // x4: int* output array 668 // x5: int output array size 669 // x6: Address stack_base 670 // x7: int direct_call 671 672 // The stack pointer should be csp on entry. 673 // csp[8]: address of the current isolate 674 // csp[0]: secondary link/return address used by native call 675 676 // Tell the system that we have a stack frame. Because the type is MANUAL, no 677 // code is generated. 678 FrameScope scope(masm_, StackFrame::MANUAL); 679 680 // Push registers on the stack, only push the argument registers that we need. 681 CPURegList argument_registers(x0, x5, x6, x7); 682 683 CPURegList registers_to_retain = kCalleeSaved; 684 DCHECK(kCalleeSaved.Count() == 11); 685 registers_to_retain.Combine(lr); 686 687 DCHECK(csp.Is(__ StackPointer())); 688 __ PushCPURegList(registers_to_retain); 689 __ PushCPURegList(argument_registers); 690 691 // Set frame pointer in place. 692 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize); 693 694 // Initialize callee-saved registers. 695 __ Mov(start_offset(), w1); 696 __ Mov(input_start(), x2); 697 __ Mov(input_end(), x3); 698 __ Mov(output_array(), x4); 699 700 // Set the number of registers we will need to allocate, that is: 701 // - success_counter (X register) 702 // - (num_registers_ - kNumCachedRegisters) (W registers) 703 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters; 704 // Do not allocate registers on the stack if they can all be cached. 705 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; } 706 // Make room for the success_counter. 707 num_wreg_to_allocate += 2; 708 709 // Make sure the stack alignment will be respected. 710 int alignment = masm_->ActivationFrameAlignment(); 711 DCHECK_EQ(alignment % 16, 0); 712 int align_mask = (alignment / kWRegSize) - 1; 713 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask; 714 715 // Check if we have space on the stack. 716 Label stack_limit_hit; 717 Label stack_ok; 718 719 ExternalReference stack_limit = 720 ExternalReference::address_of_stack_limit(isolate()); 721 __ Mov(x10, stack_limit); 722 __ Ldr(x10, MemOperand(x10)); 723 __ Subs(x10, csp, x10); 724 725 // Handle it if the stack pointer is already below the stack limit. 726 __ B(ls, &stack_limit_hit); 727 728 // Check if there is room for the variable number of registers above 729 // the stack limit. 730 __ Cmp(x10, num_wreg_to_allocate * kWRegSize); 731 __ B(hs, &stack_ok); 732 733 // Exit with OutOfMemory exception. There is not enough space on the stack 734 // for our working registers. 735 __ Mov(w0, EXCEPTION); 736 __ B(&return_w0); 737 738 __ Bind(&stack_limit_hit); 739 CallCheckStackGuardState(x10); 740 // If returned value is non-zero, we exit with the returned value as result. 741 __ Cbnz(w0, &return_w0); 742 743 __ Bind(&stack_ok); 744 745 // Allocate space on stack. 746 __ Claim(num_wreg_to_allocate, kWRegSize); 747 748 // Initialize success_counter with 0. 749 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter)); 750 751 // Find negative length (offset of start relative to end). 752 __ Sub(x10, input_start(), input_end()); 753 if (masm_->emit_debug_code()) { 754 // Check that the input string length is < 2^30. 755 __ Neg(x11, x10); 756 __ Cmp(x11, (1<<30) - 1); 757 __ Check(ls, kInputStringTooLong); 758 } 759 __ Mov(current_input_offset(), w10); 760 761 // The non-position value is used as a clearing value for the 762 // capture registers, it corresponds to the position of the first character 763 // minus one. 764 __ Sub(non_position_value(), current_input_offset(), char_size()); 765 __ Sub(non_position_value(), non_position_value(), 766 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); 767 // We can store this value twice in an X register for initializing 768 // on-stack registers later. 769 __ Orr(twice_non_position_value(), 770 non_position_value().X(), 771 Operand(non_position_value().X(), LSL, kWRegSizeInBits)); 772 773 // Initialize code pointer register. 774 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 775 776 Label load_char_start_regexp, start_regexp; 777 // Load newline if index is at start, previous character otherwise. 778 __ Cbnz(start_offset(), &load_char_start_regexp); 779 __ Mov(current_character(), '\n'); 780 __ B(&start_regexp); 781 782 // Global regexp restarts matching here. 783 __ Bind(&load_char_start_regexp); 784 // Load previous char as initial value of current character register. 785 LoadCurrentCharacterUnchecked(-1, 1); 786 __ Bind(&start_regexp); 787 // Initialize on-stack registers. 788 if (num_saved_registers_ > 0) { 789 ClearRegisters(0, num_saved_registers_ - 1); 790 } 791 792 // Initialize backtrack stack pointer. 793 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase)); 794 795 // Execute 796 __ B(&start_label_); 797 798 if (backtrack_label_.is_linked()) { 799 __ Bind(&backtrack_label_); 800 Backtrack(); 801 } 802 803 if (success_label_.is_linked()) { 804 Register first_capture_start = w15; 805 806 // Save captures when successful. 807 __ Bind(&success_label_); 808 809 if (num_saved_registers_ > 0) { 810 // V8 expects the output to be an int32_t array. 811 Register capture_start = w12; 812 Register capture_end = w13; 813 Register input_length = w14; 814 815 // Copy captures to output. 816 817 // Get string length. 818 __ Sub(x10, input_end(), input_start()); 819 if (masm_->emit_debug_code()) { 820 // Check that the input string length is < 2^30. 821 __ Cmp(x10, (1<<30) - 1); 822 __ Check(ls, kInputStringTooLong); 823 } 824 // input_start has a start_offset offset on entry. We need to include 825 // it when computing the length of the whole string. 826 if (mode_ == UC16) { 827 __ Add(input_length, start_offset(), Operand(w10, LSR, 1)); 828 } else { 829 __ Add(input_length, start_offset(), w10); 830 } 831 832 // Copy the results to the output array from the cached registers first. 833 for (int i = 0; 834 (i < num_saved_registers_) && (i < kNumCachedRegisters); 835 i += 2) { 836 __ Mov(capture_start.X(), GetCachedRegister(i)); 837 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits); 838 if ((i == 0) && global_with_zero_length_check()) { 839 // Keep capture start for the zero-length check later. 840 __ Mov(first_capture_start, capture_start); 841 } 842 // Offsets need to be relative to the start of the string. 843 if (mode_ == UC16) { 844 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 845 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 846 } else { 847 __ Add(capture_start, input_length, capture_start); 848 __ Add(capture_end, input_length, capture_end); 849 } 850 // The output pointer advances for a possible global match. 851 __ Stp(capture_start, 852 capture_end, 853 MemOperand(output_array(), kPointerSize, PostIndex)); 854 } 855 856 // Only carry on if there are more than kNumCachedRegisters capture 857 // registers. 858 int num_registers_left_on_stack = 859 num_saved_registers_ - kNumCachedRegisters; 860 if (num_registers_left_on_stack > 0) { 861 Register base = x10; 862 // There are always an even number of capture registers. A couple of 863 // registers determine one match with two offsets. 864 DCHECK_EQ(0, num_registers_left_on_stack % 2); 865 __ Add(base, frame_pointer(), kFirstCaptureOnStack); 866 867 // We can unroll the loop here, we should not unroll for less than 2 868 // registers. 869 STATIC_ASSERT(kNumRegistersToUnroll > 2); 870 if (num_registers_left_on_stack <= kNumRegistersToUnroll) { 871 for (int i = 0; i < num_registers_left_on_stack / 2; i++) { 872 __ Ldp(capture_end, 873 capture_start, 874 MemOperand(base, -kPointerSize, PostIndex)); 875 if ((i == 0) && global_with_zero_length_check()) { 876 // Keep capture start for the zero-length check later. 877 __ Mov(first_capture_start, capture_start); 878 } 879 // Offsets need to be relative to the start of the string. 880 if (mode_ == UC16) { 881 __ Add(capture_start, 882 input_length, 883 Operand(capture_start, ASR, 1)); 884 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 885 } else { 886 __ Add(capture_start, input_length, capture_start); 887 __ Add(capture_end, input_length, capture_end); 888 } 889 // The output pointer advances for a possible global match. 890 __ Stp(capture_start, 891 capture_end, 892 MemOperand(output_array(), kPointerSize, PostIndex)); 893 } 894 } else { 895 Label loop, start; 896 __ Mov(x11, num_registers_left_on_stack); 897 898 __ Ldp(capture_end, 899 capture_start, 900 MemOperand(base, -kPointerSize, PostIndex)); 901 if (global_with_zero_length_check()) { 902 __ Mov(first_capture_start, capture_start); 903 } 904 __ B(&start); 905 906 __ Bind(&loop); 907 __ Ldp(capture_end, 908 capture_start, 909 MemOperand(base, -kPointerSize, PostIndex)); 910 __ Bind(&start); 911 if (mode_ == UC16) { 912 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1)); 913 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1)); 914 } else { 915 __ Add(capture_start, input_length, capture_start); 916 __ Add(capture_end, input_length, capture_end); 917 } 918 // The output pointer advances for a possible global match. 919 __ Stp(capture_start, 920 capture_end, 921 MemOperand(output_array(), kPointerSize, PostIndex)); 922 __ Sub(x11, x11, 2); 923 __ Cbnz(x11, &loop); 924 } 925 } 926 } 927 928 if (global()) { 929 Register success_counter = w0; 930 Register output_size = x10; 931 // Restart matching if the regular expression is flagged as global. 932 933 // Increment success counter. 934 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 935 __ Add(success_counter, success_counter, 1); 936 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter)); 937 938 // Capture results have been stored, so the number of remaining global 939 // output registers is reduced by the number of stored captures. 940 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize)); 941 __ Sub(output_size, output_size, num_saved_registers_); 942 // Check whether we have enough room for another set of capture results. 943 __ Cmp(output_size, num_saved_registers_); 944 __ B(lt, &return_w0); 945 946 // The output pointer is already set to the next field in the output 947 // array. 948 // Update output size on the frame before we restart matching. 949 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize)); 950 951 if (global_with_zero_length_check()) { 952 // Special case for zero-length matches. 953 __ Cmp(current_input_offset(), first_capture_start); 954 // Not a zero-length match, restart. 955 __ B(ne, &load_char_start_regexp); 956 // Offset from the end is zero if we already reached the end. 957 __ Cbz(current_input_offset(), &return_w0); 958 // Advance current position after a zero-length match. 959 __ Add(current_input_offset(), 960 current_input_offset(), 961 Operand((mode_ == UC16) ? 2 : 1)); 962 } 963 964 __ B(&load_char_start_regexp); 965 } else { 966 __ Mov(w0, SUCCESS); 967 } 968 } 969 970 if (exit_label_.is_linked()) { 971 // Exit and return w0 972 __ Bind(&exit_label_); 973 if (global()) { 974 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter)); 975 } 976 } 977 978 __ Bind(&return_w0); 979 980 // Set stack pointer back to first register to retain 981 DCHECK(csp.Is(__ StackPointer())); 982 __ Mov(csp, fp); 983 __ AssertStackConsistency(); 984 985 // Restore registers. 986 __ PopCPURegList(registers_to_retain); 987 988 __ Ret(); 989 990 Label exit_with_exception; 991 // Registers x0 to x7 are used to store the first captures, they need to be 992 // retained over calls to C++ code. 993 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7); 994 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters); 995 996 if (check_preempt_label_.is_linked()) { 997 __ Bind(&check_preempt_label_); 998 SaveLinkRegister(); 999 // The cached registers need to be retained. 1000 __ PushCPURegList(cached_registers); 1001 CallCheckStackGuardState(x10); 1002 // Returning from the regexp code restores the stack (csp <- fp) 1003 // so we don't need to drop the link register from it before exiting. 1004 __ Cbnz(w0, &return_w0); 1005 // Reset the cached registers. 1006 __ PopCPURegList(cached_registers); 1007 RestoreLinkRegister(); 1008 __ Ret(); 1009 } 1010 1011 if (stack_overflow_label_.is_linked()) { 1012 __ Bind(&stack_overflow_label_); 1013 SaveLinkRegister(); 1014 // The cached registers need to be retained. 1015 __ PushCPURegList(cached_registers); 1016 // Call GrowStack(backtrack_stackpointer(), &stack_base) 1017 __ Mov(x2, ExternalReference::isolate_address(isolate())); 1018 __ Add(x1, frame_pointer(), kStackBase); 1019 __ Mov(x0, backtrack_stackpointer()); 1020 ExternalReference grow_stack = 1021 ExternalReference::re_grow_stack(isolate()); 1022 __ CallCFunction(grow_stack, 3); 1023 // If return NULL, we have failed to grow the stack, and 1024 // must exit with a stack-overflow exception. 1025 // Returning from the regexp code restores the stack (csp <- fp) 1026 // so we don't need to drop the link register from it before exiting. 1027 __ Cbz(w0, &exit_with_exception); 1028 // Otherwise use return value as new stack pointer. 1029 __ Mov(backtrack_stackpointer(), x0); 1030 // Reset the cached registers. 1031 __ PopCPURegList(cached_registers); 1032 RestoreLinkRegister(); 1033 __ Ret(); 1034 } 1035 1036 if (exit_with_exception.is_linked()) { 1037 __ Bind(&exit_with_exception); 1038 __ Mov(w0, EXCEPTION); 1039 __ B(&return_w0); 1040 } 1041 1042 CodeDesc code_desc; 1043 masm_->GetCode(&code_desc); 1044 Handle<Code> code = isolate()->factory()->NewCode( 1045 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject()); 1046 PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source)); 1047 return Handle<HeapObject>::cast(code); 1048 } 1049 1050 1051 void RegExpMacroAssemblerARM64::GoTo(Label* to) { 1052 BranchOrBacktrack(al, to); 1053 } 1054 1055 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand, 1056 Label* if_ge) { 1057 Register to_compare = GetRegister(reg, w10); 1058 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge); 1059 } 1060 1061 1062 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand, 1063 Label* if_lt) { 1064 Register to_compare = GetRegister(reg, w10); 1065 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt); 1066 } 1067 1068 1069 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) { 1070 Register to_compare = GetRegister(reg, w10); 1071 __ Cmp(to_compare, current_input_offset()); 1072 BranchOrBacktrack(eq, if_eq); 1073 } 1074 1075 RegExpMacroAssembler::IrregexpImplementation 1076 RegExpMacroAssemblerARM64::Implementation() { 1077 return kARM64Implementation; 1078 } 1079 1080 1081 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, 1082 Label* on_end_of_input, 1083 bool check_bounds, 1084 int characters) { 1085 // TODO(pielan): Make sure long strings are caught before this, and not 1086 // just asserted in debug mode. 1087 DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. 1088 // Be sane! (And ensure that an int32_t can be used to index the string) 1089 DCHECK(cp_offset < (1<<30)); 1090 if (check_bounds) { 1091 CheckPosition(cp_offset + characters - 1, on_end_of_input); 1092 } 1093 LoadCurrentCharacterUnchecked(cp_offset, characters); 1094 } 1095 1096 1097 void RegExpMacroAssemblerARM64::PopCurrentPosition() { 1098 Pop(current_input_offset()); 1099 } 1100 1101 1102 void RegExpMacroAssemblerARM64::PopRegister(int register_index) { 1103 Pop(w10); 1104 StoreRegister(register_index, w10); 1105 } 1106 1107 1108 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { 1109 if (label->is_bound()) { 1110 int target = label->pos(); 1111 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag); 1112 } else { 1113 __ Adr(x10, label, MacroAssembler::kAdrFar); 1114 __ Sub(x10, x10, code_pointer()); 1115 if (masm_->emit_debug_code()) { 1116 __ Cmp(x10, kWRegMask); 1117 // The code offset has to fit in a W register. 1118 __ Check(ls, kOffsetOutOfRange); 1119 } 1120 } 1121 Push(w10); 1122 CheckStackLimit(); 1123 } 1124 1125 1126 void RegExpMacroAssemblerARM64::PushCurrentPosition() { 1127 Push(current_input_offset()); 1128 } 1129 1130 1131 void RegExpMacroAssemblerARM64::PushRegister(int register_index, 1132 StackCheckFlag check_stack_limit) { 1133 Register to_push = GetRegister(register_index, w10); 1134 Push(to_push); 1135 if (check_stack_limit) CheckStackLimit(); 1136 } 1137 1138 1139 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) { 1140 Register cached_register; 1141 RegisterState register_state = GetRegisterState(reg); 1142 switch (register_state) { 1143 case STACKED: 1144 __ Ldr(current_input_offset(), register_location(reg)); 1145 break; 1146 case CACHED_LSW: 1147 cached_register = GetCachedRegister(reg); 1148 __ Mov(current_input_offset(), cached_register.W()); 1149 break; 1150 case CACHED_MSW: 1151 cached_register = GetCachedRegister(reg); 1152 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits); 1153 break; 1154 default: 1155 UNREACHABLE(); 1156 break; 1157 } 1158 } 1159 1160 1161 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) { 1162 Register read_from = GetRegister(reg, w10); 1163 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase)); 1164 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW)); 1165 } 1166 1167 1168 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) { 1169 Label after_position; 1170 __ Cmp(current_input_offset(), -by * char_size()); 1171 __ B(ge, &after_position); 1172 __ Mov(current_input_offset(), -by * char_size()); 1173 // On RegExp code entry (where this operation is used), the character before 1174 // the current position is expected to be already loaded. 1175 // We have advanced the position, so it's safe to read backwards. 1176 LoadCurrentCharacterUnchecked(-1, 1); 1177 __ Bind(&after_position); 1178 } 1179 1180 1181 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) { 1182 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1183 Register set_to = wzr; 1184 if (to != 0) { 1185 set_to = w10; 1186 __ Mov(set_to, to); 1187 } 1188 StoreRegister(register_index, set_to); 1189 } 1190 1191 1192 bool RegExpMacroAssemblerARM64::Succeed() { 1193 __ B(&success_label_); 1194 return global(); 1195 } 1196 1197 1198 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg, 1199 int cp_offset) { 1200 Register position = current_input_offset(); 1201 if (cp_offset != 0) { 1202 position = w10; 1203 __ Add(position, current_input_offset(), cp_offset * char_size()); 1204 } 1205 StoreRegister(reg, position); 1206 } 1207 1208 1209 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { 1210 DCHECK(reg_from <= reg_to); 1211 int num_registers = reg_to - reg_from + 1; 1212 1213 // If the first capture register is cached in a hardware register but not 1214 // aligned on a 64-bit one, we need to clear the first one specifically. 1215 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { 1216 StoreRegister(reg_from, non_position_value()); 1217 num_registers--; 1218 reg_from++; 1219 } 1220 1221 // Clear cached registers in pairs as far as possible. 1222 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) { 1223 DCHECK(GetRegisterState(reg_from) == CACHED_LSW); 1224 __ Mov(GetCachedRegister(reg_from), twice_non_position_value()); 1225 reg_from += 2; 1226 num_registers -= 2; 1227 } 1228 1229 if ((num_registers % 2) == 1) { 1230 StoreRegister(reg_from, non_position_value()); 1231 num_registers--; 1232 reg_from++; 1233 } 1234 1235 if (num_registers > 0) { 1236 // If there are some remaining registers, they are stored on the stack. 1237 DCHECK(reg_from >= kNumCachedRegisters); 1238 1239 // Move down the indexes of the registers on stack to get the correct offset 1240 // in memory. 1241 reg_from -= kNumCachedRegisters; 1242 reg_to -= kNumCachedRegisters; 1243 // We should not unroll the loop for less than 2 registers. 1244 STATIC_ASSERT(kNumRegistersToUnroll > 2); 1245 // We position the base pointer to (reg_from + 1). 1246 int base_offset = kFirstRegisterOnStack - 1247 kWRegSize - (kWRegSize * reg_from); 1248 if (num_registers > kNumRegistersToUnroll) { 1249 Register base = x10; 1250 __ Add(base, frame_pointer(), base_offset); 1251 1252 Label loop; 1253 __ Mov(x11, num_registers); 1254 __ Bind(&loop); 1255 __ Str(twice_non_position_value(), 1256 MemOperand(base, -kPointerSize, PostIndex)); 1257 __ Sub(x11, x11, 2); 1258 __ Cbnz(x11, &loop); 1259 } else { 1260 for (int i = reg_from; i <= reg_to; i += 2) { 1261 __ Str(twice_non_position_value(), 1262 MemOperand(frame_pointer(), base_offset)); 1263 base_offset -= kWRegSize * 2; 1264 } 1265 } 1266 } 1267 } 1268 1269 1270 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { 1271 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase)); 1272 __ Sub(x10, backtrack_stackpointer(), x10); 1273 if (masm_->emit_debug_code()) { 1274 __ Cmp(x10, Operand(w10, SXTW)); 1275 // The stack offset needs to fit in a W register. 1276 __ Check(eq, kOffsetOutOfRange); 1277 } 1278 StoreRegister(reg, w10); 1279 } 1280 1281 1282 // Helper function for reading a value out of a stack frame. 1283 template <typename T> 1284 static T& frame_entry(Address re_frame, int frame_offset) { 1285 return *reinterpret_cast<T*>(re_frame + frame_offset); 1286 } 1287 1288 1289 int RegExpMacroAssemblerARM64::CheckStackGuardState(Address* return_address, 1290 Code* re_code, 1291 Address re_frame, 1292 int start_offset, 1293 const byte** input_start, 1294 const byte** input_end) { 1295 Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate); 1296 StackLimitCheck check(isolate); 1297 if (check.JsHasOverflowed()) { 1298 isolate->StackOverflow(); 1299 return EXCEPTION; 1300 } 1301 1302 // If not real stack overflow the stack guard was used to interrupt 1303 // execution for another purpose. 1304 1305 // If this is a direct call from JavaScript retry the RegExp forcing the call 1306 // through the runtime system. Currently the direct call cannot handle a GC. 1307 if (frame_entry<int>(re_frame, kDirectCall) == 1) { 1308 return RETRY; 1309 } 1310 1311 // Prepare for possible GC. 1312 HandleScope handles(isolate); 1313 Handle<Code> code_handle(re_code); 1314 1315 Handle<String> subject(frame_entry<String*>(re_frame, kInput)); 1316 1317 // Current string. 1318 bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); 1319 1320 DCHECK(re_code->instruction_start() <= *return_address); 1321 DCHECK(*return_address <= 1322 re_code->instruction_start() + re_code->instruction_size()); 1323 1324 Object* result = isolate->stack_guard()->HandleInterrupts(); 1325 1326 if (*code_handle != re_code) { // Return address no longer valid 1327 int delta = code_handle->address() - re_code->address(); 1328 // Overwrite the return address on the stack. 1329 *return_address += delta; 1330 } 1331 1332 if (result->IsException()) { 1333 return EXCEPTION; 1334 } 1335 1336 Handle<String> subject_tmp = subject; 1337 int slice_offset = 0; 1338 1339 // Extract the underlying string and the slice offset. 1340 if (StringShape(*subject_tmp).IsCons()) { 1341 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); 1342 } else if (StringShape(*subject_tmp).IsSliced()) { 1343 SlicedString* slice = SlicedString::cast(*subject_tmp); 1344 subject_tmp = Handle<String>(slice->parent()); 1345 slice_offset = slice->offset(); 1346 } 1347 1348 // String might have changed. 1349 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) { 1350 // If we changed between an Latin1 and an UC16 string, the specialized 1351 // code cannot be used, and we need to restart regexp matching from 1352 // scratch (including, potentially, compiling a new version of the code). 1353 return RETRY; 1354 } 1355 1356 // Otherwise, the content of the string might have moved. It must still 1357 // be a sequential or external string with the same content. 1358 // Update the start and end pointers in the stack frame to the current 1359 // location (whether it has actually moved or not). 1360 DCHECK(StringShape(*subject_tmp).IsSequential() || 1361 StringShape(*subject_tmp).IsExternal()); 1362 1363 // The original start address of the characters to match. 1364 const byte* start_address = *input_start; 1365 1366 // Find the current start address of the same character at the current string 1367 // position. 1368 const byte* new_address = StringCharacterPosition(*subject_tmp, 1369 start_offset + slice_offset); 1370 1371 if (start_address != new_address) { 1372 // If there is a difference, update the object pointer and start and end 1373 // addresses in the RegExp stack frame to match the new value. 1374 const byte* end_address = *input_end; 1375 int byte_length = static_cast<int>(end_address - start_address); 1376 frame_entry<const String*>(re_frame, kInput) = *subject; 1377 *input_start = new_address; 1378 *input_end = new_address + byte_length; 1379 } else if (frame_entry<const String*>(re_frame, kInput) != *subject) { 1380 // Subject string might have been a ConsString that underwent 1381 // short-circuiting during GC. That will not change start_address but 1382 // will change pointer inside the subject handle. 1383 frame_entry<const String*>(re_frame, kInput) = *subject; 1384 } 1385 1386 return 0; 1387 } 1388 1389 1390 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, 1391 Label* on_outside_input) { 1392 CompareAndBranchOrBacktrack(current_input_offset(), 1393 -cp_offset * char_size(), 1394 ge, 1395 on_outside_input); 1396 } 1397 1398 1399 bool RegExpMacroAssemblerARM64::CanReadUnaligned() { 1400 // TODO(pielan): See whether or not we should disable unaligned accesses. 1401 return !slow_safe(); 1402 } 1403 1404 1405 // Private methods: 1406 1407 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { 1408 // Allocate space on the stack to store the return address. The 1409 // CheckStackGuardState C++ function will override it if the code 1410 // moved. Allocate extra space for 2 arguments passed by pointers. 1411 // AAPCS64 requires the stack to be 16 byte aligned. 1412 int alignment = masm_->ActivationFrameAlignment(); 1413 DCHECK_EQ(alignment % 16, 0); 1414 int align_mask = (alignment / kXRegSize) - 1; 1415 int xreg_to_claim = (3 + align_mask) & ~align_mask; 1416 1417 DCHECK(csp.Is(__ StackPointer())); 1418 __ Claim(xreg_to_claim); 1419 1420 // CheckStackGuardState needs the end and start addresses of the input string. 1421 __ Poke(input_end(), 2 * kPointerSize); 1422 __ Add(x5, csp, 2 * kPointerSize); 1423 __ Poke(input_start(), kPointerSize); 1424 __ Add(x4, csp, kPointerSize); 1425 1426 __ Mov(w3, start_offset()); 1427 // RegExp code frame pointer. 1428 __ Mov(x2, frame_pointer()); 1429 // Code* of self. 1430 __ Mov(x1, Operand(masm_->CodeObject())); 1431 1432 // We need to pass a pointer to the return address as first argument. 1433 // The DirectCEntry stub will place the return address on the stack before 1434 // calling so the stack pointer will point to it. 1435 __ Mov(x0, csp); 1436 1437 ExternalReference check_stack_guard_state = 1438 ExternalReference::re_check_stack_guard_state(isolate()); 1439 __ Mov(scratch, check_stack_guard_state); 1440 DirectCEntryStub stub(isolate()); 1441 stub.GenerateCall(masm_, scratch); 1442 1443 // The input string may have been moved in memory, we need to reload it. 1444 __ Peek(input_start(), kPointerSize); 1445 __ Peek(input_end(), 2 * kPointerSize); 1446 1447 DCHECK(csp.Is(__ StackPointer())); 1448 __ Drop(xreg_to_claim); 1449 1450 // Reload the Code pointer. 1451 __ Mov(code_pointer(), Operand(masm_->CodeObject())); 1452 } 1453 1454 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition, 1455 Label* to) { 1456 if (condition == al) { // Unconditional. 1457 if (to == NULL) { 1458 Backtrack(); 1459 return; 1460 } 1461 __ B(to); 1462 return; 1463 } 1464 if (to == NULL) { 1465 to = &backtrack_label_; 1466 } 1467 __ B(condition, to); 1468 } 1469 1470 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, 1471 int immediate, 1472 Condition condition, 1473 Label* to) { 1474 if ((immediate == 0) && ((condition == eq) || (condition == ne))) { 1475 if (to == NULL) { 1476 to = &backtrack_label_; 1477 } 1478 if (condition == eq) { 1479 __ Cbz(reg, to); 1480 } else { 1481 __ Cbnz(reg, to); 1482 } 1483 } else { 1484 __ Cmp(reg, immediate); 1485 BranchOrBacktrack(condition, to); 1486 } 1487 } 1488 1489 1490 void RegExpMacroAssemblerARM64::CheckPreemption() { 1491 // Check for preemption. 1492 ExternalReference stack_limit = 1493 ExternalReference::address_of_stack_limit(isolate()); 1494 __ Mov(x10, stack_limit); 1495 __ Ldr(x10, MemOperand(x10)); 1496 DCHECK(csp.Is(__ StackPointer())); 1497 __ Cmp(csp, x10); 1498 CallIf(&check_preempt_label_, ls); 1499 } 1500 1501 1502 void RegExpMacroAssemblerARM64::CheckStackLimit() { 1503 ExternalReference stack_limit = 1504 ExternalReference::address_of_regexp_stack_limit(isolate()); 1505 __ Mov(x10, stack_limit); 1506 __ Ldr(x10, MemOperand(x10)); 1507 __ Cmp(backtrack_stackpointer(), x10); 1508 CallIf(&stack_overflow_label_, ls); 1509 } 1510 1511 1512 void RegExpMacroAssemblerARM64::Push(Register source) { 1513 DCHECK(source.Is32Bits()); 1514 DCHECK(!source.is(backtrack_stackpointer())); 1515 __ Str(source, 1516 MemOperand(backtrack_stackpointer(), 1517 -static_cast<int>(kWRegSize), 1518 PreIndex)); 1519 } 1520 1521 1522 void RegExpMacroAssemblerARM64::Pop(Register target) { 1523 DCHECK(target.Is32Bits()); 1524 DCHECK(!target.is(backtrack_stackpointer())); 1525 __ Ldr(target, 1526 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex)); 1527 } 1528 1529 1530 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) { 1531 DCHECK(register_index < kNumCachedRegisters); 1532 return Register::Create(register_index / 2, kXRegSizeInBits); 1533 } 1534 1535 1536 Register RegExpMacroAssemblerARM64::GetRegister(int register_index, 1537 Register maybe_result) { 1538 DCHECK(maybe_result.Is32Bits()); 1539 DCHECK(register_index >= 0); 1540 if (num_registers_ <= register_index) { 1541 num_registers_ = register_index + 1; 1542 } 1543 Register result; 1544 RegisterState register_state = GetRegisterState(register_index); 1545 switch (register_state) { 1546 case STACKED: 1547 __ Ldr(maybe_result, register_location(register_index)); 1548 result = maybe_result; 1549 break; 1550 case CACHED_LSW: 1551 result = GetCachedRegister(register_index).W(); 1552 break; 1553 case CACHED_MSW: 1554 __ Lsr(maybe_result.X(), GetCachedRegister(register_index), 1555 kWRegSizeInBits); 1556 result = maybe_result; 1557 break; 1558 default: 1559 UNREACHABLE(); 1560 break; 1561 } 1562 DCHECK(result.Is32Bits()); 1563 return result; 1564 } 1565 1566 1567 void RegExpMacroAssemblerARM64::StoreRegister(int register_index, 1568 Register source) { 1569 DCHECK(source.Is32Bits()); 1570 DCHECK(register_index >= 0); 1571 if (num_registers_ <= register_index) { 1572 num_registers_ = register_index + 1; 1573 } 1574 1575 Register cached_register; 1576 RegisterState register_state = GetRegisterState(register_index); 1577 switch (register_state) { 1578 case STACKED: 1579 __ Str(source, register_location(register_index)); 1580 break; 1581 case CACHED_LSW: 1582 cached_register = GetCachedRegister(register_index); 1583 if (!source.Is(cached_register.W())) { 1584 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits); 1585 } 1586 break; 1587 case CACHED_MSW: 1588 cached_register = GetCachedRegister(register_index); 1589 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits); 1590 break; 1591 default: 1592 UNREACHABLE(); 1593 break; 1594 } 1595 } 1596 1597 1598 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) { 1599 Label skip_call; 1600 if (condition != al) __ B(&skip_call, NegateCondition(condition)); 1601 __ Bl(to); 1602 __ Bind(&skip_call); 1603 } 1604 1605 1606 void RegExpMacroAssemblerARM64::RestoreLinkRegister() { 1607 DCHECK(csp.Is(__ StackPointer())); 1608 __ Pop(lr, xzr); 1609 __ Add(lr, lr, Operand(masm_->CodeObject())); 1610 } 1611 1612 1613 void RegExpMacroAssemblerARM64::SaveLinkRegister() { 1614 DCHECK(csp.Is(__ StackPointer())); 1615 __ Sub(lr, lr, Operand(masm_->CodeObject())); 1616 __ Push(xzr, lr); 1617 } 1618 1619 1620 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) { 1621 DCHECK(register_index < (1<<30)); 1622 DCHECK(register_index >= kNumCachedRegisters); 1623 if (num_registers_ <= register_index) { 1624 num_registers_ = register_index + 1; 1625 } 1626 register_index -= kNumCachedRegisters; 1627 int offset = kFirstRegisterOnStack - register_index * kWRegSize; 1628 return MemOperand(frame_pointer(), offset); 1629 } 1630 1631 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index, 1632 Register scratch) { 1633 DCHECK(register_index < (1<<30)); 1634 DCHECK(register_index < num_saved_registers_); 1635 DCHECK(register_index >= kNumCachedRegisters); 1636 DCHECK_EQ(register_index % 2, 0); 1637 register_index -= kNumCachedRegisters; 1638 int offset = kFirstCaptureOnStack - register_index * kWRegSize; 1639 // capture_location is used with Stp instructions to load/store 2 registers. 1640 // The immediate field in the encoding is limited to 7 bits (signed). 1641 if (is_int7(offset)) { 1642 return MemOperand(frame_pointer(), offset); 1643 } else { 1644 __ Add(scratch, frame_pointer(), offset); 1645 return MemOperand(scratch); 1646 } 1647 } 1648 1649 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, 1650 int characters) { 1651 Register offset = current_input_offset(); 1652 1653 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1654 // and the operating system running on the target allow it. 1655 // If unaligned load/stores are not supported then this function must only 1656 // be used to load a single character at a time. 1657 1658 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to 1659 // disable it. 1660 // TODO(pielan): See whether or not we should disable unaligned accesses. 1661 if (!CanReadUnaligned()) { 1662 DCHECK(characters == 1); 1663 } 1664 1665 if (cp_offset != 0) { 1666 if (masm_->emit_debug_code()) { 1667 __ Mov(x10, cp_offset * char_size()); 1668 __ Add(x10, x10, Operand(current_input_offset(), SXTW)); 1669 __ Cmp(x10, Operand(w10, SXTW)); 1670 // The offset needs to fit in a W register. 1671 __ Check(eq, kOffsetOutOfRange); 1672 } else { 1673 __ Add(w10, current_input_offset(), cp_offset * char_size()); 1674 } 1675 offset = w10; 1676 } 1677 1678 if (mode_ == LATIN1) { 1679 if (characters == 4) { 1680 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1681 } else if (characters == 2) { 1682 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1683 } else { 1684 DCHECK(characters == 1); 1685 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW)); 1686 } 1687 } else { 1688 DCHECK(mode_ == UC16); 1689 if (characters == 2) { 1690 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW)); 1691 } else { 1692 DCHECK(characters == 1); 1693 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW)); 1694 } 1695 } 1696 } 1697 1698 #endif // V8_INTERPRETED_REGEXP 1699 1700 }} // namespace v8::internal 1701 1702 #endif // V8_TARGET_ARCH_ARM64 1703