1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/runtime/runtime-utils.h" 6 7 #include "src/arguments.h" 8 #include "src/conversions-inl.h" 9 #include "src/isolate-inl.h" 10 #include "src/messages.h" 11 #include "src/regexp/jsregexp-inl.h" 12 #include "src/regexp/jsregexp.h" 13 #include "src/regexp/regexp-utils.h" 14 #include "src/string-builder.h" 15 #include "src/string-search.h" 16 17 namespace v8 { 18 namespace internal { 19 20 class CompiledReplacement { 21 public: 22 explicit CompiledReplacement(Zone* zone) 23 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} 24 25 // Return whether the replacement is simple. 26 bool Compile(Handle<String> replacement, int capture_count, 27 int subject_length); 28 29 // Use Apply only if Compile returned false. 30 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, 31 int32_t* match); 32 33 // Number of distinct parts of the replacement pattern. 34 int parts() { return parts_.length(); } 35 36 Zone* zone() const { return zone_; } 37 38 private: 39 enum PartType { 40 SUBJECT_PREFIX = 1, 41 SUBJECT_SUFFIX, 42 SUBJECT_CAPTURE, 43 REPLACEMENT_SUBSTRING, 44 REPLACEMENT_STRING, 45 NUMBER_OF_PART_TYPES 46 }; 47 48 struct ReplacementPart { 49 static inline ReplacementPart SubjectMatch() { 50 return ReplacementPart(SUBJECT_CAPTURE, 0); 51 } 52 static inline ReplacementPart SubjectCapture(int capture_index) { 53 return ReplacementPart(SUBJECT_CAPTURE, capture_index); 54 } 55 static inline ReplacementPart SubjectPrefix() { 56 return ReplacementPart(SUBJECT_PREFIX, 0); 57 } 58 static inline ReplacementPart SubjectSuffix(int subject_length) { 59 return ReplacementPart(SUBJECT_SUFFIX, subject_length); 60 } 61 static inline ReplacementPart ReplacementString() { 62 return ReplacementPart(REPLACEMENT_STRING, 0); 63 } 64 static inline ReplacementPart ReplacementSubString(int from, int to) { 65 DCHECK(from >= 0); 66 DCHECK(to > from); 67 return ReplacementPart(-from, to); 68 } 69 70 // If tag <= 0 then it is the negation of a start index of a substring of 71 // the replacement pattern, otherwise it's a value from PartType. 72 ReplacementPart(int tag, int data) : tag(tag), data(data) { 73 // Must be non-positive or a PartType value. 74 DCHECK(tag < NUMBER_OF_PART_TYPES); 75 } 76 // Either a value of PartType or a non-positive number that is 77 // the negation of an index into the replacement string. 78 int tag; 79 // The data value's interpretation depends on the value of tag: 80 // tag == SUBJECT_PREFIX || 81 // tag == SUBJECT_SUFFIX: data is unused. 82 // tag == SUBJECT_CAPTURE: data is the number of the capture. 83 // tag == REPLACEMENT_SUBSTRING || 84 // tag == REPLACEMENT_STRING: data is index into array of substrings 85 // of the replacement string. 86 // tag <= 0: Temporary representation of the substring of the replacement 87 // string ranging over -tag .. data. 88 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 89 // substring objects. 90 int data; 91 }; 92 93 template <typename Char> 94 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 95 Vector<Char> characters, int capture_count, 96 int subject_length, Zone* zone) { 97 int length = characters.length(); 98 int last = 0; 99 for (int i = 0; i < length; i++) { 100 Char c = characters[i]; 101 if (c == '$') { 102 int next_index = i + 1; 103 if (next_index == length) { // No next character! 104 break; 105 } 106 Char c2 = characters[next_index]; 107 switch (c2) { 108 case '$': 109 if (i > last) { 110 // There is a substring before. Include the first "$". 111 parts->Add( 112 ReplacementPart::ReplacementSubString(last, next_index), 113 zone); 114 last = next_index + 1; // Continue after the second "$". 115 } else { 116 // Let the next substring start with the second "$". 117 last = next_index; 118 } 119 i = next_index; 120 break; 121 case '`': 122 if (i > last) { 123 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 124 } 125 parts->Add(ReplacementPart::SubjectPrefix(), zone); 126 i = next_index; 127 last = i + 1; 128 break; 129 case '\'': 130 if (i > last) { 131 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 132 } 133 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); 134 i = next_index; 135 last = i + 1; 136 break; 137 case '&': 138 if (i > last) { 139 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 140 } 141 parts->Add(ReplacementPart::SubjectMatch(), zone); 142 i = next_index; 143 last = i + 1; 144 break; 145 case '0': 146 case '1': 147 case '2': 148 case '3': 149 case '4': 150 case '5': 151 case '6': 152 case '7': 153 case '8': 154 case '9': { 155 int capture_ref = c2 - '0'; 156 if (capture_ref > capture_count) { 157 i = next_index; 158 continue; 159 } 160 int second_digit_index = next_index + 1; 161 if (second_digit_index < length) { 162 // Peek ahead to see if we have two digits. 163 Char c3 = characters[second_digit_index]; 164 if ('0' <= c3 && c3 <= '9') { // Double digits. 165 int double_digit_ref = capture_ref * 10 + c3 - '0'; 166 if (double_digit_ref <= capture_count) { 167 next_index = second_digit_index; 168 capture_ref = double_digit_ref; 169 } 170 } 171 } 172 if (capture_ref > 0) { 173 if (i > last) { 174 parts->Add(ReplacementPart::ReplacementSubString(last, i), 175 zone); 176 } 177 DCHECK(capture_ref <= capture_count); 178 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); 179 last = next_index + 1; 180 } 181 i = next_index; 182 break; 183 } 184 default: 185 i = next_index; 186 break; 187 } 188 } 189 } 190 if (length > last) { 191 if (last == 0) { 192 // Replacement is simple. Do not use Apply to do the replacement. 193 return true; 194 } else { 195 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 196 } 197 } 198 return false; 199 } 200 201 ZoneList<ReplacementPart> parts_; 202 ZoneList<Handle<String> > replacement_substrings_; 203 Zone* zone_; 204 }; 205 206 207 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, 208 int subject_length) { 209 { 210 DisallowHeapAllocation no_gc; 211 String::FlatContent content = replacement->GetFlatContent(); 212 DCHECK(content.IsFlat()); 213 bool simple = false; 214 if (content.IsOneByte()) { 215 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), 216 capture_count, subject_length, zone()); 217 } else { 218 DCHECK(content.IsTwoByte()); 219 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), 220 capture_count, subject_length, zone()); 221 } 222 if (simple) return true; 223 } 224 225 Isolate* isolate = replacement->GetIsolate(); 226 // Find substrings of replacement string and create them as String objects. 227 int substring_index = 0; 228 for (int i = 0, n = parts_.length(); i < n; i++) { 229 int tag = parts_[i].tag; 230 if (tag <= 0) { // A replacement string slice. 231 int from = -tag; 232 int to = parts_[i].data; 233 replacement_substrings_.Add( 234 isolate->factory()->NewSubString(replacement, from, to), zone()); 235 parts_[i].tag = REPLACEMENT_SUBSTRING; 236 parts_[i].data = substring_index; 237 substring_index++; 238 } else if (tag == REPLACEMENT_STRING) { 239 replacement_substrings_.Add(replacement, zone()); 240 parts_[i].data = substring_index; 241 substring_index++; 242 } 243 } 244 return false; 245 } 246 247 248 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 249 int match_from, int match_to, int32_t* match) { 250 DCHECK_LT(0, parts_.length()); 251 for (int i = 0, n = parts_.length(); i < n; i++) { 252 ReplacementPart part = parts_[i]; 253 switch (part.tag) { 254 case SUBJECT_PREFIX: 255 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 256 break; 257 case SUBJECT_SUFFIX: { 258 int subject_length = part.data; 259 if (match_to < subject_length) { 260 builder->AddSubjectSlice(match_to, subject_length); 261 } 262 break; 263 } 264 case SUBJECT_CAPTURE: { 265 int capture = part.data; 266 int from = match[capture * 2]; 267 int to = match[capture * 2 + 1]; 268 if (from >= 0 && to > from) { 269 builder->AddSubjectSlice(from, to); 270 } 271 break; 272 } 273 case REPLACEMENT_SUBSTRING: 274 case REPLACEMENT_STRING: 275 builder->AddString(replacement_substrings_[part.data]); 276 break; 277 default: 278 UNREACHABLE(); 279 } 280 } 281 } 282 283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, 284 List<int>* indices, unsigned int limit) { 285 DCHECK(limit > 0); 286 // Collect indices of pattern in subject using memchr. 287 // Stop after finding at most limit values. 288 const uint8_t* subject_start = subject.start(); 289 const uint8_t* subject_end = subject_start + subject.length(); 290 const uint8_t* pos = subject_start; 291 while (limit > 0) { 292 pos = reinterpret_cast<const uint8_t*>( 293 memchr(pos, pattern, subject_end - pos)); 294 if (pos == NULL) return; 295 indices->Add(static_cast<int>(pos - subject_start)); 296 pos++; 297 limit--; 298 } 299 } 300 301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, 302 List<int>* indices, unsigned int limit) { 303 DCHECK(limit > 0); 304 const uc16* subject_start = subject.start(); 305 const uc16* subject_end = subject_start + subject.length(); 306 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { 307 if (*pos == pattern) { 308 indices->Add(static_cast<int>(pos - subject_start)); 309 limit--; 310 } 311 } 312 } 313 314 template <typename SubjectChar, typename PatternChar> 315 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, 316 Vector<const PatternChar> pattern, List<int>* indices, 317 unsigned int limit) { 318 DCHECK(limit > 0); 319 // Collect indices of pattern in subject. 320 // Stop after finding at most limit values. 321 int pattern_length = pattern.length(); 322 int index = 0; 323 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); 324 while (limit > 0) { 325 index = search.Search(subject, index); 326 if (index < 0) return; 327 indices->Add(index); 328 index += pattern_length; 329 limit--; 330 } 331 } 332 333 void FindStringIndicesDispatch(Isolate* isolate, String* subject, 334 String* pattern, List<int>* indices, 335 unsigned int limit) { 336 { 337 DisallowHeapAllocation no_gc; 338 String::FlatContent subject_content = subject->GetFlatContent(); 339 String::FlatContent pattern_content = pattern->GetFlatContent(); 340 DCHECK(subject_content.IsFlat()); 341 DCHECK(pattern_content.IsFlat()); 342 if (subject_content.IsOneByte()) { 343 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); 344 if (pattern_content.IsOneByte()) { 345 Vector<const uint8_t> pattern_vector = 346 pattern_content.ToOneByteVector(); 347 if (pattern_vector.length() == 1) { 348 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, 349 limit); 350 } else { 351 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 352 limit); 353 } 354 } else { 355 FindStringIndices(isolate, subject_vector, 356 pattern_content.ToUC16Vector(), indices, limit); 357 } 358 } else { 359 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); 360 if (pattern_content.IsOneByte()) { 361 Vector<const uint8_t> pattern_vector = 362 pattern_content.ToOneByteVector(); 363 if (pattern_vector.length() == 1) { 364 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 365 limit); 366 } else { 367 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 368 limit); 369 } 370 } else { 371 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); 372 if (pattern_vector.length() == 1) { 373 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 374 limit); 375 } else { 376 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 377 limit); 378 } 379 } 380 } 381 } 382 } 383 384 namespace { 385 List<int>* GetRewoundRegexpIndicesList(Isolate* isolate) { 386 List<int>* list = isolate->regexp_indices(); 387 list->Rewind(0); 388 return list; 389 } 390 391 void TruncateRegexpIndicesList(Isolate* isolate) { 392 // Same size as smallest zone segment, preserving behavior from the 393 // runtime zone. 394 static const int kMaxRegexpIndicesListCapacity = 8 * KB; 395 if (isolate->regexp_indices()->capacity() > kMaxRegexpIndicesListCapacity) { 396 isolate->regexp_indices()->Clear(); // Throw away backing storage 397 } 398 } 399 } // namespace 400 401 template <typename ResultSeqString> 402 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( 403 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, 404 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 405 DCHECK(subject->IsFlat()); 406 DCHECK(replacement->IsFlat()); 407 408 List<int>* indices = GetRewoundRegexpIndicesList(isolate); 409 410 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 411 String* pattern = 412 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 413 int subject_len = subject->length(); 414 int pattern_len = pattern->length(); 415 int replacement_len = replacement->length(); 416 417 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff); 418 419 int matches = indices->length(); 420 if (matches == 0) return *subject; 421 422 // Detect integer overflow. 423 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - 424 static_cast<int64_t>(pattern_len)) * 425 static_cast<int64_t>(matches) + 426 static_cast<int64_t>(subject_len); 427 int result_len; 428 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { 429 STATIC_ASSERT(String::kMaxLength < kMaxInt); 430 result_len = kMaxInt; // Provoke exception. 431 } else { 432 result_len = static_cast<int>(result_len_64); 433 } 434 if (result_len == 0) { 435 return isolate->heap()->empty_string(); 436 } 437 438 int subject_pos = 0; 439 int result_pos = 0; 440 441 MaybeHandle<SeqString> maybe_res; 442 if (ResultSeqString::kHasOneByteEncoding) { 443 maybe_res = isolate->factory()->NewRawOneByteString(result_len); 444 } else { 445 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); 446 } 447 Handle<SeqString> untyped_res; 448 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); 449 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); 450 451 for (int i = 0; i < matches; i++) { 452 // Copy non-matched subject content. 453 if (subject_pos < indices->at(i)) { 454 String::WriteToFlat(*subject, result->GetChars() + result_pos, 455 subject_pos, indices->at(i)); 456 result_pos += indices->at(i) - subject_pos; 457 } 458 459 // Replace match. 460 if (replacement_len > 0) { 461 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, 462 replacement_len); 463 result_pos += replacement_len; 464 } 465 466 subject_pos = indices->at(i) + pattern_len; 467 } 468 // Add remaining subject content at the end. 469 if (subject_pos < subject_len) { 470 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, 471 subject_len); 472 } 473 474 int32_t match_indices[] = {indices->at(matches - 1), 475 indices->at(matches - 1) + pattern_len}; 476 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); 477 478 TruncateRegexpIndicesList(isolate); 479 480 return *result; 481 } 482 483 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( 484 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 485 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 486 DCHECK(subject->IsFlat()); 487 DCHECK(replacement->IsFlat()); 488 489 int capture_count = regexp->CaptureCount(); 490 int subject_length = subject->length(); 491 492 // CompiledReplacement uses zone allocation. 493 Zone zone(isolate->allocator(), ZONE_NAME); 494 CompiledReplacement compiled_replacement(&zone); 495 bool simple_replace = 496 compiled_replacement.Compile(replacement, capture_count, subject_length); 497 498 // Shortcut for simple non-regexp global replacements 499 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { 500 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { 501 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 502 isolate, subject, regexp, replacement, last_match_info); 503 } else { 504 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 505 isolate, subject, regexp, replacement, last_match_info); 506 } 507 } 508 509 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); 510 if (global_cache.HasException()) return isolate->heap()->exception(); 511 512 int32_t* current_match = global_cache.FetchNext(); 513 if (current_match == NULL) { 514 if (global_cache.HasException()) return isolate->heap()->exception(); 515 return *subject; 516 } 517 518 // Guessing the number of parts that the final result string is built 519 // from. Global regexps can match any number of times, so we guess 520 // conservatively. 521 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; 522 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); 523 524 // Number of parts added by compiled replacement plus preceeding 525 // string and possibly suffix after last match. It is possible for 526 // all components to use two elements when encoded as two smis. 527 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); 528 529 int prev = 0; 530 531 do { 532 builder.EnsureCapacity(parts_added_per_loop); 533 534 int start = current_match[0]; 535 int end = current_match[1]; 536 537 if (prev < start) { 538 builder.AddSubjectSlice(prev, start); 539 } 540 541 if (simple_replace) { 542 builder.AddString(replacement); 543 } else { 544 compiled_replacement.Apply(&builder, start, end, current_match); 545 } 546 prev = end; 547 548 current_match = global_cache.FetchNext(); 549 } while (current_match != NULL); 550 551 if (global_cache.HasException()) return isolate->heap()->exception(); 552 553 if (prev < subject_length) { 554 builder.EnsureCapacity(2); 555 builder.AddSubjectSlice(prev, subject_length); 556 } 557 558 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, 559 global_cache.LastSuccessfulMatch()); 560 561 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString()); 562 } 563 564 template <typename ResultSeqString> 565 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( 566 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 567 Handle<RegExpMatchInfo> last_match_info) { 568 DCHECK(subject->IsFlat()); 569 570 // Shortcut for simple non-regexp global replacements 571 if (regexp->TypeTag() == JSRegExp::ATOM) { 572 Handle<String> empty_string = isolate->factory()->empty_string(); 573 if (subject->IsOneByteRepresentation()) { 574 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 575 isolate, subject, regexp, empty_string, last_match_info); 576 } else { 577 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 578 isolate, subject, regexp, empty_string, last_match_info); 579 } 580 } 581 582 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); 583 if (global_cache.HasException()) return isolate->heap()->exception(); 584 585 int32_t* current_match = global_cache.FetchNext(); 586 if (current_match == NULL) { 587 if (global_cache.HasException()) return isolate->heap()->exception(); 588 return *subject; 589 } 590 591 int start = current_match[0]; 592 int end = current_match[1]; 593 int capture_count = regexp->CaptureCount(); 594 int subject_length = subject->length(); 595 596 int new_length = subject_length - (end - start); 597 if (new_length == 0) return isolate->heap()->empty_string(); 598 599 Handle<ResultSeqString> answer; 600 if (ResultSeqString::kHasOneByteEncoding) { 601 answer = Handle<ResultSeqString>::cast( 602 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); 603 } else { 604 answer = Handle<ResultSeqString>::cast( 605 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); 606 } 607 608 int prev = 0; 609 int position = 0; 610 611 do { 612 start = current_match[0]; 613 end = current_match[1]; 614 if (prev < start) { 615 // Add substring subject[prev;start] to answer string. 616 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); 617 position += start - prev; 618 } 619 prev = end; 620 621 current_match = global_cache.FetchNext(); 622 } while (current_match != NULL); 623 624 if (global_cache.HasException()) return isolate->heap()->exception(); 625 626 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, 627 global_cache.LastSuccessfulMatch()); 628 629 if (prev < subject_length) { 630 // Add substring subject[prev;length] to answer string. 631 String::WriteToFlat(*subject, answer->GetChars() + position, prev, 632 subject_length); 633 position += subject_length - prev; 634 } 635 636 if (position == 0) return isolate->heap()->empty_string(); 637 638 // Shorten string and fill 639 int string_size = ResultSeqString::SizeFor(position); 640 int allocated_string_size = ResultSeqString::SizeFor(new_length); 641 int delta = allocated_string_size - string_size; 642 643 answer->set_length(position); 644 if (delta == 0) return *answer; 645 646 Address end_of_string = answer->address() + string_size; 647 Heap* heap = isolate->heap(); 648 649 // The trimming is performed on a newly allocated object, which is on a 650 // fresly allocated page or on an already swept page. Hence, the sweeper 651 // thread can not get confused with the filler creation. No synchronization 652 // needed. 653 // TODO(hpayer): We should shrink the large object page if the size 654 // of the object changed significantly. 655 if (!heap->lo_space()->Contains(*answer)) { 656 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); 657 } 658 heap->AdjustLiveBytes(*answer, -delta); 659 return *answer; 660 } 661 662 namespace { 663 664 Object* StringReplaceGlobalRegExpWithStringHelper( 665 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 666 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 667 CHECK(regexp->GetFlags() & JSRegExp::kGlobal); 668 669 subject = String::Flatten(subject); 670 671 if (replacement->length() == 0) { 672 if (subject->HasOnlyOneByteChars()) { 673 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( 674 isolate, subject, regexp, last_match_info); 675 } else { 676 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( 677 isolate, subject, regexp, last_match_info); 678 } 679 } 680 681 replacement = String::Flatten(replacement); 682 683 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, 684 replacement, last_match_info); 685 } 686 687 } // namespace 688 689 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { 690 HandleScope scope(isolate); 691 DCHECK_EQ(4, args.length()); 692 693 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 694 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); 695 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 696 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3); 697 698 return StringReplaceGlobalRegExpWithStringHelper( 699 isolate, regexp, subject, replacement, last_match_info); 700 } 701 702 RUNTIME_FUNCTION(Runtime_StringSplit) { 703 HandleScope handle_scope(isolate); 704 DCHECK_EQ(3, args.length()); 705 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 706 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); 707 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); 708 CHECK(limit > 0); 709 710 int subject_length = subject->length(); 711 int pattern_length = pattern->length(); 712 CHECK(pattern_length > 0); 713 714 if (limit == 0xffffffffu) { 715 FixedArray* last_match_cache_unused; 716 Handle<Object> cached_answer( 717 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, 718 &last_match_cache_unused, 719 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), 720 isolate); 721 if (*cached_answer != Smi::kZero) { 722 // The cache FixedArray is a COW-array and can therefore be reused. 723 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( 724 Handle<FixedArray>::cast(cached_answer)); 725 return *result; 726 } 727 } 728 729 // The limit can be very large (0xffffffffu), but since the pattern 730 // isn't empty, we can never create more parts than ~half the length 731 // of the subject. 732 733 subject = String::Flatten(subject); 734 pattern = String::Flatten(pattern); 735 736 List<int>* indices = GetRewoundRegexpIndicesList(isolate); 737 738 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit); 739 740 if (static_cast<uint32_t>(indices->length()) < limit) { 741 indices->Add(subject_length); 742 } 743 744 // The list indices now contains the end of each part to create. 745 746 // Create JSArray of substrings separated by separator. 747 int part_count = indices->length(); 748 749 Handle<JSArray> result = 750 isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count, 751 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); 752 753 DCHECK(result->HasFastObjectElements()); 754 755 Handle<FixedArray> elements(FixedArray::cast(result->elements())); 756 757 if (part_count == 1 && indices->at(0) == subject_length) { 758 elements->set(0, *subject); 759 } else { 760 int part_start = 0; 761 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, { 762 int part_end = indices->at(i); 763 Handle<String> substring = 764 isolate->factory()->NewProperSubString(subject, part_start, part_end); 765 elements->set(i, *substring); 766 part_start = part_end + pattern_length; 767 }); 768 } 769 770 if (limit == 0xffffffffu) { 771 if (result->HasFastObjectElements()) { 772 RegExpResultsCache::Enter(isolate, subject, pattern, elements, 773 isolate->factory()->empty_fixed_array(), 774 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); 775 } 776 } 777 778 TruncateRegexpIndicesList(isolate); 779 780 return *result; 781 } 782 783 // ES##sec-regexpcreate 784 // RegExpCreate ( P, F ) 785 RUNTIME_FUNCTION(Runtime_RegExpCreate) { 786 HandleScope scope(isolate); 787 DCHECK_EQ(1, args.length()); 788 CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0); 789 790 Handle<String> source; 791 if (source_object->IsUndefined(isolate)) { 792 source = isolate->factory()->empty_string(); 793 } else { 794 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 795 isolate, source, Object::ToString(isolate, source_object)); 796 } 797 798 Handle<Map> map(isolate->regexp_function()->initial_map()); 799 Handle<JSRegExp> regexp = 800 Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map)); 801 802 JSRegExp::Flags flags = JSRegExp::kNone; 803 804 RETURN_FAILURE_ON_EXCEPTION(isolate, 805 JSRegExp::Initialize(regexp, source, flags)); 806 807 return *regexp; 808 } 809 810 RUNTIME_FUNCTION(Runtime_RegExpExec) { 811 HandleScope scope(isolate); 812 DCHECK_EQ(4, args.length()); 813 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 814 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 815 CONVERT_INT32_ARG_CHECKED(index, 2); 816 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3); 817 // Due to the way the JS calls are constructed this must be less than the 818 // length of a string, i.e. it is always a Smi. We check anyway for security. 819 CHECK(index >= 0); 820 CHECK(index <= subject->length()); 821 isolate->counters()->regexp_entry_runtime()->Increment(); 822 RETURN_RESULT_OR_FAILURE( 823 isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info)); 824 } 825 826 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) { 827 HandleScope scope(isolate); 828 DCHECK_EQ(3, args.length()); 829 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 830 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 831 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); 832 833 Handle<RegExpMatchInfo> internal_match_info = 834 isolate->regexp_internal_match_info(); 835 836 return StringReplaceGlobalRegExpWithStringHelper( 837 isolate, regexp, subject, replacement, internal_match_info); 838 } 839 840 namespace { 841 842 class MatchInfoBackedMatch : public String::Match { 843 public: 844 MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject, 845 Handle<RegExpMatchInfo> match_info) 846 : isolate_(isolate), match_info_(match_info) { 847 subject_ = String::Flatten(subject); 848 } 849 850 Handle<String> GetMatch() override { 851 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr); 852 } 853 854 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 855 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter( 856 isolate_, match_info_, i, capture_exists); 857 return (*capture_exists) ? Object::ToString(isolate_, capture_obj) 858 : isolate_->factory()->empty_string(); 859 } 860 861 Handle<String> GetPrefix() override { 862 const int match_start = match_info_->Capture(0); 863 return isolate_->factory()->NewSubString(subject_, 0, match_start); 864 } 865 866 Handle<String> GetSuffix() override { 867 const int match_end = match_info_->Capture(1); 868 return isolate_->factory()->NewSubString(subject_, match_end, 869 subject_->length()); 870 } 871 872 int CaptureCount() override { 873 return match_info_->NumberOfCaptureRegisters() / 2; 874 } 875 876 virtual ~MatchInfoBackedMatch() {} 877 878 private: 879 Isolate* isolate_; 880 Handle<String> subject_; 881 Handle<RegExpMatchInfo> match_info_; 882 }; 883 884 class VectorBackedMatch : public String::Match { 885 public: 886 VectorBackedMatch(Isolate* isolate, Handle<String> subject, 887 Handle<String> match, int match_position, 888 ZoneVector<Handle<Object>>* captures) 889 : isolate_(isolate), 890 match_(match), 891 match_position_(match_position), 892 captures_(captures) { 893 subject_ = String::Flatten(subject); 894 } 895 896 Handle<String> GetMatch() override { return match_; } 897 898 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 899 Handle<Object> capture_obj = captures_->at(i); 900 if (capture_obj->IsUndefined(isolate_)) { 901 *capture_exists = false; 902 return isolate_->factory()->empty_string(); 903 } 904 *capture_exists = true; 905 return Object::ToString(isolate_, capture_obj); 906 } 907 908 Handle<String> GetPrefix() override { 909 return isolate_->factory()->NewSubString(subject_, 0, match_position_); 910 } 911 912 Handle<String> GetSuffix() override { 913 const int match_end_position = match_position_ + match_->length(); 914 return isolate_->factory()->NewSubString(subject_, match_end_position, 915 subject_->length()); 916 } 917 918 int CaptureCount() override { return static_cast<int>(captures_->size()); } 919 920 virtual ~VectorBackedMatch() {} 921 922 private: 923 Isolate* isolate_; 924 Handle<String> subject_; 925 Handle<String> match_; 926 const int match_position_; 927 ZoneVector<Handle<Object>>* captures_; 928 }; 929 930 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 931 // separate last match info. See comment on that function. 932 template <bool has_capture> 933 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, 934 Handle<JSRegExp> regexp, 935 Handle<RegExpMatchInfo> last_match_array, 936 Handle<JSArray> result_array) { 937 DCHECK(subject->IsFlat()); 938 DCHECK_NE(has_capture, regexp->CaptureCount() == 0); 939 940 int capture_count = regexp->CaptureCount(); 941 int subject_length = subject->length(); 942 943 static const int kMinLengthToCache = 0x1000; 944 945 if (subject_length > kMinLengthToCache) { 946 FixedArray* last_match_cache; 947 Object* cached_answer = RegExpResultsCache::Lookup( 948 isolate->heap(), *subject, regexp->data(), &last_match_cache, 949 RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 950 if (cached_answer->IsFixedArray()) { 951 int capture_registers = (capture_count + 1) * 2; 952 int32_t* last_match = NewArray<int32_t>(capture_registers); 953 for (int i = 0; i < capture_registers; i++) { 954 last_match[i] = Smi::cast(last_match_cache->get(i))->value(); 955 } 956 Handle<FixedArray> cached_fixed_array = 957 Handle<FixedArray>(FixedArray::cast(cached_answer)); 958 // The cache FixedArray is a COW-array and we need to return a copy. 959 Handle<FixedArray> copied_fixed_array = 960 isolate->factory()->CopyFixedArrayWithMap( 961 cached_fixed_array, isolate->factory()->fixed_array_map()); 962 JSArray::SetContent(result_array, copied_fixed_array); 963 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, 964 last_match); 965 DeleteArray(last_match); 966 return *result_array; 967 } 968 } 969 970 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); 971 if (global_cache.HasException()) return isolate->heap()->exception(); 972 973 // Ensured in Runtime_RegExpExecMultiple. 974 DCHECK(result_array->HasFastObjectElements()); 975 Handle<FixedArray> result_elements( 976 FixedArray::cast(result_array->elements())); 977 if (result_elements->length() < 16) { 978 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 979 } 980 981 FixedArrayBuilder builder(result_elements); 982 983 // Position to search from. 984 int match_start = -1; 985 int match_end = 0; 986 bool first = true; 987 988 // Two smis before and after the match, for very long strings. 989 static const int kMaxBuilderEntriesPerRegExpMatch = 5; 990 991 while (true) { 992 int32_t* current_match = global_cache.FetchNext(); 993 if (current_match == NULL) break; 994 match_start = current_match[0]; 995 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 996 if (match_end < match_start) { 997 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 998 match_start); 999 } 1000 match_end = current_match[1]; 1001 { 1002 // Avoid accumulating new handles inside loop. 1003 HandleScope temp_scope(isolate); 1004 Handle<String> match; 1005 if (!first) { 1006 match = isolate->factory()->NewProperSubString(subject, match_start, 1007 match_end); 1008 } else { 1009 match = 1010 isolate->factory()->NewSubString(subject, match_start, match_end); 1011 first = false; 1012 } 1013 1014 if (has_capture) { 1015 // Arguments array to replace function is match, captures, index and 1016 // subject, i.e., 3 + capture count in total. 1017 Handle<FixedArray> elements = 1018 isolate->factory()->NewFixedArray(3 + capture_count); 1019 1020 elements->set(0, *match); 1021 for (int i = 1; i <= capture_count; i++) { 1022 int start = current_match[i * 2]; 1023 if (start >= 0) { 1024 int end = current_match[i * 2 + 1]; 1025 DCHECK(start <= end); 1026 Handle<String> substring = 1027 isolate->factory()->NewSubString(subject, start, end); 1028 elements->set(i, *substring); 1029 } else { 1030 DCHECK(current_match[i * 2 + 1] < 0); 1031 elements->set(i, isolate->heap()->undefined_value()); 1032 } 1033 } 1034 elements->set(capture_count + 1, Smi::FromInt(match_start)); 1035 elements->set(capture_count + 2, *subject); 1036 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 1037 } else { 1038 builder.Add(*match); 1039 } 1040 } 1041 } 1042 1043 if (global_cache.HasException()) return isolate->heap()->exception(); 1044 1045 if (match_start >= 0) { 1046 // Finished matching, with at least one match. 1047 if (match_end < subject_length) { 1048 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 1049 subject_length); 1050 } 1051 1052 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, 1053 global_cache.LastSuccessfulMatch()); 1054 1055 if (subject_length > kMinLengthToCache) { 1056 // Store the last successful match into the array for caching. 1057 // TODO(yangguo): do not expose last match to JS and simplify caching. 1058 int capture_registers = (capture_count + 1) * 2; 1059 Handle<FixedArray> last_match_cache = 1060 isolate->factory()->NewFixedArray(capture_registers); 1061 int32_t* last_match = global_cache.LastSuccessfulMatch(); 1062 for (int i = 0; i < capture_registers; i++) { 1063 last_match_cache->set(i, Smi::FromInt(last_match[i])); 1064 } 1065 Handle<FixedArray> result_fixed_array = builder.array(); 1066 result_fixed_array->Shrink(builder.length()); 1067 // Cache the result and copy the FixedArray into a COW array. 1068 Handle<FixedArray> copied_fixed_array = 1069 isolate->factory()->CopyFixedArrayWithMap( 1070 result_fixed_array, isolate->factory()->fixed_array_map()); 1071 RegExpResultsCache::Enter( 1072 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array, 1073 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 1074 } 1075 return *builder.ToJSArray(result_array); 1076 } else { 1077 return isolate->heap()->null_value(); // No matches at all. 1078 } 1079 } 1080 1081 MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction( 1082 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 1083 Handle<Object> replace_obj) { 1084 Factory* factory = isolate->factory(); 1085 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1086 1087 const int flags = regexp->GetFlags(); 1088 1089 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1090 DCHECK_EQ(flags & JSRegExp::kGlobal, 0); 1091 1092 // TODO(jgruber): This should be an easy port to CSA with massive payback. 1093 1094 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1095 uint32_t last_index = 0; 1096 if (sticky) { 1097 Handle<Object> last_index_obj(regexp->LastIndex(), isolate); 1098 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, 1099 Object::ToLength(isolate, last_index_obj), 1100 String); 1101 last_index = PositiveNumberToUint32(*last_index_obj); 1102 1103 if (static_cast<int>(last_index) > subject->length()) last_index = 0; 1104 } 1105 1106 Handle<Object> match_indices_obj; 1107 ASSIGN_RETURN_ON_EXCEPTION( 1108 isolate, match_indices_obj, 1109 RegExpImpl::Exec(regexp, subject, last_index, last_match_info), String); 1110 1111 if (match_indices_obj->IsNull(isolate)) { 1112 if (sticky) regexp->SetLastIndex(0); 1113 return subject; 1114 } 1115 1116 Handle<RegExpMatchInfo> match_indices = 1117 Handle<RegExpMatchInfo>::cast(match_indices_obj); 1118 1119 const int index = match_indices->Capture(0); 1120 const int end_of_match = match_indices->Capture(1); 1121 1122 if (sticky) regexp->SetLastIndex(end_of_match); 1123 1124 IncrementalStringBuilder builder(isolate); 1125 builder.AppendString(factory->NewSubString(subject, 0, index)); 1126 1127 // Compute the parameter list consisting of the match, captures, index, 1128 // and subject for the replace function invocation. 1129 // The number of captures plus one for the match. 1130 const int m = match_indices->NumberOfCaptureRegisters() / 2; 1131 1132 const int argc = m + 2; 1133 ScopedVector<Handle<Object>> argv(argc); 1134 1135 for (int j = 0; j < m; j++) { 1136 bool ok; 1137 Handle<String> capture = 1138 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); 1139 if (ok) { 1140 argv[j] = capture; 1141 } else { 1142 argv[j] = factory->undefined_value(); 1143 } 1144 } 1145 1146 argv[argc - 2] = handle(Smi::FromInt(index), isolate); 1147 argv[argc - 1] = subject; 1148 1149 Handle<Object> replacement_obj; 1150 ASSIGN_RETURN_ON_EXCEPTION( 1151 isolate, replacement_obj, 1152 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, 1153 argv.start()), 1154 String); 1155 1156 Handle<String> replacement; 1157 ASSIGN_RETURN_ON_EXCEPTION( 1158 isolate, replacement, Object::ToString(isolate, replacement_obj), String); 1159 1160 builder.AppendString(replacement); 1161 builder.AppendString( 1162 factory->NewSubString(subject, end_of_match, subject->length())); 1163 1164 return builder.Finish(); 1165 } 1166 1167 // Legacy implementation of RegExp.prototype[Symbol.replace] which 1168 // doesn't properly call the underlying exec method. 1169 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, 1170 Handle<JSRegExp> regexp, 1171 Handle<String> string, 1172 Handle<Object> replace_obj) { 1173 Factory* factory = isolate->factory(); 1174 1175 const int flags = regexp->GetFlags(); 1176 const bool global = (flags & JSRegExp::kGlobal) != 0; 1177 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1178 1179 // Functional fast-paths are dispatched directly by replace builtin. 1180 DCHECK(!replace_obj->IsCallable()); 1181 1182 Handle<String> replace; 1183 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace, 1184 Object::ToString(isolate, replace_obj), String); 1185 replace = String::Flatten(replace); 1186 1187 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1188 1189 if (!global) { 1190 // Non-global regexp search, string replace. 1191 1192 uint32_t last_index = 0; 1193 if (sticky) { 1194 Handle<Object> last_index_obj(regexp->LastIndex(), isolate); 1195 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, 1196 Object::ToLength(isolate, last_index_obj), 1197 String); 1198 last_index = PositiveNumberToUint32(*last_index_obj); 1199 1200 if (static_cast<int>(last_index) > string->length()) last_index = 0; 1201 } 1202 1203 Handle<Object> match_indices_obj; 1204 ASSIGN_RETURN_ON_EXCEPTION( 1205 isolate, match_indices_obj, 1206 RegExpImpl::Exec(regexp, string, last_index, last_match_info), String); 1207 1208 if (match_indices_obj->IsNull(isolate)) { 1209 if (sticky) regexp->SetLastIndex(0); 1210 return string; 1211 } 1212 1213 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj); 1214 1215 const int start_index = match_indices->Capture(0); 1216 const int end_index = match_indices->Capture(1); 1217 1218 if (sticky) regexp->SetLastIndex(end_index); 1219 1220 IncrementalStringBuilder builder(isolate); 1221 builder.AppendString(factory->NewSubString(string, 0, start_index)); 1222 1223 if (replace->length() > 0) { 1224 MatchInfoBackedMatch m(isolate, string, match_indices); 1225 Handle<String> replacement; 1226 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement, 1227 String::GetSubstitution(isolate, &m, replace), 1228 String); 1229 builder.AppendString(replacement); 1230 } 1231 1232 builder.AppendString( 1233 factory->NewSubString(string, end_index, string->length())); 1234 return builder.Finish(); 1235 } else { 1236 // Global regexp search, string replace. 1237 DCHECK(global); 1238 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), 1239 String); 1240 1241 if (replace->length() == 0) { 1242 if (string->HasOnlyOneByteChars()) { 1243 Object* result = 1244 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( 1245 isolate, string, regexp, last_match_info); 1246 return handle(String::cast(result), isolate); 1247 } else { 1248 Object* result = 1249 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( 1250 isolate, string, regexp, last_match_info); 1251 return handle(String::cast(result), isolate); 1252 } 1253 } 1254 1255 Object* result = StringReplaceGlobalRegExpWithString( 1256 isolate, string, regexp, replace, last_match_info); 1257 if (result->IsString()) { 1258 return handle(String::cast(result), isolate); 1259 } else { 1260 return MaybeHandle<String>(); 1261 } 1262 } 1263 1264 UNREACHABLE(); 1265 return MaybeHandle<String>(); 1266 } 1267 1268 } // namespace 1269 1270 // This is only called for StringReplaceGlobalRegExpWithFunction. 1271 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { 1272 HandleScope handles(isolate); 1273 DCHECK_EQ(4, args.length()); 1274 1275 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 1276 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 1277 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2); 1278 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 1279 CHECK(result_array->HasFastObjectElements()); 1280 1281 subject = String::Flatten(subject); 1282 CHECK(regexp->GetFlags() & JSRegExp::kGlobal); 1283 1284 if (regexp->CaptureCount() == 0) { 1285 return SearchRegExpMultiple<false>(isolate, subject, regexp, 1286 last_match_info, result_array); 1287 } else { 1288 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, 1289 result_array); 1290 } 1291 } 1292 1293 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) { 1294 HandleScope scope(isolate); 1295 DCHECK_EQ(3, args.length()); 1296 1297 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 1298 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 1299 CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2); 1300 1301 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); 1302 1303 RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction( 1304 isolate, subject, regexp, replace)); 1305 } 1306 1307 namespace { 1308 1309 // ES##sec-speciesconstructor 1310 // SpeciesConstructor ( O, defaultConstructor ) 1311 MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor( 1312 Isolate* isolate, Handle<JSReceiver> recv, 1313 Handle<JSFunction> default_ctor) { 1314 Handle<Object> ctor_obj; 1315 ASSIGN_RETURN_ON_EXCEPTION( 1316 isolate, ctor_obj, 1317 JSObject::GetProperty(recv, isolate->factory()->constructor_string()), 1318 Object); 1319 1320 if (ctor_obj->IsUndefined(isolate)) return default_ctor; 1321 1322 if (!ctor_obj->IsJSReceiver()) { 1323 THROW_NEW_ERROR(isolate, 1324 NewTypeError(MessageTemplate::kConstructorNotReceiver), 1325 Object); 1326 } 1327 1328 Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj); 1329 1330 Handle<Object> species; 1331 ASSIGN_RETURN_ON_EXCEPTION( 1332 isolate, species, 1333 JSObject::GetProperty(ctor, isolate->factory()->species_symbol()), 1334 Object); 1335 1336 if (species->IsNullOrUndefined(isolate)) { 1337 return default_ctor; 1338 } 1339 1340 if (species->IsConstructor()) return species; 1341 1342 THROW_NEW_ERROR( 1343 isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object); 1344 } 1345 1346 MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate, 1347 Handle<Object> object, 1348 uint32_t* out) { 1349 if (object->IsUndefined(isolate)) { 1350 *out = kMaxUInt32; 1351 return object; 1352 } 1353 1354 Handle<Object> number; 1355 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object); 1356 *out = NumberToUint32(*number); 1357 return object; 1358 } 1359 1360 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate, 1361 Handle<FixedArray> elems, 1362 int num_elems) { 1363 elems->Shrink(num_elems); 1364 return isolate->factory()->NewJSArrayWithElements(elems); 1365 } 1366 1367 } // namespace 1368 1369 // Slow path for: 1370 // ES#sec-regexp.prototype-@@replace 1371 // RegExp.prototype [ @@split ] ( string, limit ) 1372 RUNTIME_FUNCTION(Runtime_RegExpSplit) { 1373 HandleScope scope(isolate); 1374 DCHECK_EQ(3, args.length()); 1375 1376 DCHECK(args[1]->IsString()); 1377 1378 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); 1379 CONVERT_ARG_HANDLE_CHECKED(String, string, 1); 1380 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2); 1381 1382 Factory* factory = isolate->factory(); 1383 1384 Handle<JSFunction> regexp_fun = isolate->regexp_function(); 1385 Handle<Object> ctor; 1386 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1387 isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun)); 1388 1389 Handle<Object> flags_obj; 1390 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1391 isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string())); 1392 1393 Handle<String> flags; 1394 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags, 1395 Object::ToString(isolate, flags_obj)); 1396 1397 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u'); 1398 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0); 1399 1400 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y'); 1401 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0); 1402 1403 Handle<String> new_flags = flags; 1404 if (!sticky) { 1405 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags, 1406 factory->NewConsString(flags, y_str)); 1407 } 1408 1409 Handle<JSReceiver> splitter; 1410 { 1411 const int argc = 2; 1412 1413 ScopedVector<Handle<Object>> argv(argc); 1414 argv[0] = recv; 1415 argv[1] = new_flags; 1416 1417 Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor); 1418 Handle<Object> splitter_obj; 1419 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1420 isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start())); 1421 1422 splitter = Handle<JSReceiver>::cast(splitter_obj); 1423 } 1424 1425 uint32_t limit; 1426 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit)); 1427 1428 const uint32_t length = string->length(); 1429 1430 if (limit == 0) return *factory->NewJSArray(0); 1431 1432 if (length == 0) { 1433 Handle<Object> result; 1434 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1435 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, 1436 factory->undefined_value())); 1437 1438 if (!result->IsNull(isolate)) return *factory->NewJSArray(0); 1439 1440 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1); 1441 elems->set(0, *string); 1442 return *factory->NewJSArrayWithElements(elems); 1443 } 1444 1445 static const int kInitialArraySize = 8; 1446 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize); 1447 int num_elems = 0; 1448 1449 uint32_t string_index = 0; 1450 uint32_t prev_string_index = 0; 1451 while (string_index < length) { 1452 RETURN_FAILURE_ON_EXCEPTION( 1453 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index)); 1454 1455 Handle<Object> result; 1456 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1457 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, 1458 factory->undefined_value())); 1459 1460 if (result->IsNull(isolate)) { 1461 string_index = RegExpUtils::AdvanceStringIndex(isolate, string, 1462 string_index, unicode); 1463 continue; 1464 } 1465 1466 Handle<Object> last_index_obj; 1467 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1468 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter)); 1469 1470 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1471 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); 1472 1473 const uint32_t end = 1474 std::min(PositiveNumberToUint32(*last_index_obj), length); 1475 if (end == prev_string_index) { 1476 string_index = RegExpUtils::AdvanceStringIndex(isolate, string, 1477 string_index, unicode); 1478 continue; 1479 } 1480 1481 { 1482 Handle<String> substr = 1483 factory->NewSubString(string, prev_string_index, string_index); 1484 elems = FixedArray::SetAndGrow(elems, num_elems++, substr); 1485 if (static_cast<uint32_t>(num_elems) == limit) { 1486 return *NewJSArrayWithElements(isolate, elems, num_elems); 1487 } 1488 } 1489 1490 prev_string_index = end; 1491 1492 Handle<Object> num_captures_obj; 1493 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1494 isolate, num_captures_obj, 1495 Object::GetProperty(result, isolate->factory()->length_string())); 1496 1497 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1498 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj)); 1499 const int num_captures = PositiveNumberToUint32(*num_captures_obj); 1500 1501 for (int i = 1; i < num_captures; i++) { 1502 Handle<Object> capture; 1503 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1504 isolate, capture, Object::GetElement(isolate, result, i)); 1505 elems = FixedArray::SetAndGrow(elems, num_elems++, capture); 1506 if (static_cast<uint32_t>(num_elems) == limit) { 1507 return *NewJSArrayWithElements(isolate, elems, num_elems); 1508 } 1509 } 1510 1511 string_index = prev_string_index; 1512 } 1513 1514 { 1515 Handle<String> substr = 1516 factory->NewSubString(string, prev_string_index, length); 1517 elems = FixedArray::SetAndGrow(elems, num_elems++, substr); 1518 } 1519 1520 return *NewJSArrayWithElements(isolate, elems, num_elems); 1521 } 1522 1523 // Slow path for: 1524 // ES#sec-regexp.prototype-@@replace 1525 // RegExp.prototype [ @@replace ] ( string, replaceValue ) 1526 RUNTIME_FUNCTION(Runtime_RegExpReplace) { 1527 HandleScope scope(isolate); 1528 DCHECK_EQ(3, args.length()); 1529 1530 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); 1531 CONVERT_ARG_HANDLE_CHECKED(String, string, 1); 1532 Handle<Object> replace_obj = args.at(2); 1533 1534 Factory* factory = isolate->factory(); 1535 1536 string = String::Flatten(string); 1537 1538 // Fast-path for unmodified JSRegExps. 1539 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) { 1540 RETURN_RESULT_OR_FAILURE( 1541 isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, 1542 replace_obj)); 1543 } 1544 1545 const uint32_t length = string->length(); 1546 const bool functional_replace = replace_obj->IsCallable(); 1547 1548 Handle<String> replace; 1549 if (!functional_replace) { 1550 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace, 1551 Object::ToString(isolate, replace_obj)); 1552 } 1553 1554 Handle<Object> global_obj; 1555 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1556 isolate, global_obj, 1557 JSReceiver::GetProperty(recv, factory->global_string())); 1558 const bool global = global_obj->BooleanValue(); 1559 1560 bool unicode = false; 1561 if (global) { 1562 Handle<Object> unicode_obj; 1563 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1564 isolate, unicode_obj, 1565 JSReceiver::GetProperty(recv, factory->unicode_string())); 1566 unicode = unicode_obj->BooleanValue(); 1567 1568 RETURN_FAILURE_ON_EXCEPTION(isolate, 1569 RegExpUtils::SetLastIndex(isolate, recv, 0)); 1570 } 1571 1572 Zone zone(isolate->allocator(), ZONE_NAME); 1573 ZoneVector<Handle<Object>> results(&zone); 1574 1575 while (true) { 1576 Handle<Object> result; 1577 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1578 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, 1579 factory->undefined_value())); 1580 1581 if (result->IsNull(isolate)) break; 1582 1583 results.push_back(result); 1584 if (!global) break; 1585 1586 Handle<Object> match_obj; 1587 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, 1588 Object::GetElement(isolate, result, 0)); 1589 1590 Handle<String> match; 1591 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1592 Object::ToString(isolate, match_obj)); 1593 1594 if (match->length() == 0) { 1595 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex( 1596 isolate, recv, string, unicode)); 1597 } 1598 } 1599 1600 // TODO(jgruber): Look into ReplacementStringBuilder instead. 1601 IncrementalStringBuilder builder(isolate); 1602 uint32_t next_source_position = 0; 1603 1604 for (const auto& result : results) { 1605 Handle<Object> captures_length_obj; 1606 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1607 isolate, captures_length_obj, 1608 Object::GetProperty(result, factory->length_string())); 1609 1610 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1611 isolate, captures_length_obj, 1612 Object::ToLength(isolate, captures_length_obj)); 1613 const int captures_length = PositiveNumberToUint32(*captures_length_obj); 1614 1615 Handle<Object> match_obj; 1616 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, 1617 Object::GetElement(isolate, result, 0)); 1618 1619 Handle<String> match; 1620 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1621 Object::ToString(isolate, match_obj)); 1622 1623 const int match_length = match->length(); 1624 1625 Handle<Object> position_obj; 1626 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1627 isolate, position_obj, 1628 Object::GetProperty(result, factory->index_string())); 1629 1630 // TODO(jgruber): Extract and correct error handling. Since we can go up to 1631 // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here? 1632 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1633 isolate, position_obj, Object::ToInteger(isolate, position_obj)); 1634 const uint32_t position = 1635 std::min(PositiveNumberToUint32(*position_obj), length); 1636 1637 ZoneVector<Handle<Object>> captures(&zone); 1638 for (int n = 0; n < captures_length; n++) { 1639 Handle<Object> capture; 1640 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1641 isolate, capture, Object::GetElement(isolate, result, n)); 1642 1643 if (!capture->IsUndefined(isolate)) { 1644 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, 1645 Object::ToString(isolate, capture)); 1646 } 1647 captures.push_back(capture); 1648 } 1649 1650 Handle<String> replacement; 1651 if (functional_replace) { 1652 const int argc = captures_length + 2; 1653 ScopedVector<Handle<Object>> argv(argc); 1654 1655 for (int j = 0; j < captures_length; j++) { 1656 argv[j] = captures[j]; 1657 } 1658 1659 argv[captures_length] = handle(Smi::FromInt(position), isolate); 1660 argv[captures_length + 1] = string; 1661 1662 Handle<Object> replacement_obj; 1663 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1664 isolate, replacement_obj, 1665 Execution::Call(isolate, replace_obj, factory->undefined_value(), 1666 argc, argv.start())); 1667 1668 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1669 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1670 } else { 1671 VectorBackedMatch m(isolate, string, match, position, &captures); 1672 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1673 isolate, replacement, String::GetSubstitution(isolate, &m, replace)); 1674 } 1675 1676 if (position >= next_source_position) { 1677 builder.AppendString( 1678 factory->NewSubString(string, next_source_position, position)); 1679 builder.AppendString(replacement); 1680 1681 next_source_position = position + match_length; 1682 } 1683 } 1684 1685 if (next_source_position < length) { 1686 builder.AppendString( 1687 factory->NewSubString(string, next_source_position, length)); 1688 } 1689 1690 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); 1691 } 1692 1693 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) { 1694 SealHandleScope shs(isolate); 1695 DCHECK_EQ(4, args.length()); 1696 Object* exception = isolate->pending_exception(); 1697 isolate->clear_pending_exception(); 1698 return isolate->ReThrow(exception); 1699 } 1700 1701 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { 1702 HandleScope scope(isolate); 1703 DCHECK_EQ(3, args.length()); 1704 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 1705 CONVERT_ARG_HANDLE_CHECKED(String, source, 1); 1706 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); 1707 1708 RETURN_FAILURE_ON_EXCEPTION(isolate, 1709 JSRegExp::Initialize(regexp, source, flags)); 1710 1711 return *regexp; 1712 } 1713 1714 RUNTIME_FUNCTION(Runtime_IsRegExp) { 1715 SealHandleScope shs(isolate); 1716 DCHECK_EQ(1, args.length()); 1717 CONVERT_ARG_CHECKED(Object, obj, 0); 1718 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); 1719 } 1720 1721 } // namespace internal 1722 } // namespace v8 1723