1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/runtime/runtime-utils.h" 6 7 #include "src/arguments.h" 8 #include "src/conversions-inl.h" 9 #include "src/isolate-inl.h" 10 #include "src/messages.h" 11 #include "src/regexp/jsregexp-inl.h" 12 #include "src/regexp/jsregexp.h" 13 #include "src/string-builder.h" 14 #include "src/string-search.h" 15 16 namespace v8 { 17 namespace internal { 18 19 class CompiledReplacement { 20 public: 21 explicit CompiledReplacement(Zone* zone) 22 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} 23 24 // Return whether the replacement is simple. 25 bool Compile(Handle<String> replacement, int capture_count, 26 int subject_length); 27 28 // Use Apply only if Compile returned false. 29 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, 30 int32_t* match); 31 32 // Number of distinct parts of the replacement pattern. 33 int parts() { return parts_.length(); } 34 35 Zone* zone() const { return zone_; } 36 37 private: 38 enum PartType { 39 SUBJECT_PREFIX = 1, 40 SUBJECT_SUFFIX, 41 SUBJECT_CAPTURE, 42 REPLACEMENT_SUBSTRING, 43 REPLACEMENT_STRING, 44 NUMBER_OF_PART_TYPES 45 }; 46 47 struct ReplacementPart { 48 static inline ReplacementPart SubjectMatch() { 49 return ReplacementPart(SUBJECT_CAPTURE, 0); 50 } 51 static inline ReplacementPart SubjectCapture(int capture_index) { 52 return ReplacementPart(SUBJECT_CAPTURE, capture_index); 53 } 54 static inline ReplacementPart SubjectPrefix() { 55 return ReplacementPart(SUBJECT_PREFIX, 0); 56 } 57 static inline ReplacementPart SubjectSuffix(int subject_length) { 58 return ReplacementPart(SUBJECT_SUFFIX, subject_length); 59 } 60 static inline ReplacementPart ReplacementString() { 61 return ReplacementPart(REPLACEMENT_STRING, 0); 62 } 63 static inline ReplacementPart ReplacementSubString(int from, int to) { 64 DCHECK(from >= 0); 65 DCHECK(to > from); 66 return ReplacementPart(-from, to); 67 } 68 69 // If tag <= 0 then it is the negation of a start index of a substring of 70 // the replacement pattern, otherwise it's a value from PartType. 71 ReplacementPart(int tag, int data) : tag(tag), data(data) { 72 // Must be non-positive or a PartType value. 73 DCHECK(tag < NUMBER_OF_PART_TYPES); 74 } 75 // Either a value of PartType or a non-positive number that is 76 // the negation of an index into the replacement string. 77 int tag; 78 // The data value's interpretation depends on the value of tag: 79 // tag == SUBJECT_PREFIX || 80 // tag == SUBJECT_SUFFIX: data is unused. 81 // tag == SUBJECT_CAPTURE: data is the number of the capture. 82 // tag == REPLACEMENT_SUBSTRING || 83 // tag == REPLACEMENT_STRING: data is index into array of substrings 84 // of the replacement string. 85 // tag <= 0: Temporary representation of the substring of the replacement 86 // string ranging over -tag .. data. 87 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 88 // substring objects. 89 int data; 90 }; 91 92 template <typename Char> 93 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 94 Vector<Char> characters, int capture_count, 95 int subject_length, Zone* zone) { 96 int length = characters.length(); 97 int last = 0; 98 for (int i = 0; i < length; i++) { 99 Char c = characters[i]; 100 if (c == '$') { 101 int next_index = i + 1; 102 if (next_index == length) { // No next character! 103 break; 104 } 105 Char c2 = characters[next_index]; 106 switch (c2) { 107 case '$': 108 if (i > last) { 109 // There is a substring before. Include the first "$". 110 parts->Add( 111 ReplacementPart::ReplacementSubString(last, next_index), 112 zone); 113 last = next_index + 1; // Continue after the second "$". 114 } else { 115 // Let the next substring start with the second "$". 116 last = next_index; 117 } 118 i = next_index; 119 break; 120 case '`': 121 if (i > last) { 122 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 123 } 124 parts->Add(ReplacementPart::SubjectPrefix(), zone); 125 i = next_index; 126 last = i + 1; 127 break; 128 case '\'': 129 if (i > last) { 130 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 131 } 132 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); 133 i = next_index; 134 last = i + 1; 135 break; 136 case '&': 137 if (i > last) { 138 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); 139 } 140 parts->Add(ReplacementPart::SubjectMatch(), zone); 141 i = next_index; 142 last = i + 1; 143 break; 144 case '0': 145 case '1': 146 case '2': 147 case '3': 148 case '4': 149 case '5': 150 case '6': 151 case '7': 152 case '8': 153 case '9': { 154 int capture_ref = c2 - '0'; 155 if (capture_ref > capture_count) { 156 i = next_index; 157 continue; 158 } 159 int second_digit_index = next_index + 1; 160 if (second_digit_index < length) { 161 // Peek ahead to see if we have two digits. 162 Char c3 = characters[second_digit_index]; 163 if ('0' <= c3 && c3 <= '9') { // Double digits. 164 int double_digit_ref = capture_ref * 10 + c3 - '0'; 165 if (double_digit_ref <= capture_count) { 166 next_index = second_digit_index; 167 capture_ref = double_digit_ref; 168 } 169 } 170 } 171 if (capture_ref > 0) { 172 if (i > last) { 173 parts->Add(ReplacementPart::ReplacementSubString(last, i), 174 zone); 175 } 176 DCHECK(capture_ref <= capture_count); 177 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); 178 last = next_index + 1; 179 } 180 i = next_index; 181 break; 182 } 183 default: 184 i = next_index; 185 break; 186 } 187 } 188 } 189 if (length > last) { 190 if (last == 0) { 191 // Replacement is simple. Do not use Apply to do the replacement. 192 return true; 193 } else { 194 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 195 } 196 } 197 return false; 198 } 199 200 ZoneList<ReplacementPart> parts_; 201 ZoneList<Handle<String> > replacement_substrings_; 202 Zone* zone_; 203 }; 204 205 206 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, 207 int subject_length) { 208 { 209 DisallowHeapAllocation no_gc; 210 String::FlatContent content = replacement->GetFlatContent(); 211 DCHECK(content.IsFlat()); 212 bool simple = false; 213 if (content.IsOneByte()) { 214 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), 215 capture_count, subject_length, zone()); 216 } else { 217 DCHECK(content.IsTwoByte()); 218 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), 219 capture_count, subject_length, zone()); 220 } 221 if (simple) return true; 222 } 223 224 Isolate* isolate = replacement->GetIsolate(); 225 // Find substrings of replacement string and create them as String objects. 226 int substring_index = 0; 227 for (int i = 0, n = parts_.length(); i < n; i++) { 228 int tag = parts_[i].tag; 229 if (tag <= 0) { // A replacement string slice. 230 int from = -tag; 231 int to = parts_[i].data; 232 replacement_substrings_.Add( 233 isolate->factory()->NewSubString(replacement, from, to), zone()); 234 parts_[i].tag = REPLACEMENT_SUBSTRING; 235 parts_[i].data = substring_index; 236 substring_index++; 237 } else if (tag == REPLACEMENT_STRING) { 238 replacement_substrings_.Add(replacement, zone()); 239 parts_[i].data = substring_index; 240 substring_index++; 241 } 242 } 243 return false; 244 } 245 246 247 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 248 int match_from, int match_to, int32_t* match) { 249 DCHECK_LT(0, parts_.length()); 250 for (int i = 0, n = parts_.length(); i < n; i++) { 251 ReplacementPart part = parts_[i]; 252 switch (part.tag) { 253 case SUBJECT_PREFIX: 254 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 255 break; 256 case SUBJECT_SUFFIX: { 257 int subject_length = part.data; 258 if (match_to < subject_length) { 259 builder->AddSubjectSlice(match_to, subject_length); 260 } 261 break; 262 } 263 case SUBJECT_CAPTURE: { 264 int capture = part.data; 265 int from = match[capture * 2]; 266 int to = match[capture * 2 + 1]; 267 if (from >= 0 && to > from) { 268 builder->AddSubjectSlice(from, to); 269 } 270 break; 271 } 272 case REPLACEMENT_SUBSTRING: 273 case REPLACEMENT_STRING: 274 builder->AddString(replacement_substrings_[part.data]); 275 break; 276 default: 277 UNREACHABLE(); 278 } 279 } 280 } 281 282 283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, 284 ZoneList<int>* indices, unsigned int limit, 285 Zone* zone) { 286 DCHECK(limit > 0); 287 // Collect indices of pattern in subject using memchr. 288 // Stop after finding at most limit values. 289 const uint8_t* subject_start = subject.start(); 290 const uint8_t* subject_end = subject_start + subject.length(); 291 const uint8_t* pos = subject_start; 292 while (limit > 0) { 293 pos = reinterpret_cast<const uint8_t*>( 294 memchr(pos, pattern, subject_end - pos)); 295 if (pos == NULL) return; 296 indices->Add(static_cast<int>(pos - subject_start), zone); 297 pos++; 298 limit--; 299 } 300 } 301 302 303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, 304 ZoneList<int>* indices, unsigned int limit, 305 Zone* zone) { 306 DCHECK(limit > 0); 307 const uc16* subject_start = subject.start(); 308 const uc16* subject_end = subject_start + subject.length(); 309 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { 310 if (*pos == pattern) { 311 indices->Add(static_cast<int>(pos - subject_start), zone); 312 limit--; 313 } 314 } 315 } 316 317 318 template <typename SubjectChar, typename PatternChar> 319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, 320 Vector<const PatternChar> pattern, 321 ZoneList<int>* indices, unsigned int limit, Zone* zone) { 322 DCHECK(limit > 0); 323 // Collect indices of pattern in subject. 324 // Stop after finding at most limit values. 325 int pattern_length = pattern.length(); 326 int index = 0; 327 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); 328 while (limit > 0) { 329 index = search.Search(subject, index); 330 if (index < 0) return; 331 indices->Add(index, zone); 332 index += pattern_length; 333 limit--; 334 } 335 } 336 337 338 void FindStringIndicesDispatch(Isolate* isolate, String* subject, 339 String* pattern, ZoneList<int>* indices, 340 unsigned int limit, Zone* zone) { 341 { 342 DisallowHeapAllocation no_gc; 343 String::FlatContent subject_content = subject->GetFlatContent(); 344 String::FlatContent pattern_content = pattern->GetFlatContent(); 345 DCHECK(subject_content.IsFlat()); 346 DCHECK(pattern_content.IsFlat()); 347 if (subject_content.IsOneByte()) { 348 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); 349 if (pattern_content.IsOneByte()) { 350 Vector<const uint8_t> pattern_vector = 351 pattern_content.ToOneByteVector(); 352 if (pattern_vector.length() == 1) { 353 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, 354 limit, zone); 355 } else { 356 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 357 limit, zone); 358 } 359 } else { 360 FindStringIndices(isolate, subject_vector, 361 pattern_content.ToUC16Vector(), indices, limit, zone); 362 } 363 } else { 364 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); 365 if (pattern_content.IsOneByte()) { 366 Vector<const uint8_t> pattern_vector = 367 pattern_content.ToOneByteVector(); 368 if (pattern_vector.length() == 1) { 369 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 370 limit, zone); 371 } else { 372 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 373 limit, zone); 374 } 375 } else { 376 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); 377 if (pattern_vector.length() == 1) { 378 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, 379 limit, zone); 380 } else { 381 FindStringIndices(isolate, subject_vector, pattern_vector, indices, 382 limit, zone); 383 } 384 } 385 } 386 } 387 } 388 389 390 template <typename ResultSeqString> 391 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( 392 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, 393 Handle<String> replacement, Handle<JSArray> last_match_info) { 394 DCHECK(subject->IsFlat()); 395 DCHECK(replacement->IsFlat()); 396 397 ZoneScope zone_scope(isolate->runtime_zone()); 398 ZoneList<int> indices(8, zone_scope.zone()); 399 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 400 String* pattern = 401 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 402 int subject_len = subject->length(); 403 int pattern_len = pattern->length(); 404 int replacement_len = replacement->length(); 405 406 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, 407 zone_scope.zone()); 408 409 int matches = indices.length(); 410 if (matches == 0) return *subject; 411 412 // Detect integer overflow. 413 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - 414 static_cast<int64_t>(pattern_len)) * 415 static_cast<int64_t>(matches) + 416 static_cast<int64_t>(subject_len); 417 int result_len; 418 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { 419 STATIC_ASSERT(String::kMaxLength < kMaxInt); 420 result_len = kMaxInt; // Provoke exception. 421 } else { 422 result_len = static_cast<int>(result_len_64); 423 } 424 425 int subject_pos = 0; 426 int result_pos = 0; 427 428 MaybeHandle<SeqString> maybe_res; 429 if (ResultSeqString::kHasOneByteEncoding) { 430 maybe_res = isolate->factory()->NewRawOneByteString(result_len); 431 } else { 432 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); 433 } 434 Handle<SeqString> untyped_res; 435 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); 436 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); 437 438 for (int i = 0; i < matches; i++) { 439 // Copy non-matched subject content. 440 if (subject_pos < indices.at(i)) { 441 String::WriteToFlat(*subject, result->GetChars() + result_pos, 442 subject_pos, indices.at(i)); 443 result_pos += indices.at(i) - subject_pos; 444 } 445 446 // Replace match. 447 if (replacement_len > 0) { 448 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, 449 replacement_len); 450 result_pos += replacement_len; 451 } 452 453 subject_pos = indices.at(i) + pattern_len; 454 } 455 // Add remaining subject content at the end. 456 if (subject_pos < subject_len) { 457 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, 458 subject_len); 459 } 460 461 int32_t match_indices[] = {indices.at(matches - 1), 462 indices.at(matches - 1) + pattern_len}; 463 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); 464 465 return *result; 466 } 467 468 469 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( 470 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 471 Handle<String> replacement, Handle<JSArray> last_match_info) { 472 DCHECK(subject->IsFlat()); 473 DCHECK(replacement->IsFlat()); 474 475 int capture_count = regexp->CaptureCount(); 476 int subject_length = subject->length(); 477 478 // CompiledReplacement uses zone allocation. 479 ZoneScope zone_scope(isolate->runtime_zone()); 480 CompiledReplacement compiled_replacement(zone_scope.zone()); 481 bool simple_replace = 482 compiled_replacement.Compile(replacement, capture_count, subject_length); 483 484 // Shortcut for simple non-regexp global replacements 485 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { 486 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { 487 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 488 isolate, subject, regexp, replacement, last_match_info); 489 } else { 490 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 491 isolate, subject, regexp, replacement, last_match_info); 492 } 493 } 494 495 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); 496 if (global_cache.HasException()) return isolate->heap()->exception(); 497 498 int32_t* current_match = global_cache.FetchNext(); 499 if (current_match == NULL) { 500 if (global_cache.HasException()) return isolate->heap()->exception(); 501 return *subject; 502 } 503 504 // Guessing the number of parts that the final result string is built 505 // from. Global regexps can match any number of times, so we guess 506 // conservatively. 507 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; 508 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); 509 510 // Number of parts added by compiled replacement plus preceeding 511 // string and possibly suffix after last match. It is possible for 512 // all components to use two elements when encoded as two smis. 513 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); 514 515 int prev = 0; 516 517 do { 518 builder.EnsureCapacity(parts_added_per_loop); 519 520 int start = current_match[0]; 521 int end = current_match[1]; 522 523 if (prev < start) { 524 builder.AddSubjectSlice(prev, start); 525 } 526 527 if (simple_replace) { 528 builder.AddString(replacement); 529 } else { 530 compiled_replacement.Apply(&builder, start, end, current_match); 531 } 532 prev = end; 533 534 current_match = global_cache.FetchNext(); 535 } while (current_match != NULL); 536 537 if (global_cache.HasException()) return isolate->heap()->exception(); 538 539 if (prev < subject_length) { 540 builder.EnsureCapacity(2); 541 builder.AddSubjectSlice(prev, subject_length); 542 } 543 544 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, 545 global_cache.LastSuccessfulMatch()); 546 547 Handle<String> result; 548 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString()); 549 return *result; 550 } 551 552 553 template <typename ResultSeqString> 554 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( 555 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 556 Handle<JSArray> last_match_info) { 557 DCHECK(subject->IsFlat()); 558 559 // Shortcut for simple non-regexp global replacements 560 if (regexp->TypeTag() == JSRegExp::ATOM) { 561 Handle<String> empty_string = isolate->factory()->empty_string(); 562 if (subject->IsOneByteRepresentation()) { 563 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 564 isolate, subject, regexp, empty_string, last_match_info); 565 } else { 566 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 567 isolate, subject, regexp, empty_string, last_match_info); 568 } 569 } 570 571 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); 572 if (global_cache.HasException()) return isolate->heap()->exception(); 573 574 int32_t* current_match = global_cache.FetchNext(); 575 if (current_match == NULL) { 576 if (global_cache.HasException()) return isolate->heap()->exception(); 577 return *subject; 578 } 579 580 int start = current_match[0]; 581 int end = current_match[1]; 582 int capture_count = regexp->CaptureCount(); 583 int subject_length = subject->length(); 584 585 int new_length = subject_length - (end - start); 586 if (new_length == 0) return isolate->heap()->empty_string(); 587 588 Handle<ResultSeqString> answer; 589 if (ResultSeqString::kHasOneByteEncoding) { 590 answer = Handle<ResultSeqString>::cast( 591 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); 592 } else { 593 answer = Handle<ResultSeqString>::cast( 594 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); 595 } 596 597 int prev = 0; 598 int position = 0; 599 600 do { 601 start = current_match[0]; 602 end = current_match[1]; 603 if (prev < start) { 604 // Add substring subject[prev;start] to answer string. 605 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); 606 position += start - prev; 607 } 608 prev = end; 609 610 current_match = global_cache.FetchNext(); 611 } while (current_match != NULL); 612 613 if (global_cache.HasException()) return isolate->heap()->exception(); 614 615 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, 616 global_cache.LastSuccessfulMatch()); 617 618 if (prev < subject_length) { 619 // Add substring subject[prev;length] to answer string. 620 String::WriteToFlat(*subject, answer->GetChars() + position, prev, 621 subject_length); 622 position += subject_length - prev; 623 } 624 625 if (position == 0) return isolate->heap()->empty_string(); 626 627 // Shorten string and fill 628 int string_size = ResultSeqString::SizeFor(position); 629 int allocated_string_size = ResultSeqString::SizeFor(new_length); 630 int delta = allocated_string_size - string_size; 631 632 answer->set_length(position); 633 if (delta == 0) return *answer; 634 635 Address end_of_string = answer->address() + string_size; 636 Heap* heap = isolate->heap(); 637 638 // The trimming is performed on a newly allocated object, which is on a 639 // fresly allocated page or on an already swept page. Hence, the sweeper 640 // thread can not get confused with the filler creation. No synchronization 641 // needed. 642 // TODO(hpayer): We should shrink the large object page if the size 643 // of the object changed significantly. 644 if (!heap->lo_space()->Contains(*answer)) { 645 heap->CreateFillerObjectAt(end_of_string, delta); 646 } 647 heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER); 648 return *answer; 649 } 650 651 652 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { 653 HandleScope scope(isolate); 654 DCHECK(args.length() == 4); 655 656 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 657 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); 658 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 659 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); 660 661 RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal); 662 RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); 663 664 subject = String::Flatten(subject); 665 666 if (replacement->length() == 0) { 667 if (subject->HasOnlyOneByteChars()) { 668 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( 669 isolate, subject, regexp, last_match_info); 670 } else { 671 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( 672 isolate, subject, regexp, last_match_info); 673 } 674 } 675 676 replacement = String::Flatten(replacement); 677 678 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, 679 replacement, last_match_info); 680 } 681 682 683 RUNTIME_FUNCTION(Runtime_StringSplit) { 684 HandleScope handle_scope(isolate); 685 DCHECK(args.length() == 3); 686 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 687 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); 688 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); 689 RUNTIME_ASSERT(limit > 0); 690 691 int subject_length = subject->length(); 692 int pattern_length = pattern->length(); 693 RUNTIME_ASSERT(pattern_length > 0); 694 695 if (limit == 0xffffffffu) { 696 FixedArray* last_match_cache_unused; 697 Handle<Object> cached_answer( 698 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, 699 &last_match_cache_unused, 700 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), 701 isolate); 702 if (*cached_answer != Smi::FromInt(0)) { 703 // The cache FixedArray is a COW-array and can therefore be reused. 704 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( 705 Handle<FixedArray>::cast(cached_answer)); 706 return *result; 707 } 708 } 709 710 // The limit can be very large (0xffffffffu), but since the pattern 711 // isn't empty, we can never create more parts than ~half the length 712 // of the subject. 713 714 subject = String::Flatten(subject); 715 pattern = String::Flatten(pattern); 716 717 static const int kMaxInitialListCapacity = 16; 718 719 ZoneScope zone_scope(isolate->runtime_zone()); 720 721 // Find (up to limit) indices of separator and end-of-string in subject 722 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); 723 ZoneList<int> indices(initial_capacity, zone_scope.zone()); 724 725 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, 726 zone_scope.zone()); 727 728 if (static_cast<uint32_t>(indices.length()) < limit) { 729 indices.Add(subject_length, zone_scope.zone()); 730 } 731 732 // The list indices now contains the end of each part to create. 733 734 // Create JSArray of substrings separated by separator. 735 int part_count = indices.length(); 736 737 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count); 738 JSObject::EnsureCanContainHeapObjectElements(result); 739 result->set_length(Smi::FromInt(part_count)); 740 741 DCHECK(result->HasFastObjectElements()); 742 743 Handle<FixedArray> elements(FixedArray::cast(result->elements())); 744 745 if (part_count == 1 && indices.at(0) == subject_length) { 746 elements->set(0, *subject); 747 } else { 748 int part_start = 0; 749 for (int i = 0; i < part_count; i++) { 750 HandleScope local_loop_handle(isolate); 751 int part_end = indices.at(i); 752 Handle<String> substring = 753 isolate->factory()->NewProperSubString(subject, part_start, part_end); 754 elements->set(i, *substring); 755 part_start = part_end + pattern_length; 756 } 757 } 758 759 if (limit == 0xffffffffu) { 760 if (result->HasFastObjectElements()) { 761 RegExpResultsCache::Enter(isolate, subject, pattern, elements, 762 isolate->factory()->empty_fixed_array(), 763 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); 764 } 765 } 766 767 return *result; 768 } 769 770 771 RUNTIME_FUNCTION(Runtime_RegExpExec) { 772 HandleScope scope(isolate); 773 DCHECK(args.length() == 4); 774 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 775 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 776 CONVERT_INT32_ARG_CHECKED(index, 2); 777 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); 778 // Due to the way the JS calls are constructed this must be less than the 779 // length of a string, i.e. it is always a Smi. We check anyway for security. 780 RUNTIME_ASSERT(index >= 0); 781 RUNTIME_ASSERT(index <= subject->length()); 782 isolate->counters()->regexp_entry_runtime()->Increment(); 783 Handle<Object> result; 784 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 785 isolate, result, 786 RegExpImpl::Exec(regexp, subject, index, last_match_info)); 787 return *result; 788 } 789 790 791 RUNTIME_FUNCTION(Runtime_RegExpFlags) { 792 SealHandleScope shs(isolate); 793 DCHECK(args.length() == 1); 794 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); 795 return regexp->flags(); 796 } 797 798 799 RUNTIME_FUNCTION(Runtime_RegExpSource) { 800 SealHandleScope shs(isolate); 801 DCHECK(args.length() == 1); 802 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); 803 return regexp->source(); 804 } 805 806 807 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) { 808 HandleScope handle_scope(isolate); 809 DCHECK(args.length() == 3); 810 CONVERT_SMI_ARG_CHECKED(size, 0); 811 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength); 812 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1); 813 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2); 814 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size); 815 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map()); 816 Handle<JSObject> object = 817 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED); 818 Handle<JSArray> array = Handle<JSArray>::cast(object); 819 array->set_elements(*elements); 820 array->set_length(Smi::FromInt(size)); 821 // Write in-object properties after the length of the array. 822 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index); 823 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input); 824 return *array; 825 } 826 827 828 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { 829 HandleScope scope(isolate); 830 DCHECK(args.length() == 3); 831 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 832 CONVERT_ARG_HANDLE_CHECKED(String, source, 1); 833 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); 834 835 RETURN_FAILURE_ON_EXCEPTION(isolate, 836 JSRegExp::Initialize(regexp, source, flags)); 837 838 return *regexp; 839 } 840 841 842 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 843 // separate last match info. See comment on that function. 844 template <bool has_capture> 845 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, 846 Handle<JSRegExp> regexp, 847 Handle<JSArray> last_match_array, 848 Handle<JSArray> result_array) { 849 DCHECK(subject->IsFlat()); 850 DCHECK_NE(has_capture, regexp->CaptureCount() == 0); 851 852 int capture_count = regexp->CaptureCount(); 853 int subject_length = subject->length(); 854 855 static const int kMinLengthToCache = 0x1000; 856 857 if (subject_length > kMinLengthToCache) { 858 FixedArray* last_match_cache; 859 Object* cached_answer = RegExpResultsCache::Lookup( 860 isolate->heap(), *subject, regexp->data(), &last_match_cache, 861 RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 862 if (cached_answer->IsFixedArray()) { 863 int capture_registers = (capture_count + 1) * 2; 864 int32_t* last_match = NewArray<int32_t>(capture_registers); 865 for (int i = 0; i < capture_registers; i++) { 866 last_match[i] = Smi::cast(last_match_cache->get(i))->value(); 867 } 868 Handle<FixedArray> cached_fixed_array = 869 Handle<FixedArray>(FixedArray::cast(cached_answer)); 870 // The cache FixedArray is a COW-array and can therefore be reused. 871 JSArray::SetContent(result_array, cached_fixed_array); 872 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, 873 last_match); 874 DeleteArray(last_match); 875 return *result_array; 876 } 877 } 878 879 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); 880 if (global_cache.HasException()) return isolate->heap()->exception(); 881 882 // Ensured in Runtime_RegExpExecMultiple. 883 DCHECK(result_array->HasFastObjectElements()); 884 Handle<FixedArray> result_elements( 885 FixedArray::cast(result_array->elements())); 886 if (result_elements->length() < 16) { 887 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 888 } 889 890 FixedArrayBuilder builder(result_elements); 891 892 // Position to search from. 893 int match_start = -1; 894 int match_end = 0; 895 bool first = true; 896 897 // Two smis before and after the match, for very long strings. 898 static const int kMaxBuilderEntriesPerRegExpMatch = 5; 899 900 while (true) { 901 int32_t* current_match = global_cache.FetchNext(); 902 if (current_match == NULL) break; 903 match_start = current_match[0]; 904 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 905 if (match_end < match_start) { 906 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 907 match_start); 908 } 909 match_end = current_match[1]; 910 { 911 // Avoid accumulating new handles inside loop. 912 HandleScope temp_scope(isolate); 913 Handle<String> match; 914 if (!first) { 915 match = isolate->factory()->NewProperSubString(subject, match_start, 916 match_end); 917 } else { 918 match = 919 isolate->factory()->NewSubString(subject, match_start, match_end); 920 first = false; 921 } 922 923 if (has_capture) { 924 // Arguments array to replace function is match, captures, index and 925 // subject, i.e., 3 + capture count in total. 926 Handle<FixedArray> elements = 927 isolate->factory()->NewFixedArray(3 + capture_count); 928 929 elements->set(0, *match); 930 for (int i = 1; i <= capture_count; i++) { 931 int start = current_match[i * 2]; 932 if (start >= 0) { 933 int end = current_match[i * 2 + 1]; 934 DCHECK(start <= end); 935 Handle<String> substring = 936 isolate->factory()->NewSubString(subject, start, end); 937 elements->set(i, *substring); 938 } else { 939 DCHECK(current_match[i * 2 + 1] < 0); 940 elements->set(i, isolate->heap()->undefined_value()); 941 } 942 } 943 elements->set(capture_count + 1, Smi::FromInt(match_start)); 944 elements->set(capture_count + 2, *subject); 945 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 946 } else { 947 builder.Add(*match); 948 } 949 } 950 } 951 952 if (global_cache.HasException()) return isolate->heap()->exception(); 953 954 if (match_start >= 0) { 955 // Finished matching, with at least one match. 956 if (match_end < subject_length) { 957 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, 958 subject_length); 959 } 960 961 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, 962 global_cache.LastSuccessfulMatch()); 963 964 if (subject_length > kMinLengthToCache) { 965 // Store the last successful match into the array for caching. 966 // TODO(yangguo): do not expose last match to JS and simplify caching. 967 int capture_registers = (capture_count + 1) * 2; 968 Handle<FixedArray> last_match_cache = 969 isolate->factory()->NewFixedArray(capture_registers); 970 int32_t* last_match = global_cache.LastSuccessfulMatch(); 971 for (int i = 0; i < capture_registers; i++) { 972 last_match_cache->set(i, Smi::FromInt(last_match[i])); 973 } 974 Handle<FixedArray> result_fixed_array = builder.array(); 975 result_fixed_array->Shrink(builder.length()); 976 // Cache the result and turn the FixedArray into a COW array. 977 RegExpResultsCache::Enter( 978 isolate, subject, handle(regexp->data(), isolate), result_fixed_array, 979 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); 980 } 981 return *builder.ToJSArray(result_array); 982 } else { 983 return isolate->heap()->null_value(); // No matches at all. 984 } 985 } 986 987 988 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets 989 // lastMatchInfoOverride to maintain the last match info, so we don't need to 990 // set any other last match array info. 991 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { 992 HandleScope handles(isolate); 993 DCHECK(args.length() == 4); 994 995 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 996 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 997 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); 998 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 999 RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); 1000 RUNTIME_ASSERT(result_array->HasFastObjectElements()); 1001 1002 subject = String::Flatten(subject); 1003 RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal); 1004 1005 if (regexp->CaptureCount() == 0) { 1006 return SearchRegExpMultiple<false>(isolate, subject, regexp, 1007 last_match_info, result_array); 1008 } else { 1009 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, 1010 result_array); 1011 } 1012 } 1013 1014 1015 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) { 1016 SealHandleScope shs(isolate); 1017 DCHECK(args.length() == 4); 1018 Object* exception = isolate->pending_exception(); 1019 isolate->clear_pending_exception(); 1020 return isolate->ReThrow(exception); 1021 } 1022 1023 1024 RUNTIME_FUNCTION(Runtime_IsRegExp) { 1025 SealHandleScope shs(isolate); 1026 DCHECK(args.length() == 1); 1027 CONVERT_ARG_CHECKED(Object, obj, 0); 1028 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); 1029 } 1030 } // namespace internal 1031 } // namespace v8 1032