Home | History | Annotate | Download | only in runtime
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/runtime/runtime-utils.h"
      6 
      7 #include "src/arguments.h"
      8 #include "src/conversions-inl.h"
      9 #include "src/isolate-inl.h"
     10 #include "src/messages.h"
     11 #include "src/regexp/jsregexp-inl.h"
     12 #include "src/regexp/jsregexp.h"
     13 #include "src/string-builder.h"
     14 #include "src/string-search.h"
     15 
     16 namespace v8 {
     17 namespace internal {
     18 
     19 class CompiledReplacement {
     20  public:
     21   explicit CompiledReplacement(Zone* zone)
     22       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
     23 
     24   // Return whether the replacement is simple.
     25   bool Compile(Handle<String> replacement, int capture_count,
     26                int subject_length);
     27 
     28   // Use Apply only if Compile returned false.
     29   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
     30              int32_t* match);
     31 
     32   // Number of distinct parts of the replacement pattern.
     33   int parts() { return parts_.length(); }
     34 
     35   Zone* zone() const { return zone_; }
     36 
     37  private:
     38   enum PartType {
     39     SUBJECT_PREFIX = 1,
     40     SUBJECT_SUFFIX,
     41     SUBJECT_CAPTURE,
     42     REPLACEMENT_SUBSTRING,
     43     REPLACEMENT_STRING,
     44     NUMBER_OF_PART_TYPES
     45   };
     46 
     47   struct ReplacementPart {
     48     static inline ReplacementPart SubjectMatch() {
     49       return ReplacementPart(SUBJECT_CAPTURE, 0);
     50     }
     51     static inline ReplacementPart SubjectCapture(int capture_index) {
     52       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
     53     }
     54     static inline ReplacementPart SubjectPrefix() {
     55       return ReplacementPart(SUBJECT_PREFIX, 0);
     56     }
     57     static inline ReplacementPart SubjectSuffix(int subject_length) {
     58       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
     59     }
     60     static inline ReplacementPart ReplacementString() {
     61       return ReplacementPart(REPLACEMENT_STRING, 0);
     62     }
     63     static inline ReplacementPart ReplacementSubString(int from, int to) {
     64       DCHECK(from >= 0);
     65       DCHECK(to > from);
     66       return ReplacementPart(-from, to);
     67     }
     68 
     69     // If tag <= 0 then it is the negation of a start index of a substring of
     70     // the replacement pattern, otherwise it's a value from PartType.
     71     ReplacementPart(int tag, int data) : tag(tag), data(data) {
     72       // Must be non-positive or a PartType value.
     73       DCHECK(tag < NUMBER_OF_PART_TYPES);
     74     }
     75     // Either a value of PartType or a non-positive number that is
     76     // the negation of an index into the replacement string.
     77     int tag;
     78     // The data value's interpretation depends on the value of tag:
     79     // tag == SUBJECT_PREFIX ||
     80     // tag == SUBJECT_SUFFIX:  data is unused.
     81     // tag == SUBJECT_CAPTURE: data is the number of the capture.
     82     // tag == REPLACEMENT_SUBSTRING ||
     83     // tag == REPLACEMENT_STRING:    data is index into array of substrings
     84     //                               of the replacement string.
     85     // tag <= 0: Temporary representation of the substring of the replacement
     86     //           string ranging over -tag .. data.
     87     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
     88     //           substring objects.
     89     int data;
     90   };
     91 
     92   template <typename Char>
     93   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
     94                                Vector<Char> characters, int capture_count,
     95                                int subject_length, Zone* zone) {
     96     int length = characters.length();
     97     int last = 0;
     98     for (int i = 0; i < length; i++) {
     99       Char c = characters[i];
    100       if (c == '$') {
    101         int next_index = i + 1;
    102         if (next_index == length) {  // No next character!
    103           break;
    104         }
    105         Char c2 = characters[next_index];
    106         switch (c2) {
    107           case '$':
    108             if (i > last) {
    109               // There is a substring before. Include the first "$".
    110               parts->Add(
    111                   ReplacementPart::ReplacementSubString(last, next_index),
    112                   zone);
    113               last = next_index + 1;  // Continue after the second "$".
    114             } else {
    115               // Let the next substring start with the second "$".
    116               last = next_index;
    117             }
    118             i = next_index;
    119             break;
    120           case '`':
    121             if (i > last) {
    122               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    123             }
    124             parts->Add(ReplacementPart::SubjectPrefix(), zone);
    125             i = next_index;
    126             last = i + 1;
    127             break;
    128           case '\'':
    129             if (i > last) {
    130               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    131             }
    132             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
    133             i = next_index;
    134             last = i + 1;
    135             break;
    136           case '&':
    137             if (i > last) {
    138               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    139             }
    140             parts->Add(ReplacementPart::SubjectMatch(), zone);
    141             i = next_index;
    142             last = i + 1;
    143             break;
    144           case '0':
    145           case '1':
    146           case '2':
    147           case '3':
    148           case '4':
    149           case '5':
    150           case '6':
    151           case '7':
    152           case '8':
    153           case '9': {
    154             int capture_ref = c2 - '0';
    155             if (capture_ref > capture_count) {
    156               i = next_index;
    157               continue;
    158             }
    159             int second_digit_index = next_index + 1;
    160             if (second_digit_index < length) {
    161               // Peek ahead to see if we have two digits.
    162               Char c3 = characters[second_digit_index];
    163               if ('0' <= c3 && c3 <= '9') {  // Double digits.
    164                 int double_digit_ref = capture_ref * 10 + c3 - '0';
    165                 if (double_digit_ref <= capture_count) {
    166                   next_index = second_digit_index;
    167                   capture_ref = double_digit_ref;
    168                 }
    169               }
    170             }
    171             if (capture_ref > 0) {
    172               if (i > last) {
    173                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
    174                            zone);
    175               }
    176               DCHECK(capture_ref <= capture_count);
    177               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
    178               last = next_index + 1;
    179             }
    180             i = next_index;
    181             break;
    182           }
    183           default:
    184             i = next_index;
    185             break;
    186         }
    187       }
    188     }
    189     if (length > last) {
    190       if (last == 0) {
    191         // Replacement is simple.  Do not use Apply to do the replacement.
    192         return true;
    193       } else {
    194         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
    195       }
    196     }
    197     return false;
    198   }
    199 
    200   ZoneList<ReplacementPart> parts_;
    201   ZoneList<Handle<String> > replacement_substrings_;
    202   Zone* zone_;
    203 };
    204 
    205 
    206 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
    207                                   int subject_length) {
    208   {
    209     DisallowHeapAllocation no_gc;
    210     String::FlatContent content = replacement->GetFlatContent();
    211     DCHECK(content.IsFlat());
    212     bool simple = false;
    213     if (content.IsOneByte()) {
    214       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
    215                                        capture_count, subject_length, zone());
    216     } else {
    217       DCHECK(content.IsTwoByte());
    218       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
    219                                        capture_count, subject_length, zone());
    220     }
    221     if (simple) return true;
    222   }
    223 
    224   Isolate* isolate = replacement->GetIsolate();
    225   // Find substrings of replacement string and create them as String objects.
    226   int substring_index = 0;
    227   for (int i = 0, n = parts_.length(); i < n; i++) {
    228     int tag = parts_[i].tag;
    229     if (tag <= 0) {  // A replacement string slice.
    230       int from = -tag;
    231       int to = parts_[i].data;
    232       replacement_substrings_.Add(
    233           isolate->factory()->NewSubString(replacement, from, to), zone());
    234       parts_[i].tag = REPLACEMENT_SUBSTRING;
    235       parts_[i].data = substring_index;
    236       substring_index++;
    237     } else if (tag == REPLACEMENT_STRING) {
    238       replacement_substrings_.Add(replacement, zone());
    239       parts_[i].data = substring_index;
    240       substring_index++;
    241     }
    242   }
    243   return false;
    244 }
    245 
    246 
    247 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
    248                                 int match_from, int match_to, int32_t* match) {
    249   DCHECK_LT(0, parts_.length());
    250   for (int i = 0, n = parts_.length(); i < n; i++) {
    251     ReplacementPart part = parts_[i];
    252     switch (part.tag) {
    253       case SUBJECT_PREFIX:
    254         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
    255         break;
    256       case SUBJECT_SUFFIX: {
    257         int subject_length = part.data;
    258         if (match_to < subject_length) {
    259           builder->AddSubjectSlice(match_to, subject_length);
    260         }
    261         break;
    262       }
    263       case SUBJECT_CAPTURE: {
    264         int capture = part.data;
    265         int from = match[capture * 2];
    266         int to = match[capture * 2 + 1];
    267         if (from >= 0 && to > from) {
    268           builder->AddSubjectSlice(from, to);
    269         }
    270         break;
    271       }
    272       case REPLACEMENT_SUBSTRING:
    273       case REPLACEMENT_STRING:
    274         builder->AddString(replacement_substrings_[part.data]);
    275         break;
    276       default:
    277         UNREACHABLE();
    278     }
    279   }
    280 }
    281 
    282 
    283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
    284                               ZoneList<int>* indices, unsigned int limit,
    285                               Zone* zone) {
    286   DCHECK(limit > 0);
    287   // Collect indices of pattern in subject using memchr.
    288   // Stop after finding at most limit values.
    289   const uint8_t* subject_start = subject.start();
    290   const uint8_t* subject_end = subject_start + subject.length();
    291   const uint8_t* pos = subject_start;
    292   while (limit > 0) {
    293     pos = reinterpret_cast<const uint8_t*>(
    294         memchr(pos, pattern, subject_end - pos));
    295     if (pos == NULL) return;
    296     indices->Add(static_cast<int>(pos - subject_start), zone);
    297     pos++;
    298     limit--;
    299   }
    300 }
    301 
    302 
    303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
    304                               ZoneList<int>* indices, unsigned int limit,
    305                               Zone* zone) {
    306   DCHECK(limit > 0);
    307   const uc16* subject_start = subject.start();
    308   const uc16* subject_end = subject_start + subject.length();
    309   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
    310     if (*pos == pattern) {
    311       indices->Add(static_cast<int>(pos - subject_start), zone);
    312       limit--;
    313     }
    314   }
    315 }
    316 
    317 
    318 template <typename SubjectChar, typename PatternChar>
    319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
    320                        Vector<const PatternChar> pattern,
    321                        ZoneList<int>* indices, unsigned int limit, Zone* zone) {
    322   DCHECK(limit > 0);
    323   // Collect indices of pattern in subject.
    324   // Stop after finding at most limit values.
    325   int pattern_length = pattern.length();
    326   int index = 0;
    327   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
    328   while (limit > 0) {
    329     index = search.Search(subject, index);
    330     if (index < 0) return;
    331     indices->Add(index, zone);
    332     index += pattern_length;
    333     limit--;
    334   }
    335 }
    336 
    337 
    338 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
    339                                String* pattern, ZoneList<int>* indices,
    340                                unsigned int limit, Zone* zone) {
    341   {
    342     DisallowHeapAllocation no_gc;
    343     String::FlatContent subject_content = subject->GetFlatContent();
    344     String::FlatContent pattern_content = pattern->GetFlatContent();
    345     DCHECK(subject_content.IsFlat());
    346     DCHECK(pattern_content.IsFlat());
    347     if (subject_content.IsOneByte()) {
    348       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
    349       if (pattern_content.IsOneByte()) {
    350         Vector<const uint8_t> pattern_vector =
    351             pattern_content.ToOneByteVector();
    352         if (pattern_vector.length() == 1) {
    353           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
    354                                    limit, zone);
    355         } else {
    356           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    357                             limit, zone);
    358         }
    359       } else {
    360         FindStringIndices(isolate, subject_vector,
    361                           pattern_content.ToUC16Vector(), indices, limit, zone);
    362       }
    363     } else {
    364       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
    365       if (pattern_content.IsOneByte()) {
    366         Vector<const uint8_t> pattern_vector =
    367             pattern_content.ToOneByteVector();
    368         if (pattern_vector.length() == 1) {
    369           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
    370                                    limit, zone);
    371         } else {
    372           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    373                             limit, zone);
    374         }
    375       } else {
    376         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
    377         if (pattern_vector.length() == 1) {
    378           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
    379                                    limit, zone);
    380         } else {
    381           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    382                             limit, zone);
    383         }
    384       }
    385     }
    386   }
    387 }
    388 
    389 
    390 template <typename ResultSeqString>
    391 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
    392     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
    393     Handle<String> replacement, Handle<JSArray> last_match_info) {
    394   DCHECK(subject->IsFlat());
    395   DCHECK(replacement->IsFlat());
    396 
    397   ZoneScope zone_scope(isolate->runtime_zone());
    398   ZoneList<int> indices(8, zone_scope.zone());
    399   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
    400   String* pattern =
    401       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
    402   int subject_len = subject->length();
    403   int pattern_len = pattern->length();
    404   int replacement_len = replacement->length();
    405 
    406   FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
    407                             zone_scope.zone());
    408 
    409   int matches = indices.length();
    410   if (matches == 0) return *subject;
    411 
    412   // Detect integer overflow.
    413   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
    414                            static_cast<int64_t>(pattern_len)) *
    415                               static_cast<int64_t>(matches) +
    416                           static_cast<int64_t>(subject_len);
    417   int result_len;
    418   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
    419     STATIC_ASSERT(String::kMaxLength < kMaxInt);
    420     result_len = kMaxInt;  // Provoke exception.
    421   } else {
    422     result_len = static_cast<int>(result_len_64);
    423   }
    424 
    425   int subject_pos = 0;
    426   int result_pos = 0;
    427 
    428   MaybeHandle<SeqString> maybe_res;
    429   if (ResultSeqString::kHasOneByteEncoding) {
    430     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
    431   } else {
    432     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
    433   }
    434   Handle<SeqString> untyped_res;
    435   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
    436   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
    437 
    438   for (int i = 0; i < matches; i++) {
    439     // Copy non-matched subject content.
    440     if (subject_pos < indices.at(i)) {
    441       String::WriteToFlat(*subject, result->GetChars() + result_pos,
    442                           subject_pos, indices.at(i));
    443       result_pos += indices.at(i) - subject_pos;
    444     }
    445 
    446     // Replace match.
    447     if (replacement_len > 0) {
    448       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
    449                           replacement_len);
    450       result_pos += replacement_len;
    451     }
    452 
    453     subject_pos = indices.at(i) + pattern_len;
    454   }
    455   // Add remaining subject content at the end.
    456   if (subject_pos < subject_len) {
    457     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
    458                         subject_len);
    459   }
    460 
    461   int32_t match_indices[] = {indices.at(matches - 1),
    462                              indices.at(matches - 1) + pattern_len};
    463   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
    464 
    465   return *result;
    466 }
    467 
    468 
    469 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
    470     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
    471     Handle<String> replacement, Handle<JSArray> last_match_info) {
    472   DCHECK(subject->IsFlat());
    473   DCHECK(replacement->IsFlat());
    474 
    475   int capture_count = regexp->CaptureCount();
    476   int subject_length = subject->length();
    477 
    478   // CompiledReplacement uses zone allocation.
    479   ZoneScope zone_scope(isolate->runtime_zone());
    480   CompiledReplacement compiled_replacement(zone_scope.zone());
    481   bool simple_replace =
    482       compiled_replacement.Compile(replacement, capture_count, subject_length);
    483 
    484   // Shortcut for simple non-regexp global replacements
    485   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
    486     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
    487       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
    488           isolate, subject, regexp, replacement, last_match_info);
    489     } else {
    490       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
    491           isolate, subject, regexp, replacement, last_match_info);
    492     }
    493   }
    494 
    495   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
    496   if (global_cache.HasException()) return isolate->heap()->exception();
    497 
    498   int32_t* current_match = global_cache.FetchNext();
    499   if (current_match == NULL) {
    500     if (global_cache.HasException()) return isolate->heap()->exception();
    501     return *subject;
    502   }
    503 
    504   // Guessing the number of parts that the final result string is built
    505   // from. Global regexps can match any number of times, so we guess
    506   // conservatively.
    507   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
    508   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
    509 
    510   // Number of parts added by compiled replacement plus preceeding
    511   // string and possibly suffix after last match.  It is possible for
    512   // all components to use two elements when encoded as two smis.
    513   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
    514 
    515   int prev = 0;
    516 
    517   do {
    518     builder.EnsureCapacity(parts_added_per_loop);
    519 
    520     int start = current_match[0];
    521     int end = current_match[1];
    522 
    523     if (prev < start) {
    524       builder.AddSubjectSlice(prev, start);
    525     }
    526 
    527     if (simple_replace) {
    528       builder.AddString(replacement);
    529     } else {
    530       compiled_replacement.Apply(&builder, start, end, current_match);
    531     }
    532     prev = end;
    533 
    534     current_match = global_cache.FetchNext();
    535   } while (current_match != NULL);
    536 
    537   if (global_cache.HasException()) return isolate->heap()->exception();
    538 
    539   if (prev < subject_length) {
    540     builder.EnsureCapacity(2);
    541     builder.AddSubjectSlice(prev, subject_length);
    542   }
    543 
    544   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
    545                                global_cache.LastSuccessfulMatch());
    546 
    547   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
    548 }
    549 
    550 
    551 template <typename ResultSeqString>
    552 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
    553     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
    554     Handle<JSArray> last_match_info) {
    555   DCHECK(subject->IsFlat());
    556 
    557   // Shortcut for simple non-regexp global replacements
    558   if (regexp->TypeTag() == JSRegExp::ATOM) {
    559     Handle<String> empty_string = isolate->factory()->empty_string();
    560     if (subject->IsOneByteRepresentation()) {
    561       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
    562           isolate, subject, regexp, empty_string, last_match_info);
    563     } else {
    564       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
    565           isolate, subject, regexp, empty_string, last_match_info);
    566     }
    567   }
    568 
    569   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
    570   if (global_cache.HasException()) return isolate->heap()->exception();
    571 
    572   int32_t* current_match = global_cache.FetchNext();
    573   if (current_match == NULL) {
    574     if (global_cache.HasException()) return isolate->heap()->exception();
    575     return *subject;
    576   }
    577 
    578   int start = current_match[0];
    579   int end = current_match[1];
    580   int capture_count = regexp->CaptureCount();
    581   int subject_length = subject->length();
    582 
    583   int new_length = subject_length - (end - start);
    584   if (new_length == 0) return isolate->heap()->empty_string();
    585 
    586   Handle<ResultSeqString> answer;
    587   if (ResultSeqString::kHasOneByteEncoding) {
    588     answer = Handle<ResultSeqString>::cast(
    589         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
    590   } else {
    591     answer = Handle<ResultSeqString>::cast(
    592         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
    593   }
    594 
    595   int prev = 0;
    596   int position = 0;
    597 
    598   do {
    599     start = current_match[0];
    600     end = current_match[1];
    601     if (prev < start) {
    602       // Add substring subject[prev;start] to answer string.
    603       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
    604       position += start - prev;
    605     }
    606     prev = end;
    607 
    608     current_match = global_cache.FetchNext();
    609   } while (current_match != NULL);
    610 
    611   if (global_cache.HasException()) return isolate->heap()->exception();
    612 
    613   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
    614                                global_cache.LastSuccessfulMatch());
    615 
    616   if (prev < subject_length) {
    617     // Add substring subject[prev;length] to answer string.
    618     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
    619                         subject_length);
    620     position += subject_length - prev;
    621   }
    622 
    623   if (position == 0) return isolate->heap()->empty_string();
    624 
    625   // Shorten string and fill
    626   int string_size = ResultSeqString::SizeFor(position);
    627   int allocated_string_size = ResultSeqString::SizeFor(new_length);
    628   int delta = allocated_string_size - string_size;
    629 
    630   answer->set_length(position);
    631   if (delta == 0) return *answer;
    632 
    633   Address end_of_string = answer->address() + string_size;
    634   Heap* heap = isolate->heap();
    635 
    636   // The trimming is performed on a newly allocated object, which is on a
    637   // fresly allocated page or on an already swept page. Hence, the sweeper
    638   // thread can not get confused with the filler creation. No synchronization
    639   // needed.
    640   // TODO(hpayer): We should shrink the large object page if the size
    641   // of the object changed significantly.
    642   if (!heap->lo_space()->Contains(*answer)) {
    643     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
    644   }
    645   heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
    646   return *answer;
    647 }
    648 
    649 
    650 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
    651   HandleScope scope(isolate);
    652   DCHECK(args.length() == 4);
    653 
    654   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
    655   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
    656   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
    657   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
    658 
    659   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
    660   CHECK(last_match_info->HasFastObjectElements());
    661 
    662   subject = String::Flatten(subject);
    663 
    664   if (replacement->length() == 0) {
    665     if (subject->HasOnlyOneByteChars()) {
    666       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
    667           isolate, subject, regexp, last_match_info);
    668     } else {
    669       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
    670           isolate, subject, regexp, last_match_info);
    671     }
    672   }
    673 
    674   replacement = String::Flatten(replacement);
    675 
    676   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
    677                                              replacement, last_match_info);
    678 }
    679 
    680 
    681 RUNTIME_FUNCTION(Runtime_StringSplit) {
    682   HandleScope handle_scope(isolate);
    683   DCHECK(args.length() == 3);
    684   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
    685   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
    686   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
    687   CHECK(limit > 0);
    688 
    689   int subject_length = subject->length();
    690   int pattern_length = pattern->length();
    691   CHECK(pattern_length > 0);
    692 
    693   if (limit == 0xffffffffu) {
    694     FixedArray* last_match_cache_unused;
    695     Handle<Object> cached_answer(
    696         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
    697                                    &last_match_cache_unused,
    698                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
    699         isolate);
    700     if (*cached_answer != Smi::FromInt(0)) {
    701       // The cache FixedArray is a COW-array and can therefore be reused.
    702       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
    703           Handle<FixedArray>::cast(cached_answer));
    704       return *result;
    705     }
    706   }
    707 
    708   // The limit can be very large (0xffffffffu), but since the pattern
    709   // isn't empty, we can never create more parts than ~half the length
    710   // of the subject.
    711 
    712   subject = String::Flatten(subject);
    713   pattern = String::Flatten(pattern);
    714 
    715   static const int kMaxInitialListCapacity = 16;
    716 
    717   ZoneScope zone_scope(isolate->runtime_zone());
    718 
    719   // Find (up to limit) indices of separator and end-of-string in subject
    720   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
    721   ZoneList<int> indices(initial_capacity, zone_scope.zone());
    722 
    723   FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
    724                             zone_scope.zone());
    725 
    726   if (static_cast<uint32_t>(indices.length()) < limit) {
    727     indices.Add(subject_length, zone_scope.zone());
    728   }
    729 
    730   // The list indices now contains the end of each part to create.
    731 
    732   // Create JSArray of substrings separated by separator.
    733   int part_count = indices.length();
    734 
    735   Handle<JSArray> result =
    736       isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
    737                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
    738 
    739   DCHECK(result->HasFastObjectElements());
    740 
    741   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
    742 
    743   if (part_count == 1 && indices.at(0) == subject_length) {
    744     elements->set(0, *subject);
    745   } else {
    746     int part_start = 0;
    747     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
    748       int part_end = indices.at(i);
    749       Handle<String> substring =
    750           isolate->factory()->NewProperSubString(subject, part_start, part_end);
    751       elements->set(i, *substring);
    752       part_start = part_end + pattern_length;
    753     });
    754   }
    755 
    756   if (limit == 0xffffffffu) {
    757     if (result->HasFastObjectElements()) {
    758       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
    759                                 isolate->factory()->empty_fixed_array(),
    760                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
    761     }
    762   }
    763 
    764   return *result;
    765 }
    766 
    767 
    768 RUNTIME_FUNCTION(Runtime_RegExpExec) {
    769   HandleScope scope(isolate);
    770   DCHECK(args.length() == 4);
    771   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    772   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
    773   CONVERT_INT32_ARG_CHECKED(index, 2);
    774   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
    775   // Due to the way the JS calls are constructed this must be less than the
    776   // length of a string, i.e. it is always a Smi.  We check anyway for security.
    777   CHECK(index >= 0);
    778   CHECK(index <= subject->length());
    779   isolate->counters()->regexp_entry_runtime()->Increment();
    780   RETURN_RESULT_OR_FAILURE(
    781       isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
    782 }
    783 
    784 
    785 RUNTIME_FUNCTION(Runtime_RegExpFlags) {
    786   SealHandleScope shs(isolate);
    787   DCHECK(args.length() == 1);
    788   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
    789   return regexp->flags();
    790 }
    791 
    792 
    793 RUNTIME_FUNCTION(Runtime_RegExpSource) {
    794   SealHandleScope shs(isolate);
    795   DCHECK(args.length() == 1);
    796   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
    797   return regexp->source();
    798 }
    799 
    800 
    801 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
    802   HandleScope handle_scope(isolate);
    803   DCHECK(args.length() == 3);
    804   CONVERT_SMI_ARG_CHECKED(size, 0);
    805   CHECK(size >= 0 && size <= FixedArray::kMaxLength);
    806   CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
    807   CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
    808   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
    809   Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
    810   Handle<JSObject> object =
    811       isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
    812   Handle<JSArray> array = Handle<JSArray>::cast(object);
    813   array->set_elements(*elements);
    814   array->set_length(Smi::FromInt(size));
    815   // Write in-object properties after the length of the array.
    816   array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
    817   array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
    818   return *array;
    819 }
    820 
    821 
    822 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
    823   HandleScope scope(isolate);
    824   DCHECK(args.length() == 3);
    825   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    826   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
    827   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
    828 
    829   RETURN_FAILURE_ON_EXCEPTION(isolate,
    830                               JSRegExp::Initialize(regexp, source, flags));
    831 
    832   return *regexp;
    833 }
    834 
    835 
    836 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
    837 // separate last match info.  See comment on that function.
    838 template <bool has_capture>
    839 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
    840                                     Handle<JSRegExp> regexp,
    841                                     Handle<JSArray> last_match_array,
    842                                     Handle<JSArray> result_array) {
    843   DCHECK(subject->IsFlat());
    844   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
    845 
    846   int capture_count = regexp->CaptureCount();
    847   int subject_length = subject->length();
    848 
    849   static const int kMinLengthToCache = 0x1000;
    850 
    851   if (subject_length > kMinLengthToCache) {
    852     FixedArray* last_match_cache;
    853     Object* cached_answer = RegExpResultsCache::Lookup(
    854         isolate->heap(), *subject, regexp->data(), &last_match_cache,
    855         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
    856     if (cached_answer->IsFixedArray()) {
    857       int capture_registers = (capture_count + 1) * 2;
    858       int32_t* last_match = NewArray<int32_t>(capture_registers);
    859       for (int i = 0; i < capture_registers; i++) {
    860         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
    861       }
    862       Handle<FixedArray> cached_fixed_array =
    863           Handle<FixedArray>(FixedArray::cast(cached_answer));
    864       // The cache FixedArray is a COW-array and can therefore be reused.
    865       JSArray::SetContent(result_array, cached_fixed_array);
    866       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
    867                                    last_match);
    868       DeleteArray(last_match);
    869       return *result_array;
    870     }
    871   }
    872 
    873   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
    874   if (global_cache.HasException()) return isolate->heap()->exception();
    875 
    876   // Ensured in Runtime_RegExpExecMultiple.
    877   DCHECK(result_array->HasFastObjectElements());
    878   Handle<FixedArray> result_elements(
    879       FixedArray::cast(result_array->elements()));
    880   if (result_elements->length() < 16) {
    881     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
    882   }
    883 
    884   FixedArrayBuilder builder(result_elements);
    885 
    886   // Position to search from.
    887   int match_start = -1;
    888   int match_end = 0;
    889   bool first = true;
    890 
    891   // Two smis before and after the match, for very long strings.
    892   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
    893 
    894   while (true) {
    895     int32_t* current_match = global_cache.FetchNext();
    896     if (current_match == NULL) break;
    897     match_start = current_match[0];
    898     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
    899     if (match_end < match_start) {
    900       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
    901                                                 match_start);
    902     }
    903     match_end = current_match[1];
    904     {
    905       // Avoid accumulating new handles inside loop.
    906       HandleScope temp_scope(isolate);
    907       Handle<String> match;
    908       if (!first) {
    909         match = isolate->factory()->NewProperSubString(subject, match_start,
    910                                                        match_end);
    911       } else {
    912         match =
    913             isolate->factory()->NewSubString(subject, match_start, match_end);
    914         first = false;
    915       }
    916 
    917       if (has_capture) {
    918         // Arguments array to replace function is match, captures, index and
    919         // subject, i.e., 3 + capture count in total.
    920         Handle<FixedArray> elements =
    921             isolate->factory()->NewFixedArray(3 + capture_count);
    922 
    923         elements->set(0, *match);
    924         for (int i = 1; i <= capture_count; i++) {
    925           int start = current_match[i * 2];
    926           if (start >= 0) {
    927             int end = current_match[i * 2 + 1];
    928             DCHECK(start <= end);
    929             Handle<String> substring =
    930                 isolate->factory()->NewSubString(subject, start, end);
    931             elements->set(i, *substring);
    932           } else {
    933             DCHECK(current_match[i * 2 + 1] < 0);
    934             elements->set(i, isolate->heap()->undefined_value());
    935           }
    936         }
    937         elements->set(capture_count + 1, Smi::FromInt(match_start));
    938         elements->set(capture_count + 2, *subject);
    939         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
    940       } else {
    941         builder.Add(*match);
    942       }
    943     }
    944   }
    945 
    946   if (global_cache.HasException()) return isolate->heap()->exception();
    947 
    948   if (match_start >= 0) {
    949     // Finished matching, with at least one match.
    950     if (match_end < subject_length) {
    951       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
    952                                                 subject_length);
    953     }
    954 
    955     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
    956                                  global_cache.LastSuccessfulMatch());
    957 
    958     if (subject_length > kMinLengthToCache) {
    959       // Store the last successful match into the array for caching.
    960       // TODO(yangguo): do not expose last match to JS and simplify caching.
    961       int capture_registers = (capture_count + 1) * 2;
    962       Handle<FixedArray> last_match_cache =
    963           isolate->factory()->NewFixedArray(capture_registers);
    964       int32_t* last_match = global_cache.LastSuccessfulMatch();
    965       for (int i = 0; i < capture_registers; i++) {
    966         last_match_cache->set(i, Smi::FromInt(last_match[i]));
    967       }
    968       Handle<FixedArray> result_fixed_array = builder.array();
    969       result_fixed_array->Shrink(builder.length());
    970       // Cache the result and turn the FixedArray into a COW array.
    971       RegExpResultsCache::Enter(
    972           isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
    973           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
    974     }
    975     return *builder.ToJSArray(result_array);
    976   } else {
    977     return isolate->heap()->null_value();  // No matches at all.
    978   }
    979 }
    980 
    981 
    982 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
    983 // lastMatchInfoOverride to maintain the last match info, so we don't need to
    984 // set any other last match array info.
    985 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
    986   HandleScope handles(isolate);
    987   DCHECK(args.length() == 4);
    988 
    989   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    990   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
    991   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
    992   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
    993   CHECK(last_match_info->HasFastObjectElements());
    994   CHECK(result_array->HasFastObjectElements());
    995 
    996   subject = String::Flatten(subject);
    997   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
    998 
    999   if (regexp->CaptureCount() == 0) {
   1000     return SearchRegExpMultiple<false>(isolate, subject, regexp,
   1001                                        last_match_info, result_array);
   1002   } else {
   1003     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
   1004                                       result_array);
   1005   }
   1006 }
   1007 
   1008 
   1009 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
   1010   SealHandleScope shs(isolate);
   1011   DCHECK(args.length() == 4);
   1012   Object* exception = isolate->pending_exception();
   1013   isolate->clear_pending_exception();
   1014   return isolate->ReThrow(exception);
   1015 }
   1016 
   1017 
   1018 RUNTIME_FUNCTION(Runtime_IsRegExp) {
   1019   SealHandleScope shs(isolate);
   1020   DCHECK(args.length() == 1);
   1021   CONVERT_ARG_CHECKED(Object, obj, 0);
   1022   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
   1023 }
   1024 }  // namespace internal
   1025 }  // namespace v8
   1026