Home | History | Annotate | Download | only in runtime
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/runtime/runtime-utils.h"
      6 
      7 #include "src/arguments.h"
      8 #include "src/conversions-inl.h"
      9 #include "src/isolate-inl.h"
     10 #include "src/messages.h"
     11 #include "src/regexp/jsregexp-inl.h"
     12 #include "src/regexp/jsregexp.h"
     13 #include "src/string-builder.h"
     14 #include "src/string-search.h"
     15 
     16 namespace v8 {
     17 namespace internal {
     18 
     19 class CompiledReplacement {
     20  public:
     21   explicit CompiledReplacement(Zone* zone)
     22       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
     23 
     24   // Return whether the replacement is simple.
     25   bool Compile(Handle<String> replacement, int capture_count,
     26                int subject_length);
     27 
     28   // Use Apply only if Compile returned false.
     29   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
     30              int32_t* match);
     31 
     32   // Number of distinct parts of the replacement pattern.
     33   int parts() { return parts_.length(); }
     34 
     35   Zone* zone() const { return zone_; }
     36 
     37  private:
     38   enum PartType {
     39     SUBJECT_PREFIX = 1,
     40     SUBJECT_SUFFIX,
     41     SUBJECT_CAPTURE,
     42     REPLACEMENT_SUBSTRING,
     43     REPLACEMENT_STRING,
     44     NUMBER_OF_PART_TYPES
     45   };
     46 
     47   struct ReplacementPart {
     48     static inline ReplacementPart SubjectMatch() {
     49       return ReplacementPart(SUBJECT_CAPTURE, 0);
     50     }
     51     static inline ReplacementPart SubjectCapture(int capture_index) {
     52       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
     53     }
     54     static inline ReplacementPart SubjectPrefix() {
     55       return ReplacementPart(SUBJECT_PREFIX, 0);
     56     }
     57     static inline ReplacementPart SubjectSuffix(int subject_length) {
     58       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
     59     }
     60     static inline ReplacementPart ReplacementString() {
     61       return ReplacementPart(REPLACEMENT_STRING, 0);
     62     }
     63     static inline ReplacementPart ReplacementSubString(int from, int to) {
     64       DCHECK(from >= 0);
     65       DCHECK(to > from);
     66       return ReplacementPart(-from, to);
     67     }
     68 
     69     // If tag <= 0 then it is the negation of a start index of a substring of
     70     // the replacement pattern, otherwise it's a value from PartType.
     71     ReplacementPart(int tag, int data) : tag(tag), data(data) {
     72       // Must be non-positive or a PartType value.
     73       DCHECK(tag < NUMBER_OF_PART_TYPES);
     74     }
     75     // Either a value of PartType or a non-positive number that is
     76     // the negation of an index into the replacement string.
     77     int tag;
     78     // The data value's interpretation depends on the value of tag:
     79     // tag == SUBJECT_PREFIX ||
     80     // tag == SUBJECT_SUFFIX:  data is unused.
     81     // tag == SUBJECT_CAPTURE: data is the number of the capture.
     82     // tag == REPLACEMENT_SUBSTRING ||
     83     // tag == REPLACEMENT_STRING:    data is index into array of substrings
     84     //                               of the replacement string.
     85     // tag <= 0: Temporary representation of the substring of the replacement
     86     //           string ranging over -tag .. data.
     87     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
     88     //           substring objects.
     89     int data;
     90   };
     91 
     92   template <typename Char>
     93   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
     94                                Vector<Char> characters, int capture_count,
     95                                int subject_length, Zone* zone) {
     96     int length = characters.length();
     97     int last = 0;
     98     for (int i = 0; i < length; i++) {
     99       Char c = characters[i];
    100       if (c == '$') {
    101         int next_index = i + 1;
    102         if (next_index == length) {  // No next character!
    103           break;
    104         }
    105         Char c2 = characters[next_index];
    106         switch (c2) {
    107           case '$':
    108             if (i > last) {
    109               // There is a substring before. Include the first "$".
    110               parts->Add(
    111                   ReplacementPart::ReplacementSubString(last, next_index),
    112                   zone);
    113               last = next_index + 1;  // Continue after the second "$".
    114             } else {
    115               // Let the next substring start with the second "$".
    116               last = next_index;
    117             }
    118             i = next_index;
    119             break;
    120           case '`':
    121             if (i > last) {
    122               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    123             }
    124             parts->Add(ReplacementPart::SubjectPrefix(), zone);
    125             i = next_index;
    126             last = i + 1;
    127             break;
    128           case '\'':
    129             if (i > last) {
    130               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    131             }
    132             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
    133             i = next_index;
    134             last = i + 1;
    135             break;
    136           case '&':
    137             if (i > last) {
    138               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
    139             }
    140             parts->Add(ReplacementPart::SubjectMatch(), zone);
    141             i = next_index;
    142             last = i + 1;
    143             break;
    144           case '0':
    145           case '1':
    146           case '2':
    147           case '3':
    148           case '4':
    149           case '5':
    150           case '6':
    151           case '7':
    152           case '8':
    153           case '9': {
    154             int capture_ref = c2 - '0';
    155             if (capture_ref > capture_count) {
    156               i = next_index;
    157               continue;
    158             }
    159             int second_digit_index = next_index + 1;
    160             if (second_digit_index < length) {
    161               // Peek ahead to see if we have two digits.
    162               Char c3 = characters[second_digit_index];
    163               if ('0' <= c3 && c3 <= '9') {  // Double digits.
    164                 int double_digit_ref = capture_ref * 10 + c3 - '0';
    165                 if (double_digit_ref <= capture_count) {
    166                   next_index = second_digit_index;
    167                   capture_ref = double_digit_ref;
    168                 }
    169               }
    170             }
    171             if (capture_ref > 0) {
    172               if (i > last) {
    173                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
    174                            zone);
    175               }
    176               DCHECK(capture_ref <= capture_count);
    177               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
    178               last = next_index + 1;
    179             }
    180             i = next_index;
    181             break;
    182           }
    183           default:
    184             i = next_index;
    185             break;
    186         }
    187       }
    188     }
    189     if (length > last) {
    190       if (last == 0) {
    191         // Replacement is simple.  Do not use Apply to do the replacement.
    192         return true;
    193       } else {
    194         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
    195       }
    196     }
    197     return false;
    198   }
    199 
    200   ZoneList<ReplacementPart> parts_;
    201   ZoneList<Handle<String> > replacement_substrings_;
    202   Zone* zone_;
    203 };
    204 
    205 
    206 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
    207                                   int subject_length) {
    208   {
    209     DisallowHeapAllocation no_gc;
    210     String::FlatContent content = replacement->GetFlatContent();
    211     DCHECK(content.IsFlat());
    212     bool simple = false;
    213     if (content.IsOneByte()) {
    214       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
    215                                        capture_count, subject_length, zone());
    216     } else {
    217       DCHECK(content.IsTwoByte());
    218       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
    219                                        capture_count, subject_length, zone());
    220     }
    221     if (simple) return true;
    222   }
    223 
    224   Isolate* isolate = replacement->GetIsolate();
    225   // Find substrings of replacement string and create them as String objects.
    226   int substring_index = 0;
    227   for (int i = 0, n = parts_.length(); i < n; i++) {
    228     int tag = parts_[i].tag;
    229     if (tag <= 0) {  // A replacement string slice.
    230       int from = -tag;
    231       int to = parts_[i].data;
    232       replacement_substrings_.Add(
    233           isolate->factory()->NewSubString(replacement, from, to), zone());
    234       parts_[i].tag = REPLACEMENT_SUBSTRING;
    235       parts_[i].data = substring_index;
    236       substring_index++;
    237     } else if (tag == REPLACEMENT_STRING) {
    238       replacement_substrings_.Add(replacement, zone());
    239       parts_[i].data = substring_index;
    240       substring_index++;
    241     }
    242   }
    243   return false;
    244 }
    245 
    246 
    247 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
    248                                 int match_from, int match_to, int32_t* match) {
    249   DCHECK_LT(0, parts_.length());
    250   for (int i = 0, n = parts_.length(); i < n; i++) {
    251     ReplacementPart part = parts_[i];
    252     switch (part.tag) {
    253       case SUBJECT_PREFIX:
    254         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
    255         break;
    256       case SUBJECT_SUFFIX: {
    257         int subject_length = part.data;
    258         if (match_to < subject_length) {
    259           builder->AddSubjectSlice(match_to, subject_length);
    260         }
    261         break;
    262       }
    263       case SUBJECT_CAPTURE: {
    264         int capture = part.data;
    265         int from = match[capture * 2];
    266         int to = match[capture * 2 + 1];
    267         if (from >= 0 && to > from) {
    268           builder->AddSubjectSlice(from, to);
    269         }
    270         break;
    271       }
    272       case REPLACEMENT_SUBSTRING:
    273       case REPLACEMENT_STRING:
    274         builder->AddString(replacement_substrings_[part.data]);
    275         break;
    276       default:
    277         UNREACHABLE();
    278     }
    279   }
    280 }
    281 
    282 
    283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
    284                               ZoneList<int>* indices, unsigned int limit,
    285                               Zone* zone) {
    286   DCHECK(limit > 0);
    287   // Collect indices of pattern in subject using memchr.
    288   // Stop after finding at most limit values.
    289   const uint8_t* subject_start = subject.start();
    290   const uint8_t* subject_end = subject_start + subject.length();
    291   const uint8_t* pos = subject_start;
    292   while (limit > 0) {
    293     pos = reinterpret_cast<const uint8_t*>(
    294         memchr(pos, pattern, subject_end - pos));
    295     if (pos == NULL) return;
    296     indices->Add(static_cast<int>(pos - subject_start), zone);
    297     pos++;
    298     limit--;
    299   }
    300 }
    301 
    302 
    303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
    304                               ZoneList<int>* indices, unsigned int limit,
    305                               Zone* zone) {
    306   DCHECK(limit > 0);
    307   const uc16* subject_start = subject.start();
    308   const uc16* subject_end = subject_start + subject.length();
    309   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
    310     if (*pos == pattern) {
    311       indices->Add(static_cast<int>(pos - subject_start), zone);
    312       limit--;
    313     }
    314   }
    315 }
    316 
    317 
    318 template <typename SubjectChar, typename PatternChar>
    319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
    320                        Vector<const PatternChar> pattern,
    321                        ZoneList<int>* indices, unsigned int limit, Zone* zone) {
    322   DCHECK(limit > 0);
    323   // Collect indices of pattern in subject.
    324   // Stop after finding at most limit values.
    325   int pattern_length = pattern.length();
    326   int index = 0;
    327   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
    328   while (limit > 0) {
    329     index = search.Search(subject, index);
    330     if (index < 0) return;
    331     indices->Add(index, zone);
    332     index += pattern_length;
    333     limit--;
    334   }
    335 }
    336 
    337 
    338 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
    339                                String* pattern, ZoneList<int>* indices,
    340                                unsigned int limit, Zone* zone) {
    341   {
    342     DisallowHeapAllocation no_gc;
    343     String::FlatContent subject_content = subject->GetFlatContent();
    344     String::FlatContent pattern_content = pattern->GetFlatContent();
    345     DCHECK(subject_content.IsFlat());
    346     DCHECK(pattern_content.IsFlat());
    347     if (subject_content.IsOneByte()) {
    348       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
    349       if (pattern_content.IsOneByte()) {
    350         Vector<const uint8_t> pattern_vector =
    351             pattern_content.ToOneByteVector();
    352         if (pattern_vector.length() == 1) {
    353           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
    354                                    limit, zone);
    355         } else {
    356           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    357                             limit, zone);
    358         }
    359       } else {
    360         FindStringIndices(isolate, subject_vector,
    361                           pattern_content.ToUC16Vector(), indices, limit, zone);
    362       }
    363     } else {
    364       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
    365       if (pattern_content.IsOneByte()) {
    366         Vector<const uint8_t> pattern_vector =
    367             pattern_content.ToOneByteVector();
    368         if (pattern_vector.length() == 1) {
    369           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
    370                                    limit, zone);
    371         } else {
    372           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    373                             limit, zone);
    374         }
    375       } else {
    376         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
    377         if (pattern_vector.length() == 1) {
    378           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
    379                                    limit, zone);
    380         } else {
    381           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
    382                             limit, zone);
    383         }
    384       }
    385     }
    386   }
    387 }
    388 
    389 
    390 template <typename ResultSeqString>
    391 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
    392     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
    393     Handle<String> replacement, Handle<JSArray> last_match_info) {
    394   DCHECK(subject->IsFlat());
    395   DCHECK(replacement->IsFlat());
    396 
    397   ZoneScope zone_scope(isolate->runtime_zone());
    398   ZoneList<int> indices(8, zone_scope.zone());
    399   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
    400   String* pattern =
    401       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
    402   int subject_len = subject->length();
    403   int pattern_len = pattern->length();
    404   int replacement_len = replacement->length();
    405 
    406   FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
    407                             zone_scope.zone());
    408 
    409   int matches = indices.length();
    410   if (matches == 0) return *subject;
    411 
    412   // Detect integer overflow.
    413   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
    414                            static_cast<int64_t>(pattern_len)) *
    415                               static_cast<int64_t>(matches) +
    416                           static_cast<int64_t>(subject_len);
    417   int result_len;
    418   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
    419     STATIC_ASSERT(String::kMaxLength < kMaxInt);
    420     result_len = kMaxInt;  // Provoke exception.
    421   } else {
    422     result_len = static_cast<int>(result_len_64);
    423   }
    424 
    425   int subject_pos = 0;
    426   int result_pos = 0;
    427 
    428   MaybeHandle<SeqString> maybe_res;
    429   if (ResultSeqString::kHasOneByteEncoding) {
    430     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
    431   } else {
    432     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
    433   }
    434   Handle<SeqString> untyped_res;
    435   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
    436   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
    437 
    438   for (int i = 0; i < matches; i++) {
    439     // Copy non-matched subject content.
    440     if (subject_pos < indices.at(i)) {
    441       String::WriteToFlat(*subject, result->GetChars() + result_pos,
    442                           subject_pos, indices.at(i));
    443       result_pos += indices.at(i) - subject_pos;
    444     }
    445 
    446     // Replace match.
    447     if (replacement_len > 0) {
    448       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
    449                           replacement_len);
    450       result_pos += replacement_len;
    451     }
    452 
    453     subject_pos = indices.at(i) + pattern_len;
    454   }
    455   // Add remaining subject content at the end.
    456   if (subject_pos < subject_len) {
    457     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
    458                         subject_len);
    459   }
    460 
    461   int32_t match_indices[] = {indices.at(matches - 1),
    462                              indices.at(matches - 1) + pattern_len};
    463   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
    464 
    465   return *result;
    466 }
    467 
    468 
    469 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
    470     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
    471     Handle<String> replacement, Handle<JSArray> last_match_info) {
    472   DCHECK(subject->IsFlat());
    473   DCHECK(replacement->IsFlat());
    474 
    475   int capture_count = regexp->CaptureCount();
    476   int subject_length = subject->length();
    477 
    478   // CompiledReplacement uses zone allocation.
    479   ZoneScope zone_scope(isolate->runtime_zone());
    480   CompiledReplacement compiled_replacement(zone_scope.zone());
    481   bool simple_replace =
    482       compiled_replacement.Compile(replacement, capture_count, subject_length);
    483 
    484   // Shortcut for simple non-regexp global replacements
    485   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
    486     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
    487       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
    488           isolate, subject, regexp, replacement, last_match_info);
    489     } else {
    490       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
    491           isolate, subject, regexp, replacement, last_match_info);
    492     }
    493   }
    494 
    495   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
    496   if (global_cache.HasException()) return isolate->heap()->exception();
    497 
    498   int32_t* current_match = global_cache.FetchNext();
    499   if (current_match == NULL) {
    500     if (global_cache.HasException()) return isolate->heap()->exception();
    501     return *subject;
    502   }
    503 
    504   // Guessing the number of parts that the final result string is built
    505   // from. Global regexps can match any number of times, so we guess
    506   // conservatively.
    507   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
    508   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
    509 
    510   // Number of parts added by compiled replacement plus preceeding
    511   // string and possibly suffix after last match.  It is possible for
    512   // all components to use two elements when encoded as two smis.
    513   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
    514 
    515   int prev = 0;
    516 
    517   do {
    518     builder.EnsureCapacity(parts_added_per_loop);
    519 
    520     int start = current_match[0];
    521     int end = current_match[1];
    522 
    523     if (prev < start) {
    524       builder.AddSubjectSlice(prev, start);
    525     }
    526 
    527     if (simple_replace) {
    528       builder.AddString(replacement);
    529     } else {
    530       compiled_replacement.Apply(&builder, start, end, current_match);
    531     }
    532     prev = end;
    533 
    534     current_match = global_cache.FetchNext();
    535   } while (current_match != NULL);
    536 
    537   if (global_cache.HasException()) return isolate->heap()->exception();
    538 
    539   if (prev < subject_length) {
    540     builder.EnsureCapacity(2);
    541     builder.AddSubjectSlice(prev, subject_length);
    542   }
    543 
    544   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
    545                                global_cache.LastSuccessfulMatch());
    546 
    547   Handle<String> result;
    548   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
    549   return *result;
    550 }
    551 
    552 
    553 template <typename ResultSeqString>
    554 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
    555     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
    556     Handle<JSArray> last_match_info) {
    557   DCHECK(subject->IsFlat());
    558 
    559   // Shortcut for simple non-regexp global replacements
    560   if (regexp->TypeTag() == JSRegExp::ATOM) {
    561     Handle<String> empty_string = isolate->factory()->empty_string();
    562     if (subject->IsOneByteRepresentation()) {
    563       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
    564           isolate, subject, regexp, empty_string, last_match_info);
    565     } else {
    566       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
    567           isolate, subject, regexp, empty_string, last_match_info);
    568     }
    569   }
    570 
    571   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
    572   if (global_cache.HasException()) return isolate->heap()->exception();
    573 
    574   int32_t* current_match = global_cache.FetchNext();
    575   if (current_match == NULL) {
    576     if (global_cache.HasException()) return isolate->heap()->exception();
    577     return *subject;
    578   }
    579 
    580   int start = current_match[0];
    581   int end = current_match[1];
    582   int capture_count = regexp->CaptureCount();
    583   int subject_length = subject->length();
    584 
    585   int new_length = subject_length - (end - start);
    586   if (new_length == 0) return isolate->heap()->empty_string();
    587 
    588   Handle<ResultSeqString> answer;
    589   if (ResultSeqString::kHasOneByteEncoding) {
    590     answer = Handle<ResultSeqString>::cast(
    591         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
    592   } else {
    593     answer = Handle<ResultSeqString>::cast(
    594         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
    595   }
    596 
    597   int prev = 0;
    598   int position = 0;
    599 
    600   do {
    601     start = current_match[0];
    602     end = current_match[1];
    603     if (prev < start) {
    604       // Add substring subject[prev;start] to answer string.
    605       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
    606       position += start - prev;
    607     }
    608     prev = end;
    609 
    610     current_match = global_cache.FetchNext();
    611   } while (current_match != NULL);
    612 
    613   if (global_cache.HasException()) return isolate->heap()->exception();
    614 
    615   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
    616                                global_cache.LastSuccessfulMatch());
    617 
    618   if (prev < subject_length) {
    619     // Add substring subject[prev;length] to answer string.
    620     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
    621                         subject_length);
    622     position += subject_length - prev;
    623   }
    624 
    625   if (position == 0) return isolate->heap()->empty_string();
    626 
    627   // Shorten string and fill
    628   int string_size = ResultSeqString::SizeFor(position);
    629   int allocated_string_size = ResultSeqString::SizeFor(new_length);
    630   int delta = allocated_string_size - string_size;
    631 
    632   answer->set_length(position);
    633   if (delta == 0) return *answer;
    634 
    635   Address end_of_string = answer->address() + string_size;
    636   Heap* heap = isolate->heap();
    637 
    638   // The trimming is performed on a newly allocated object, which is on a
    639   // fresly allocated page or on an already swept page. Hence, the sweeper
    640   // thread can not get confused with the filler creation. No synchronization
    641   // needed.
    642   // TODO(hpayer): We should shrink the large object page if the size
    643   // of the object changed significantly.
    644   if (!heap->lo_space()->Contains(*answer)) {
    645     heap->CreateFillerObjectAt(end_of_string, delta);
    646   }
    647   heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
    648   return *answer;
    649 }
    650 
    651 
    652 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
    653   HandleScope scope(isolate);
    654   DCHECK(args.length() == 4);
    655 
    656   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
    657   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
    658   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
    659   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
    660 
    661   RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
    662   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
    663 
    664   subject = String::Flatten(subject);
    665 
    666   if (replacement->length() == 0) {
    667     if (subject->HasOnlyOneByteChars()) {
    668       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
    669           isolate, subject, regexp, last_match_info);
    670     } else {
    671       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
    672           isolate, subject, regexp, last_match_info);
    673     }
    674   }
    675 
    676   replacement = String::Flatten(replacement);
    677 
    678   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
    679                                              replacement, last_match_info);
    680 }
    681 
    682 
    683 RUNTIME_FUNCTION(Runtime_StringSplit) {
    684   HandleScope handle_scope(isolate);
    685   DCHECK(args.length() == 3);
    686   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
    687   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
    688   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
    689   RUNTIME_ASSERT(limit > 0);
    690 
    691   int subject_length = subject->length();
    692   int pattern_length = pattern->length();
    693   RUNTIME_ASSERT(pattern_length > 0);
    694 
    695   if (limit == 0xffffffffu) {
    696     FixedArray* last_match_cache_unused;
    697     Handle<Object> cached_answer(
    698         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
    699                                    &last_match_cache_unused,
    700                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
    701         isolate);
    702     if (*cached_answer != Smi::FromInt(0)) {
    703       // The cache FixedArray is a COW-array and can therefore be reused.
    704       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
    705           Handle<FixedArray>::cast(cached_answer));
    706       return *result;
    707     }
    708   }
    709 
    710   // The limit can be very large (0xffffffffu), but since the pattern
    711   // isn't empty, we can never create more parts than ~half the length
    712   // of the subject.
    713 
    714   subject = String::Flatten(subject);
    715   pattern = String::Flatten(pattern);
    716 
    717   static const int kMaxInitialListCapacity = 16;
    718 
    719   ZoneScope zone_scope(isolate->runtime_zone());
    720 
    721   // Find (up to limit) indices of separator and end-of-string in subject
    722   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
    723   ZoneList<int> indices(initial_capacity, zone_scope.zone());
    724 
    725   FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
    726                             zone_scope.zone());
    727 
    728   if (static_cast<uint32_t>(indices.length()) < limit) {
    729     indices.Add(subject_length, zone_scope.zone());
    730   }
    731 
    732   // The list indices now contains the end of each part to create.
    733 
    734   // Create JSArray of substrings separated by separator.
    735   int part_count = indices.length();
    736 
    737   Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
    738   JSObject::EnsureCanContainHeapObjectElements(result);
    739   result->set_length(Smi::FromInt(part_count));
    740 
    741   DCHECK(result->HasFastObjectElements());
    742 
    743   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
    744 
    745   if (part_count == 1 && indices.at(0) == subject_length) {
    746     elements->set(0, *subject);
    747   } else {
    748     int part_start = 0;
    749     for (int i = 0; i < part_count; i++) {
    750       HandleScope local_loop_handle(isolate);
    751       int part_end = indices.at(i);
    752       Handle<String> substring =
    753           isolate->factory()->NewProperSubString(subject, part_start, part_end);
    754       elements->set(i, *substring);
    755       part_start = part_end + pattern_length;
    756     }
    757   }
    758 
    759   if (limit == 0xffffffffu) {
    760     if (result->HasFastObjectElements()) {
    761       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
    762                                 isolate->factory()->empty_fixed_array(),
    763                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
    764     }
    765   }
    766 
    767   return *result;
    768 }
    769 
    770 
    771 RUNTIME_FUNCTION(Runtime_RegExpExec) {
    772   HandleScope scope(isolate);
    773   DCHECK(args.length() == 4);
    774   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    775   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
    776   CONVERT_INT32_ARG_CHECKED(index, 2);
    777   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
    778   // Due to the way the JS calls are constructed this must be less than the
    779   // length of a string, i.e. it is always a Smi.  We check anyway for security.
    780   RUNTIME_ASSERT(index >= 0);
    781   RUNTIME_ASSERT(index <= subject->length());
    782   isolate->counters()->regexp_entry_runtime()->Increment();
    783   Handle<Object> result;
    784   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
    785       isolate, result,
    786       RegExpImpl::Exec(regexp, subject, index, last_match_info));
    787   return *result;
    788 }
    789 
    790 
    791 RUNTIME_FUNCTION(Runtime_RegExpFlags) {
    792   SealHandleScope shs(isolate);
    793   DCHECK(args.length() == 1);
    794   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
    795   return regexp->flags();
    796 }
    797 
    798 
    799 RUNTIME_FUNCTION(Runtime_RegExpSource) {
    800   SealHandleScope shs(isolate);
    801   DCHECK(args.length() == 1);
    802   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
    803   return regexp->source();
    804 }
    805 
    806 
    807 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
    808   HandleScope handle_scope(isolate);
    809   DCHECK(args.length() == 3);
    810   CONVERT_SMI_ARG_CHECKED(size, 0);
    811   RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
    812   CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
    813   CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
    814   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
    815   Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
    816   Handle<JSObject> object =
    817       isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
    818   Handle<JSArray> array = Handle<JSArray>::cast(object);
    819   array->set_elements(*elements);
    820   array->set_length(Smi::FromInt(size));
    821   // Write in-object properties after the length of the array.
    822   array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
    823   array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
    824   return *array;
    825 }
    826 
    827 
    828 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
    829   HandleScope scope(isolate);
    830   DCHECK(args.length() == 3);
    831   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    832   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
    833   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
    834 
    835   RETURN_FAILURE_ON_EXCEPTION(isolate,
    836                               JSRegExp::Initialize(regexp, source, flags));
    837 
    838   return *regexp;
    839 }
    840 
    841 
    842 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
    843 // separate last match info.  See comment on that function.
    844 template <bool has_capture>
    845 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
    846                                     Handle<JSRegExp> regexp,
    847                                     Handle<JSArray> last_match_array,
    848                                     Handle<JSArray> result_array) {
    849   DCHECK(subject->IsFlat());
    850   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
    851 
    852   int capture_count = regexp->CaptureCount();
    853   int subject_length = subject->length();
    854 
    855   static const int kMinLengthToCache = 0x1000;
    856 
    857   if (subject_length > kMinLengthToCache) {
    858     FixedArray* last_match_cache;
    859     Object* cached_answer = RegExpResultsCache::Lookup(
    860         isolate->heap(), *subject, regexp->data(), &last_match_cache,
    861         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
    862     if (cached_answer->IsFixedArray()) {
    863       int capture_registers = (capture_count + 1) * 2;
    864       int32_t* last_match = NewArray<int32_t>(capture_registers);
    865       for (int i = 0; i < capture_registers; i++) {
    866         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
    867       }
    868       Handle<FixedArray> cached_fixed_array =
    869           Handle<FixedArray>(FixedArray::cast(cached_answer));
    870       // The cache FixedArray is a COW-array and can therefore be reused.
    871       JSArray::SetContent(result_array, cached_fixed_array);
    872       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
    873                                    last_match);
    874       DeleteArray(last_match);
    875       return *result_array;
    876     }
    877   }
    878 
    879   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
    880   if (global_cache.HasException()) return isolate->heap()->exception();
    881 
    882   // Ensured in Runtime_RegExpExecMultiple.
    883   DCHECK(result_array->HasFastObjectElements());
    884   Handle<FixedArray> result_elements(
    885       FixedArray::cast(result_array->elements()));
    886   if (result_elements->length() < 16) {
    887     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
    888   }
    889 
    890   FixedArrayBuilder builder(result_elements);
    891 
    892   // Position to search from.
    893   int match_start = -1;
    894   int match_end = 0;
    895   bool first = true;
    896 
    897   // Two smis before and after the match, for very long strings.
    898   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
    899 
    900   while (true) {
    901     int32_t* current_match = global_cache.FetchNext();
    902     if (current_match == NULL) break;
    903     match_start = current_match[0];
    904     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
    905     if (match_end < match_start) {
    906       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
    907                                                 match_start);
    908     }
    909     match_end = current_match[1];
    910     {
    911       // Avoid accumulating new handles inside loop.
    912       HandleScope temp_scope(isolate);
    913       Handle<String> match;
    914       if (!first) {
    915         match = isolate->factory()->NewProperSubString(subject, match_start,
    916                                                        match_end);
    917       } else {
    918         match =
    919             isolate->factory()->NewSubString(subject, match_start, match_end);
    920         first = false;
    921       }
    922 
    923       if (has_capture) {
    924         // Arguments array to replace function is match, captures, index and
    925         // subject, i.e., 3 + capture count in total.
    926         Handle<FixedArray> elements =
    927             isolate->factory()->NewFixedArray(3 + capture_count);
    928 
    929         elements->set(0, *match);
    930         for (int i = 1; i <= capture_count; i++) {
    931           int start = current_match[i * 2];
    932           if (start >= 0) {
    933             int end = current_match[i * 2 + 1];
    934             DCHECK(start <= end);
    935             Handle<String> substring =
    936                 isolate->factory()->NewSubString(subject, start, end);
    937             elements->set(i, *substring);
    938           } else {
    939             DCHECK(current_match[i * 2 + 1] < 0);
    940             elements->set(i, isolate->heap()->undefined_value());
    941           }
    942         }
    943         elements->set(capture_count + 1, Smi::FromInt(match_start));
    944         elements->set(capture_count + 2, *subject);
    945         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
    946       } else {
    947         builder.Add(*match);
    948       }
    949     }
    950   }
    951 
    952   if (global_cache.HasException()) return isolate->heap()->exception();
    953 
    954   if (match_start >= 0) {
    955     // Finished matching, with at least one match.
    956     if (match_end < subject_length) {
    957       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
    958                                                 subject_length);
    959     }
    960 
    961     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
    962                                  global_cache.LastSuccessfulMatch());
    963 
    964     if (subject_length > kMinLengthToCache) {
    965       // Store the last successful match into the array for caching.
    966       // TODO(yangguo): do not expose last match to JS and simplify caching.
    967       int capture_registers = (capture_count + 1) * 2;
    968       Handle<FixedArray> last_match_cache =
    969           isolate->factory()->NewFixedArray(capture_registers);
    970       int32_t* last_match = global_cache.LastSuccessfulMatch();
    971       for (int i = 0; i < capture_registers; i++) {
    972         last_match_cache->set(i, Smi::FromInt(last_match[i]));
    973       }
    974       Handle<FixedArray> result_fixed_array = builder.array();
    975       result_fixed_array->Shrink(builder.length());
    976       // Cache the result and turn the FixedArray into a COW array.
    977       RegExpResultsCache::Enter(
    978           isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
    979           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
    980     }
    981     return *builder.ToJSArray(result_array);
    982   } else {
    983     return isolate->heap()->null_value();  // No matches at all.
    984   }
    985 }
    986 
    987 
    988 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
    989 // lastMatchInfoOverride to maintain the last match info, so we don't need to
    990 // set any other last match array info.
    991 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
    992   HandleScope handles(isolate);
    993   DCHECK(args.length() == 4);
    994 
    995   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
    996   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
    997   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
    998   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
    999   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
   1000   RUNTIME_ASSERT(result_array->HasFastObjectElements());
   1001 
   1002   subject = String::Flatten(subject);
   1003   RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
   1004 
   1005   if (regexp->CaptureCount() == 0) {
   1006     return SearchRegExpMultiple<false>(isolate, subject, regexp,
   1007                                        last_match_info, result_array);
   1008   } else {
   1009     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
   1010                                       result_array);
   1011   }
   1012 }
   1013 
   1014 
   1015 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
   1016   SealHandleScope shs(isolate);
   1017   DCHECK(args.length() == 4);
   1018   Object* exception = isolate->pending_exception();
   1019   isolate->clear_pending_exception();
   1020   return isolate->ReThrow(exception);
   1021 }
   1022 
   1023 
   1024 RUNTIME_FUNCTION(Runtime_IsRegExp) {
   1025   SealHandleScope shs(isolate);
   1026   DCHECK(args.length() == 1);
   1027   CONVERT_ARG_CHECKED(Object, obj, 0);
   1028   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
   1029 }
   1030 }  // namespace internal
   1031 }  // namespace v8
   1032