Home | History | Annotate | Download | only in src
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_STRING_BUILDER_H_
      6 #define V8_STRING_BUILDER_H_
      7 
      8 #include "src/assert-scope.h"
      9 #include "src/factory.h"
     10 #include "src/handles.h"
     11 #include "src/isolate.h"
     12 #include "src/objects.h"
     13 #include "src/utils.h"
     14 
     15 namespace v8 {
     16 namespace internal {
     17 
     18 const int kStringBuilderConcatHelperLengthBits = 11;
     19 const int kStringBuilderConcatHelperPositionBits = 19;
     20 
     21 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
     22     StringBuilderSubstringLength;
     23 typedef BitField<int, kStringBuilderConcatHelperLengthBits,
     24                  kStringBuilderConcatHelperPositionBits>
     25     StringBuilderSubstringPosition;
     26 
     27 
     28 template <typename sinkchar>
     29 static inline void StringBuilderConcatHelper(String* special, sinkchar* sink,
     30                                              FixedArray* fixed_array,
     31                                              int array_length) {
     32   DisallowHeapAllocation no_gc;
     33   int position = 0;
     34   for (int i = 0; i < array_length; i++) {
     35     Object* element = fixed_array->get(i);
     36     if (element->IsSmi()) {
     37       // Smi encoding of position and length.
     38       int encoded_slice = Smi::cast(element)->value();
     39       int pos;
     40       int len;
     41       if (encoded_slice > 0) {
     42         // Position and length encoded in one smi.
     43         pos = StringBuilderSubstringPosition::decode(encoded_slice);
     44         len = StringBuilderSubstringLength::decode(encoded_slice);
     45       } else {
     46         // Position and length encoded in two smis.
     47         Object* obj = fixed_array->get(++i);
     48         DCHECK(obj->IsSmi());
     49         pos = Smi::cast(obj)->value();
     50         len = -encoded_slice;
     51       }
     52       String::WriteToFlat(special, sink + position, pos, pos + len);
     53       position += len;
     54     } else {
     55       String* string = String::cast(element);
     56       int element_length = string->length();
     57       String::WriteToFlat(string, sink + position, 0, element_length);
     58       position += element_length;
     59     }
     60   }
     61 }
     62 
     63 
     64 // Returns the result length of the concatenation.
     65 // On illegal argument, -1 is returned.
     66 static inline int StringBuilderConcatLength(int special_length,
     67                                             FixedArray* fixed_array,
     68                                             int array_length, bool* one_byte) {
     69   DisallowHeapAllocation no_gc;
     70   int position = 0;
     71   for (int i = 0; i < array_length; i++) {
     72     int increment = 0;
     73     Object* elt = fixed_array->get(i);
     74     if (elt->IsSmi()) {
     75       // Smi encoding of position and length.
     76       int smi_value = Smi::cast(elt)->value();
     77       int pos;
     78       int len;
     79       if (smi_value > 0) {
     80         // Position and length encoded in one smi.
     81         pos = StringBuilderSubstringPosition::decode(smi_value);
     82         len = StringBuilderSubstringLength::decode(smi_value);
     83       } else {
     84         // Position and length encoded in two smis.
     85         len = -smi_value;
     86         // Get the position and check that it is a positive smi.
     87         i++;
     88         if (i >= array_length) return -1;
     89         Object* next_smi = fixed_array->get(i);
     90         if (!next_smi->IsSmi()) return -1;
     91         pos = Smi::cast(next_smi)->value();
     92         if (pos < 0) return -1;
     93       }
     94       DCHECK(pos >= 0);
     95       DCHECK(len >= 0);
     96       if (pos > special_length || len > special_length - pos) return -1;
     97       increment = len;
     98     } else if (elt->IsString()) {
     99       String* element = String::cast(elt);
    100       int element_length = element->length();
    101       increment = element_length;
    102       if (*one_byte && !element->HasOnlyOneByteChars()) {
    103         *one_byte = false;
    104       }
    105     } else {
    106       return -1;
    107     }
    108     if (increment > String::kMaxLength - position) {
    109       return kMaxInt;  // Provoke throw on allocation.
    110     }
    111     position += increment;
    112   }
    113   return position;
    114 }
    115 
    116 
    117 class FixedArrayBuilder {
    118  public:
    119   explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity)
    120       : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)),
    121         length_(0),
    122         has_non_smi_elements_(false) {
    123     // Require a non-zero initial size. Ensures that doubling the size to
    124     // extend the array will work.
    125     DCHECK(initial_capacity > 0);
    126   }
    127 
    128   explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
    129       : array_(backing_store), length_(0), has_non_smi_elements_(false) {
    130     // Require a non-zero initial size. Ensures that doubling the size to
    131     // extend the array will work.
    132     DCHECK(backing_store->length() > 0);
    133   }
    134 
    135   bool HasCapacity(int elements) {
    136     int length = array_->length();
    137     int required_length = length_ + elements;
    138     return (length >= required_length);
    139   }
    140 
    141   void EnsureCapacity(int elements) {
    142     int length = array_->length();
    143     int required_length = length_ + elements;
    144     if (length < required_length) {
    145       int new_length = length;
    146       do {
    147         new_length *= 2;
    148       } while (new_length < required_length);
    149       Handle<FixedArray> extended_array =
    150           array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length);
    151       array_->CopyTo(0, *extended_array, 0, length_);
    152       array_ = extended_array;
    153     }
    154   }
    155 
    156   void Add(Object* value) {
    157     DCHECK(!value->IsSmi());
    158     DCHECK(length_ < capacity());
    159     array_->set(length_, value);
    160     length_++;
    161     has_non_smi_elements_ = true;
    162   }
    163 
    164   void Add(Smi* value) {
    165     DCHECK(value->IsSmi());
    166     DCHECK(length_ < capacity());
    167     array_->set(length_, value);
    168     length_++;
    169   }
    170 
    171   Handle<FixedArray> array() { return array_; }
    172 
    173   int length() { return length_; }
    174 
    175   int capacity() { return array_->length(); }
    176 
    177   Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
    178     JSArray::SetContent(target_array, array_);
    179     target_array->set_length(Smi::FromInt(length_));
    180     return target_array;
    181   }
    182 
    183  private:
    184   Handle<FixedArray> array_;
    185   int length_;
    186   bool has_non_smi_elements_;
    187 };
    188 
    189 
    190 class ReplacementStringBuilder {
    191  public:
    192   ReplacementStringBuilder(Heap* heap, Handle<String> subject,
    193                            int estimated_part_count)
    194       : heap_(heap),
    195         array_builder_(heap->isolate(), estimated_part_count),
    196         subject_(subject),
    197         character_count_(0),
    198         is_one_byte_(subject->IsOneByteRepresentation()) {
    199     // Require a non-zero initial size. Ensures that doubling the size to
    200     // extend the array will work.
    201     DCHECK(estimated_part_count > 0);
    202   }
    203 
    204   static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
    205                                      int to) {
    206     DCHECK(from >= 0);
    207     int length = to - from;
    208     DCHECK(length > 0);
    209     if (StringBuilderSubstringLength::is_valid(length) &&
    210         StringBuilderSubstringPosition::is_valid(from)) {
    211       int encoded_slice = StringBuilderSubstringLength::encode(length) |
    212                           StringBuilderSubstringPosition::encode(from);
    213       builder->Add(Smi::FromInt(encoded_slice));
    214     } else {
    215       // Otherwise encode as two smis.
    216       builder->Add(Smi::FromInt(-length));
    217       builder->Add(Smi::FromInt(from));
    218     }
    219   }
    220 
    221 
    222   void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); }
    223 
    224 
    225   void AddSubjectSlice(int from, int to) {
    226     AddSubjectSlice(&array_builder_, from, to);
    227     IncrementCharacterCount(to - from);
    228   }
    229 
    230 
    231   void AddString(Handle<String> string) {
    232     int length = string->length();
    233     DCHECK(length > 0);
    234     AddElement(*string);
    235     if (!string->IsOneByteRepresentation()) {
    236       is_one_byte_ = false;
    237     }
    238     IncrementCharacterCount(length);
    239   }
    240 
    241 
    242   MaybeHandle<String> ToString();
    243 
    244 
    245   void IncrementCharacterCount(int by) {
    246     if (character_count_ > String::kMaxLength - by) {
    247       STATIC_ASSERT(String::kMaxLength < kMaxInt);
    248       character_count_ = kMaxInt;
    249     } else {
    250       character_count_ += by;
    251     }
    252   }
    253 
    254  private:
    255   void AddElement(Object* element) {
    256     DCHECK(element->IsSmi() || element->IsString());
    257     DCHECK(array_builder_.capacity() > array_builder_.length());
    258     array_builder_.Add(element);
    259   }
    260 
    261   Heap* heap_;
    262   FixedArrayBuilder array_builder_;
    263   Handle<String> subject_;
    264   int character_count_;
    265   bool is_one_byte_;
    266 };
    267 
    268 
    269 class IncrementalStringBuilder {
    270  public:
    271   explicit IncrementalStringBuilder(Isolate* isolate);
    272 
    273   INLINE(String::Encoding CurrentEncoding()) { return encoding_; }
    274 
    275   template <typename SrcChar, typename DestChar>
    276   INLINE(void Append(SrcChar c));
    277 
    278   INLINE(void AppendCharacter(uint8_t c)) {
    279     if (encoding_ == String::ONE_BYTE_ENCODING) {
    280       Append<uint8_t, uint8_t>(c);
    281     } else {
    282       Append<uint8_t, uc16>(c);
    283     }
    284   }
    285 
    286   INLINE(void AppendCString(const char* s)) {
    287     const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
    288     if (encoding_ == String::ONE_BYTE_ENCODING) {
    289       while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
    290     } else {
    291       while (*u != '\0') Append<uint8_t, uc16>(*(u++));
    292     }
    293   }
    294 
    295   INLINE(void AppendCString(const uc16* s)) {
    296     if (encoding_ == String::ONE_BYTE_ENCODING) {
    297       while (*s != '\0') Append<uc16, uint8_t>(*(s++));
    298     } else {
    299       while (*s != '\0') Append<uc16, uc16>(*(s++));
    300     }
    301   }
    302 
    303   INLINE(bool CurrentPartCanFit(int length)) {
    304     return part_length_ - current_index_ > length;
    305   }
    306 
    307   void AppendString(Handle<String> string);
    308 
    309   MaybeHandle<String> Finish();
    310 
    311   INLINE(bool HasOverflowed()) const { return overflowed_; }
    312 
    313   INLINE(int Length()) const { return accumulator_->length() + current_index_; }
    314 
    315   // Change encoding to two-byte.
    316   void ChangeEncoding() {
    317     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
    318     ShrinkCurrentPart();
    319     encoding_ = String::TWO_BYTE_ENCODING;
    320     Extend();
    321   }
    322 
    323   template <typename DestChar>
    324   class NoExtend {
    325    public:
    326     explicit NoExtend(Handle<String> string, int offset) {
    327       DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
    328       if (sizeof(DestChar) == 1) {
    329         start_ = reinterpret_cast<DestChar*>(
    330             Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
    331       } else {
    332         start_ = reinterpret_cast<DestChar*>(
    333             Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
    334       }
    335       cursor_ = start_;
    336     }
    337 
    338     INLINE(void Append(DestChar c)) { *(cursor_++) = c; }
    339     INLINE(void AppendCString(const char* s)) {
    340       const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
    341       while (*u != '\0') Append(*(u++));
    342     }
    343 
    344     int written() { return static_cast<int>(cursor_ - start_); }
    345 
    346    private:
    347     DestChar* start_;
    348     DestChar* cursor_;
    349     DisallowHeapAllocation no_gc_;
    350   };
    351 
    352   template <typename DestChar>
    353   class NoExtendString : public NoExtend<DestChar> {
    354    public:
    355     NoExtendString(Handle<String> string, int required_length)
    356         : NoExtend<DestChar>(string, 0), string_(string) {
    357       DCHECK(string->length() >= required_length);
    358     }
    359 
    360     Handle<String> Finalize() {
    361       Handle<SeqString> string = Handle<SeqString>::cast(string_);
    362       int length = NoExtend<DestChar>::written();
    363       Handle<String> result = SeqString::Truncate(string, length);
    364       string_ = Handle<String>();
    365       return result;
    366     }
    367 
    368    private:
    369     Handle<String> string_;
    370   };
    371 
    372   template <typename DestChar>
    373   class NoExtendBuilder : public NoExtend<DestChar> {
    374    public:
    375     NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
    376         : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
    377           builder_(builder) {
    378       DCHECK(builder->CurrentPartCanFit(required_length));
    379     }
    380 
    381     ~NoExtendBuilder() {
    382       builder_->current_index_ += NoExtend<DestChar>::written();
    383     }
    384 
    385    private:
    386     IncrementalStringBuilder* builder_;
    387   };
    388 
    389  private:
    390   Factory* factory() { return isolate_->factory(); }
    391 
    392   INLINE(Handle<String> accumulator()) { return accumulator_; }
    393 
    394   INLINE(void set_accumulator(Handle<String> string)) {
    395     *accumulator_.location() = *string;
    396   }
    397 
    398   INLINE(Handle<String> current_part()) { return current_part_; }
    399 
    400   INLINE(void set_current_part(Handle<String> string)) {
    401     *current_part_.location() = *string;
    402   }
    403 
    404   // Add the current part to the accumulator.
    405   void Accumulate(Handle<String> new_part);
    406 
    407   // Finish the current part and allocate a new part.
    408   void Extend();
    409 
    410   // Shrink current part to the right size.
    411   void ShrinkCurrentPart() {
    412     DCHECK(current_index_ < part_length_);
    413     set_current_part(SeqString::Truncate(
    414         Handle<SeqString>::cast(current_part()), current_index_));
    415   }
    416 
    417   static const int kInitialPartLength = 32;
    418   static const int kMaxPartLength = 16 * 1024;
    419   static const int kPartLengthGrowthFactor = 2;
    420 
    421   Isolate* isolate_;
    422   String::Encoding encoding_;
    423   bool overflowed_;
    424   int part_length_;
    425   int current_index_;
    426   Handle<String> accumulator_;
    427   Handle<String> current_part_;
    428 };
    429 
    430 
    431 template <typename SrcChar, typename DestChar>
    432 void IncrementalStringBuilder::Append(SrcChar c) {
    433   DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
    434   if (sizeof(DestChar) == 1) {
    435     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
    436     SeqOneByteString::cast(*current_part_)
    437         ->SeqOneByteStringSet(current_index_++, c);
    438   } else {
    439     DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
    440     SeqTwoByteString::cast(*current_part_)
    441         ->SeqTwoByteStringSet(current_index_++, c);
    442   }
    443   if (current_index_ == part_length_) Extend();
    444 }
    445 }  // namespace internal
    446 }  // namespace v8
    447 
    448 #endif  // V8_STRING_BUILDER_H_
    449