1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_STRING_BUILDER_H_ 6 #define V8_STRING_BUILDER_H_ 7 8 #include "src/assert-scope.h" 9 #include "src/factory.h" 10 #include "src/handles.h" 11 #include "src/isolate.h" 12 #include "src/objects.h" 13 #include "src/utils.h" 14 15 namespace v8 { 16 namespace internal { 17 18 const int kStringBuilderConcatHelperLengthBits = 11; 19 const int kStringBuilderConcatHelperPositionBits = 19; 20 21 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits> 22 StringBuilderSubstringLength; 23 typedef BitField<int, kStringBuilderConcatHelperLengthBits, 24 kStringBuilderConcatHelperPositionBits> 25 StringBuilderSubstringPosition; 26 27 28 template <typename sinkchar> 29 static inline void StringBuilderConcatHelper(String* special, sinkchar* sink, 30 FixedArray* fixed_array, 31 int array_length) { 32 DisallowHeapAllocation no_gc; 33 int position = 0; 34 for (int i = 0; i < array_length; i++) { 35 Object* element = fixed_array->get(i); 36 if (element->IsSmi()) { 37 // Smi encoding of position and length. 38 int encoded_slice = Smi::cast(element)->value(); 39 int pos; 40 int len; 41 if (encoded_slice > 0) { 42 // Position and length encoded in one smi. 43 pos = StringBuilderSubstringPosition::decode(encoded_slice); 44 len = StringBuilderSubstringLength::decode(encoded_slice); 45 } else { 46 // Position and length encoded in two smis. 47 Object* obj = fixed_array->get(++i); 48 DCHECK(obj->IsSmi()); 49 pos = Smi::cast(obj)->value(); 50 len = -encoded_slice; 51 } 52 String::WriteToFlat(special, sink + position, pos, pos + len); 53 position += len; 54 } else { 55 String* string = String::cast(element); 56 int element_length = string->length(); 57 String::WriteToFlat(string, sink + position, 0, element_length); 58 position += element_length; 59 } 60 } 61 } 62 63 64 // Returns the result length of the concatenation. 65 // On illegal argument, -1 is returned. 66 static inline int StringBuilderConcatLength(int special_length, 67 FixedArray* fixed_array, 68 int array_length, bool* one_byte) { 69 DisallowHeapAllocation no_gc; 70 int position = 0; 71 for (int i = 0; i < array_length; i++) { 72 int increment = 0; 73 Object* elt = fixed_array->get(i); 74 if (elt->IsSmi()) { 75 // Smi encoding of position and length. 76 int smi_value = Smi::cast(elt)->value(); 77 int pos; 78 int len; 79 if (smi_value > 0) { 80 // Position and length encoded in one smi. 81 pos = StringBuilderSubstringPosition::decode(smi_value); 82 len = StringBuilderSubstringLength::decode(smi_value); 83 } else { 84 // Position and length encoded in two smis. 85 len = -smi_value; 86 // Get the position and check that it is a positive smi. 87 i++; 88 if (i >= array_length) return -1; 89 Object* next_smi = fixed_array->get(i); 90 if (!next_smi->IsSmi()) return -1; 91 pos = Smi::cast(next_smi)->value(); 92 if (pos < 0) return -1; 93 } 94 DCHECK(pos >= 0); 95 DCHECK(len >= 0); 96 if (pos > special_length || len > special_length - pos) return -1; 97 increment = len; 98 } else if (elt->IsString()) { 99 String* element = String::cast(elt); 100 int element_length = element->length(); 101 increment = element_length; 102 if (*one_byte && !element->HasOnlyOneByteChars()) { 103 *one_byte = false; 104 } 105 } else { 106 return -1; 107 } 108 if (increment > String::kMaxLength - position) { 109 return kMaxInt; // Provoke throw on allocation. 110 } 111 position += increment; 112 } 113 return position; 114 } 115 116 117 class FixedArrayBuilder { 118 public: 119 explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity) 120 : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), 121 length_(0), 122 has_non_smi_elements_(false) { 123 // Require a non-zero initial size. Ensures that doubling the size to 124 // extend the array will work. 125 DCHECK(initial_capacity > 0); 126 } 127 128 explicit FixedArrayBuilder(Handle<FixedArray> backing_store) 129 : array_(backing_store), length_(0), has_non_smi_elements_(false) { 130 // Require a non-zero initial size. Ensures that doubling the size to 131 // extend the array will work. 132 DCHECK(backing_store->length() > 0); 133 } 134 135 bool HasCapacity(int elements) { 136 int length = array_->length(); 137 int required_length = length_ + elements; 138 return (length >= required_length); 139 } 140 141 void EnsureCapacity(int elements) { 142 int length = array_->length(); 143 int required_length = length_ + elements; 144 if (length < required_length) { 145 int new_length = length; 146 do { 147 new_length *= 2; 148 } while (new_length < required_length); 149 Handle<FixedArray> extended_array = 150 array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length); 151 array_->CopyTo(0, *extended_array, 0, length_); 152 array_ = extended_array; 153 } 154 } 155 156 void Add(Object* value) { 157 DCHECK(!value->IsSmi()); 158 DCHECK(length_ < capacity()); 159 array_->set(length_, value); 160 length_++; 161 has_non_smi_elements_ = true; 162 } 163 164 void Add(Smi* value) { 165 DCHECK(value->IsSmi()); 166 DCHECK(length_ < capacity()); 167 array_->set(length_, value); 168 length_++; 169 } 170 171 Handle<FixedArray> array() { return array_; } 172 173 int length() { return length_; } 174 175 int capacity() { return array_->length(); } 176 177 Handle<JSArray> ToJSArray(Handle<JSArray> target_array) { 178 JSArray::SetContent(target_array, array_); 179 target_array->set_length(Smi::FromInt(length_)); 180 return target_array; 181 } 182 183 184 private: 185 Handle<FixedArray> array_; 186 int length_; 187 bool has_non_smi_elements_; 188 }; 189 190 191 class ReplacementStringBuilder { 192 public: 193 ReplacementStringBuilder(Heap* heap, Handle<String> subject, 194 int estimated_part_count) 195 : heap_(heap), 196 array_builder_(heap->isolate(), estimated_part_count), 197 subject_(subject), 198 character_count_(0), 199 is_one_byte_(subject->IsOneByteRepresentation()) { 200 // Require a non-zero initial size. Ensures that doubling the size to 201 // extend the array will work. 202 DCHECK(estimated_part_count > 0); 203 } 204 205 static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, 206 int to) { 207 DCHECK(from >= 0); 208 int length = to - from; 209 DCHECK(length > 0); 210 if (StringBuilderSubstringLength::is_valid(length) && 211 StringBuilderSubstringPosition::is_valid(from)) { 212 int encoded_slice = StringBuilderSubstringLength::encode(length) | 213 StringBuilderSubstringPosition::encode(from); 214 builder->Add(Smi::FromInt(encoded_slice)); 215 } else { 216 // Otherwise encode as two smis. 217 builder->Add(Smi::FromInt(-length)); 218 builder->Add(Smi::FromInt(from)); 219 } 220 } 221 222 223 void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); } 224 225 226 void AddSubjectSlice(int from, int to) { 227 AddSubjectSlice(&array_builder_, from, to); 228 IncrementCharacterCount(to - from); 229 } 230 231 232 void AddString(Handle<String> string) { 233 int length = string->length(); 234 DCHECK(length > 0); 235 AddElement(*string); 236 if (!string->IsOneByteRepresentation()) { 237 is_one_byte_ = false; 238 } 239 IncrementCharacterCount(length); 240 } 241 242 243 MaybeHandle<String> ToString(); 244 245 246 void IncrementCharacterCount(int by) { 247 if (character_count_ > String::kMaxLength - by) { 248 STATIC_ASSERT(String::kMaxLength < kMaxInt); 249 character_count_ = kMaxInt; 250 } else { 251 character_count_ += by; 252 } 253 } 254 255 private: 256 void AddElement(Object* element) { 257 DCHECK(element->IsSmi() || element->IsString()); 258 DCHECK(array_builder_.capacity() > array_builder_.length()); 259 array_builder_.Add(element); 260 } 261 262 Heap* heap_; 263 FixedArrayBuilder array_builder_; 264 Handle<String> subject_; 265 int character_count_; 266 bool is_one_byte_; 267 }; 268 269 270 class IncrementalStringBuilder { 271 public: 272 explicit IncrementalStringBuilder(Isolate* isolate); 273 274 INLINE(String::Encoding CurrentEncoding()) { return encoding_; } 275 276 template <typename SrcChar, typename DestChar> 277 INLINE(void Append(SrcChar c)); 278 279 INLINE(void AppendCharacter(uint8_t c)) { 280 if (encoding_ == String::ONE_BYTE_ENCODING) { 281 Append<uint8_t, uint8_t>(c); 282 } else { 283 Append<uint8_t, uc16>(c); 284 } 285 } 286 287 INLINE(void AppendCString(const char* s)) { 288 const uint8_t* u = reinterpret_cast<const uint8_t*>(s); 289 if (encoding_ == String::ONE_BYTE_ENCODING) { 290 while (*u != '\0') Append<uint8_t, uint8_t>(*(u++)); 291 } else { 292 while (*u != '\0') Append<uint8_t, uc16>(*(u++)); 293 } 294 } 295 296 INLINE(void AppendCString(const uc16* s)) { 297 if (encoding_ == String::ONE_BYTE_ENCODING) { 298 while (*s != '\0') Append<uc16, uint8_t>(*(s++)); 299 } else { 300 while (*s != '\0') Append<uc16, uc16>(*(s++)); 301 } 302 } 303 304 INLINE(bool CurrentPartCanFit(int length)) { 305 return part_length_ - current_index_ > length; 306 } 307 308 void AppendString(Handle<String> string); 309 310 MaybeHandle<String> Finish(); 311 312 INLINE(bool HasOverflowed()) const { return overflowed_; } 313 314 // Change encoding to two-byte. 315 void ChangeEncoding() { 316 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); 317 ShrinkCurrentPart(); 318 encoding_ = String::TWO_BYTE_ENCODING; 319 Extend(); 320 } 321 322 template <typename DestChar> 323 class NoExtend { 324 public: 325 explicit NoExtend(Handle<String> string, int offset) { 326 DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); 327 if (sizeof(DestChar) == 1) { 328 start_ = reinterpret_cast<DestChar*>( 329 Handle<SeqOneByteString>::cast(string)->GetChars() + offset); 330 } else { 331 start_ = reinterpret_cast<DestChar*>( 332 Handle<SeqTwoByteString>::cast(string)->GetChars() + offset); 333 } 334 cursor_ = start_; 335 } 336 337 INLINE(void Append(DestChar c)) { *(cursor_++) = c; } 338 INLINE(void AppendCString(const char* s)) { 339 const uint8_t* u = reinterpret_cast<const uint8_t*>(s); 340 while (*u != '\0') Append(*(u++)); 341 } 342 343 int written() { return static_cast<int>(cursor_ - start_); } 344 345 private: 346 DestChar* start_; 347 DestChar* cursor_; 348 DisallowHeapAllocation no_gc_; 349 }; 350 351 template <typename DestChar> 352 class NoExtendString : public NoExtend<DestChar> { 353 public: 354 NoExtendString(Handle<String> string, int required_length) 355 : NoExtend<DestChar>(string, 0), string_(string) { 356 DCHECK(string->length() >= required_length); 357 } 358 359 Handle<String> Finalize() { 360 Handle<SeqString> string = Handle<SeqString>::cast(string_); 361 int length = NoExtend<DestChar>::written(); 362 Handle<String> result = SeqString::Truncate(string, length); 363 string_ = Handle<String>(); 364 return result; 365 } 366 367 private: 368 Handle<String> string_; 369 }; 370 371 template <typename DestChar> 372 class NoExtendBuilder : public NoExtend<DestChar> { 373 public: 374 NoExtendBuilder(IncrementalStringBuilder* builder, int required_length) 375 : NoExtend<DestChar>(builder->current_part(), builder->current_index_), 376 builder_(builder) { 377 DCHECK(builder->CurrentPartCanFit(required_length)); 378 } 379 380 ~NoExtendBuilder() { 381 builder_->current_index_ += NoExtend<DestChar>::written(); 382 } 383 384 private: 385 IncrementalStringBuilder* builder_; 386 }; 387 388 private: 389 Factory* factory() { return isolate_->factory(); } 390 391 INLINE(Handle<String> accumulator()) { return accumulator_; } 392 393 INLINE(void set_accumulator(Handle<String> string)) { 394 *accumulator_.location() = *string; 395 } 396 397 INLINE(Handle<String> current_part()) { return current_part_; } 398 399 INLINE(void set_current_part(Handle<String> string)) { 400 *current_part_.location() = *string; 401 } 402 403 // Add the current part to the accumulator. 404 void Accumulate(Handle<String> new_part); 405 406 // Finish the current part and allocate a new part. 407 void Extend(); 408 409 // Shrink current part to the right size. 410 void ShrinkCurrentPart() { 411 DCHECK(current_index_ < part_length_); 412 set_current_part(SeqString::Truncate( 413 Handle<SeqString>::cast(current_part()), current_index_)); 414 } 415 416 static const int kInitialPartLength = 32; 417 static const int kMaxPartLength = 16 * 1024; 418 static const int kPartLengthGrowthFactor = 2; 419 420 Isolate* isolate_; 421 String::Encoding encoding_; 422 bool overflowed_; 423 int part_length_; 424 int current_index_; 425 Handle<String> accumulator_; 426 Handle<String> current_part_; 427 }; 428 429 430 template <typename SrcChar, typename DestChar> 431 void IncrementalStringBuilder::Append(SrcChar c) { 432 DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); 433 if (sizeof(DestChar) == 1) { 434 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); 435 SeqOneByteString::cast(*current_part_) 436 ->SeqOneByteStringSet(current_index_++, c); 437 } else { 438 DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); 439 SeqTwoByteString::cast(*current_part_) 440 ->SeqTwoByteStringSet(current_index_++, c); 441 } 442 if (current_index_ == part_length_) Extend(); 443 } 444 } // namespace internal 445 } // namespace v8 446 447 #endif // V8_STRING_BUILDER_H_ 448