1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_STRING_BUILDER_H_ 6 #define V8_STRING_BUILDER_H_ 7 8 #include "src/assert-scope.h" 9 #include "src/factory.h" 10 #include "src/handles.h" 11 #include "src/isolate.h" 12 #include "src/objects.h" 13 #include "src/utils.h" 14 15 namespace v8 { 16 namespace internal { 17 18 const int kStringBuilderConcatHelperLengthBits = 11; 19 const int kStringBuilderConcatHelperPositionBits = 19; 20 21 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits> 22 StringBuilderSubstringLength; 23 typedef BitField<int, kStringBuilderConcatHelperLengthBits, 24 kStringBuilderConcatHelperPositionBits> 25 StringBuilderSubstringPosition; 26 27 28 template <typename sinkchar> 29 static inline void StringBuilderConcatHelper(String* special, sinkchar* sink, 30 FixedArray* fixed_array, 31 int array_length) { 32 DisallowHeapAllocation no_gc; 33 int position = 0; 34 for (int i = 0; i < array_length; i++) { 35 Object* element = fixed_array->get(i); 36 if (element->IsSmi()) { 37 // Smi encoding of position and length. 38 int encoded_slice = Smi::cast(element)->value(); 39 int pos; 40 int len; 41 if (encoded_slice > 0) { 42 // Position and length encoded in one smi. 43 pos = StringBuilderSubstringPosition::decode(encoded_slice); 44 len = StringBuilderSubstringLength::decode(encoded_slice); 45 } else { 46 // Position and length encoded in two smis. 47 Object* obj = fixed_array->get(++i); 48 DCHECK(obj->IsSmi()); 49 pos = Smi::cast(obj)->value(); 50 len = -encoded_slice; 51 } 52 String::WriteToFlat(special, sink + position, pos, pos + len); 53 position += len; 54 } else { 55 String* string = String::cast(element); 56 int element_length = string->length(); 57 String::WriteToFlat(string, sink + position, 0, element_length); 58 position += element_length; 59 } 60 } 61 } 62 63 64 // Returns the result length of the concatenation. 65 // On illegal argument, -1 is returned. 66 static inline int StringBuilderConcatLength(int special_length, 67 FixedArray* fixed_array, 68 int array_length, bool* one_byte) { 69 DisallowHeapAllocation no_gc; 70 int position = 0; 71 for (int i = 0; i < array_length; i++) { 72 int increment = 0; 73 Object* elt = fixed_array->get(i); 74 if (elt->IsSmi()) { 75 // Smi encoding of position and length. 76 int smi_value = Smi::cast(elt)->value(); 77 int pos; 78 int len; 79 if (smi_value > 0) { 80 // Position and length encoded in one smi. 81 pos = StringBuilderSubstringPosition::decode(smi_value); 82 len = StringBuilderSubstringLength::decode(smi_value); 83 } else { 84 // Position and length encoded in two smis. 85 len = -smi_value; 86 // Get the position and check that it is a positive smi. 87 i++; 88 if (i >= array_length) return -1; 89 Object* next_smi = fixed_array->get(i); 90 if (!next_smi->IsSmi()) return -1; 91 pos = Smi::cast(next_smi)->value(); 92 if (pos < 0) return -1; 93 } 94 DCHECK(pos >= 0); 95 DCHECK(len >= 0); 96 if (pos > special_length || len > special_length - pos) return -1; 97 increment = len; 98 } else if (elt->IsString()) { 99 String* element = String::cast(elt); 100 int element_length = element->length(); 101 increment = element_length; 102 if (*one_byte && !element->HasOnlyOneByteChars()) { 103 *one_byte = false; 104 } 105 } else { 106 return -1; 107 } 108 if (increment > String::kMaxLength - position) { 109 return kMaxInt; // Provoke throw on allocation. 110 } 111 position += increment; 112 } 113 return position; 114 } 115 116 117 class FixedArrayBuilder { 118 public: 119 explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity) 120 : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), 121 length_(0), 122 has_non_smi_elements_(false) { 123 // Require a non-zero initial size. Ensures that doubling the size to 124 // extend the array will work. 125 DCHECK(initial_capacity > 0); 126 } 127 128 explicit FixedArrayBuilder(Handle<FixedArray> backing_store) 129 : array_(backing_store), length_(0), has_non_smi_elements_(false) { 130 // Require a non-zero initial size. Ensures that doubling the size to 131 // extend the array will work. 132 DCHECK(backing_store->length() > 0); 133 } 134 135 bool HasCapacity(int elements) { 136 int length = array_->length(); 137 int required_length = length_ + elements; 138 return (length >= required_length); 139 } 140 141 void EnsureCapacity(int elements) { 142 int length = array_->length(); 143 int required_length = length_ + elements; 144 if (length < required_length) { 145 int new_length = length; 146 do { 147 new_length *= 2; 148 } while (new_length < required_length); 149 Handle<FixedArray> extended_array = 150 array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length); 151 array_->CopyTo(0, *extended_array, 0, length_); 152 array_ = extended_array; 153 } 154 } 155 156 void Add(Object* value) { 157 DCHECK(!value->IsSmi()); 158 DCHECK(length_ < capacity()); 159 array_->set(length_, value); 160 length_++; 161 has_non_smi_elements_ = true; 162 } 163 164 void Add(Smi* value) { 165 DCHECK(value->IsSmi()); 166 DCHECK(length_ < capacity()); 167 array_->set(length_, value); 168 length_++; 169 } 170 171 Handle<FixedArray> array() { return array_; } 172 173 int length() { return length_; } 174 175 int capacity() { return array_->length(); } 176 177 Handle<JSArray> ToJSArray(Handle<JSArray> target_array) { 178 JSArray::SetContent(target_array, array_); 179 target_array->set_length(Smi::FromInt(length_)); 180 return target_array; 181 } 182 183 private: 184 Handle<FixedArray> array_; 185 int length_; 186 bool has_non_smi_elements_; 187 }; 188 189 190 class ReplacementStringBuilder { 191 public: 192 ReplacementStringBuilder(Heap* heap, Handle<String> subject, 193 int estimated_part_count) 194 : heap_(heap), 195 array_builder_(heap->isolate(), estimated_part_count), 196 subject_(subject), 197 character_count_(0), 198 is_one_byte_(subject->IsOneByteRepresentation()) { 199 // Require a non-zero initial size. Ensures that doubling the size to 200 // extend the array will work. 201 DCHECK(estimated_part_count > 0); 202 } 203 204 static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, 205 int to) { 206 DCHECK(from >= 0); 207 int length = to - from; 208 DCHECK(length > 0); 209 if (StringBuilderSubstringLength::is_valid(length) && 210 StringBuilderSubstringPosition::is_valid(from)) { 211 int encoded_slice = StringBuilderSubstringLength::encode(length) | 212 StringBuilderSubstringPosition::encode(from); 213 builder->Add(Smi::FromInt(encoded_slice)); 214 } else { 215 // Otherwise encode as two smis. 216 builder->Add(Smi::FromInt(-length)); 217 builder->Add(Smi::FromInt(from)); 218 } 219 } 220 221 222 void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); } 223 224 225 void AddSubjectSlice(int from, int to) { 226 AddSubjectSlice(&array_builder_, from, to); 227 IncrementCharacterCount(to - from); 228 } 229 230 231 void AddString(Handle<String> string) { 232 int length = string->length(); 233 DCHECK(length > 0); 234 AddElement(*string); 235 if (!string->IsOneByteRepresentation()) { 236 is_one_byte_ = false; 237 } 238 IncrementCharacterCount(length); 239 } 240 241 242 MaybeHandle<String> ToString(); 243 244 245 void IncrementCharacterCount(int by) { 246 if (character_count_ > String::kMaxLength - by) { 247 STATIC_ASSERT(String::kMaxLength < kMaxInt); 248 character_count_ = kMaxInt; 249 } else { 250 character_count_ += by; 251 } 252 } 253 254 private: 255 void AddElement(Object* element) { 256 DCHECK(element->IsSmi() || element->IsString()); 257 DCHECK(array_builder_.capacity() > array_builder_.length()); 258 array_builder_.Add(element); 259 } 260 261 Heap* heap_; 262 FixedArrayBuilder array_builder_; 263 Handle<String> subject_; 264 int character_count_; 265 bool is_one_byte_; 266 }; 267 268 269 class IncrementalStringBuilder { 270 public: 271 explicit IncrementalStringBuilder(Isolate* isolate); 272 273 INLINE(String::Encoding CurrentEncoding()) { return encoding_; } 274 275 template <typename SrcChar, typename DestChar> 276 INLINE(void Append(SrcChar c)); 277 278 INLINE(void AppendCharacter(uint8_t c)) { 279 if (encoding_ == String::ONE_BYTE_ENCODING) { 280 Append<uint8_t, uint8_t>(c); 281 } else { 282 Append<uint8_t, uc16>(c); 283 } 284 } 285 286 INLINE(void AppendCString(const char* s)) { 287 const uint8_t* u = reinterpret_cast<const uint8_t*>(s); 288 if (encoding_ == String::ONE_BYTE_ENCODING) { 289 while (*u != '\0') Append<uint8_t, uint8_t>(*(u++)); 290 } else { 291 while (*u != '\0') Append<uint8_t, uc16>(*(u++)); 292 } 293 } 294 295 INLINE(void AppendCString(const uc16* s)) { 296 if (encoding_ == String::ONE_BYTE_ENCODING) { 297 while (*s != '\0') Append<uc16, uint8_t>(*(s++)); 298 } else { 299 while (*s != '\0') Append<uc16, uc16>(*(s++)); 300 } 301 } 302 303 INLINE(bool CurrentPartCanFit(int length)) { 304 return part_length_ - current_index_ > length; 305 } 306 307 void AppendString(Handle<String> string); 308 309 MaybeHandle<String> Finish(); 310 311 INLINE(bool HasOverflowed()) const { return overflowed_; } 312 313 // Change encoding to two-byte. 314 void ChangeEncoding() { 315 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); 316 ShrinkCurrentPart(); 317 encoding_ = String::TWO_BYTE_ENCODING; 318 Extend(); 319 } 320 321 template <typename DestChar> 322 class NoExtend { 323 public: 324 explicit NoExtend(Handle<String> string, int offset) { 325 DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); 326 if (sizeof(DestChar) == 1) { 327 start_ = reinterpret_cast<DestChar*>( 328 Handle<SeqOneByteString>::cast(string)->GetChars() + offset); 329 } else { 330 start_ = reinterpret_cast<DestChar*>( 331 Handle<SeqTwoByteString>::cast(string)->GetChars() + offset); 332 } 333 cursor_ = start_; 334 } 335 336 INLINE(void Append(DestChar c)) { *(cursor_++) = c; } 337 INLINE(void AppendCString(const char* s)) { 338 const uint8_t* u = reinterpret_cast<const uint8_t*>(s); 339 while (*u != '\0') Append(*(u++)); 340 } 341 342 int written() { return static_cast<int>(cursor_ - start_); } 343 344 private: 345 DestChar* start_; 346 DestChar* cursor_; 347 DisallowHeapAllocation no_gc_; 348 }; 349 350 template <typename DestChar> 351 class NoExtendString : public NoExtend<DestChar> { 352 public: 353 NoExtendString(Handle<String> string, int required_length) 354 : NoExtend<DestChar>(string, 0), string_(string) { 355 DCHECK(string->length() >= required_length); 356 } 357 358 Handle<String> Finalize() { 359 Handle<SeqString> string = Handle<SeqString>::cast(string_); 360 int length = NoExtend<DestChar>::written(); 361 Handle<String> result = SeqString::Truncate(string, length); 362 string_ = Handle<String>(); 363 return result; 364 } 365 366 private: 367 Handle<String> string_; 368 }; 369 370 template <typename DestChar> 371 class NoExtendBuilder : public NoExtend<DestChar> { 372 public: 373 NoExtendBuilder(IncrementalStringBuilder* builder, int required_length) 374 : NoExtend<DestChar>(builder->current_part(), builder->current_index_), 375 builder_(builder) { 376 DCHECK(builder->CurrentPartCanFit(required_length)); 377 } 378 379 ~NoExtendBuilder() { 380 builder_->current_index_ += NoExtend<DestChar>::written(); 381 } 382 383 private: 384 IncrementalStringBuilder* builder_; 385 }; 386 387 private: 388 Factory* factory() { return isolate_->factory(); } 389 390 INLINE(Handle<String> accumulator()) { return accumulator_; } 391 392 INLINE(void set_accumulator(Handle<String> string)) { 393 *accumulator_.location() = *string; 394 } 395 396 INLINE(Handle<String> current_part()) { return current_part_; } 397 398 INLINE(void set_current_part(Handle<String> string)) { 399 *current_part_.location() = *string; 400 } 401 402 // Add the current part to the accumulator. 403 void Accumulate(Handle<String> new_part); 404 405 // Finish the current part and allocate a new part. 406 void Extend(); 407 408 // Shrink current part to the right size. 409 void ShrinkCurrentPart() { 410 DCHECK(current_index_ < part_length_); 411 set_current_part(SeqString::Truncate( 412 Handle<SeqString>::cast(current_part()), current_index_)); 413 } 414 415 static const int kInitialPartLength = 32; 416 static const int kMaxPartLength = 16 * 1024; 417 static const int kPartLengthGrowthFactor = 2; 418 419 Isolate* isolate_; 420 String::Encoding encoding_; 421 bool overflowed_; 422 int part_length_; 423 int current_index_; 424 Handle<String> accumulator_; 425 Handle<String> current_part_; 426 }; 427 428 429 template <typename SrcChar, typename DestChar> 430 void IncrementalStringBuilder::Append(SrcChar c) { 431 DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); 432 if (sizeof(DestChar) == 1) { 433 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); 434 SeqOneByteString::cast(*current_part_) 435 ->SeqOneByteStringSet(current_index_++, c); 436 } else { 437 DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); 438 SeqTwoByteString::cast(*current_part_) 439 ->SeqTwoByteStringSet(current_index_++, c); 440 } 441 if (current_index_ == part_length_) Extend(); 442 } 443 } // namespace internal 444 } // namespace v8 445 446 #endif // V8_STRING_BUILDER_H_ 447