1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_SERIALIZE_H_ 6 #define V8_SERIALIZE_H_ 7 8 #include "src/hashmap.h" 9 10 namespace v8 { 11 namespace internal { 12 13 // A TypeCode is used to distinguish different kinds of external reference. 14 // It is a single bit to make testing for types easy. 15 enum TypeCode { 16 UNCLASSIFIED, // One-of-a-kind references. 17 BUILTIN, 18 RUNTIME_FUNCTION, 19 IC_UTILITY, 20 STATS_COUNTER, 21 TOP_ADDRESS, 22 C_BUILTIN, 23 EXTENSION, 24 ACCESSOR, 25 RUNTIME_ENTRY, 26 STUB_CACHE_TABLE, 27 LAZY_DEOPTIMIZATION 28 }; 29 30 const int kTypeCodeCount = LAZY_DEOPTIMIZATION + 1; 31 const int kFirstTypeCode = UNCLASSIFIED; 32 33 const int kReferenceIdBits = 16; 34 const int kReferenceIdMask = (1 << kReferenceIdBits) - 1; 35 const int kReferenceTypeShift = kReferenceIdBits; 36 37 const int kDeoptTableSerializeEntryCount = 64; 38 39 // ExternalReferenceTable is a helper class that defines the relationship 40 // between external references and their encodings. It is used to build 41 // hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder. 42 class ExternalReferenceTable { 43 public: 44 static ExternalReferenceTable* instance(Isolate* isolate); 45 46 ~ExternalReferenceTable() { } 47 48 int size() const { return refs_.length(); } 49 50 Address address(int i) { return refs_[i].address; } 51 52 uint32_t code(int i) { return refs_[i].code; } 53 54 const char* name(int i) { return refs_[i].name; } 55 56 int max_id(int code) { return max_id_[code]; } 57 58 private: 59 explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) { 60 PopulateTable(isolate); 61 } 62 63 struct ExternalReferenceEntry { 64 Address address; 65 uint32_t code; 66 const char* name; 67 }; 68 69 void PopulateTable(Isolate* isolate); 70 71 // For a few types of references, we can get their address from their id. 72 void AddFromId(TypeCode type, 73 uint16_t id, 74 const char* name, 75 Isolate* isolate); 76 77 // For other types of references, the caller will figure out the address. 78 void Add(Address address, TypeCode type, uint16_t id, const char* name); 79 80 List<ExternalReferenceEntry> refs_; 81 int max_id_[kTypeCodeCount]; 82 }; 83 84 85 class ExternalReferenceEncoder { 86 public: 87 explicit ExternalReferenceEncoder(Isolate* isolate); 88 89 uint32_t Encode(Address key) const; 90 91 const char* NameOfAddress(Address key) const; 92 93 private: 94 HashMap encodings_; 95 static uint32_t Hash(Address key) { 96 return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2); 97 } 98 99 int IndexOf(Address key) const; 100 101 void Put(Address key, int index); 102 103 Isolate* isolate_; 104 }; 105 106 107 class ExternalReferenceDecoder { 108 public: 109 explicit ExternalReferenceDecoder(Isolate* isolate); 110 ~ExternalReferenceDecoder(); 111 112 Address Decode(uint32_t key) const { 113 if (key == 0) return NULL; 114 return *Lookup(key); 115 } 116 117 private: 118 Address** encodings_; 119 120 Address* Lookup(uint32_t key) const { 121 int type = key >> kReferenceTypeShift; 122 ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount); 123 int id = key & kReferenceIdMask; 124 return &encodings_[type][id]; 125 } 126 127 void Put(uint32_t key, Address value) { 128 *Lookup(key) = value; 129 } 130 131 Isolate* isolate_; 132 }; 133 134 135 class SnapshotByteSource { 136 public: 137 SnapshotByteSource(const byte* array, int length) 138 : data_(array), length_(length), position_(0) { } 139 140 bool HasMore() { return position_ < length_; } 141 142 int Get() { 143 ASSERT(position_ < length_); 144 return data_[position_++]; 145 } 146 147 int32_t GetUnalignedInt() { 148 #if defined(V8_HOST_CAN_READ_UNALIGNED) && __BYTE_ORDER == __LITTLE_ENDIAN 149 int32_t answer; 150 ASSERT(position_ + sizeof(answer) <= length_ + 0u); 151 answer = *reinterpret_cast<const int32_t*>(data_ + position_); 152 #else 153 int32_t answer = data_[position_]; 154 answer |= data_[position_ + 1] << 8; 155 answer |= data_[position_ + 2] << 16; 156 answer |= data_[position_ + 3] << 24; 157 #endif 158 return answer; 159 } 160 161 void Advance(int by) { position_ += by; } 162 163 inline void CopyRaw(byte* to, int number_of_bytes); 164 165 inline int GetInt(); 166 167 bool AtEOF(); 168 169 int position() { return position_; } 170 171 private: 172 const byte* data_; 173 int length_; 174 int position_; 175 }; 176 177 178 // The Serializer/Deserializer class is a common superclass for Serializer and 179 // Deserializer which is used to store common constants and methods used by 180 // both. 181 class SerializerDeserializer: public ObjectVisitor { 182 public: 183 static void Iterate(Isolate* isolate, ObjectVisitor* visitor); 184 185 static int nop() { return kNop; } 186 187 protected: 188 // Where the pointed-to object can be found: 189 enum Where { 190 kNewObject = 0, // Object is next in snapshot. 191 // 1-6 One per space. 192 kRootArray = 0x9, // Object is found in root array. 193 kPartialSnapshotCache = 0xa, // Object is in the cache. 194 kExternalReference = 0xb, // Pointer to an external reference. 195 kSkip = 0xc, // Skip n bytes. 196 kNop = 0xd, // Does nothing, used to pad. 197 // 0xe-0xf Free. 198 kBackref = 0x10, // Object is described relative to end. 199 // 0x11-0x16 One per space. 200 kBackrefWithSkip = 0x18, // Object is described relative to end. 201 // 0x19-0x1e One per space. 202 // 0x20-0x3f Used by misc. tags below. 203 kPointedToMask = 0x3f 204 }; 205 206 // How to code the pointer to the object. 207 enum HowToCode { 208 kPlain = 0, // Straight pointer. 209 // What this means depends on the architecture: 210 kFromCode = 0x40, // A pointer inlined in code. 211 kHowToCodeMask = 0x40 212 }; 213 214 // For kRootArrayConstants 215 enum WithSkip { 216 kNoSkipDistance = 0, 217 kHasSkipDistance = 0x40, 218 kWithSkipMask = 0x40 219 }; 220 221 // Where to point within the object. 222 enum WhereToPoint { 223 kStartOfObject = 0, 224 kInnerPointer = 0x80, // First insn in code object or payload of cell. 225 kWhereToPointMask = 0x80 226 }; 227 228 // Misc. 229 // Raw data to be copied from the snapshot. This byte code does not advance 230 // the current pointer, which is used for code objects, where we write the 231 // entire code in one memcpy, then fix up stuff with kSkip and other byte 232 // codes that overwrite data. 233 static const int kRawData = 0x20; 234 // Some common raw lengths: 0x21-0x3f. These autoadvance the current pointer. 235 // A tag emitted at strategic points in the snapshot to delineate sections. 236 // If the deserializer does not find these at the expected moments then it 237 // is an indication that the snapshot and the VM do not fit together. 238 // Examine the build process for architecture, version or configuration 239 // mismatches. 240 static const int kSynchronize = 0x70; 241 // Used for the source code of the natives, which is in the executable, but 242 // is referred to from external strings in the snapshot. 243 static const int kNativesStringResource = 0x71; 244 static const int kRepeat = 0x72; 245 static const int kConstantRepeat = 0x73; 246 // 0x73-0x7f Repeat last word (subtract 0x72 to get the count). 247 static const int kMaxRepeats = 0x7f - 0x72; 248 static int CodeForRepeats(int repeats) { 249 ASSERT(repeats >= 1 && repeats <= kMaxRepeats); 250 return 0x72 + repeats; 251 } 252 static int RepeatsForCode(int byte_code) { 253 ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f); 254 return byte_code - 0x72; 255 } 256 static const int kRootArrayConstants = 0xa0; 257 // 0xa0-0xbf Things from the first 32 elements of the root array. 258 static const int kRootArrayNumberOfConstantEncodings = 0x20; 259 static int RootArrayConstantFromByteCode(int byte_code) { 260 return byte_code & 0x1f; 261 } 262 263 static const int kNumberOfSpaces = LO_SPACE; 264 static const int kAnyOldSpace = -1; 265 266 // A bitmask for getting the space out of an instruction. 267 static const int kSpaceMask = 7; 268 }; 269 270 271 int SnapshotByteSource::GetInt() { 272 // This way of variable-length encoding integers does not suffer from branch 273 // mispredictions. 274 uint32_t answer = GetUnalignedInt(); 275 int bytes = answer & 3; 276 Advance(bytes); 277 uint32_t mask = 0xffffffffu; 278 mask >>= 32 - (bytes << 3); 279 answer &= mask; 280 answer >>= 2; 281 return answer; 282 } 283 284 285 void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) { 286 MemCopy(to, data_ + position_, number_of_bytes); 287 position_ += number_of_bytes; 288 } 289 290 291 // A Deserializer reads a snapshot and reconstructs the Object graph it defines. 292 class Deserializer: public SerializerDeserializer { 293 public: 294 // Create a deserializer from a snapshot byte source. 295 explicit Deserializer(SnapshotByteSource* source); 296 297 virtual ~Deserializer(); 298 299 // Deserialize the snapshot into an empty heap. 300 void Deserialize(Isolate* isolate); 301 302 // Deserialize a single object and the objects reachable from it. 303 void DeserializePartial(Isolate* isolate, Object** root); 304 305 void set_reservation(int space_number, int reservation) { 306 ASSERT(space_number >= 0); 307 ASSERT(space_number <= LAST_SPACE); 308 reservations_[space_number] = reservation; 309 } 310 311 private: 312 virtual void VisitPointers(Object** start, Object** end); 313 314 virtual void VisitRuntimeEntry(RelocInfo* rinfo) { 315 UNREACHABLE(); 316 } 317 318 // Allocation sites are present in the snapshot, and must be linked into 319 // a list at deserialization time. 320 void RelinkAllocationSite(AllocationSite* site); 321 322 // Fills in some heap data in an area from start to end (non-inclusive). The 323 // space id is used for the write barrier. The object_address is the address 324 // of the object we are writing into, or NULL if we are not writing into an 325 // object, i.e. if we are writing a series of tagged values that are not on 326 // the heap. 327 void ReadChunk( 328 Object** start, Object** end, int space, Address object_address); 329 void ReadObject(int space_number, Object** write_back); 330 331 // This routine both allocates a new object, and also keeps 332 // track of where objects have been allocated so that we can 333 // fix back references when deserializing. 334 Address Allocate(int space_index, int size) { 335 Address address = high_water_[space_index]; 336 high_water_[space_index] = address + size; 337 HeapProfiler* profiler = isolate_->heap_profiler(); 338 if (profiler->is_tracking_allocations()) { 339 profiler->AllocationEvent(address, size); 340 } 341 return address; 342 } 343 344 // This returns the address of an object that has been described in the 345 // snapshot as being offset bytes back in a particular space. 346 HeapObject* GetAddressFromEnd(int space) { 347 int offset = source_->GetInt(); 348 offset <<= kObjectAlignmentBits; 349 return HeapObject::FromAddress(high_water_[space] - offset); 350 } 351 352 void FlushICacheForNewCodeObjects(); 353 354 // Cached current isolate. 355 Isolate* isolate_; 356 357 SnapshotByteSource* source_; 358 // This is the address of the next object that will be allocated in each 359 // space. It is used to calculate the addresses of back-references. 360 Address high_water_[LAST_SPACE + 1]; 361 362 int reservations_[LAST_SPACE + 1]; 363 static const intptr_t kUninitializedReservation = -1; 364 365 ExternalReferenceDecoder* external_reference_decoder_; 366 367 DISALLOW_COPY_AND_ASSIGN(Deserializer); 368 }; 369 370 371 class SnapshotByteSink { 372 public: 373 virtual ~SnapshotByteSink() { } 374 virtual void Put(int byte, const char* description) = 0; 375 virtual void PutSection(int byte, const char* description) { 376 Put(byte, description); 377 } 378 void PutInt(uintptr_t integer, const char* description); 379 virtual int Position() = 0; 380 }; 381 382 383 // Mapping objects to their location after deserialization. 384 // This is used during building, but not at runtime by V8. 385 class SerializationAddressMapper { 386 public: 387 SerializationAddressMapper() 388 : no_allocation_(), 389 serialization_map_(new HashMap(HashMap::PointersMatch)) { } 390 391 ~SerializationAddressMapper() { 392 delete serialization_map_; 393 } 394 395 bool IsMapped(HeapObject* obj) { 396 return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL; 397 } 398 399 int MappedTo(HeapObject* obj) { 400 ASSERT(IsMapped(obj)); 401 return static_cast<int>(reinterpret_cast<intptr_t>( 402 serialization_map_->Lookup(Key(obj), Hash(obj), false)->value)); 403 } 404 405 void AddMapping(HeapObject* obj, int to) { 406 ASSERT(!IsMapped(obj)); 407 HashMap::Entry* entry = 408 serialization_map_->Lookup(Key(obj), Hash(obj), true); 409 entry->value = Value(to); 410 } 411 412 private: 413 static uint32_t Hash(HeapObject* obj) { 414 return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address())); 415 } 416 417 static void* Key(HeapObject* obj) { 418 return reinterpret_cast<void*>(obj->address()); 419 } 420 421 static void* Value(int v) { 422 return reinterpret_cast<void*>(v); 423 } 424 425 DisallowHeapAllocation no_allocation_; 426 HashMap* serialization_map_; 427 DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper); 428 }; 429 430 431 class CodeAddressMap; 432 433 // There can be only one serializer per V8 process. 434 class Serializer : public SerializerDeserializer { 435 public: 436 Serializer(Isolate* isolate, SnapshotByteSink* sink); 437 ~Serializer(); 438 void VisitPointers(Object** start, Object** end); 439 // You can call this after serialization to find out how much space was used 440 // in each space. 441 int CurrentAllocationAddress(int space) const { 442 ASSERT(space < kNumberOfSpaces); 443 return fullness_[space]; 444 } 445 446 Isolate* isolate() const { return isolate_; } 447 448 SerializationAddressMapper* address_mapper() { return &address_mapper_; } 449 void PutRoot(int index, 450 HeapObject* object, 451 HowToCode how, 452 WhereToPoint where, 453 int skip); 454 455 protected: 456 static const int kInvalidRootIndex = -1; 457 458 int RootIndex(HeapObject* heap_object, HowToCode from); 459 intptr_t root_index_wave_front() { return root_index_wave_front_; } 460 void set_root_index_wave_front(intptr_t value) { 461 ASSERT(value >= root_index_wave_front_); 462 root_index_wave_front_ = value; 463 } 464 465 class ObjectSerializer : public ObjectVisitor { 466 public: 467 ObjectSerializer(Serializer* serializer, 468 Object* o, 469 SnapshotByteSink* sink, 470 HowToCode how_to_code, 471 WhereToPoint where_to_point) 472 : serializer_(serializer), 473 object_(HeapObject::cast(o)), 474 sink_(sink), 475 reference_representation_(how_to_code + where_to_point), 476 bytes_processed_so_far_(0), 477 code_object_(o->IsCode()), 478 code_has_been_output_(false) { } 479 void Serialize(); 480 void VisitPointers(Object** start, Object** end); 481 void VisitEmbeddedPointer(RelocInfo* target); 482 void VisitExternalReference(Address* p); 483 void VisitExternalReference(RelocInfo* rinfo); 484 void VisitCodeTarget(RelocInfo* target); 485 void VisitCodeEntry(Address entry_address); 486 void VisitCell(RelocInfo* rinfo); 487 void VisitRuntimeEntry(RelocInfo* reloc); 488 // Used for seralizing the external strings that hold the natives source. 489 void VisitExternalAsciiString( 490 v8::String::ExternalAsciiStringResource** resource); 491 // We can't serialize a heap with external two byte strings. 492 void VisitExternalTwoByteString( 493 v8::String::ExternalStringResource** resource) { 494 UNREACHABLE(); 495 } 496 497 private: 498 enum ReturnSkip { kCanReturnSkipInsteadOfSkipping, kIgnoringReturn }; 499 // This function outputs or skips the raw data between the last pointer and 500 // up to the current position. It optionally can just return the number of 501 // bytes to skip instead of performing a skip instruction, in case the skip 502 // can be merged into the next instruction. 503 int OutputRawData(Address up_to, ReturnSkip return_skip = kIgnoringReturn); 504 505 Serializer* serializer_; 506 HeapObject* object_; 507 SnapshotByteSink* sink_; 508 int reference_representation_; 509 int bytes_processed_so_far_; 510 bool code_object_; 511 bool code_has_been_output_; 512 }; 513 514 virtual void SerializeObject(Object* o, 515 HowToCode how_to_code, 516 WhereToPoint where_to_point, 517 int skip) = 0; 518 void SerializeReferenceToPreviousObject( 519 int space, 520 int address, 521 HowToCode how_to_code, 522 WhereToPoint where_to_point, 523 int skip); 524 void InitializeAllocators(); 525 // This will return the space for an object. 526 static int SpaceOfObject(HeapObject* object); 527 int Allocate(int space, int size); 528 int EncodeExternalReference(Address addr) { 529 return external_reference_encoder_->Encode(addr); 530 } 531 532 int SpaceAreaSize(int space); 533 534 // Some roots should not be serialized, because their actual value depends on 535 // absolute addresses and they are reset after deserialization, anyway. 536 bool ShouldBeSkipped(Object** current); 537 538 Isolate* isolate_; 539 // Keep track of the fullness of each space in order to generate 540 // relative addresses for back references. 541 int fullness_[LAST_SPACE + 1]; 542 SnapshotByteSink* sink_; 543 ExternalReferenceEncoder* external_reference_encoder_; 544 545 SerializationAddressMapper address_mapper_; 546 intptr_t root_index_wave_front_; 547 void Pad(); 548 549 friend class ObjectSerializer; 550 friend class Deserializer; 551 552 // We may not need the code address map for logging for every instance 553 // of the serializer. Initialize it on demand. 554 void InitializeCodeAddressMap(); 555 556 private: 557 CodeAddressMap* code_address_map_; 558 DISALLOW_COPY_AND_ASSIGN(Serializer); 559 }; 560 561 562 class PartialSerializer : public Serializer { 563 public: 564 PartialSerializer(Isolate* isolate, 565 Serializer* startup_snapshot_serializer, 566 SnapshotByteSink* sink) 567 : Serializer(isolate, sink), 568 startup_serializer_(startup_snapshot_serializer) { 569 set_root_index_wave_front(Heap::kStrongRootListLength); 570 InitializeCodeAddressMap(); 571 } 572 573 // Serialize the objects reachable from a single object pointer. 574 void Serialize(Object** o); 575 virtual void SerializeObject(Object* o, 576 HowToCode how_to_code, 577 WhereToPoint where_to_point, 578 int skip); 579 580 private: 581 int PartialSnapshotCacheIndex(HeapObject* o); 582 bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 583 // Scripts should be referred only through shared function infos. We can't 584 // allow them to be part of the partial snapshot because they contain a 585 // unique ID, and deserializing several partial snapshots containing script 586 // would cause dupes. 587 ASSERT(!o->IsScript()); 588 return o->IsName() || o->IsSharedFunctionInfo() || 589 o->IsHeapNumber() || o->IsCode() || 590 o->IsScopeInfo() || 591 o->map() == 592 startup_serializer_->isolate()->heap()->fixed_cow_array_map(); 593 } 594 595 596 Serializer* startup_serializer_; 597 DISALLOW_COPY_AND_ASSIGN(PartialSerializer); 598 }; 599 600 601 class StartupSerializer : public Serializer { 602 public: 603 StartupSerializer(Isolate* isolate, SnapshotByteSink* sink) 604 : Serializer(isolate, sink) { 605 // Clear the cache of objects used by the partial snapshot. After the 606 // strong roots have been serialized we can create a partial snapshot 607 // which will repopulate the cache with objects needed by that partial 608 // snapshot. 609 isolate->set_serialize_partial_snapshot_cache_length(0); 610 InitializeCodeAddressMap(); 611 } 612 // Serialize the current state of the heap. The order is: 613 // 1) Strong references. 614 // 2) Partial snapshot cache. 615 // 3) Weak references (e.g. the string table). 616 virtual void SerializeStrongReferences(); 617 virtual void SerializeObject(Object* o, 618 HowToCode how_to_code, 619 WhereToPoint where_to_point, 620 int skip); 621 void SerializeWeakReferences(); 622 void Serialize() { 623 SerializeStrongReferences(); 624 SerializeWeakReferences(); 625 Pad(); 626 } 627 }; 628 629 630 } } // namespace v8::internal 631 632 #endif // V8_SERIALIZE_H_ 633