1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_SERIALIZE_H_ 29 #define V8_SERIALIZE_H_ 30 31 #include "hashmap.h" 32 33 namespace v8 { 34 namespace internal { 35 36 // A TypeCode is used to distinguish different kinds of external reference. 37 // It is a single bit to make testing for types easy. 38 enum TypeCode { 39 UNCLASSIFIED, // One-of-a-kind references. 40 BUILTIN, 41 RUNTIME_FUNCTION, 42 IC_UTILITY, 43 DEBUG_ADDRESS, 44 STATS_COUNTER, 45 TOP_ADDRESS, 46 C_BUILTIN, 47 EXTENSION, 48 ACCESSOR, 49 RUNTIME_ENTRY, 50 STUB_CACHE_TABLE, 51 LAZY_DEOPTIMIZATION 52 }; 53 54 const int kTypeCodeCount = LAZY_DEOPTIMIZATION + 1; 55 const int kFirstTypeCode = UNCLASSIFIED; 56 57 const int kReferenceIdBits = 16; 58 const int kReferenceIdMask = (1 << kReferenceIdBits) - 1; 59 const int kReferenceTypeShift = kReferenceIdBits; 60 const int kDebugRegisterBits = 4; 61 const int kDebugIdShift = kDebugRegisterBits; 62 63 const int kDeoptTableSerializeEntryCount = 8; 64 65 // ExternalReferenceTable is a helper class that defines the relationship 66 // between external references and their encodings. It is used to build 67 // hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder. 68 class ExternalReferenceTable { 69 public: 70 static ExternalReferenceTable* instance(Isolate* isolate); 71 72 ~ExternalReferenceTable() { } 73 74 int size() const { return refs_.length(); } 75 76 Address address(int i) { return refs_[i].address; } 77 78 uint32_t code(int i) { return refs_[i].code; } 79 80 const char* name(int i) { return refs_[i].name; } 81 82 int max_id(int code) { return max_id_[code]; } 83 84 private: 85 explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) { 86 PopulateTable(isolate); 87 } 88 89 struct ExternalReferenceEntry { 90 Address address; 91 uint32_t code; 92 const char* name; 93 }; 94 95 void PopulateTable(Isolate* isolate); 96 97 // For a few types of references, we can get their address from their id. 98 void AddFromId(TypeCode type, 99 uint16_t id, 100 const char* name, 101 Isolate* isolate); 102 103 // For other types of references, the caller will figure out the address. 104 void Add(Address address, TypeCode type, uint16_t id, const char* name); 105 106 List<ExternalReferenceEntry> refs_; 107 int max_id_[kTypeCodeCount]; 108 }; 109 110 111 class ExternalReferenceEncoder { 112 public: 113 explicit ExternalReferenceEncoder(Isolate* isolate); 114 115 uint32_t Encode(Address key) const; 116 117 const char* NameOfAddress(Address key) const; 118 119 private: 120 HashMap encodings_; 121 static uint32_t Hash(Address key) { 122 return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2); 123 } 124 125 int IndexOf(Address key) const; 126 127 static bool Match(void* key1, void* key2) { return key1 == key2; } 128 129 void Put(Address key, int index); 130 131 Isolate* isolate_; 132 }; 133 134 135 class ExternalReferenceDecoder { 136 public: 137 explicit ExternalReferenceDecoder(Isolate* isolate); 138 ~ExternalReferenceDecoder(); 139 140 Address Decode(uint32_t key) const { 141 if (key == 0) return NULL; 142 return *Lookup(key); 143 } 144 145 private: 146 Address** encodings_; 147 148 Address* Lookup(uint32_t key) const { 149 int type = key >> kReferenceTypeShift; 150 ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount); 151 int id = key & kReferenceIdMask; 152 return &encodings_[type][id]; 153 } 154 155 void Put(uint32_t key, Address value) { 156 *Lookup(key) = value; 157 } 158 159 Isolate* isolate_; 160 }; 161 162 163 class SnapshotByteSource { 164 public: 165 SnapshotByteSource(const byte* array, int length) 166 : data_(array), length_(length), position_(0) { } 167 168 bool HasMore() { return position_ < length_; } 169 170 int Get() { 171 ASSERT(position_ < length_); 172 return data_[position_++]; 173 } 174 175 int32_t GetUnalignedInt() { 176 #if defined(V8_HOST_CAN_READ_UNALIGNED) && __BYTE_ORDER == __LITTLE_ENDIAN 177 int32_t answer; 178 ASSERT(position_ + sizeof(answer) <= length_ + 0u); 179 answer = *reinterpret_cast<const int32_t*>(data_ + position_); 180 #else 181 int32_t answer = data_[position_]; 182 answer |= data_[position_ + 1] << 8; 183 answer |= data_[position_ + 2] << 16; 184 answer |= data_[position_ + 3] << 24; 185 #endif 186 return answer; 187 } 188 189 void Advance(int by) { position_ += by; } 190 191 inline void CopyRaw(byte* to, int number_of_bytes); 192 193 inline int GetInt(); 194 195 bool AtEOF(); 196 197 int position() { return position_; } 198 199 private: 200 const byte* data_; 201 int length_; 202 int position_; 203 }; 204 205 206 // The Serializer/Deserializer class is a common superclass for Serializer and 207 // Deserializer which is used to store common constants and methods used by 208 // both. 209 class SerializerDeserializer: public ObjectVisitor { 210 public: 211 static void Iterate(Isolate* isolate, ObjectVisitor* visitor); 212 213 static int nop() { return kNop; } 214 215 protected: 216 // Where the pointed-to object can be found: 217 enum Where { 218 kNewObject = 0, // Object is next in snapshot. 219 // 1-6 One per space. 220 kRootArray = 0x9, // Object is found in root array. 221 kPartialSnapshotCache = 0xa, // Object is in the cache. 222 kExternalReference = 0xb, // Pointer to an external reference. 223 kSkip = 0xc, // Skip n bytes. 224 kNop = 0xd, // Does nothing, used to pad. 225 // 0xe-0xf Free. 226 kBackref = 0x10, // Object is described relative to end. 227 // 0x11-0x16 One per space. 228 kBackrefWithSkip = 0x18, // Object is described relative to end. 229 // 0x19-0x1e One per space. 230 // 0x20-0x3f Used by misc. tags below. 231 kPointedToMask = 0x3f 232 }; 233 234 // How to code the pointer to the object. 235 enum HowToCode { 236 kPlain = 0, // Straight pointer. 237 // What this means depends on the architecture: 238 kFromCode = 0x40, // A pointer inlined in code. 239 kHowToCodeMask = 0x40 240 }; 241 242 // For kRootArrayConstants 243 enum WithSkip { 244 kNoSkipDistance = 0, 245 kHasSkipDistance = 0x40, 246 kWithSkipMask = 0x40 247 }; 248 249 // Where to point within the object. 250 enum WhereToPoint { 251 kStartOfObject = 0, 252 kInnerPointer = 0x80, // First insn in code object or payload of cell. 253 kWhereToPointMask = 0x80 254 }; 255 256 // Misc. 257 // Raw data to be copied from the snapshot. This byte code does not advance 258 // the current pointer, which is used for code objects, where we write the 259 // entire code in one memcpy, then fix up stuff with kSkip and other byte 260 // codes that overwrite data. 261 static const int kRawData = 0x20; 262 // Some common raw lengths: 0x21-0x3f. These autoadvance the current pointer. 263 // A tag emitted at strategic points in the snapshot to delineate sections. 264 // If the deserializer does not find these at the expected moments then it 265 // is an indication that the snapshot and the VM do not fit together. 266 // Examine the build process for architecture, version or configuration 267 // mismatches. 268 static const int kSynchronize = 0x70; 269 // Used for the source code of the natives, which is in the executable, but 270 // is referred to from external strings in the snapshot. 271 static const int kNativesStringResource = 0x71; 272 static const int kRepeat = 0x72; 273 static const int kConstantRepeat = 0x73; 274 // 0x73-0x7f Repeat last word (subtract 0x72 to get the count). 275 static const int kMaxRepeats = 0x7f - 0x72; 276 static int CodeForRepeats(int repeats) { 277 ASSERT(repeats >= 1 && repeats <= kMaxRepeats); 278 return 0x72 + repeats; 279 } 280 static int RepeatsForCode(int byte_code) { 281 ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f); 282 return byte_code - 0x72; 283 } 284 static const int kRootArrayConstants = 0xa0; 285 // 0xa0-0xbf Things from the first 32 elements of the root array. 286 static const int kRootArrayNumberOfConstantEncodings = 0x20; 287 static int RootArrayConstantFromByteCode(int byte_code) { 288 return byte_code & 0x1f; 289 } 290 291 static const int kNumberOfSpaces = LO_SPACE; 292 static const int kAnyOldSpace = -1; 293 294 // A bitmask for getting the space out of an instruction. 295 static const int kSpaceMask = 7; 296 }; 297 298 299 int SnapshotByteSource::GetInt() { 300 // This way of variable-length encoding integers does not suffer from branch 301 // mispredictions. 302 uint32_t answer = GetUnalignedInt(); 303 int bytes = answer & 3; 304 Advance(bytes); 305 uint32_t mask = 0xffffffffu; 306 mask >>= 32 - (bytes << 3); 307 answer &= mask; 308 answer >>= 2; 309 return answer; 310 } 311 312 313 void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) { 314 OS::MemCopy(to, data_ + position_, number_of_bytes); 315 position_ += number_of_bytes; 316 } 317 318 319 // A Deserializer reads a snapshot and reconstructs the Object graph it defines. 320 class Deserializer: public SerializerDeserializer { 321 public: 322 // Create a deserializer from a snapshot byte source. 323 explicit Deserializer(SnapshotByteSource* source); 324 325 virtual ~Deserializer(); 326 327 // Deserialize the snapshot into an empty heap. 328 void Deserialize(Isolate* isolate); 329 330 // Deserialize a single object and the objects reachable from it. 331 void DeserializePartial(Isolate* isolate, Object** root); 332 333 void set_reservation(int space_number, int reservation) { 334 ASSERT(space_number >= 0); 335 ASSERT(space_number <= LAST_SPACE); 336 reservations_[space_number] = reservation; 337 } 338 339 private: 340 virtual void VisitPointers(Object** start, Object** end); 341 342 virtual void VisitRuntimeEntry(RelocInfo* rinfo) { 343 UNREACHABLE(); 344 } 345 346 // Allocation sites are present in the snapshot, and must be linked into 347 // a list at deserialization time. 348 void RelinkAllocationSite(AllocationSite* site); 349 350 // Fills in some heap data in an area from start to end (non-inclusive). The 351 // space id is used for the write barrier. The object_address is the address 352 // of the object we are writing into, or NULL if we are not writing into an 353 // object, i.e. if we are writing a series of tagged values that are not on 354 // the heap. 355 void ReadChunk( 356 Object** start, Object** end, int space, Address object_address); 357 void ReadObject(int space_number, Object** write_back); 358 359 // This routine both allocates a new object, and also keeps 360 // track of where objects have been allocated so that we can 361 // fix back references when deserializing. 362 Address Allocate(int space_index, int size) { 363 Address address = high_water_[space_index]; 364 high_water_[space_index] = address + size; 365 HeapProfiler* profiler = isolate_->heap_profiler(); 366 if (profiler->is_tracking_allocations()) { 367 profiler->AllocationEvent(address, size); 368 } 369 return address; 370 } 371 372 // This returns the address of an object that has been described in the 373 // snapshot as being offset bytes back in a particular space. 374 HeapObject* GetAddressFromEnd(int space) { 375 int offset = source_->GetInt(); 376 offset <<= kObjectAlignmentBits; 377 return HeapObject::FromAddress(high_water_[space] - offset); 378 } 379 380 void FlushICacheForNewCodeObjects(); 381 382 // Cached current isolate. 383 Isolate* isolate_; 384 385 SnapshotByteSource* source_; 386 // This is the address of the next object that will be allocated in each 387 // space. It is used to calculate the addresses of back-references. 388 Address high_water_[LAST_SPACE + 1]; 389 390 int reservations_[LAST_SPACE + 1]; 391 static const intptr_t kUninitializedReservation = -1; 392 393 ExternalReferenceDecoder* external_reference_decoder_; 394 395 DISALLOW_COPY_AND_ASSIGN(Deserializer); 396 }; 397 398 399 class SnapshotByteSink { 400 public: 401 virtual ~SnapshotByteSink() { } 402 virtual void Put(int byte, const char* description) = 0; 403 virtual void PutSection(int byte, const char* description) { 404 Put(byte, description); 405 } 406 void PutInt(uintptr_t integer, const char* description); 407 virtual int Position() = 0; 408 }; 409 410 411 // Mapping objects to their location after deserialization. 412 // This is used during building, but not at runtime by V8. 413 class SerializationAddressMapper { 414 public: 415 SerializationAddressMapper() 416 : no_allocation_(), 417 serialization_map_(new HashMap(&SerializationMatchFun)) { } 418 419 ~SerializationAddressMapper() { 420 delete serialization_map_; 421 } 422 423 bool IsMapped(HeapObject* obj) { 424 return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL; 425 } 426 427 int MappedTo(HeapObject* obj) { 428 ASSERT(IsMapped(obj)); 429 return static_cast<int>(reinterpret_cast<intptr_t>( 430 serialization_map_->Lookup(Key(obj), Hash(obj), false)->value)); 431 } 432 433 void AddMapping(HeapObject* obj, int to) { 434 ASSERT(!IsMapped(obj)); 435 HashMap::Entry* entry = 436 serialization_map_->Lookup(Key(obj), Hash(obj), true); 437 entry->value = Value(to); 438 } 439 440 private: 441 static bool SerializationMatchFun(void* key1, void* key2) { 442 return key1 == key2; 443 } 444 445 static uint32_t Hash(HeapObject* obj) { 446 return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address())); 447 } 448 449 static void* Key(HeapObject* obj) { 450 return reinterpret_cast<void*>(obj->address()); 451 } 452 453 static void* Value(int v) { 454 return reinterpret_cast<void*>(v); 455 } 456 457 DisallowHeapAllocation no_allocation_; 458 HashMap* serialization_map_; 459 DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper); 460 }; 461 462 463 class CodeAddressMap; 464 465 // There can be only one serializer per V8 process. 466 class Serializer : public SerializerDeserializer { 467 public: 468 Serializer(Isolate* isolate, SnapshotByteSink* sink); 469 ~Serializer(); 470 void VisitPointers(Object** start, Object** end); 471 // You can call this after serialization to find out how much space was used 472 // in each space. 473 int CurrentAllocationAddress(int space) { 474 ASSERT(space < kNumberOfSpaces); 475 return fullness_[space]; 476 } 477 478 Isolate* isolate() const { return isolate_; } 479 static void Enable(Isolate* isolate); 480 static void Disable(); 481 482 // Call this when you have made use of the fact that there is no serialization 483 // going on. 484 static void TooLateToEnableNow() { too_late_to_enable_now_ = true; } 485 static bool enabled() { return serialization_enabled_; } 486 SerializationAddressMapper* address_mapper() { return &address_mapper_; } 487 void PutRoot(int index, 488 HeapObject* object, 489 HowToCode how, 490 WhereToPoint where, 491 int skip); 492 493 protected: 494 static const int kInvalidRootIndex = -1; 495 496 int RootIndex(HeapObject* heap_object, HowToCode from); 497 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0; 498 intptr_t root_index_wave_front() { return root_index_wave_front_; } 499 void set_root_index_wave_front(intptr_t value) { 500 ASSERT(value >= root_index_wave_front_); 501 root_index_wave_front_ = value; 502 } 503 504 class ObjectSerializer : public ObjectVisitor { 505 public: 506 ObjectSerializer(Serializer* serializer, 507 Object* o, 508 SnapshotByteSink* sink, 509 HowToCode how_to_code, 510 WhereToPoint where_to_point) 511 : serializer_(serializer), 512 object_(HeapObject::cast(o)), 513 sink_(sink), 514 reference_representation_(how_to_code + where_to_point), 515 bytes_processed_so_far_(0), 516 code_object_(o->IsCode()), 517 code_has_been_output_(false) { } 518 void Serialize(); 519 void VisitPointers(Object** start, Object** end); 520 void VisitEmbeddedPointer(RelocInfo* target); 521 void VisitExternalReference(Address* p); 522 void VisitExternalReference(RelocInfo* rinfo); 523 void VisitCodeTarget(RelocInfo* target); 524 void VisitCodeEntry(Address entry_address); 525 void VisitCell(RelocInfo* rinfo); 526 void VisitRuntimeEntry(RelocInfo* reloc); 527 // Used for seralizing the external strings that hold the natives source. 528 void VisitExternalAsciiString( 529 v8::String::ExternalAsciiStringResource** resource); 530 // We can't serialize a heap with external two byte strings. 531 void VisitExternalTwoByteString( 532 v8::String::ExternalStringResource** resource) { 533 UNREACHABLE(); 534 } 535 536 private: 537 enum ReturnSkip { kCanReturnSkipInsteadOfSkipping, kIgnoringReturn }; 538 // This function outputs or skips the raw data between the last pointer and 539 // up to the current position. It optionally can just return the number of 540 // bytes to skip instead of performing a skip instruction, in case the skip 541 // can be merged into the next instruction. 542 int OutputRawData(Address up_to, ReturnSkip return_skip = kIgnoringReturn); 543 544 Serializer* serializer_; 545 HeapObject* object_; 546 SnapshotByteSink* sink_; 547 int reference_representation_; 548 int bytes_processed_so_far_; 549 bool code_object_; 550 bool code_has_been_output_; 551 }; 552 553 virtual void SerializeObject(Object* o, 554 HowToCode how_to_code, 555 WhereToPoint where_to_point, 556 int skip) = 0; 557 void SerializeReferenceToPreviousObject( 558 int space, 559 int address, 560 HowToCode how_to_code, 561 WhereToPoint where_to_point, 562 int skip); 563 void InitializeAllocators(); 564 // This will return the space for an object. 565 static int SpaceOfObject(HeapObject* object); 566 int Allocate(int space, int size); 567 int EncodeExternalReference(Address addr) { 568 return external_reference_encoder_->Encode(addr); 569 } 570 571 int SpaceAreaSize(int space); 572 573 // Some roots should not be serialized, because their actual value depends on 574 // absolute addresses and they are reset after deserialization, anyway. 575 bool ShouldBeSkipped(Object** current); 576 577 Isolate* isolate_; 578 // Keep track of the fullness of each space in order to generate 579 // relative addresses for back references. 580 int fullness_[LAST_SPACE + 1]; 581 SnapshotByteSink* sink_; 582 int current_root_index_; 583 ExternalReferenceEncoder* external_reference_encoder_; 584 static bool serialization_enabled_; 585 // Did we already make use of the fact that serialization was not enabled? 586 static bool too_late_to_enable_now_; 587 SerializationAddressMapper address_mapper_; 588 intptr_t root_index_wave_front_; 589 void Pad(); 590 591 friend class ObjectSerializer; 592 friend class Deserializer; 593 594 private: 595 static CodeAddressMap* code_address_map_; 596 DISALLOW_COPY_AND_ASSIGN(Serializer); 597 }; 598 599 600 class PartialSerializer : public Serializer { 601 public: 602 PartialSerializer(Isolate* isolate, 603 Serializer* startup_snapshot_serializer, 604 SnapshotByteSink* sink) 605 : Serializer(isolate, sink), 606 startup_serializer_(startup_snapshot_serializer) { 607 set_root_index_wave_front(Heap::kStrongRootListLength); 608 } 609 610 // Serialize the objects reachable from a single object pointer. 611 virtual void Serialize(Object** o); 612 virtual void SerializeObject(Object* o, 613 HowToCode how_to_code, 614 WhereToPoint where_to_point, 615 int skip); 616 617 protected: 618 virtual int PartialSnapshotCacheIndex(HeapObject* o); 619 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 620 // Scripts should be referred only through shared function infos. We can't 621 // allow them to be part of the partial snapshot because they contain a 622 // unique ID, and deserializing several partial snapshots containing script 623 // would cause dupes. 624 ASSERT(!o->IsScript()); 625 return o->IsName() || o->IsSharedFunctionInfo() || 626 o->IsHeapNumber() || o->IsCode() || 627 o->IsScopeInfo() || 628 o->map() == 629 startup_serializer_->isolate()->heap()->fixed_cow_array_map(); 630 } 631 632 private: 633 Serializer* startup_serializer_; 634 DISALLOW_COPY_AND_ASSIGN(PartialSerializer); 635 }; 636 637 638 class StartupSerializer : public Serializer { 639 public: 640 StartupSerializer(Isolate* isolate, SnapshotByteSink* sink) 641 : Serializer(isolate, sink) { 642 // Clear the cache of objects used by the partial snapshot. After the 643 // strong roots have been serialized we can create a partial snapshot 644 // which will repopulate the cache with objects needed by that partial 645 // snapshot. 646 isolate->set_serialize_partial_snapshot_cache_length(0); 647 } 648 // Serialize the current state of the heap. The order is: 649 // 1) Strong references. 650 // 2) Partial snapshot cache. 651 // 3) Weak references (e.g. the string table). 652 virtual void SerializeStrongReferences(); 653 virtual void SerializeObject(Object* o, 654 HowToCode how_to_code, 655 WhereToPoint where_to_point, 656 int skip); 657 void SerializeWeakReferences(); 658 void Serialize() { 659 SerializeStrongReferences(); 660 SerializeWeakReferences(); 661 Pad(); 662 } 663 664 private: 665 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 666 return false; 667 } 668 }; 669 670 671 } } // namespace v8::internal 672 673 #endif // V8_SERIALIZE_H_ 674