1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_SERIALIZE_H_ 29 #define V8_SERIALIZE_H_ 30 31 #include "hashmap.h" 32 33 namespace v8 { 34 namespace internal { 35 36 // A TypeCode is used to distinguish different kinds of external reference. 37 // It is a single bit to make testing for types easy. 38 enum TypeCode { 39 UNCLASSIFIED, // One-of-a-kind references. 40 BUILTIN, 41 RUNTIME_FUNCTION, 42 IC_UTILITY, 43 DEBUG_ADDRESS, 44 STATS_COUNTER, 45 TOP_ADDRESS, 46 C_BUILTIN, 47 EXTENSION, 48 ACCESSOR, 49 RUNTIME_ENTRY, 50 STUB_CACHE_TABLE 51 }; 52 53 const int kTypeCodeCount = STUB_CACHE_TABLE + 1; 54 const int kFirstTypeCode = UNCLASSIFIED; 55 56 const int kReferenceIdBits = 16; 57 const int kReferenceIdMask = (1 << kReferenceIdBits) - 1; 58 const int kReferenceTypeShift = kReferenceIdBits; 59 const int kDebugRegisterBits = 4; 60 const int kDebugIdShift = kDebugRegisterBits; 61 62 63 class ExternalReferenceEncoder { 64 public: 65 ExternalReferenceEncoder(); 66 67 uint32_t Encode(Address key) const; 68 69 const char* NameOfAddress(Address key) const; 70 71 private: 72 HashMap encodings_; 73 static uint32_t Hash(Address key) { 74 return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2); 75 } 76 77 int IndexOf(Address key) const; 78 79 static bool Match(void* key1, void* key2) { return key1 == key2; } 80 81 void Put(Address key, int index); 82 83 Isolate* isolate_; 84 }; 85 86 87 class ExternalReferenceDecoder { 88 public: 89 ExternalReferenceDecoder(); 90 ~ExternalReferenceDecoder(); 91 92 Address Decode(uint32_t key) const { 93 if (key == 0) return NULL; 94 return *Lookup(key); 95 } 96 97 private: 98 Address** encodings_; 99 100 Address* Lookup(uint32_t key) const { 101 int type = key >> kReferenceTypeShift; 102 ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount); 103 int id = key & kReferenceIdMask; 104 return &encodings_[type][id]; 105 } 106 107 void Put(uint32_t key, Address value) { 108 *Lookup(key) = value; 109 } 110 111 Isolate* isolate_; 112 }; 113 114 115 class SnapshotByteSource { 116 public: 117 SnapshotByteSource(const byte* array, int length) 118 : data_(array), length_(length), position_(0) { } 119 120 bool HasMore() { return position_ < length_; } 121 122 int Get() { 123 ASSERT(position_ < length_); 124 return data_[position_++]; 125 } 126 127 inline void CopyRaw(byte* to, int number_of_bytes); 128 129 inline int GetInt(); 130 131 bool AtEOF() { 132 return position_ == length_; 133 } 134 135 int position() { return position_; } 136 137 private: 138 const byte* data_; 139 int length_; 140 int position_; 141 }; 142 143 144 // It is very common to have a reference to objects at certain offsets in the 145 // heap. These offsets have been determined experimentally. We code 146 // references to such objects in a single byte that encodes the way the pointer 147 // is written (only plain pointers allowed), the space number and the offset. 148 // This only works for objects in the first page of a space. Don't use this for 149 // things in newspace since it bypasses the write barrier. 150 151 RLYSTC const int k64 = (sizeof(uintptr_t) - 4) / 4; 152 153 #define COMMON_REFERENCE_PATTERNS(f) \ 154 f(kNumberOfSpaces, 2, (11 - k64)) \ 155 f((kNumberOfSpaces + 1), 2, 0) \ 156 f((kNumberOfSpaces + 2), 2, (142 - 16 * k64)) \ 157 f((kNumberOfSpaces + 3), 2, (74 - 15 * k64)) \ 158 f((kNumberOfSpaces + 4), 2, 5) \ 159 f((kNumberOfSpaces + 5), 1, 135) \ 160 f((kNumberOfSpaces + 6), 2, (228 - 39 * k64)) 161 162 #define COMMON_RAW_LENGTHS(f) \ 163 f(1, 1) \ 164 f(2, 2) \ 165 f(3, 3) \ 166 f(4, 4) \ 167 f(5, 5) \ 168 f(6, 6) \ 169 f(7, 7) \ 170 f(8, 8) \ 171 f(9, 12) \ 172 f(10, 16) \ 173 f(11, 20) \ 174 f(12, 24) \ 175 f(13, 28) \ 176 f(14, 32) \ 177 f(15, 36) 178 179 // The Serializer/Deserializer class is a common superclass for Serializer and 180 // Deserializer which is used to store common constants and methods used by 181 // both. 182 class SerializerDeserializer: public ObjectVisitor { 183 public: 184 RLYSTC void Iterate(ObjectVisitor* visitor); 185 RLYSTC void SetSnapshotCacheSize(int size); 186 187 protected: 188 // Where the pointed-to object can be found: 189 enum Where { 190 kNewObject = 0, // Object is next in snapshot. 191 // 1-8 One per space. 192 kRootArray = 0x9, // Object is found in root array. 193 kPartialSnapshotCache = 0xa, // Object is in the cache. 194 kExternalReference = 0xb, // Pointer to an external reference. 195 // 0xc-0xf Free. 196 kBackref = 0x10, // Object is described relative to end. 197 // 0x11-0x18 One per space. 198 // 0x19-0x1f Common backref offsets. 199 kFromStart = 0x20, // Object is described relative to start. 200 // 0x21-0x28 One per space. 201 // 0x29-0x2f Free. 202 // 0x30-0x3f Used by misc tags below. 203 kPointedToMask = 0x3f 204 }; 205 206 // How to code the pointer to the object. 207 enum HowToCode { 208 kPlain = 0, // Straight pointer. 209 // What this means depends on the architecture: 210 kFromCode = 0x40, // A pointer inlined in code. 211 kHowToCodeMask = 0x40 212 }; 213 214 // Where to point within the object. 215 enum WhereToPoint { 216 kStartOfObject = 0, 217 kFirstInstruction = 0x80, 218 kWhereToPointMask = 0x80 219 }; 220 221 // Misc. 222 // Raw data to be copied from the snapshot. 223 RLYSTC const int kRawData = 0x30; 224 // Some common raw lengths: 0x31-0x3f 225 // A tag emitted at strategic points in the snapshot to delineate sections. 226 // If the deserializer does not find these at the expected moments then it 227 // is an indication that the snapshot and the VM do not fit together. 228 // Examine the build process for architecture, version or configuration 229 // mismatches. 230 RLYSTC const int kSynchronize = 0x70; 231 // Used for the source code of the natives, which is in the executable, but 232 // is referred to from external strings in the snapshot. 233 RLYSTC const int kNativesStringResource = 0x71; 234 RLYSTC const int kNewPage = 0x72; 235 // 0x73-0x7f Free. 236 // 0xb0-0xbf Free. 237 // 0xf0-0xff Free. 238 239 240 RLYSTC const int kLargeData = LAST_SPACE; 241 RLYSTC const int kLargeCode = kLargeData + 1; 242 RLYSTC const int kLargeFixedArray = kLargeCode + 1; 243 RLYSTC const int kNumberOfSpaces = kLargeFixedArray + 1; 244 RLYSTC const int kAnyOldSpace = -1; 245 246 // A bitmask for getting the space out of an instruction. 247 RLYSTC const int kSpaceMask = 15; 248 249 RLYSTC inline bool SpaceIsLarge(int space) { return space >= kLargeData; } 250 RLYSTC inline bool SpaceIsPaged(int space) { 251 return space >= FIRST_PAGED_SPACE && space <= LAST_PAGED_SPACE; 252 } 253 }; 254 255 256 int SnapshotByteSource::GetInt() { 257 // A little unwind to catch the really small ints. 258 int snapshot_byte = Get(); 259 if ((snapshot_byte & 0x80) == 0) { 260 return snapshot_byte; 261 } 262 int accumulator = (snapshot_byte & 0x7f) << 7; 263 while (true) { 264 snapshot_byte = Get(); 265 if ((snapshot_byte & 0x80) == 0) { 266 return accumulator | snapshot_byte; 267 } 268 accumulator = (accumulator | (snapshot_byte & 0x7f)) << 7; 269 } 270 UNREACHABLE(); 271 return accumulator; 272 } 273 274 275 void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) { 276 memcpy(to, data_ + position_, number_of_bytes); 277 position_ += number_of_bytes; 278 } 279 280 281 // A Deserializer reads a snapshot and reconstructs the Object graph it defines. 282 class Deserializer: public SerializerDeserializer { 283 public: 284 // Create a deserializer from a snapshot byte source. 285 explicit Deserializer(SnapshotByteSource* source); 286 287 virtual ~Deserializer(); 288 289 // Deserialize the snapshot into an empty heap. 290 void Deserialize(); 291 292 // Deserialize a single object and the objects reachable from it. 293 void DeserializePartial(Object** root); 294 295 #ifdef DEBUG 296 virtual void Synchronize(const char* tag); 297 #endif 298 299 private: 300 virtual void VisitPointers(Object** start, Object** end); 301 302 virtual void VisitExternalReferences(Address* start, Address* end) { 303 UNREACHABLE(); 304 } 305 306 virtual void VisitRuntimeEntry(RelocInfo* rinfo) { 307 UNREACHABLE(); 308 } 309 310 void ReadChunk(Object** start, Object** end, int space, Address address); 311 HeapObject* GetAddressFromStart(int space); 312 inline HeapObject* GetAddressFromEnd(int space); 313 Address Allocate(int space_number, Space* space, int size); 314 void ReadObject(int space_number, Space* space, Object** write_back); 315 316 // Cached current isolate. 317 Isolate* isolate_; 318 319 // Keep track of the pages in the paged spaces. 320 // (In large object space we are keeping track of individual objects 321 // rather than pages.) In new space we just need the address of the 322 // first object and the others will flow from that. 323 List<Address> pages_[SerializerDeserializer::kNumberOfSpaces]; 324 325 SnapshotByteSource* source_; 326 // This is the address of the next object that will be allocated in each 327 // space. It is used to calculate the addresses of back-references. 328 Address high_water_[LAST_SPACE + 1]; 329 // This is the address of the most recent object that was allocated. It 330 // is used to set the location of the new page when we encounter a 331 // START_NEW_PAGE_SERIALIZATION tag. 332 Address last_object_address_; 333 334 ExternalReferenceDecoder* external_reference_decoder_; 335 336 DISALLOW_COPY_AND_ASSIGN(Deserializer); 337 }; 338 339 340 class SnapshotByteSink { 341 public: 342 virtual ~SnapshotByteSink() { } 343 virtual void Put(int byte, const char* description) = 0; 344 virtual void PutSection(int byte, const char* description) { 345 Put(byte, description); 346 } 347 void PutInt(uintptr_t integer, const char* description); 348 virtual int Position() = 0; 349 }; 350 351 352 // Mapping objects to their location after deserialization. 353 // This is used during building, but not at runtime by V8. 354 class SerializationAddressMapper { 355 public: 356 SerializationAddressMapper() 357 : serialization_map_(new HashMap(&SerializationMatchFun)), 358 no_allocation_(new AssertNoAllocation()) { } 359 360 ~SerializationAddressMapper() { 361 delete serialization_map_; 362 delete no_allocation_; 363 } 364 365 bool IsMapped(HeapObject* obj) { 366 return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL; 367 } 368 369 int MappedTo(HeapObject* obj) { 370 ASSERT(IsMapped(obj)); 371 return static_cast<int>(reinterpret_cast<intptr_t>( 372 serialization_map_->Lookup(Key(obj), Hash(obj), false)->value)); 373 } 374 375 void AddMapping(HeapObject* obj, int to) { 376 ASSERT(!IsMapped(obj)); 377 HashMap::Entry* entry = 378 serialization_map_->Lookup(Key(obj), Hash(obj), true); 379 entry->value = Value(to); 380 } 381 382 private: 383 RLYSTC bool SerializationMatchFun(void* key1, void* key2) { 384 return key1 == key2; 385 } 386 387 RLYSTC uint32_t Hash(HeapObject* obj) { 388 return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address())); 389 } 390 391 RLYSTC void* Key(HeapObject* obj) { 392 return reinterpret_cast<void*>(obj->address()); 393 } 394 395 RLYSTC void* Value(int v) { 396 return reinterpret_cast<void*>(v); 397 } 398 399 HashMap* serialization_map_; 400 AssertNoAllocation* no_allocation_; 401 DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper); 402 }; 403 404 405 // There can be only one serializer per V8 process. 406 STATIC_CLASS Serializer : public SerializerDeserializer { 407 public: 408 explicit Serializer(SnapshotByteSink* sink); 409 ~Serializer(); 410 void VisitPointers(Object** start, Object** end); 411 // You can call this after serialization to find out how much space was used 412 // in each space. 413 int CurrentAllocationAddress(int space) { 414 if (SpaceIsLarge(space)) return large_object_total_; 415 return fullness_[space]; 416 } 417 418 RLYSTC void Enable() { 419 if (!serialization_enabled_) { 420 ASSERT(!too_late_to_enable_now_); 421 } 422 serialization_enabled_ = true; 423 } 424 425 RLYSTC void Disable() { serialization_enabled_ = false; } 426 // Call this when you have made use of the fact that there is no serialization 427 // going on. 428 RLYSTC void TooLateToEnableNow() { too_late_to_enable_now_ = true; } 429 RLYSTC bool enabled() { return serialization_enabled_; } 430 SerializationAddressMapper* address_mapper() { return &address_mapper_; } 431 #ifdef DEBUG 432 virtual void Synchronize(const char* tag); 433 #endif 434 435 protected: 436 RLYSTC const int kInvalidRootIndex = -1; 437 virtual int RootIndex(HeapObject* heap_object) = 0; 438 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0; 439 440 class ObjectSerializer : public ObjectVisitor { 441 public: 442 ObjectSerializer(Serializer* serializer, 443 Object* o, 444 SnapshotByteSink* sink, 445 HowToCode how_to_code, 446 WhereToPoint where_to_point) 447 : serializer_(serializer), 448 object_(HeapObject::cast(o)), 449 sink_(sink), 450 reference_representation_(how_to_code + where_to_point), 451 bytes_processed_so_far_(0) { } 452 void Serialize(); 453 void VisitPointers(Object** start, Object** end); 454 void VisitExternalReferences(Address* start, Address* end); 455 void VisitCodeTarget(RelocInfo* target); 456 void VisitCodeEntry(Address entry_address); 457 void VisitGlobalPropertyCell(RelocInfo* rinfo); 458 void VisitRuntimeEntry(RelocInfo* reloc); 459 // Used for seralizing the external strings that hold the natives source. 460 void VisitExternalAsciiString( 461 v8::String::ExternalAsciiStringResource** resource); 462 // We can't serialize a heap with external two byte strings. 463 void VisitExternalTwoByteString( 464 v8::String::ExternalStringResource** resource) { 465 UNREACHABLE(); 466 } 467 468 private: 469 void OutputRawData(Address up_to); 470 471 Serializer* serializer_; 472 HeapObject* object_; 473 SnapshotByteSink* sink_; 474 int reference_representation_; 475 int bytes_processed_so_far_; 476 }; 477 478 virtual void SerializeObject(Object* o, 479 HowToCode how_to_code, 480 WhereToPoint where_to_point) = 0; 481 void SerializeReferenceToPreviousObject( 482 int space, 483 int address, 484 HowToCode how_to_code, 485 WhereToPoint where_to_point); 486 void InitializeAllocators(); 487 // This will return the space for an object. If the object is in large 488 // object space it may return kLargeCode or kLargeFixedArray in order 489 // to indicate to the deserializer what kind of large object allocation 490 // to make. 491 RLYSTC int SpaceOfObject(HeapObject* object); 492 // This just returns the space of the object. It will return LO_SPACE 493 // for all large objects since you can't check the type of the object 494 // once the map has been used for the serialization address. 495 RLYSTC int SpaceOfAlreadySerializedObject(HeapObject* object); 496 int Allocate(int space, int size, bool* new_page_started); 497 int EncodeExternalReference(Address addr) { 498 return external_reference_encoder_->Encode(addr); 499 } 500 501 // Keep track of the fullness of each space in order to generate 502 // relative addresses for back references. Large objects are 503 // just numbered sequentially since relative addresses make no 504 // sense in large object space. 505 int fullness_[LAST_SPACE + 1]; 506 SnapshotByteSink* sink_; 507 int current_root_index_; 508 ExternalReferenceEncoder* external_reference_encoder_; 509 RLYSTC bool serialization_enabled_; 510 // Did we already make use of the fact that serialization was not enabled? 511 RLYSTC bool too_late_to_enable_now_; 512 int large_object_total_; 513 SerializationAddressMapper address_mapper_; 514 515 friend class ObjectSerializer; 516 friend class Deserializer; 517 518 DISALLOW_COPY_AND_ASSIGN(Serializer); 519 }; 520 521 522 class PartialSerializer : public Serializer { 523 public: 524 PartialSerializer(Serializer* startup_snapshot_serializer, 525 SnapshotByteSink* sink) 526 : Serializer(sink), 527 startup_serializer_(startup_snapshot_serializer) { 528 } 529 530 // Serialize the objects reachable from a single object pointer. 531 virtual void Serialize(Object** o); 532 virtual void SerializeObject(Object* o, 533 HowToCode how_to_code, 534 WhereToPoint where_to_point); 535 536 protected: 537 virtual int RootIndex(HeapObject* o); 538 virtual int PartialSnapshotCacheIndex(HeapObject* o); 539 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 540 // Scripts should be referred only through shared function infos. We can't 541 // allow them to be part of the partial snapshot because they contain a 542 // unique ID, and deserializing several partial snapshots containing script 543 // would cause dupes. 544 ASSERT(!o->IsScript()); 545 return o->IsString() || o->IsSharedFunctionInfo() || 546 o->IsHeapNumber() || o->IsCode() || 547 o->map() == HEAP->fixed_cow_array_map(); 548 } 549 550 private: 551 Serializer* startup_serializer_; 552 DISALLOW_COPY_AND_ASSIGN(PartialSerializer); 553 }; 554 555 556 class StartupSerializer : public Serializer { 557 public: 558 explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) { 559 // Clear the cache of objects used by the partial snapshot. After the 560 // strong roots have been serialized we can create a partial snapshot 561 // which will repopulate the cache with objects neede by that partial 562 // snapshot. 563 Isolate::Current()->set_serialize_partial_snapshot_cache_length(0); 564 } 565 // Serialize the current state of the heap. The order is: 566 // 1) Strong references. 567 // 2) Partial snapshot cache. 568 // 3) Weak references (eg the symbol table). 569 virtual void SerializeStrongReferences(); 570 virtual void SerializeObject(Object* o, 571 HowToCode how_to_code, 572 WhereToPoint where_to_point); 573 void SerializeWeakReferences(); 574 void Serialize() { 575 SerializeStrongReferences(); 576 SerializeWeakReferences(); 577 } 578 579 private: 580 virtual int RootIndex(HeapObject* o) { return kInvalidRootIndex; } 581 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 582 return false; 583 } 584 }; 585 586 587 } } // namespace v8::internal 588 589 #endif // V8_SERIALIZE_H_ 590