Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #ifndef V8_SERIALIZE_H_
     29 #define V8_SERIALIZE_H_
     30 
     31 #include "hashmap.h"
     32 
     33 namespace v8 {
     34 namespace internal {
     35 
     36 // A TypeCode is used to distinguish different kinds of external reference.
     37 // It is a single bit to make testing for types easy.
     38 enum TypeCode {
     39   UNCLASSIFIED,        // One-of-a-kind references.
     40   BUILTIN,
     41   RUNTIME_FUNCTION,
     42   IC_UTILITY,
     43   DEBUG_ADDRESS,
     44   STATS_COUNTER,
     45   TOP_ADDRESS,
     46   C_BUILTIN,
     47   EXTENSION,
     48   ACCESSOR,
     49   RUNTIME_ENTRY,
     50   STUB_CACHE_TABLE
     51 };
     52 
     53 const int kTypeCodeCount = STUB_CACHE_TABLE + 1;
     54 const int kFirstTypeCode = UNCLASSIFIED;
     55 
     56 const int kReferenceIdBits = 16;
     57 const int kReferenceIdMask = (1 << kReferenceIdBits) - 1;
     58 const int kReferenceTypeShift = kReferenceIdBits;
     59 const int kDebugRegisterBits = 4;
     60 const int kDebugIdShift = kDebugRegisterBits;
     61 
     62 
     63 class ExternalReferenceEncoder {
     64  public:
     65   ExternalReferenceEncoder();
     66 
     67   uint32_t Encode(Address key) const;
     68 
     69   const char* NameOfAddress(Address key) const;
     70 
     71  private:
     72   HashMap encodings_;
     73   static uint32_t Hash(Address key) {
     74     return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2);
     75   }
     76 
     77   int IndexOf(Address key) const;
     78 
     79   static bool Match(void* key1, void* key2) { return key1 == key2; }
     80 
     81   void Put(Address key, int index);
     82 
     83   Isolate* isolate_;
     84 };
     85 
     86 
     87 class ExternalReferenceDecoder {
     88  public:
     89   ExternalReferenceDecoder();
     90   ~ExternalReferenceDecoder();
     91 
     92   Address Decode(uint32_t key) const {
     93     if (key == 0) return NULL;
     94     return *Lookup(key);
     95   }
     96 
     97  private:
     98   Address** encodings_;
     99 
    100   Address* Lookup(uint32_t key) const {
    101     int type = key >> kReferenceTypeShift;
    102     ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount);
    103     int id = key & kReferenceIdMask;
    104     return &encodings_[type][id];
    105   }
    106 
    107   void Put(uint32_t key, Address value) {
    108     *Lookup(key) = value;
    109   }
    110 
    111   Isolate* isolate_;
    112 };
    113 
    114 
    115 class SnapshotByteSource {
    116  public:
    117   SnapshotByteSource(const byte* array, int length)
    118     : data_(array), length_(length), position_(0) { }
    119 
    120   bool HasMore() { return position_ < length_; }
    121 
    122   int Get() {
    123     ASSERT(position_ < length_);
    124     return data_[position_++];
    125   }
    126 
    127   inline void CopyRaw(byte* to, int number_of_bytes);
    128 
    129   inline int GetInt();
    130 
    131   bool AtEOF() {
    132     return position_ == length_;
    133   }
    134 
    135   int position() { return position_; }
    136 
    137  private:
    138   const byte* data_;
    139   int length_;
    140   int position_;
    141 };
    142 
    143 
    144 // It is very common to have a reference to objects at certain offsets in the
    145 // heap.  These offsets have been determined experimentally.  We code
    146 // references to such objects in a single byte that encodes the way the pointer
    147 // is written (only plain pointers allowed), the space number and the offset.
    148 // This only works for objects in the first page of a space.  Don't use this for
    149 // things in newspace since it bypasses the write barrier.
    150 
    151 RLYSTC const int k64 = (sizeof(uintptr_t) - 4) / 4;
    152 
    153 #define COMMON_REFERENCE_PATTERNS(f)                               \
    154   f(kNumberOfSpaces, 2, (11 - k64))                                \
    155   f((kNumberOfSpaces + 1), 2, 0)                                   \
    156   f((kNumberOfSpaces + 2), 2, (142 - 16 * k64))                    \
    157   f((kNumberOfSpaces + 3), 2, (74 - 15 * k64))                     \
    158   f((kNumberOfSpaces + 4), 2, 5)                                   \
    159   f((kNumberOfSpaces + 5), 1, 135)                                 \
    160   f((kNumberOfSpaces + 6), 2, (228 - 39 * k64))
    161 
    162 #define COMMON_RAW_LENGTHS(f)        \
    163   f(1, 1)  \
    164   f(2, 2)  \
    165   f(3, 3)  \
    166   f(4, 4)  \
    167   f(5, 5)  \
    168   f(6, 6)  \
    169   f(7, 7)  \
    170   f(8, 8)  \
    171   f(9, 12)  \
    172   f(10, 16) \
    173   f(11, 20) \
    174   f(12, 24) \
    175   f(13, 28) \
    176   f(14, 32) \
    177   f(15, 36)
    178 
    179 // The Serializer/Deserializer class is a common superclass for Serializer and
    180 // Deserializer which is used to store common constants and methods used by
    181 // both.
    182 class SerializerDeserializer: public ObjectVisitor {
    183  public:
    184   RLYSTC void Iterate(ObjectVisitor* visitor);
    185   RLYSTC void SetSnapshotCacheSize(int size);
    186 
    187  protected:
    188   // Where the pointed-to object can be found:
    189   enum Where {
    190     kNewObject = 0,                 // Object is next in snapshot.
    191     // 1-8                             One per space.
    192     kRootArray = 0x9,               // Object is found in root array.
    193     kPartialSnapshotCache = 0xa,    // Object is in the cache.
    194     kExternalReference = 0xb,       // Pointer to an external reference.
    195     // 0xc-0xf                         Free.
    196     kBackref = 0x10,                 // Object is described relative to end.
    197     // 0x11-0x18                       One per space.
    198     // 0x19-0x1f                       Common backref offsets.
    199     kFromStart = 0x20,              // Object is described relative to start.
    200     // 0x21-0x28                       One per space.
    201     // 0x29-0x2f                       Free.
    202     // 0x30-0x3f                       Used by misc tags below.
    203     kPointedToMask = 0x3f
    204   };
    205 
    206   // How to code the pointer to the object.
    207   enum HowToCode {
    208     kPlain = 0,                          // Straight pointer.
    209     // What this means depends on the architecture:
    210     kFromCode = 0x40,                    // A pointer inlined in code.
    211     kHowToCodeMask = 0x40
    212   };
    213 
    214   // Where to point within the object.
    215   enum WhereToPoint {
    216     kStartOfObject = 0,
    217     kFirstInstruction = 0x80,
    218     kWhereToPointMask = 0x80
    219   };
    220 
    221   // Misc.
    222   // Raw data to be copied from the snapshot.
    223   RLYSTC const int kRawData = 0x30;
    224   // Some common raw lengths: 0x31-0x3f
    225   // A tag emitted at strategic points in the snapshot to delineate sections.
    226   // If the deserializer does not find these at the expected moments then it
    227   // is an indication that the snapshot and the VM do not fit together.
    228   // Examine the build process for architecture, version or configuration
    229   // mismatches.
    230   RLYSTC const int kSynchronize = 0x70;
    231   // Used for the source code of the natives, which is in the executable, but
    232   // is referred to from external strings in the snapshot.
    233   RLYSTC const int kNativesStringResource = 0x71;
    234   RLYSTC const int kNewPage = 0x72;
    235   // 0x73-0x7f                            Free.
    236   // 0xb0-0xbf                            Free.
    237   // 0xf0-0xff                            Free.
    238 
    239 
    240   RLYSTC const int kLargeData = LAST_SPACE;
    241   RLYSTC const int kLargeCode = kLargeData + 1;
    242   RLYSTC const int kLargeFixedArray = kLargeCode + 1;
    243   RLYSTC const int kNumberOfSpaces = kLargeFixedArray + 1;
    244   RLYSTC const int kAnyOldSpace = -1;
    245 
    246   // A bitmask for getting the space out of an instruction.
    247   RLYSTC const int kSpaceMask = 15;
    248 
    249   RLYSTC inline bool SpaceIsLarge(int space) { return space >= kLargeData; }
    250   RLYSTC inline bool SpaceIsPaged(int space) {
    251     return space >= FIRST_PAGED_SPACE && space <= LAST_PAGED_SPACE;
    252   }
    253 };
    254 
    255 
    256 int SnapshotByteSource::GetInt() {
    257   // A little unwind to catch the really small ints.
    258   int snapshot_byte = Get();
    259   if ((snapshot_byte & 0x80) == 0) {
    260     return snapshot_byte;
    261   }
    262   int accumulator = (snapshot_byte & 0x7f) << 7;
    263   while (true) {
    264     snapshot_byte = Get();
    265     if ((snapshot_byte & 0x80) == 0) {
    266       return accumulator | snapshot_byte;
    267     }
    268     accumulator = (accumulator | (snapshot_byte & 0x7f)) << 7;
    269   }
    270   UNREACHABLE();
    271   return accumulator;
    272 }
    273 
    274 
    275 void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) {
    276   memcpy(to, data_ + position_, number_of_bytes);
    277   position_ += number_of_bytes;
    278 }
    279 
    280 
    281 // A Deserializer reads a snapshot and reconstructs the Object graph it defines.
    282 class Deserializer: public SerializerDeserializer {
    283  public:
    284   // Create a deserializer from a snapshot byte source.
    285   explicit Deserializer(SnapshotByteSource* source);
    286 
    287   virtual ~Deserializer();
    288 
    289   // Deserialize the snapshot into an empty heap.
    290   void Deserialize();
    291 
    292   // Deserialize a single object and the objects reachable from it.
    293   void DeserializePartial(Object** root);
    294 
    295 #ifdef DEBUG
    296   virtual void Synchronize(const char* tag);
    297 #endif
    298 
    299  private:
    300   virtual void VisitPointers(Object** start, Object** end);
    301 
    302   virtual void VisitExternalReferences(Address* start, Address* end) {
    303     UNREACHABLE();
    304   }
    305 
    306   virtual void VisitRuntimeEntry(RelocInfo* rinfo) {
    307     UNREACHABLE();
    308   }
    309 
    310   void ReadChunk(Object** start, Object** end, int space, Address address);
    311   HeapObject* GetAddressFromStart(int space);
    312   inline HeapObject* GetAddressFromEnd(int space);
    313   Address Allocate(int space_number, Space* space, int size);
    314   void ReadObject(int space_number, Space* space, Object** write_back);
    315 
    316   // Cached current isolate.
    317   Isolate* isolate_;
    318 
    319   // Keep track of the pages in the paged spaces.
    320   // (In large object space we are keeping track of individual objects
    321   // rather than pages.)  In new space we just need the address of the
    322   // first object and the others will flow from that.
    323   List<Address> pages_[SerializerDeserializer::kNumberOfSpaces];
    324 
    325   SnapshotByteSource* source_;
    326   // This is the address of the next object that will be allocated in each
    327   // space.  It is used to calculate the addresses of back-references.
    328   Address high_water_[LAST_SPACE + 1];
    329   // This is the address of the most recent object that was allocated.  It
    330   // is used to set the location of the new page when we encounter a
    331   // START_NEW_PAGE_SERIALIZATION tag.
    332   Address last_object_address_;
    333 
    334   ExternalReferenceDecoder* external_reference_decoder_;
    335 
    336   DISALLOW_COPY_AND_ASSIGN(Deserializer);
    337 };
    338 
    339 
    340 class SnapshotByteSink {
    341  public:
    342   virtual ~SnapshotByteSink() { }
    343   virtual void Put(int byte, const char* description) = 0;
    344   virtual void PutSection(int byte, const char* description) {
    345     Put(byte, description);
    346   }
    347   void PutInt(uintptr_t integer, const char* description);
    348   virtual int Position() = 0;
    349 };
    350 
    351 
    352 // Mapping objects to their location after deserialization.
    353 // This is used during building, but not at runtime by V8.
    354 class SerializationAddressMapper {
    355  public:
    356   SerializationAddressMapper()
    357       : serialization_map_(new HashMap(&SerializationMatchFun)),
    358         no_allocation_(new AssertNoAllocation()) { }
    359 
    360   ~SerializationAddressMapper() {
    361     delete serialization_map_;
    362     delete no_allocation_;
    363   }
    364 
    365   bool IsMapped(HeapObject* obj) {
    366     return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL;
    367   }
    368 
    369   int MappedTo(HeapObject* obj) {
    370     ASSERT(IsMapped(obj));
    371     return static_cast<int>(reinterpret_cast<intptr_t>(
    372         serialization_map_->Lookup(Key(obj), Hash(obj), false)->value));
    373   }
    374 
    375   void AddMapping(HeapObject* obj, int to) {
    376     ASSERT(!IsMapped(obj));
    377     HashMap::Entry* entry =
    378         serialization_map_->Lookup(Key(obj), Hash(obj), true);
    379     entry->value = Value(to);
    380   }
    381 
    382  private:
    383   RLYSTC bool SerializationMatchFun(void* key1, void* key2) {
    384     return key1 == key2;
    385   }
    386 
    387   RLYSTC uint32_t Hash(HeapObject* obj) {
    388     return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address()));
    389   }
    390 
    391   RLYSTC void* Key(HeapObject* obj) {
    392     return reinterpret_cast<void*>(obj->address());
    393   }
    394 
    395   RLYSTC void* Value(int v) {
    396     return reinterpret_cast<void*>(v);
    397   }
    398 
    399   HashMap* serialization_map_;
    400   AssertNoAllocation* no_allocation_;
    401   DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper);
    402 };
    403 
    404 
    405 // There can be only one serializer per V8 process.
    406 STATIC_CLASS Serializer : public SerializerDeserializer {
    407  public:
    408   explicit Serializer(SnapshotByteSink* sink);
    409   ~Serializer();
    410   void VisitPointers(Object** start, Object** end);
    411   // You can call this after serialization to find out how much space was used
    412   // in each space.
    413   int CurrentAllocationAddress(int space) {
    414     if (SpaceIsLarge(space)) return large_object_total_;
    415     return fullness_[space];
    416   }
    417 
    418   RLYSTC void Enable() {
    419     if (!serialization_enabled_) {
    420       ASSERT(!too_late_to_enable_now_);
    421     }
    422     serialization_enabled_ = true;
    423   }
    424 
    425   RLYSTC void Disable() { serialization_enabled_ = false; }
    426   // Call this when you have made use of the fact that there is no serialization
    427   // going on.
    428   RLYSTC void TooLateToEnableNow() { too_late_to_enable_now_ = true; }
    429   RLYSTC bool enabled() { return serialization_enabled_; }
    430   SerializationAddressMapper* address_mapper() { return &address_mapper_; }
    431 #ifdef DEBUG
    432   virtual void Synchronize(const char* tag);
    433 #endif
    434 
    435  protected:
    436   RLYSTC const int kInvalidRootIndex = -1;
    437   virtual int RootIndex(HeapObject* heap_object) = 0;
    438   virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0;
    439 
    440   class ObjectSerializer : public ObjectVisitor {
    441    public:
    442     ObjectSerializer(Serializer* serializer,
    443                      Object* o,
    444                      SnapshotByteSink* sink,
    445                      HowToCode how_to_code,
    446                      WhereToPoint where_to_point)
    447       : serializer_(serializer),
    448         object_(HeapObject::cast(o)),
    449         sink_(sink),
    450         reference_representation_(how_to_code + where_to_point),
    451         bytes_processed_so_far_(0) { }
    452     void Serialize();
    453     void VisitPointers(Object** start, Object** end);
    454     void VisitExternalReferences(Address* start, Address* end);
    455     void VisitCodeTarget(RelocInfo* target);
    456     void VisitCodeEntry(Address entry_address);
    457     void VisitGlobalPropertyCell(RelocInfo* rinfo);
    458     void VisitRuntimeEntry(RelocInfo* reloc);
    459     // Used for seralizing the external strings that hold the natives source.
    460     void VisitExternalAsciiString(
    461         v8::String::ExternalAsciiStringResource** resource);
    462     // We can't serialize a heap with external two byte strings.
    463     void VisitExternalTwoByteString(
    464         v8::String::ExternalStringResource** resource) {
    465       UNREACHABLE();
    466     }
    467 
    468    private:
    469     void OutputRawData(Address up_to);
    470 
    471     Serializer* serializer_;
    472     HeapObject* object_;
    473     SnapshotByteSink* sink_;
    474     int reference_representation_;
    475     int bytes_processed_so_far_;
    476   };
    477 
    478   virtual void SerializeObject(Object* o,
    479                                HowToCode how_to_code,
    480                                WhereToPoint where_to_point) = 0;
    481   void SerializeReferenceToPreviousObject(
    482       int space,
    483       int address,
    484       HowToCode how_to_code,
    485       WhereToPoint where_to_point);
    486   void InitializeAllocators();
    487   // This will return the space for an object.  If the object is in large
    488   // object space it may return kLargeCode or kLargeFixedArray in order
    489   // to indicate to the deserializer what kind of large object allocation
    490   // to make.
    491   RLYSTC int SpaceOfObject(HeapObject* object);
    492   // This just returns the space of the object.  It will return LO_SPACE
    493   // for all large objects since you can't check the type of the object
    494   // once the map has been used for the serialization address.
    495   RLYSTC int SpaceOfAlreadySerializedObject(HeapObject* object);
    496   int Allocate(int space, int size, bool* new_page_started);
    497   int EncodeExternalReference(Address addr) {
    498     return external_reference_encoder_->Encode(addr);
    499   }
    500 
    501   // Keep track of the fullness of each space in order to generate
    502   // relative addresses for back references.  Large objects are
    503   // just numbered sequentially since relative addresses make no
    504   // sense in large object space.
    505   int fullness_[LAST_SPACE + 1];
    506   SnapshotByteSink* sink_;
    507   int current_root_index_;
    508   ExternalReferenceEncoder* external_reference_encoder_;
    509   RLYSTC bool serialization_enabled_;
    510   // Did we already make use of the fact that serialization was not enabled?
    511   RLYSTC bool too_late_to_enable_now_;
    512   int large_object_total_;
    513   SerializationAddressMapper address_mapper_;
    514 
    515   friend class ObjectSerializer;
    516   friend class Deserializer;
    517 
    518   DISALLOW_COPY_AND_ASSIGN(Serializer);
    519 };
    520 
    521 
    522 class PartialSerializer : public Serializer {
    523  public:
    524   PartialSerializer(Serializer* startup_snapshot_serializer,
    525                     SnapshotByteSink* sink)
    526     : Serializer(sink),
    527       startup_serializer_(startup_snapshot_serializer) {
    528   }
    529 
    530   // Serialize the objects reachable from a single object pointer.
    531   virtual void Serialize(Object** o);
    532   virtual void SerializeObject(Object* o,
    533                                HowToCode how_to_code,
    534                                WhereToPoint where_to_point);
    535 
    536  protected:
    537   virtual int RootIndex(HeapObject* o);
    538   virtual int PartialSnapshotCacheIndex(HeapObject* o);
    539   virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
    540     // Scripts should be referred only through shared function infos.  We can't
    541     // allow them to be part of the partial snapshot because they contain a
    542     // unique ID, and deserializing several partial snapshots containing script
    543     // would cause dupes.
    544     ASSERT(!o->IsScript());
    545     return o->IsString() || o->IsSharedFunctionInfo() ||
    546            o->IsHeapNumber() || o->IsCode() ||
    547            o->map() == HEAP->fixed_cow_array_map();
    548   }
    549 
    550  private:
    551   Serializer* startup_serializer_;
    552   DISALLOW_COPY_AND_ASSIGN(PartialSerializer);
    553 };
    554 
    555 
    556 class StartupSerializer : public Serializer {
    557  public:
    558   explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) {
    559     // Clear the cache of objects used by the partial snapshot.  After the
    560     // strong roots have been serialized we can create a partial snapshot
    561     // which will repopulate the cache with objects neede by that partial
    562     // snapshot.
    563     Isolate::Current()->set_serialize_partial_snapshot_cache_length(0);
    564   }
    565   // Serialize the current state of the heap.  The order is:
    566   // 1) Strong references.
    567   // 2) Partial snapshot cache.
    568   // 3) Weak references (eg the symbol table).
    569   virtual void SerializeStrongReferences();
    570   virtual void SerializeObject(Object* o,
    571                                HowToCode how_to_code,
    572                                WhereToPoint where_to_point);
    573   void SerializeWeakReferences();
    574   void Serialize() {
    575     SerializeStrongReferences();
    576     SerializeWeakReferences();
    577   }
    578 
    579  private:
    580   virtual int RootIndex(HeapObject* o) { return kInvalidRootIndex; }
    581   virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
    582     return false;
    583   }
    584 };
    585 
    586 
    587 } }  // namespace v8::internal
    588 
    589 #endif  // V8_SERIALIZE_H_
    590