Home | History | Annotate | Download | only in Core
      1 //
      2 //  MappedHash.h
      3 //
      4 
      5 #ifndef liblldb_MappedHash_h_
      6 #define liblldb_MappedHash_h_
      7 
      8 #include <assert.h>
      9 #include <stdint.h>
     10 
     11 #include <map>
     12 #include <vector>
     13 
     14 #include "lldb/Core/DataExtractor.h"
     15 #include "lldb/Core/Stream.h"
     16 
     17 class MappedHash
     18 {
     19 public:
     20 
     21     enum HashFunctionType
     22     {
     23         eHashFunctionDJB        = 0u    // Daniel J Bernstein hash function that is also used by the ELF GNU_HASH sections
     24     };
     25 
     26 
     27     static uint32_t
     28     HashStringUsingDJB (const char *s)
     29     {
     30         uint32_t h = 5381;
     31 
     32         for (unsigned char c = *s; c; c = *++s)
     33             h = ((h << 5) + h) + c;
     34 
     35         return h;
     36     }
     37 
     38     static uint32_t
     39     HashString (uint32_t hash_function, const char *s)
     40     {
     41         switch (hash_function)
     42         {
     43             case MappedHash::eHashFunctionDJB:
     44                 return HashStringUsingDJB (s);
     45 
     46             default:
     47                 break;
     48         }
     49         assert (!"Invalid hash function index");
     50         return 0;
     51     }
     52 
     53 
     54     static const uint32_t HASH_MAGIC = 0x48415348u;
     55     static const uint32_t HASH_CIGAM = 0x48534148u;
     56 
     57     template <typename T>
     58     struct Header
     59 	{
     60         typedef T HeaderData;
     61 
     62         uint32_t    magic;             // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
     63         uint16_t    version;           // Version number
     64 		uint16_t    hash_function;     // The hash function enumeration that was used
     65 		uint32_t    bucket_count;      // The number of buckets in this hash table
     66 		uint32_t    hashes_count;      // The total number of unique hash values and hash data offsets in this table
     67         uint32_t    header_data_len;   // The size in bytes of the "header_data" template member below
     68         HeaderData  header_data;       //
     69 
     70 		Header () :
     71             magic (HASH_MAGIC),
     72             version (1),
     73             hash_function (eHashFunctionDJB),
     74             bucket_count (0),
     75             hashes_count (0),
     76             header_data_len (sizeof(T)),
     77             header_data ()
     78 		{
     79 		}
     80 
     81         virtual
     82         ~Header()
     83         {
     84         }
     85 
     86         size_t
     87         GetByteSize() const
     88         {
     89             return  sizeof(magic) +
     90                     sizeof(version) +
     91                     sizeof(hash_function) +
     92                     sizeof(bucket_count) +
     93                     sizeof(hashes_count) +
     94                     sizeof(header_data_len) +
     95                     header_data_len;
     96         }
     97 
     98         virtual size_t
     99         GetByteSize (const HeaderData &header_data) = 0;
    100 
    101         void
    102         SetHeaderDataByteSize (uint32_t header_data_byte_size)
    103         {
    104             header_data_len = header_data_byte_size;
    105         }
    106 
    107         void
    108         Dump (lldb_private::Stream &s)
    109         {
    110             s.Printf ("header.magic              = 0x%8.8x\n", magic);
    111             s.Printf ("header.version            = 0x%4.4x\n", version);
    112             s.Printf ("header.hash_function      = 0x%4.4x\n", hash_function);
    113             s.Printf ("header.bucket_count       = 0x%8.8x %u\n", bucket_count, bucket_count);
    114             s.Printf ("header.hashes_count       = 0x%8.8x %u\n", hashes_count, hashes_count);
    115             s.Printf ("header.header_data_len    = 0x%8.8x %u\n", header_data_len, header_data_len);
    116         }
    117 
    118         virtual lldb::offset_t
    119         Read (lldb_private::DataExtractor &data, lldb::offset_t offset)
    120         {
    121             if (data.ValidOffsetForDataOfSize (offset,
    122                                                sizeof (magic) +
    123                                                sizeof (version) +
    124                                                sizeof (hash_function) +
    125                                                sizeof (bucket_count) +
    126                                                sizeof (hashes_count) +
    127                                                sizeof (header_data_len)))
    128             {
    129                 magic = data.GetU32 (&offset);
    130                 if (magic != HASH_MAGIC)
    131                 {
    132                     if (magic == HASH_CIGAM)
    133                     {
    134                         switch (data.GetByteOrder())
    135                         {
    136                             case lldb::eByteOrderBig:
    137                                 data.SetByteOrder(lldb::eByteOrderLittle);
    138                                 break;
    139                             case lldb::eByteOrderLittle:
    140                                 data.SetByteOrder(lldb::eByteOrderBig);
    141                                 break;
    142                             default:
    143                                 return LLDB_INVALID_OFFSET;
    144                         }
    145                     }
    146                     else
    147                     {
    148                         // Magic bytes didn't match
    149                         version = 0;
    150                         return LLDB_INVALID_OFFSET;
    151                     }
    152                 }
    153 
    154                 version = data.GetU16 (&offset);
    155                 if (version != 1)
    156                 {
    157                     // Unsupported version
    158                     return LLDB_INVALID_OFFSET;
    159                 }
    160                 hash_function       = data.GetU16 (&offset);
    161                 if (hash_function == 4)
    162                     hash_function = 0; // Deal with pre-release version of this table...
    163                 bucket_count        = data.GetU32 (&offset);
    164                 hashes_count        = data.GetU32 (&offset);
    165                 header_data_len     = data.GetU32 (&offset);
    166                 return offset;
    167             }
    168             return LLDB_INVALID_OFFSET;
    169         }
    170 //
    171 //        // Returns a buffer that contains a serialized version of this table
    172 //        // that must be freed with free().
    173 //        virtual void *
    174 //        Write (int fd);
    175 	};
    176 
    177     template <typename __KeyType, class __HeaderDataType, class __ValueType>
    178     class ExportTable
    179     {
    180     public:
    181         typedef __HeaderDataType HeaderDataType;
    182         typedef Header<HeaderDataType> HeaderType;
    183         typedef __KeyType KeyType;
    184         typedef __ValueType ValueType;
    185 
    186         struct Entry
    187         {
    188             uint32_t    hash;
    189             KeyType     key;
    190             ValueType   value;
    191         };
    192 
    193         typedef std::vector<ValueType> ValueArrayType;
    194 
    195         typedef std::map<KeyType, ValueArrayType> HashData;
    196         // Map a name hash to one or more name infos
    197         typedef std::map<uint32_t, HashData> HashToHashData;
    198 
    199         virtual KeyType
    200         GetKeyForStringType (const char *cstr) const = 0;
    201 
    202         virtual size_t
    203         GetByteSize (const HashData &key_to_key_values) = 0;
    204 
    205         virtual bool
    206         WriteHashData (const HashData &hash_data,
    207                        lldb_private::Stream &ostrm) = 0;
    208 //
    209         void
    210         AddEntry (const char *cstr, const ValueType &value)
    211         {
    212             Entry entry;
    213             entry.hash = MappedHash::HashString (eHashFunctionDJB, cstr);
    214             entry.key = GetKeyForStringType (cstr);
    215             entry.value = value;
    216             m_entries.push_back (entry);
    217         }
    218 
    219         void
    220         Save (const HeaderDataType &header_data,
    221               lldb_private::Stream &ostrm)
    222         {
    223             if (m_entries.empty())
    224                 return;
    225 
    226             const uint32_t num_entries = m_entries.size();
    227             uint32_t i = 0;
    228 
    229             HeaderType header;
    230 
    231             header.magic = HASH_MAGIC;
    232             header.version = 1;
    233             header.hash_function = eHashFunctionDJB;
    234             header.bucket_count = 0;
    235             header.hashes_count = 0;
    236             header.prologue_length = header_data.GetByteSize();
    237 
    238             // We need to figure out the number of unique hashes first before we can
    239             // calculate the number of buckets we want to use.
    240             typedef std::vector<uint32_t> hash_coll;
    241             hash_coll unique_hashes;
    242             unique_hashes.resize (num_entries);
    243             for (i=0; i<num_entries; ++i)
    244                 unique_hashes[i] = m_entries[i].hash;
    245             std::sort (unique_hashes.begin(), unique_hashes.end());
    246             hash_coll::iterator pos = std::unique (unique_hashes.begin(), unique_hashes.end());
    247             const size_t num_unique_hashes = std::distance (unique_hashes.begin(), pos);
    248 
    249             if (num_unique_hashes > 1024)
    250                 header.bucket_count = num_unique_hashes/4;
    251             else if (num_unique_hashes > 16)
    252                 header.bucket_count = num_unique_hashes/2;
    253             else
    254                 header.bucket_count = num_unique_hashes;
    255             if (header.bucket_count == 0)
    256                 header.bucket_count = 1;
    257 
    258 
    259             std::vector<HashToHashData> hash_buckets;
    260             std::vector<uint32_t> hash_indexes (header.bucket_count, 0);
    261             std::vector<uint32_t> hash_values;
    262             std::vector<uint32_t> hash_offsets;
    263             hash_buckets.resize (header.bucket_count);
    264             uint32_t bucket_entry_empties = 0;
    265             //StreamString hash_file_data(Stream::eBinary, dwarf->GetObjectFile()->GetAddressByteSize(), dwarf->GetObjectFile()->GetByteSize());
    266 
    267             // Push all of the hashes into their buckets and create all bucket
    268             // entries all populated with data.
    269             for (i=0; i<num_entries; ++i)
    270             {
    271                 const uint32_t hash = m_entries[i].hash;
    272                 const uint32_t bucket_idx = hash % header.bucket_count;
    273                 const uint32_t strp_offset = m_entries[i].str_offset;
    274                 const uint32_t die_offset = m_entries[i].die_offset;
    275                 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
    276             }
    277 
    278             // Now for each bucket we write the bucket value which is the
    279             // number of hashes and the hash index encoded into a single
    280             // 32 bit unsigned integer.
    281             for (i=0; i<header.bucket_count; ++i)
    282             {
    283                 HashToHashData &bucket_entry = hash_buckets[i];
    284 
    285                 if (bucket_entry.empty())
    286                 {
    287                     // Empty bucket
    288                     ++bucket_entry_empties;
    289                     hash_indexes[i] = UINT32_MAX;
    290                 }
    291                 else
    292                 {
    293                     const uint32_t hash_value_index = hash_values.size();
    294                     uint32_t hash_count = 0;
    295                     typename HashToHashData::const_iterator pos, end = bucket_entry.end();
    296                     for (pos = bucket_entry.begin(); pos != end; ++pos)
    297                     {
    298                         hash_values.push_back (pos->first);
    299                         hash_offsets.push_back (GetByteSize (pos->second));
    300                         ++hash_count;
    301                     }
    302 
    303                     hash_indexes[i] = hash_value_index;
    304                 }
    305             }
    306             header.hashes_count = hash_values.size();
    307 
    308             // Write the header out now that we have the hash_count
    309             header.Write (ostrm);
    310 
    311             // Now for each bucket we write the start index of the hashes
    312             // for the current bucket, or UINT32_MAX if the bucket is empty
    313             for (i=0; i<header.bucket_count; ++i)
    314             {
    315                 ostrm.PutHex32(hash_indexes[i]);
    316             }
    317 
    318             // Now we need to write out all of the hash values
    319             for (i=0; i<header.hashes_count; ++i)
    320             {
    321                 ostrm.PutHex32(hash_values[i]);
    322             }
    323 
    324             // Now we need to write out all of the hash data offsets,
    325             // there is an offset for each hash in the hashes array
    326             // that was written out above
    327             for (i=0; i<header.hashes_count; ++i)
    328             {
    329                 ostrm.PutHex32(hash_offsets[i]);
    330             }
    331 
    332             // Now we write the data for each hash and verify we got the offset
    333             // correct above...
    334             for (i=0; i<header.bucket_count; ++i)
    335             {
    336                 HashToHashData &bucket_entry = hash_buckets[i];
    337 
    338                 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
    339                 for (pos = bucket_entry.begin(); pos != end; ++pos)
    340                 {
    341                     if (!bucket_entry.empty())
    342                     {
    343                         WriteHashData (pos->second);
    344                     }
    345                 }
    346             }
    347         }
    348     protected:
    349         typedef std::vector<Entry> collection;
    350         collection m_entries;
    351     };
    352     // A class for reading and using a saved hash table from a block of data
    353     // in memory
    354     template <typename __KeyType, class __HeaderType, class __HashData>
    355     class MemoryTable
    356     {
    357     public:
    358         typedef __HeaderType HeaderType;
    359         typedef __KeyType KeyType;
    360         typedef __HashData HashData;
    361 
    362         enum Result
    363         {
    364             eResultKeyMatch         = 0u, // The entry was found, key matched and "pair" was filled in successfully
    365             eResultKeyMismatch      = 1u, // Bucket hash data collision, but key didn't match
    366             eResultEndOfHashData    = 2u, // The chain of items for this hash data in this bucket is terminated, search no more
    367             eResultError            = 3u  // Error parsing the hash data, abort
    368         };
    369 
    370         struct Pair
    371         {
    372             KeyType key;
    373             HashData value;
    374         };
    375 
    376         MemoryTable (lldb_private::DataExtractor &data) :
    377             m_header (),
    378             m_hash_indexes (NULL),
    379             m_hash_values (NULL),
    380             m_hash_offsets (NULL)
    381         {
    382             lldb::offset_t offset = m_header.Read (data, 0);
    383             if (offset != LLDB_INVALID_OFFSET && IsValid ())
    384             {
    385                 m_hash_indexes = (uint32_t *)data.GetData (&offset, m_header.bucket_count * sizeof(uint32_t));
    386                 m_hash_values  = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
    387                 m_hash_offsets = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
    388             }
    389         }
    390 
    391         virtual
    392         ~MemoryTable ()
    393         {
    394         }
    395 
    396         bool
    397         IsValid () const
    398         {
    399             return m_header.version == 1 &&
    400                    m_header.hash_function == eHashFunctionDJB &&
    401                    m_header.bucket_count > 0 &&
    402                    m_header.hashes_count > 0;
    403         }
    404 
    405         uint32_t
    406         GetHashIndex (uint32_t bucket_idx) const
    407         {
    408             if (m_hash_indexes && bucket_idx < m_header.bucket_count)
    409                 return m_hash_indexes[bucket_idx];
    410             return UINT32_MAX;
    411         }
    412 
    413         uint32_t
    414         GetHashValue (uint32_t hash_idx) const
    415         {
    416             if (m_hash_values && hash_idx < m_header.hashes_count)
    417                 return m_hash_values[hash_idx];
    418             return UINT32_MAX;
    419         }
    420 
    421         uint32_t
    422         GetHashDataOffset (uint32_t hash_idx) const
    423         {
    424             if (m_hash_offsets && hash_idx < m_header.hashes_count)
    425                 return m_hash_offsets[hash_idx];
    426             return UINT32_MAX;
    427         }
    428 
    429         bool
    430         Find (const char *name, Pair &pair) const
    431         {
    432             if (IsValid ())
    433             {
    434                 const uint32_t bucket_count = m_header.bucket_count;
    435                 const uint32_t hash_count = m_header.hashes_count;
    436                 const uint32_t hash_value = MappedHash::HashString (m_header.hash_function, name);
    437                 const uint32_t bucket_idx = hash_value % bucket_count;
    438                 uint32_t hash_idx = GetHashIndex (bucket_idx);
    439                 if (hash_idx < hash_count)
    440                 {
    441                     for (; hash_idx < hash_count; ++hash_idx)
    442                     {
    443                         const uint32_t curr_hash_value = GetHashValue (hash_idx);
    444                         if (curr_hash_value == hash_value)
    445                         {
    446                             lldb::offset_t hash_data_offset = GetHashDataOffset (hash_idx);
    447                             while (hash_data_offset != UINT32_MAX)
    448                             {
    449                                 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
    450                                 Result hash_result = GetHashDataForName (name, &hash_data_offset, pair);
    451                                 // Check the result of getting our hash data
    452                                 switch (hash_result)
    453                                 {
    454                                 case eResultKeyMatch:
    455                                     return true;
    456 
    457                                 case eResultKeyMismatch:
    458                                     if (prev_hash_data_offset == hash_data_offset)
    459                                         return false;
    460                                     break;
    461 
    462                                 case eResultEndOfHashData:
    463                                     // The last HashData for this key has been reached, stop searching
    464                                     return false;
    465                                 case eResultError:
    466                                     // Error parsing the hash data, abort
    467                                     return false;
    468                                 }
    469                             }
    470                         }
    471                         if ((curr_hash_value % bucket_count) != bucket_idx)
    472                             break;
    473                     }
    474                 }
    475             }
    476             return false;
    477         }
    478 
    479         // This method must be implemented in any subclasses.
    480         // The KeyType is user specified and must somehow result in a string
    481         // value. For example, the KeyType might be a string offset in a string
    482         // table and subclasses can store their string table as a member of the
    483         // subclass and return a valie "const char *" given a "key". The value
    484         // could also be a C string pointer, in which case just returning "key"
    485         // will suffice.
    486 
    487         virtual const char *
    488         GetStringForKeyType (KeyType key) const = 0;
    489 
    490         virtual bool
    491         ReadHashData (uint32_t hash_data_offset,
    492                       HashData &hash_data) const = 0;
    493 
    494         // This method must be implemented in any subclasses and it must try to
    495         // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
    496         // parameter. This offset should be updated as bytes are consumed and
    497         // a value "Result" enum should be returned. If the "name" matches the
    498         // full name for the "pair.key" (which must be filled in by this call),
    499         // then the HashData in the pair ("pair.value") should be extracted and
    500         // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
    501         // match this string for the key, then "eResultKeyMismatch" should be
    502         // returned and all data for the current HashData must be consumed or
    503         // skipped and the "hash_data_offset_ptr" offset needs to be updated to
    504         // point to the next HashData. If the end of the HashData objects for
    505         // a given hash value have been reached, then "eResultEndOfHashData"
    506         // should be returned. If anything else goes wrong during parsing,
    507         // return "eResultError" and the corresponding "Find()" function will
    508         // be canceled and return false.
    509 
    510         virtual Result
    511         GetHashDataForName (const char *name,
    512                             lldb::offset_t* hash_data_offset_ptr,
    513                             Pair &pair) const = 0;
    514 
    515         const HeaderType &
    516         GetHeader()
    517         {
    518             return m_header;
    519         }
    520 
    521 
    522         void
    523         ForEach (std::function <bool(const HashData &hash_data)> const &callback) const
    524         {
    525             const size_t num_hash_offsets = m_header.hashes_count;
    526             for (size_t i=0; i<num_hash_offsets; ++i)
    527             {
    528                 uint32_t hash_data_offset = GetHashDataOffset (i);
    529                 if (hash_data_offset != UINT32_MAX)
    530                 {
    531                     HashData hash_data;
    532                     if (ReadHashData (hash_data_offset, hash_data))
    533                     {
    534                         // If the callback returns false, then we are done and should stop
    535                         if (callback(hash_data) == false)
    536                             return;
    537                     }
    538                 }
    539             }
    540         }
    541 
    542     protected:
    543         // Implementation agnostic information
    544         HeaderType m_header;
    545         uint32_t *m_hash_indexes;
    546         uint32_t *m_hash_values;
    547         uint32_t *m_hash_offsets;
    548     };
    549 
    550 };
    551 
    552 #endif // #ifndef liblldb_MappedHash_h_
    553