1 // 2 // MappedHash.h 3 // 4 5 #ifndef liblldb_MappedHash_h_ 6 #define liblldb_MappedHash_h_ 7 8 #include <assert.h> 9 #include <stdint.h> 10 11 #include <map> 12 #include <vector> 13 14 #include "lldb/Core/DataExtractor.h" 15 #include "lldb/Core/Stream.h" 16 17 class MappedHash 18 { 19 public: 20 21 enum HashFunctionType 22 { 23 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used by the ELF GNU_HASH sections 24 }; 25 26 27 static uint32_t 28 HashStringUsingDJB (const char *s) 29 { 30 uint32_t h = 5381; 31 32 for (unsigned char c = *s; c; c = *++s) 33 h = ((h << 5) + h) + c; 34 35 return h; 36 } 37 38 static uint32_t 39 HashString (uint32_t hash_function, const char *s) 40 { 41 switch (hash_function) 42 { 43 case MappedHash::eHashFunctionDJB: 44 return HashStringUsingDJB (s); 45 46 default: 47 break; 48 } 49 assert (!"Invalid hash function index"); 50 return 0; 51 } 52 53 54 static const uint32_t HASH_MAGIC = 0x48415348u; 55 static const uint32_t HASH_CIGAM = 0x48534148u; 56 57 template <typename T> 58 struct Header 59 { 60 typedef T HeaderData; 61 62 uint32_t magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection 63 uint16_t version; // Version number 64 uint16_t hash_function; // The hash function enumeration that was used 65 uint32_t bucket_count; // The number of buckets in this hash table 66 uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table 67 uint32_t header_data_len; // The size in bytes of the "header_data" template member below 68 HeaderData header_data; // 69 70 Header () : 71 magic (HASH_MAGIC), 72 version (1), 73 hash_function (eHashFunctionDJB), 74 bucket_count (0), 75 hashes_count (0), 76 header_data_len (sizeof(T)), 77 header_data () 78 { 79 } 80 81 virtual 82 ~Header() 83 { 84 } 85 86 size_t 87 GetByteSize() const 88 { 89 return sizeof(magic) + 90 sizeof(version) + 91 sizeof(hash_function) + 92 sizeof(bucket_count) + 93 sizeof(hashes_count) + 94 sizeof(header_data_len) + 95 header_data_len; 96 } 97 98 virtual size_t 99 GetByteSize (const HeaderData &header_data) = 0; 100 101 void 102 SetHeaderDataByteSize (uint32_t header_data_byte_size) 103 { 104 header_data_len = header_data_byte_size; 105 } 106 107 void 108 Dump (lldb_private::Stream &s) 109 { 110 s.Printf ("header.magic = 0x%8.8x\n", magic); 111 s.Printf ("header.version = 0x%4.4x\n", version); 112 s.Printf ("header.hash_function = 0x%4.4x\n", hash_function); 113 s.Printf ("header.bucket_count = 0x%8.8x %u\n", bucket_count, bucket_count); 114 s.Printf ("header.hashes_count = 0x%8.8x %u\n", hashes_count, hashes_count); 115 s.Printf ("header.header_data_len = 0x%8.8x %u\n", header_data_len, header_data_len); 116 } 117 118 virtual lldb::offset_t 119 Read (lldb_private::DataExtractor &data, lldb::offset_t offset) 120 { 121 if (data.ValidOffsetForDataOfSize (offset, 122 sizeof (magic) + 123 sizeof (version) + 124 sizeof (hash_function) + 125 sizeof (bucket_count) + 126 sizeof (hashes_count) + 127 sizeof (header_data_len))) 128 { 129 magic = data.GetU32 (&offset); 130 if (magic != HASH_MAGIC) 131 { 132 if (magic == HASH_CIGAM) 133 { 134 switch (data.GetByteOrder()) 135 { 136 case lldb::eByteOrderBig: 137 data.SetByteOrder(lldb::eByteOrderLittle); 138 break; 139 case lldb::eByteOrderLittle: 140 data.SetByteOrder(lldb::eByteOrderBig); 141 break; 142 default: 143 return LLDB_INVALID_OFFSET; 144 } 145 } 146 else 147 { 148 // Magic bytes didn't match 149 version = 0; 150 return LLDB_INVALID_OFFSET; 151 } 152 } 153 154 version = data.GetU16 (&offset); 155 if (version != 1) 156 { 157 // Unsupported version 158 return LLDB_INVALID_OFFSET; 159 } 160 hash_function = data.GetU16 (&offset); 161 if (hash_function == 4) 162 hash_function = 0; // Deal with pre-release version of this table... 163 bucket_count = data.GetU32 (&offset); 164 hashes_count = data.GetU32 (&offset); 165 header_data_len = data.GetU32 (&offset); 166 return offset; 167 } 168 return LLDB_INVALID_OFFSET; 169 } 170 // 171 // // Returns a buffer that contains a serialized version of this table 172 // // that must be freed with free(). 173 // virtual void * 174 // Write (int fd); 175 }; 176 177 template <typename __KeyType, class __HeaderDataType, class __ValueType> 178 class ExportTable 179 { 180 public: 181 typedef __HeaderDataType HeaderDataType; 182 typedef Header<HeaderDataType> HeaderType; 183 typedef __KeyType KeyType; 184 typedef __ValueType ValueType; 185 186 struct Entry 187 { 188 uint32_t hash; 189 KeyType key; 190 ValueType value; 191 }; 192 193 typedef std::vector<ValueType> ValueArrayType; 194 195 typedef std::map<KeyType, ValueArrayType> HashData; 196 // Map a name hash to one or more name infos 197 typedef std::map<uint32_t, HashData> HashToHashData; 198 199 virtual KeyType 200 GetKeyForStringType (const char *cstr) const = 0; 201 202 virtual size_t 203 GetByteSize (const HashData &key_to_key_values) = 0; 204 205 virtual bool 206 WriteHashData (const HashData &hash_data, 207 lldb_private::Stream &ostrm) = 0; 208 // 209 void 210 AddEntry (const char *cstr, const ValueType &value) 211 { 212 Entry entry; 213 entry.hash = MappedHash::HashString (eHashFunctionDJB, cstr); 214 entry.key = GetKeyForStringType (cstr); 215 entry.value = value; 216 m_entries.push_back (entry); 217 } 218 219 void 220 Save (const HeaderDataType &header_data, 221 lldb_private::Stream &ostrm) 222 { 223 if (m_entries.empty()) 224 return; 225 226 const uint32_t num_entries = m_entries.size(); 227 uint32_t i = 0; 228 229 HeaderType header; 230 231 header.magic = HASH_MAGIC; 232 header.version = 1; 233 header.hash_function = eHashFunctionDJB; 234 header.bucket_count = 0; 235 header.hashes_count = 0; 236 header.prologue_length = header_data.GetByteSize(); 237 238 // We need to figure out the number of unique hashes first before we can 239 // calculate the number of buckets we want to use. 240 typedef std::vector<uint32_t> hash_coll; 241 hash_coll unique_hashes; 242 unique_hashes.resize (num_entries); 243 for (i=0; i<num_entries; ++i) 244 unique_hashes[i] = m_entries[i].hash; 245 std::sort (unique_hashes.begin(), unique_hashes.end()); 246 hash_coll::iterator pos = std::unique (unique_hashes.begin(), unique_hashes.end()); 247 const size_t num_unique_hashes = std::distance (unique_hashes.begin(), pos); 248 249 if (num_unique_hashes > 1024) 250 header.bucket_count = num_unique_hashes/4; 251 else if (num_unique_hashes > 16) 252 header.bucket_count = num_unique_hashes/2; 253 else 254 header.bucket_count = num_unique_hashes; 255 if (header.bucket_count == 0) 256 header.bucket_count = 1; 257 258 259 std::vector<HashToHashData> hash_buckets; 260 std::vector<uint32_t> hash_indexes (header.bucket_count, 0); 261 std::vector<uint32_t> hash_values; 262 std::vector<uint32_t> hash_offsets; 263 hash_buckets.resize (header.bucket_count); 264 uint32_t bucket_entry_empties = 0; 265 //StreamString hash_file_data(Stream::eBinary, dwarf->GetObjectFile()->GetAddressByteSize(), dwarf->GetObjectFile()->GetByteSize()); 266 267 // Push all of the hashes into their buckets and create all bucket 268 // entries all populated with data. 269 for (i=0; i<num_entries; ++i) 270 { 271 const uint32_t hash = m_entries[i].hash; 272 const uint32_t bucket_idx = hash % header.bucket_count; 273 const uint32_t strp_offset = m_entries[i].str_offset; 274 const uint32_t die_offset = m_entries[i].die_offset; 275 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset); 276 } 277 278 // Now for each bucket we write the bucket value which is the 279 // number of hashes and the hash index encoded into a single 280 // 32 bit unsigned integer. 281 for (i=0; i<header.bucket_count; ++i) 282 { 283 HashToHashData &bucket_entry = hash_buckets[i]; 284 285 if (bucket_entry.empty()) 286 { 287 // Empty bucket 288 ++bucket_entry_empties; 289 hash_indexes[i] = UINT32_MAX; 290 } 291 else 292 { 293 const uint32_t hash_value_index = hash_values.size(); 294 uint32_t hash_count = 0; 295 typename HashToHashData::const_iterator pos, end = bucket_entry.end(); 296 for (pos = bucket_entry.begin(); pos != end; ++pos) 297 { 298 hash_values.push_back (pos->first); 299 hash_offsets.push_back (GetByteSize (pos->second)); 300 ++hash_count; 301 } 302 303 hash_indexes[i] = hash_value_index; 304 } 305 } 306 header.hashes_count = hash_values.size(); 307 308 // Write the header out now that we have the hash_count 309 header.Write (ostrm); 310 311 // Now for each bucket we write the start index of the hashes 312 // for the current bucket, or UINT32_MAX if the bucket is empty 313 for (i=0; i<header.bucket_count; ++i) 314 { 315 ostrm.PutHex32(hash_indexes[i]); 316 } 317 318 // Now we need to write out all of the hash values 319 for (i=0; i<header.hashes_count; ++i) 320 { 321 ostrm.PutHex32(hash_values[i]); 322 } 323 324 // Now we need to write out all of the hash data offsets, 325 // there is an offset for each hash in the hashes array 326 // that was written out above 327 for (i=0; i<header.hashes_count; ++i) 328 { 329 ostrm.PutHex32(hash_offsets[i]); 330 } 331 332 // Now we write the data for each hash and verify we got the offset 333 // correct above... 334 for (i=0; i<header.bucket_count; ++i) 335 { 336 HashToHashData &bucket_entry = hash_buckets[i]; 337 338 typename HashToHashData::const_iterator pos, end = bucket_entry.end(); 339 for (pos = bucket_entry.begin(); pos != end; ++pos) 340 { 341 if (!bucket_entry.empty()) 342 { 343 WriteHashData (pos->second); 344 } 345 } 346 } 347 } 348 protected: 349 typedef std::vector<Entry> collection; 350 collection m_entries; 351 }; 352 // A class for reading and using a saved hash table from a block of data 353 // in memory 354 template <typename __KeyType, class __HeaderType, class __HashData> 355 class MemoryTable 356 { 357 public: 358 typedef __HeaderType HeaderType; 359 typedef __KeyType KeyType; 360 typedef __HashData HashData; 361 362 enum Result 363 { 364 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was filled in successfully 365 eResultKeyMismatch = 1u, // Bucket hash data collision, but key didn't match 366 eResultEndOfHashData = 2u, // The chain of items for this hash data in this bucket is terminated, search no more 367 eResultError = 3u // Error parsing the hash data, abort 368 }; 369 370 struct Pair 371 { 372 KeyType key; 373 HashData value; 374 }; 375 376 MemoryTable (lldb_private::DataExtractor &data) : 377 m_header (), 378 m_hash_indexes (NULL), 379 m_hash_values (NULL), 380 m_hash_offsets (NULL) 381 { 382 lldb::offset_t offset = m_header.Read (data, 0); 383 if (offset != LLDB_INVALID_OFFSET && IsValid ()) 384 { 385 m_hash_indexes = (uint32_t *)data.GetData (&offset, m_header.bucket_count * sizeof(uint32_t)); 386 m_hash_values = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t)); 387 m_hash_offsets = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t)); 388 } 389 } 390 391 virtual 392 ~MemoryTable () 393 { 394 } 395 396 bool 397 IsValid () const 398 { 399 return m_header.version == 1 && 400 m_header.hash_function == eHashFunctionDJB && 401 m_header.bucket_count > 0 && 402 m_header.hashes_count > 0; 403 } 404 405 uint32_t 406 GetHashIndex (uint32_t bucket_idx) const 407 { 408 if (m_hash_indexes && bucket_idx < m_header.bucket_count) 409 return m_hash_indexes[bucket_idx]; 410 return UINT32_MAX; 411 } 412 413 uint32_t 414 GetHashValue (uint32_t hash_idx) const 415 { 416 if (m_hash_values && hash_idx < m_header.hashes_count) 417 return m_hash_values[hash_idx]; 418 return UINT32_MAX; 419 } 420 421 uint32_t 422 GetHashDataOffset (uint32_t hash_idx) const 423 { 424 if (m_hash_offsets && hash_idx < m_header.hashes_count) 425 return m_hash_offsets[hash_idx]; 426 return UINT32_MAX; 427 } 428 429 bool 430 Find (const char *name, Pair &pair) const 431 { 432 if (IsValid ()) 433 { 434 const uint32_t bucket_count = m_header.bucket_count; 435 const uint32_t hash_count = m_header.hashes_count; 436 const uint32_t hash_value = MappedHash::HashString (m_header.hash_function, name); 437 const uint32_t bucket_idx = hash_value % bucket_count; 438 uint32_t hash_idx = GetHashIndex (bucket_idx); 439 if (hash_idx < hash_count) 440 { 441 for (; hash_idx < hash_count; ++hash_idx) 442 { 443 const uint32_t curr_hash_value = GetHashValue (hash_idx); 444 if (curr_hash_value == hash_value) 445 { 446 lldb::offset_t hash_data_offset = GetHashDataOffset (hash_idx); 447 while (hash_data_offset != UINT32_MAX) 448 { 449 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 450 Result hash_result = GetHashDataForName (name, &hash_data_offset, pair); 451 // Check the result of getting our hash data 452 switch (hash_result) 453 { 454 case eResultKeyMatch: 455 return true; 456 457 case eResultKeyMismatch: 458 if (prev_hash_data_offset == hash_data_offset) 459 return false; 460 break; 461 462 case eResultEndOfHashData: 463 // The last HashData for this key has been reached, stop searching 464 return false; 465 case eResultError: 466 // Error parsing the hash data, abort 467 return false; 468 } 469 } 470 } 471 if ((curr_hash_value % bucket_count) != bucket_idx) 472 break; 473 } 474 } 475 } 476 return false; 477 } 478 479 // This method must be implemented in any subclasses. 480 // The KeyType is user specified and must somehow result in a string 481 // value. For example, the KeyType might be a string offset in a string 482 // table and subclasses can store their string table as a member of the 483 // subclass and return a valie "const char *" given a "key". The value 484 // could also be a C string pointer, in which case just returning "key" 485 // will suffice. 486 487 virtual const char * 488 GetStringForKeyType (KeyType key) const = 0; 489 490 virtual bool 491 ReadHashData (uint32_t hash_data_offset, 492 HashData &hash_data) const = 0; 493 494 // This method must be implemented in any subclasses and it must try to 495 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr" 496 // parameter. This offset should be updated as bytes are consumed and 497 // a value "Result" enum should be returned. If the "name" matches the 498 // full name for the "pair.key" (which must be filled in by this call), 499 // then the HashData in the pair ("pair.value") should be extracted and 500 // filled in and "eResultKeyMatch" should be returned. If "name" doesn't 501 // match this string for the key, then "eResultKeyMismatch" should be 502 // returned and all data for the current HashData must be consumed or 503 // skipped and the "hash_data_offset_ptr" offset needs to be updated to 504 // point to the next HashData. If the end of the HashData objects for 505 // a given hash value have been reached, then "eResultEndOfHashData" 506 // should be returned. If anything else goes wrong during parsing, 507 // return "eResultError" and the corresponding "Find()" function will 508 // be canceled and return false. 509 510 virtual Result 511 GetHashDataForName (const char *name, 512 lldb::offset_t* hash_data_offset_ptr, 513 Pair &pair) const = 0; 514 515 const HeaderType & 516 GetHeader() 517 { 518 return m_header; 519 } 520 521 522 void 523 ForEach (std::function <bool(const HashData &hash_data)> const &callback) const 524 { 525 const size_t num_hash_offsets = m_header.hashes_count; 526 for (size_t i=0; i<num_hash_offsets; ++i) 527 { 528 uint32_t hash_data_offset = GetHashDataOffset (i); 529 if (hash_data_offset != UINT32_MAX) 530 { 531 HashData hash_data; 532 if (ReadHashData (hash_data_offset, hash_data)) 533 { 534 // If the callback returns false, then we are done and should stop 535 if (callback(hash_data) == false) 536 return; 537 } 538 } 539 } 540 } 541 542 protected: 543 // Implementation agnostic information 544 HeaderType m_header; 545 uint32_t *m_hash_indexes; 546 uint32_t *m_hash_values; 547 uint32_t *m_hash_offsets; 548 }; 549 550 }; 551 552 #endif // #ifndef liblldb_MappedHash_h_ 553