Home | History | Annotate | Download | only in AsmPrinter
      1 //==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains support for writing dwarf accelerator tables.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
     15 #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
     16 
     17 #include "DIE.h"
     18 #include "llvm/ADT/ArrayRef.h"
     19 #include "llvm/ADT/StringMap.h"
     20 #include "llvm/MC/MCSymbol.h"
     21 #include "llvm/Support/DataTypes.h"
     22 #include "llvm/Support/Debug.h"
     23 #include "llvm/Support/Dwarf.h"
     24 #include "llvm/Support/ErrorHandling.h"
     25 #include "llvm/Support/Format.h"
     26 #include "llvm/Support/FormattedStream.h"
     27 #include <map>
     28 #include <vector>
     29 
     30 // The dwarf accelerator tables are an indirect hash table optimized
     31 // for null lookup rather than access to known data. They are output into
     32 // an on-disk format that looks like this:
     33 //
     34 // .-------------.
     35 // |  HEADER     |
     36 // |-------------|
     37 // |  BUCKETS    |
     38 // |-------------|
     39 // |  HASHES     |
     40 // |-------------|
     41 // |  OFFSETS    |
     42 // |-------------|
     43 // |  DATA       |
     44 // `-------------'
     45 //
     46 // where the header contains a magic number, version, type of hash function,
     47 // the number of buckets, total number of hashes, and room for a special
     48 // struct of data and the length of that struct.
     49 //
     50 // The buckets contain an index (e.g. 6) into the hashes array. The hashes
     51 // section contains all of the 32-bit hash values in contiguous memory, and
     52 // the offsets contain the offset into the data area for the particular
     53 // hash.
     54 //
     55 // For a lookup example, we could hash a function name and take it modulo the
     56 // number of buckets giving us our bucket. From there we take the bucket value
     57 // as an index into the hashes table and look at each successive hash as long
     58 // as the hash value is still the same modulo result (bucket value) as earlier.
     59 // If we have a match we look at that same entry in the offsets table and
     60 // grab the offset in the data for our final match.
     61 
     62 namespace llvm {
     63 
     64 class AsmPrinter;
     65 class DIE;
     66 class DwarfUnits;
     67 
     68 class DwarfAccelTable {
     69 
     70   enum HashFunctionType {
     71     eHashFunctionDJB = 0u
     72   };
     73 
     74   static uint32_t HashDJB (StringRef Str) {
     75     uint32_t h = 5381;
     76     for (unsigned i = 0, e = Str.size(); i != e; ++i)
     77       h = ((h << 5) + h) + Str[i];
     78     return h;
     79   }
     80 
     81   // Helper function to compute the number of buckets needed based on
     82   // the number of unique hashes.
     83   void ComputeBucketCount (void);
     84 
     85   struct TableHeader {
     86     uint32_t   magic;           // 'HASH' magic value to allow endian detection
     87     uint16_t   version;         // Version number.
     88     uint16_t   hash_function;   // The hash function enumeration that was used.
     89     uint32_t   bucket_count;    // The number of buckets in this hash table.
     90     uint32_t   hashes_count;    // The total number of unique hash values
     91                                 // and hash data offsets in this table.
     92     uint32_t   header_data_len; // The bytes to skip to get to the hash
     93                                 // indexes (buckets) for correct alignment.
     94     // Also written to disk is the implementation specific header data.
     95 
     96     static const uint32_t MagicHash = 0x48415348;
     97 
     98     TableHeader (uint32_t data_len) :
     99       magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
    100       bucket_count (0), hashes_count (0), header_data_len (data_len)
    101     {}
    102 
    103 #ifndef NDEBUG
    104     void print(raw_ostream &O) {
    105       O << "Magic: " << format("0x%x", magic) << "\n"
    106         << "Version: " << version << "\n"
    107         << "Hash Function: " << hash_function << "\n"
    108         << "Bucket Count: " << bucket_count << "\n"
    109         << "Header Data Length: " << header_data_len << "\n";
    110     }
    111     void dump() { print(dbgs()); }
    112 #endif
    113   };
    114 
    115 public:
    116   // The HeaderData describes the form of each set of data. In general this
    117   // is as a list of atoms (atom_count) where each atom contains a type
    118   // (AtomType type) of data, and an encoding form (form). In the case of
    119   // data that is referenced via DW_FORM_ref_* the die_offset_base is
    120   // used to describe the offset for all forms in the list of atoms.
    121   // This also serves as a public interface of sorts.
    122   // When written to disk this will have the form:
    123   //
    124   // uint32_t die_offset_base
    125   // uint32_t atom_count
    126   // atom_count Atoms
    127   enum AtomType {
    128     eAtomTypeNULL       = 0u,
    129     eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
    130     eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that
    131                                 // contains the item in question
    132     eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as
    133                                 // DW_FORM_data1 (if no tags exceed 255) or
    134                                 // DW_FORM_data2.
    135     eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
    136     eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
    137   };
    138 
    139   enum TypeFlags {
    140     eTypeFlagClassMask = 0x0000000fu,
    141 
    142     // Always set for C++, only set for ObjC if this is the
    143     // @implementation for a class.
    144     eTypeFlagClassIsImplementation  = ( 1u << 1 )
    145   };
    146 
    147   // Make these public so that they can be used as a general interface to
    148   // the class.
    149   struct Atom {
    150     AtomType type; // enum AtomType
    151     uint16_t form; // DWARF DW_FORM_ defines
    152 
    153     Atom(AtomType type, uint16_t form) : type(type), form(form) {}
    154     static const char * AtomTypeString(enum AtomType);
    155 #ifndef NDEBUG
    156     void print(raw_ostream &O) {
    157       O << "Type: " << AtomTypeString(type) << "\n"
    158         << "Form: " << dwarf::FormEncodingString(form) << "\n";
    159     }
    160     void dump() {
    161       print(dbgs());
    162     }
    163 #endif
    164   };
    165 
    166  private:
    167   struct TableHeaderData {
    168     uint32_t die_offset_base;
    169     SmallVector<Atom, 1> Atoms;
    170 
    171     TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
    172       : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }
    173 
    174 #ifndef NDEBUG
    175     void print (raw_ostream &O) {
    176       O << "die_offset_base: " << die_offset_base << "\n";
    177       for (size_t i = 0; i < Atoms.size(); i++)
    178         Atoms[i].print(O);
    179     }
    180     void dump() {
    181       print(dbgs());
    182     }
    183 #endif
    184   };
    185 
    186   // The data itself consists of a str_offset, a count of the DIEs in the
    187   // hash and the offsets to the DIEs themselves.
    188   // On disk each data section is ended with a 0 KeyType as the end of the
    189   // hash chain.
    190   // On output this looks like:
    191   // uint32_t str_offset
    192   // uint32_t hash_data_count
    193   // HashData[hash_data_count]
    194 public:
    195   struct HashDataContents {
    196     DIE *Die; // Offsets
    197     char Flags; // Specific flags to output
    198 
    199     HashDataContents(DIE *D, char Flags) :
    200       Die(D),
    201       Flags(Flags) { }
    202     #ifndef NDEBUG
    203     void print(raw_ostream &O) const {
    204       O << "  Offset: " << Die->getOffset() << "\n";
    205       O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
    206       O << "  Flags: " << Flags << "\n";
    207     }
    208     #endif
    209   };
    210 private:
    211   struct HashData {
    212     StringRef Str;
    213     uint32_t HashValue;
    214     MCSymbol *Sym;
    215     ArrayRef<HashDataContents*> Data; // offsets
    216     HashData(StringRef S, ArrayRef<HashDataContents*> Data)
    217       : Str(S), Data(Data) {
    218       HashValue = DwarfAccelTable::HashDJB(S);
    219     }
    220     #ifndef NDEBUG
    221     void print(raw_ostream &O) {
    222       O << "Name: " << Str << "\n";
    223       O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
    224       O << "  Symbol: " ;
    225       if (Sym) Sym->print(O);
    226       else O << "<none>";
    227       O << "\n";
    228       for (size_t i = 0; i < Data.size(); i++) {
    229         O << "  Offset: " << Data[i]->Die->getOffset() << "\n";
    230         O << "  Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
    231         O << "  Flags: " << Data[i]->Flags << "\n";
    232       }
    233     }
    234     void dump() {
    235       print(dbgs());
    236     }
    237     #endif
    238   };
    239 
    240   DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
    241   void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
    242 
    243   // Internal Functions
    244   void EmitHeader(AsmPrinter *);
    245   void EmitBuckets(AsmPrinter *);
    246   void EmitHashes(AsmPrinter *);
    247   void EmitOffsets(AsmPrinter *, MCSymbol *);
    248   void EmitData(AsmPrinter *, DwarfUnits *D);
    249 
    250   // Allocator for HashData and HashDataContents.
    251   BumpPtrAllocator Allocator;
    252 
    253   // Output Variables
    254   TableHeader Header;
    255   TableHeaderData HeaderData;
    256   std::vector<HashData*> Data;
    257 
    258   // String Data
    259   typedef std::vector<HashDataContents*> DataArray;
    260   typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
    261   StringEntries Entries;
    262 
    263   // Buckets/Hashes/Offsets
    264   typedef std::vector<HashData*> HashList;
    265   typedef std::vector<HashList> BucketList;
    266   BucketList Buckets;
    267   HashList Hashes;
    268 
    269   // Public Implementation
    270  public:
    271   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
    272   ~DwarfAccelTable();
    273   void AddName(StringRef, DIE*, char = 0);
    274   void FinalizeTable(AsmPrinter *, StringRef);
    275   void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
    276 #ifndef NDEBUG
    277   void print(raw_ostream &O);
    278   void dump() { print(dbgs()); }
    279 #endif
    280 };
    281 
    282 }
    283 #endif
    284