Home | History | Annotate | Download | only in AsmPrinter
      1 //===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains support for DWARF4 hashing of DIEs.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ByteStreamer.h"
     15 #include "DIEHash.h"
     16 #include "DwarfDebug.h"
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/StringRef.h"
     19 #include "llvm/CodeGen/AsmPrinter.h"
     20 #include "llvm/CodeGen/DIE.h"
     21 #include "llvm/Support/Debug.h"
     22 #include "llvm/Support/Dwarf.h"
     23 #include "llvm/Support/Endian.h"
     24 #include "llvm/Support/MD5.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 
     27 using namespace llvm;
     28 
     29 #define DEBUG_TYPE "dwarfdebug"
     30 
     31 /// \brief Grabs the string in whichever attribute is passed in and returns
     32 /// a reference to it.
     33 static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
     34   // Iterate through all the attributes until we find the one we're
     35   // looking for, if we can't find it return an empty string.
     36   for (const auto &V : Die.values())
     37     if (V.getAttribute() == Attr)
     38       return V.getDIEString().getString();
     39 
     40   return StringRef("");
     41 }
     42 
     43 /// \brief Adds the string in \p Str to the hash. This also hashes
     44 /// a trailing NULL with the string.
     45 void DIEHash::addString(StringRef Str) {
     46   DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
     47   Hash.update(Str);
     48   Hash.update(makeArrayRef((uint8_t)'\0'));
     49 }
     50 
     51 // FIXME: The LEB128 routines are copied and only slightly modified out of
     52 // LEB128.h.
     53 
     54 /// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128.
     55 void DIEHash::addULEB128(uint64_t Value) {
     56   DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
     57   do {
     58     uint8_t Byte = Value & 0x7f;
     59     Value >>= 7;
     60     if (Value != 0)
     61       Byte |= 0x80; // Mark this byte to show that more bytes will follow.
     62     Hash.update(Byte);
     63   } while (Value != 0);
     64 }
     65 
     66 void DIEHash::addSLEB128(int64_t Value) {
     67   DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
     68   bool More;
     69   do {
     70     uint8_t Byte = Value & 0x7f;
     71     Value >>= 7;
     72     More = !((((Value == 0) && ((Byte & 0x40) == 0)) ||
     73               ((Value == -1) && ((Byte & 0x40) != 0))));
     74     if (More)
     75       Byte |= 0x80; // Mark this byte to show that more bytes will follow.
     76     Hash.update(Byte);
     77   } while (More);
     78 }
     79 
     80 /// \brief Including \p Parent adds the context of Parent to the hash..
     81 void DIEHash::addParentContext(const DIE &Parent) {
     82 
     83   DEBUG(dbgs() << "Adding parent context to hash...\n");
     84 
     85   // [7.27.2] For each surrounding type or namespace beginning with the
     86   // outermost such construct...
     87   SmallVector<const DIE *, 1> Parents;
     88   const DIE *Cur = &Parent;
     89   while (Cur->getParent()) {
     90     Parents.push_back(Cur);
     91     Cur = Cur->getParent();
     92   }
     93   assert(Cur->getTag() == dwarf::DW_TAG_compile_unit ||
     94          Cur->getTag() == dwarf::DW_TAG_type_unit);
     95 
     96   // Reverse iterate over our list to go from the outermost construct to the
     97   // innermost.
     98   for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
     99                                                       E = Parents.rend();
    100        I != E; ++I) {
    101     const DIE &Die = **I;
    102 
    103     // ... Append the letter "C" to the sequence...
    104     addULEB128('C');
    105 
    106     // ... Followed by the DWARF tag of the construct...
    107     addULEB128(Die.getTag());
    108 
    109     // ... Then the name, taken from the DW_AT_name attribute.
    110     StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
    111     DEBUG(dbgs() << "... adding context: " << Name << "\n");
    112     if (!Name.empty())
    113       addString(Name);
    114   }
    115 }
    116 
    117 // Collect all of the attributes for a particular DIE in single structure.
    118 void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
    119 #define COLLECT_ATTR(NAME)                                                     \
    120   case dwarf::NAME:                                                            \
    121     Attrs.NAME = V;                                                            \
    122     break
    123 
    124   for (const auto &V : Die.values()) {
    125     DEBUG(dbgs() << "Attribute: "
    126                  << dwarf::AttributeString(V.getAttribute())
    127                  << " added.\n");
    128     switch (V.getAttribute()) {
    129       COLLECT_ATTR(DW_AT_name);
    130       COLLECT_ATTR(DW_AT_accessibility);
    131       COLLECT_ATTR(DW_AT_address_class);
    132       COLLECT_ATTR(DW_AT_allocated);
    133       COLLECT_ATTR(DW_AT_artificial);
    134       COLLECT_ATTR(DW_AT_associated);
    135       COLLECT_ATTR(DW_AT_binary_scale);
    136       COLLECT_ATTR(DW_AT_bit_offset);
    137       COLLECT_ATTR(DW_AT_bit_size);
    138       COLLECT_ATTR(DW_AT_bit_stride);
    139       COLLECT_ATTR(DW_AT_byte_size);
    140       COLLECT_ATTR(DW_AT_byte_stride);
    141       COLLECT_ATTR(DW_AT_const_expr);
    142       COLLECT_ATTR(DW_AT_const_value);
    143       COLLECT_ATTR(DW_AT_containing_type);
    144       COLLECT_ATTR(DW_AT_count);
    145       COLLECT_ATTR(DW_AT_data_bit_offset);
    146       COLLECT_ATTR(DW_AT_data_location);
    147       COLLECT_ATTR(DW_AT_data_member_location);
    148       COLLECT_ATTR(DW_AT_decimal_scale);
    149       COLLECT_ATTR(DW_AT_decimal_sign);
    150       COLLECT_ATTR(DW_AT_default_value);
    151       COLLECT_ATTR(DW_AT_digit_count);
    152       COLLECT_ATTR(DW_AT_discr);
    153       COLLECT_ATTR(DW_AT_discr_list);
    154       COLLECT_ATTR(DW_AT_discr_value);
    155       COLLECT_ATTR(DW_AT_encoding);
    156       COLLECT_ATTR(DW_AT_enum_class);
    157       COLLECT_ATTR(DW_AT_endianity);
    158       COLLECT_ATTR(DW_AT_explicit);
    159       COLLECT_ATTR(DW_AT_is_optional);
    160       COLLECT_ATTR(DW_AT_location);
    161       COLLECT_ATTR(DW_AT_lower_bound);
    162       COLLECT_ATTR(DW_AT_mutable);
    163       COLLECT_ATTR(DW_AT_ordering);
    164       COLLECT_ATTR(DW_AT_picture_string);
    165       COLLECT_ATTR(DW_AT_prototyped);
    166       COLLECT_ATTR(DW_AT_small);
    167       COLLECT_ATTR(DW_AT_segment);
    168       COLLECT_ATTR(DW_AT_string_length);
    169       COLLECT_ATTR(DW_AT_threads_scaled);
    170       COLLECT_ATTR(DW_AT_upper_bound);
    171       COLLECT_ATTR(DW_AT_use_location);
    172       COLLECT_ATTR(DW_AT_use_UTF8);
    173       COLLECT_ATTR(DW_AT_variable_parameter);
    174       COLLECT_ATTR(DW_AT_virtuality);
    175       COLLECT_ATTR(DW_AT_visibility);
    176       COLLECT_ATTR(DW_AT_vtable_elem_location);
    177       COLLECT_ATTR(DW_AT_type);
    178     default:
    179       break;
    180     }
    181   }
    182 }
    183 
    184 void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,
    185                                        const DIE &Entry, StringRef Name) {
    186   // append the letter 'N'
    187   addULEB128('N');
    188 
    189   // the DWARF attribute code (DW_AT_type or DW_AT_friend),
    190   addULEB128(Attribute);
    191 
    192   // the context of the tag,
    193   if (const DIE *Parent = Entry.getParent())
    194     addParentContext(*Parent);
    195 
    196   // the letter 'E',
    197   addULEB128('E');
    198 
    199   // and the name of the type.
    200   addString(Name);
    201 
    202   // Currently DW_TAG_friends are not used by Clang, but if they do become so,
    203   // here's the relevant spec text to implement:
    204   //
    205   // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,
    206   // the context is omitted and the name to be used is the ABI-specific name
    207   // of the subprogram (e.g., the mangled linker name).
    208 }
    209 
    210 void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,
    211                                         unsigned DieNumber) {
    212   // a) If T is in the list of [previously hashed types], use the letter
    213   // 'R' as the marker
    214   addULEB128('R');
    215 
    216   addULEB128(Attribute);
    217 
    218   // and use the unsigned LEB128 encoding of [the index of T in the
    219   // list] as the attribute value;
    220   addULEB128(DieNumber);
    221 }
    222 
    223 void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
    224                            const DIE &Entry) {
    225   assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "
    226                                         "tags. Add support here when there's "
    227                                         "a use case");
    228   // Step 5
    229   // If the tag in Step 3 is one of [the below tags]
    230   if ((Tag == dwarf::DW_TAG_pointer_type ||
    231        Tag == dwarf::DW_TAG_reference_type ||
    232        Tag == dwarf::DW_TAG_rvalue_reference_type ||
    233        Tag == dwarf::DW_TAG_ptr_to_member_type) &&
    234       // and the referenced type (via the [below attributes])
    235       // FIXME: This seems overly restrictive, and causes hash mismatches
    236       // there's a decl/def difference in the containing type of a
    237       // ptr_to_member_type, but it's what DWARF says, for some reason.
    238       Attribute == dwarf::DW_AT_type) {
    239     // ... has a DW_AT_name attribute,
    240     StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name);
    241     if (!Name.empty()) {
    242       hashShallowTypeReference(Attribute, Entry, Name);
    243       return;
    244     }
    245   }
    246 
    247   unsigned &DieNumber = Numbering[&Entry];
    248   if (DieNumber) {
    249     hashRepeatedTypeReference(Attribute, DieNumber);
    250     return;
    251   }
    252 
    253   // otherwise, b) use the letter 'T' as the marker, ...
    254   addULEB128('T');
    255 
    256   addULEB128(Attribute);
    257 
    258   // ... process the type T recursively by performing Steps 2 through 7, and
    259   // use the result as the attribute value.
    260   DieNumber = Numbering.size();
    261   computeHash(Entry);
    262 }
    263 
    264 // Hash all of the values in a block like set of values. This assumes that
    265 // all of the data is going to be added as integers.
    266 void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
    267   for (const auto &V : Values)
    268     Hash.update((uint64_t)V.getDIEInteger().getValue());
    269 }
    270 
    271 // Hash the contents of a loclistptr class.
    272 void DIEHash::hashLocList(const DIELocList &LocList) {
    273   HashingByteStreamer Streamer(*this);
    274   DwarfDebug &DD = *AP->getDwarfDebug();
    275   const DebugLocStream &Locs = DD.getDebugLocs();
    276   for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
    277     DD.emitDebugLocEntry(Streamer, Entry);
    278 }
    279 
    280 // Hash an individual attribute \param Attr based on the type of attribute and
    281 // the form.
    282 void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
    283   dwarf::Attribute Attribute = Value.getAttribute();
    284 
    285   // Other attribute values use the letter 'A' as the marker, and the value
    286   // consists of the form code (encoded as an unsigned LEB128 value) followed by
    287   // the encoding of the value according to the form code. To ensure
    288   // reproducibility of the signature, the set of forms used in the signature
    289   // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
    290   // DW_FORM_string, and DW_FORM_block.
    291 
    292   switch (Value.getType()) {
    293   case DIEValue::isNone:
    294     llvm_unreachable("Expected valid DIEValue");
    295 
    296     // 7.27 Step 3
    297     // ... An attribute that refers to another type entry T is processed as
    298     // follows:
    299   case DIEValue::isEntry:
    300     hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry());
    301     break;
    302   case DIEValue::isInteger: {
    303     addULEB128('A');
    304     addULEB128(Attribute);
    305     switch (Value.getForm()) {
    306     case dwarf::DW_FORM_data1:
    307     case dwarf::DW_FORM_data2:
    308     case dwarf::DW_FORM_data4:
    309     case dwarf::DW_FORM_data8:
    310     case dwarf::DW_FORM_udata:
    311     case dwarf::DW_FORM_sdata:
    312       addULEB128(dwarf::DW_FORM_sdata);
    313       addSLEB128((int64_t)Value.getDIEInteger().getValue());
    314       break;
    315     // DW_FORM_flag_present is just flag with a value of one. We still give it a
    316     // value so just use the value.
    317     case dwarf::DW_FORM_flag_present:
    318     case dwarf::DW_FORM_flag:
    319       addULEB128(dwarf::DW_FORM_flag);
    320       addULEB128((int64_t)Value.getDIEInteger().getValue());
    321       break;
    322     default:
    323       llvm_unreachable("Unknown integer form!");
    324     }
    325     break;
    326   }
    327   case DIEValue::isString:
    328     addULEB128('A');
    329     addULEB128(Attribute);
    330     addULEB128(dwarf::DW_FORM_string);
    331     addString(Value.getDIEString().getString());
    332     break;
    333   case DIEValue::isBlock:
    334   case DIEValue::isLoc:
    335   case DIEValue::isLocList:
    336     addULEB128('A');
    337     addULEB128(Attribute);
    338     addULEB128(dwarf::DW_FORM_block);
    339     if (Value.getType() == DIEValue::isBlock) {
    340       addULEB128(Value.getDIEBlock().ComputeSize(AP));
    341       hashBlockData(Value.getDIEBlock().values());
    342     } else if (Value.getType() == DIEValue::isLoc) {
    343       addULEB128(Value.getDIELoc().ComputeSize(AP));
    344       hashBlockData(Value.getDIELoc().values());
    345     } else {
    346       // We could add the block length, but that would take
    347       // a bit of work and not add a lot of uniqueness
    348       // to the hash in some way we could test.
    349       hashLocList(Value.getDIELocList());
    350     }
    351     break;
    352     // FIXME: It's uncertain whether or not we should handle this at the moment.
    353   case DIEValue::isExpr:
    354   case DIEValue::isLabel:
    355   case DIEValue::isDelta:
    356     llvm_unreachable("Add support for additional value types.");
    357   }
    358 }
    359 
    360 // Go through the attributes from \param Attrs in the order specified in 7.27.4
    361 // and hash them.
    362 void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
    363 #define ADD_ATTR(ATTR)                                                         \
    364   {                                                                            \
    365     if (ATTR)                                                                  \
    366       hashAttribute(ATTR, Tag);                                                \
    367   }
    368 
    369   ADD_ATTR(Attrs.DW_AT_name);
    370   ADD_ATTR(Attrs.DW_AT_accessibility);
    371   ADD_ATTR(Attrs.DW_AT_address_class);
    372   ADD_ATTR(Attrs.DW_AT_allocated);
    373   ADD_ATTR(Attrs.DW_AT_artificial);
    374   ADD_ATTR(Attrs.DW_AT_associated);
    375   ADD_ATTR(Attrs.DW_AT_binary_scale);
    376   ADD_ATTR(Attrs.DW_AT_bit_offset);
    377   ADD_ATTR(Attrs.DW_AT_bit_size);
    378   ADD_ATTR(Attrs.DW_AT_bit_stride);
    379   ADD_ATTR(Attrs.DW_AT_byte_size);
    380   ADD_ATTR(Attrs.DW_AT_byte_stride);
    381   ADD_ATTR(Attrs.DW_AT_const_expr);
    382   ADD_ATTR(Attrs.DW_AT_const_value);
    383   ADD_ATTR(Attrs.DW_AT_containing_type);
    384   ADD_ATTR(Attrs.DW_AT_count);
    385   ADD_ATTR(Attrs.DW_AT_data_bit_offset);
    386   ADD_ATTR(Attrs.DW_AT_data_location);
    387   ADD_ATTR(Attrs.DW_AT_data_member_location);
    388   ADD_ATTR(Attrs.DW_AT_decimal_scale);
    389   ADD_ATTR(Attrs.DW_AT_decimal_sign);
    390   ADD_ATTR(Attrs.DW_AT_default_value);
    391   ADD_ATTR(Attrs.DW_AT_digit_count);
    392   ADD_ATTR(Attrs.DW_AT_discr);
    393   ADD_ATTR(Attrs.DW_AT_discr_list);
    394   ADD_ATTR(Attrs.DW_AT_discr_value);
    395   ADD_ATTR(Attrs.DW_AT_encoding);
    396   ADD_ATTR(Attrs.DW_AT_enum_class);
    397   ADD_ATTR(Attrs.DW_AT_endianity);
    398   ADD_ATTR(Attrs.DW_AT_explicit);
    399   ADD_ATTR(Attrs.DW_AT_is_optional);
    400   ADD_ATTR(Attrs.DW_AT_location);
    401   ADD_ATTR(Attrs.DW_AT_lower_bound);
    402   ADD_ATTR(Attrs.DW_AT_mutable);
    403   ADD_ATTR(Attrs.DW_AT_ordering);
    404   ADD_ATTR(Attrs.DW_AT_picture_string);
    405   ADD_ATTR(Attrs.DW_AT_prototyped);
    406   ADD_ATTR(Attrs.DW_AT_small);
    407   ADD_ATTR(Attrs.DW_AT_segment);
    408   ADD_ATTR(Attrs.DW_AT_string_length);
    409   ADD_ATTR(Attrs.DW_AT_threads_scaled);
    410   ADD_ATTR(Attrs.DW_AT_upper_bound);
    411   ADD_ATTR(Attrs.DW_AT_use_location);
    412   ADD_ATTR(Attrs.DW_AT_use_UTF8);
    413   ADD_ATTR(Attrs.DW_AT_variable_parameter);
    414   ADD_ATTR(Attrs.DW_AT_virtuality);
    415   ADD_ATTR(Attrs.DW_AT_visibility);
    416   ADD_ATTR(Attrs.DW_AT_vtable_elem_location);
    417   ADD_ATTR(Attrs.DW_AT_type);
    418 
    419   // FIXME: Add the extended attributes.
    420 }
    421 
    422 // Add all of the attributes for \param Die to the hash.
    423 void DIEHash::addAttributes(const DIE &Die) {
    424   DIEAttrs Attrs = {};
    425   collectAttributes(Die, Attrs);
    426   hashAttributes(Attrs, Die.getTag());
    427 }
    428 
    429 void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {
    430   // 7.27 Step 7
    431   // ... append the letter 'S',
    432   addULEB128('S');
    433 
    434   // the tag of C,
    435   addULEB128(Die.getTag());
    436 
    437   // and the name.
    438   addString(Name);
    439 }
    440 
    441 // Compute the hash of a DIE. This is based on the type signature computation
    442 // given in section 7.27 of the DWARF4 standard. It is the md5 hash of a
    443 // flattened description of the DIE.
    444 void DIEHash::computeHash(const DIE &Die) {
    445   // Append the letter 'D', followed by the DWARF tag of the DIE.
    446   addULEB128('D');
    447   addULEB128(Die.getTag());
    448 
    449   // Add each of the attributes of the DIE.
    450   addAttributes(Die);
    451 
    452   // Then hash each of the children of the DIE.
    453   for (auto &C : Die.children()) {
    454     // 7.27 Step 7
    455     // If C is a nested type entry or a member function entry, ...
    456     if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) {
    457       StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
    458       // ... and has a DW_AT_name attribute
    459       if (!Name.empty()) {
    460         hashNestedType(C, Name);
    461         continue;
    462       }
    463     }
    464     computeHash(C);
    465   }
    466 
    467   // Following the last (or if there are no children), append a zero byte.
    468   Hash.update(makeArrayRef((uint8_t)'\0'));
    469 }
    470 
    471 /// This is based on the type signature computation given in section 7.27 of the
    472 /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
    473 /// with the inclusion of the full CU and all top level CU entities.
    474 // TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
    475 uint64_t DIEHash::computeCUSignature(const DIE &Die) {
    476   Numbering.clear();
    477   Numbering[&Die] = 1;
    478 
    479   // Hash the DIE.
    480   computeHash(Die);
    481 
    482   // Now return the result.
    483   MD5::MD5Result Result;
    484   Hash.final(Result);
    485 
    486   // ... take the least significant 8 bytes and return those. Our MD5
    487   // implementation always returns its results in little endian, swap bytes
    488   // appropriately.
    489   return support::endian::read64le(Result + 8);
    490 }
    491 
    492 /// This is based on the type signature computation given in section 7.27 of the
    493 /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
    494 /// with the inclusion of additional forms not specifically called out in the
    495 /// standard.
    496 uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
    497   Numbering.clear();
    498   Numbering[&Die] = 1;
    499 
    500   if (const DIE *Parent = Die.getParent())
    501     addParentContext(*Parent);
    502 
    503   // Hash the DIE.
    504   computeHash(Die);
    505 
    506   // Now return the result.
    507   MD5::MD5Result Result;
    508   Hash.final(Result);
    509 
    510   // ... take the least significant 8 bytes and return those. Our MD5
    511   // implementation always returns its results in little endian, swap bytes
    512   // appropriately.
    513   return support::endian::read64le(Result + 8);
    514 }
    515