Home | History | Annotate | Download | only in ProfileData
      1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains support for reading profiling data for instrumentation
     11 // based PGO and coverage.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
     16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
     17 
     18 #include "llvm/ADT/ArrayRef.h"
     19 #include "llvm/ADT/StringRef.h"
     20 #include "llvm/IR/ProfileSummary.h"
     21 #include "llvm/ProfileData/InstrProf.h"
     22 #include "llvm/Support/Endian.h"
     23 #include "llvm/Support/Error.h"
     24 #include "llvm/Support/LineIterator.h"
     25 #include "llvm/Support/MemoryBuffer.h"
     26 #include "llvm/Support/OnDiskHashTable.h"
     27 #include "llvm/Support/SwapByteOrder.h"
     28 #include <algorithm>
     29 #include <cassert>
     30 #include <cstddef>
     31 #include <cstdint>
     32 #include <iterator>
     33 #include <memory>
     34 #include <utility>
     35 #include <vector>
     36 
     37 namespace llvm {
     38 
     39 class InstrProfReader;
     40 
     41 /// A file format agnostic iterator over profiling data.
     42 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
     43                                                NamedInstrProfRecord> {
     44   InstrProfReader *Reader = nullptr;
     45   value_type Record;
     46 
     47   void Increment();
     48 
     49 public:
     50   InstrProfIterator() = default;
     51   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
     52 
     53   InstrProfIterator &operator++() { Increment(); return *this; }
     54   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
     55   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
     56   value_type &operator*() { return Record; }
     57   value_type *operator->() { return &Record; }
     58 };
     59 
     60 /// Base class and interface for reading profiling data of any known instrprof
     61 /// format. Provides an iterator over NamedInstrProfRecords.
     62 class InstrProfReader {
     63   instrprof_error LastError = instrprof_error::success;
     64 
     65 public:
     66   InstrProfReader() = default;
     67   virtual ~InstrProfReader() = default;
     68 
     69   /// Read the header.  Required before reading first record.
     70   virtual Error readHeader() = 0;
     71 
     72   /// Read a single record.
     73   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
     74 
     75   /// Iterator over profile data.
     76   InstrProfIterator begin() { return InstrProfIterator(this); }
     77   InstrProfIterator end() { return InstrProfIterator(); }
     78 
     79   virtual bool isIRLevelProfile() const = 0;
     80 
     81   /// Return the PGO symtab. There are three different readers:
     82   /// Raw, Text, and Indexed profile readers. The first two types
     83   /// of readers are used only by llvm-profdata tool, while the indexed
     84   /// profile reader is also used by llvm-cov tool and the compiler (
     85   /// backend or frontend). Since creating PGO symtab can create
     86   /// significant runtime and memory overhead (as it touches data
     87   /// for the whole program), InstrProfSymtab for the indexed profile
     88   /// reader should be created on demand and it is recommended to be
     89   /// only used for dumping purpose with llvm-proftool, not with the
     90   /// compiler.
     91   virtual InstrProfSymtab &getSymtab() = 0;
     92 
     93 protected:
     94   std::unique_ptr<InstrProfSymtab> Symtab;
     95 
     96   /// Set the current error and return same.
     97   Error error(instrprof_error Err) {
     98     LastError = Err;
     99     if (Err == instrprof_error::success)
    100       return Error::success();
    101     return make_error<InstrProfError>(Err);
    102   }
    103 
    104   Error error(Error E) { return error(InstrProfError::take(std::move(E))); }
    105 
    106   /// Clear the current error and return a successful one.
    107   Error success() { return error(instrprof_error::success); }
    108 
    109 public:
    110   /// Return true if the reader has finished reading the profile data.
    111   bool isEOF() { return LastError == instrprof_error::eof; }
    112 
    113   /// Return true if the reader encountered an error reading profiling data.
    114   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
    115 
    116   /// Get the current error.
    117   Error getError() {
    118     if (hasError())
    119       return make_error<InstrProfError>(LastError);
    120     return Error::success();
    121   }
    122 
    123   /// Factory method to create an appropriately typed reader for the given
    124   /// instrprof file.
    125   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
    126 
    127   static Expected<std::unique_ptr<InstrProfReader>>
    128   create(std::unique_ptr<MemoryBuffer> Buffer);
    129 };
    130 
    131 /// Reader for the simple text based instrprof format.
    132 ///
    133 /// This format is a simple text format that's suitable for test data. Records
    134 /// are separated by one or more blank lines, and record fields are separated by
    135 /// new lines.
    136 ///
    137 /// Each record consists of a function name, a function hash, a number of
    138 /// counters, and then each counter value, in that order.
    139 class TextInstrProfReader : public InstrProfReader {
    140 private:
    141   /// The profile data file contents.
    142   std::unique_ptr<MemoryBuffer> DataBuffer;
    143   /// Iterator over the profile data.
    144   line_iterator Line;
    145   bool IsIRLevelProfile = false;
    146 
    147   Error readValueProfileData(InstrProfRecord &Record);
    148 
    149 public:
    150   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
    151       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
    152   TextInstrProfReader(const TextInstrProfReader &) = delete;
    153   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
    154 
    155   /// Return true if the given buffer is in text instrprof format.
    156   static bool hasFormat(const MemoryBuffer &Buffer);
    157 
    158   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
    159 
    160   /// Read the header.
    161   Error readHeader() override;
    162 
    163   /// Read a single record.
    164   Error readNextRecord(NamedInstrProfRecord &Record) override;
    165 
    166   InstrProfSymtab &getSymtab() override {
    167     assert(Symtab.get());
    168     return *Symtab.get();
    169   }
    170 };
    171 
    172 /// Reader for the raw instrprof binary format from runtime.
    173 ///
    174 /// This format is a raw memory dump of the instrumentation-baed profiling data
    175 /// from the runtime.  It has no index.
    176 ///
    177 /// Templated on the unsigned type whose size matches pointers on the platform
    178 /// that wrote the profile.
    179 template <class IntPtrT>
    180 class RawInstrProfReader : public InstrProfReader {
    181 private:
    182   /// The profile data file contents.
    183   std::unique_ptr<MemoryBuffer> DataBuffer;
    184   bool ShouldSwapBytes;
    185   // The value of the version field of the raw profile data header. The lower 56
    186   // bits specifies the format version and the most significant 8 bits specify
    187   // the variant types of the profile.
    188   uint64_t Version;
    189   uint64_t CountersDelta;
    190   uint64_t NamesDelta;
    191   const RawInstrProf::ProfileData<IntPtrT> *Data;
    192   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
    193   const uint64_t *CountersStart;
    194   const char *NamesStart;
    195   uint64_t NamesSize;
    196   // After value profile is all read, this pointer points to
    197   // the header of next profile data (if exists)
    198   const uint8_t *ValueDataStart;
    199   uint32_t ValueKindLast;
    200   uint32_t CurValueDataSize;
    201 
    202   InstrProfRecord::ValueMapType FunctionPtrToNameMap;
    203 
    204 public:
    205   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
    206       : DataBuffer(std::move(DataBuffer)) {}
    207   RawInstrProfReader(const RawInstrProfReader &) = delete;
    208   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
    209 
    210   static bool hasFormat(const MemoryBuffer &DataBuffer);
    211   Error readHeader() override;
    212   Error readNextRecord(NamedInstrProfRecord &Record) override;
    213 
    214   bool isIRLevelProfile() const override {
    215     return (Version & VARIANT_MASK_IR_PROF) != 0;
    216   }
    217 
    218   InstrProfSymtab &getSymtab() override {
    219     assert(Symtab.get());
    220     return *Symtab.get();
    221   }
    222 
    223 private:
    224   Error createSymtab(InstrProfSymtab &Symtab);
    225   Error readNextHeader(const char *CurrentPos);
    226   Error readHeader(const RawInstrProf::Header &Header);
    227 
    228   template <class IntT> IntT swap(IntT Int) const {
    229     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
    230   }
    231 
    232   support::endianness getDataEndianness() const {
    233     support::endianness HostEndian = getHostEndianness();
    234     if (!ShouldSwapBytes)
    235       return HostEndian;
    236     if (HostEndian == support::little)
    237       return support::big;
    238     else
    239       return support::little;
    240   }
    241 
    242   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
    243     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
    244   }
    245 
    246   Error readName(NamedInstrProfRecord &Record);
    247   Error readFuncHash(NamedInstrProfRecord &Record);
    248   Error readRawCounts(InstrProfRecord &Record);
    249   Error readValueProfilingData(InstrProfRecord &Record);
    250   bool atEnd() const { return Data == DataEnd; }
    251 
    252   void advanceData() {
    253     Data++;
    254     ValueDataStart += CurValueDataSize;
    255   }
    256 
    257   const char *getNextHeaderPos() const {
    258       assert(atEnd());
    259       return (const char *)ValueDataStart;
    260   }
    261 
    262   const uint64_t *getCounter(IntPtrT CounterPtr) const {
    263     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
    264     return CountersStart + Offset;
    265   }
    266 
    267   StringRef getName(uint64_t NameRef) const {
    268     return Symtab->getFuncName(swap(NameRef));
    269   }
    270 };
    271 
    272 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
    273 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
    274 
    275 namespace IndexedInstrProf {
    276 
    277 enum class HashT : uint32_t;
    278 
    279 } // end namespace IndexedInstrProf
    280 
    281 /// Trait for lookups into the on-disk hash table for the binary instrprof
    282 /// format.
    283 class InstrProfLookupTrait {
    284   std::vector<NamedInstrProfRecord> DataBuffer;
    285   IndexedInstrProf::HashT HashType;
    286   unsigned FormatVersion;
    287   // Endianness of the input value profile data.
    288   // It should be LE by default, but can be changed
    289   // for testing purpose.
    290   support::endianness ValueProfDataEndianness = support::little;
    291 
    292 public:
    293   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
    294       : HashType(HashType), FormatVersion(FormatVersion) {}
    295 
    296   using data_type = ArrayRef<NamedInstrProfRecord>;
    297 
    298   using internal_key_type = StringRef;
    299   using external_key_type = StringRef;
    300   using hash_value_type = uint64_t;
    301   using offset_type = uint64_t;
    302 
    303   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
    304   static StringRef GetInternalKey(StringRef K) { return K; }
    305   static StringRef GetExternalKey(StringRef K) { return K; }
    306 
    307   hash_value_type ComputeHash(StringRef K);
    308 
    309   static std::pair<offset_type, offset_type>
    310   ReadKeyDataLength(const unsigned char *&D) {
    311     using namespace support;
    312 
    313     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
    314     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
    315     return std::make_pair(KeyLen, DataLen);
    316   }
    317 
    318   StringRef ReadKey(const unsigned char *D, offset_type N) {
    319     return StringRef((const char *)D, N);
    320   }
    321 
    322   bool readValueProfilingData(const unsigned char *&D,
    323                               const unsigned char *const End);
    324   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
    325 
    326   // Used for testing purpose only.
    327   void setValueProfDataEndianness(support::endianness Endianness) {
    328     ValueProfDataEndianness = Endianness;
    329   }
    330 };
    331 
    332 struct InstrProfReaderIndexBase {
    333   virtual ~InstrProfReaderIndexBase() = default;
    334 
    335   // Read all the profile records with the same key pointed to the current
    336   // iterator.
    337   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
    338 
    339   // Read all the profile records with the key equal to FuncName
    340   virtual Error getRecords(StringRef FuncName,
    341                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
    342   virtual void advanceToNextKey() = 0;
    343   virtual bool atEnd() const = 0;
    344   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
    345   virtual uint64_t getVersion() const = 0;
    346   virtual bool isIRLevelProfile() const = 0;
    347   virtual Error populateSymtab(InstrProfSymtab &) = 0;
    348 };
    349 
    350 using OnDiskHashTableImplV3 =
    351     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
    352 
    353 template <typename HashTableImpl>
    354 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
    355 private:
    356   std::unique_ptr<HashTableImpl> HashTable;
    357   typename HashTableImpl::data_iterator RecordIterator;
    358   uint64_t FormatVersion;
    359 
    360 public:
    361   InstrProfReaderIndex(const unsigned char *Buckets,
    362                        const unsigned char *const Payload,
    363                        const unsigned char *const Base,
    364                        IndexedInstrProf::HashT HashType, uint64_t Version);
    365   ~InstrProfReaderIndex() override = default;
    366 
    367   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
    368   Error getRecords(StringRef FuncName,
    369                    ArrayRef<NamedInstrProfRecord> &Data) override;
    370   void advanceToNextKey() override { RecordIterator++; }
    371 
    372   bool atEnd() const override {
    373     return RecordIterator == HashTable->data_end();
    374   }
    375 
    376   void setValueProfDataEndianness(support::endianness Endianness) override {
    377     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
    378   }
    379 
    380   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
    381 
    382   bool isIRLevelProfile() const override {
    383     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
    384   }
    385 
    386   Error populateSymtab(InstrProfSymtab &Symtab) override {
    387     return Symtab.create(HashTable->keys());
    388   }
    389 };
    390 
    391 /// Reader for the indexed binary instrprof format.
    392 class IndexedInstrProfReader : public InstrProfReader {
    393 private:
    394   /// The profile data file contents.
    395   std::unique_ptr<MemoryBuffer> DataBuffer;
    396   /// The index into the profile data.
    397   std::unique_ptr<InstrProfReaderIndexBase> Index;
    398   /// Profile summary data.
    399   std::unique_ptr<ProfileSummary> Summary;
    400   // Index to the current record in the record array.
    401   unsigned RecordIndex;
    402 
    403   // Read the profile summary. Return a pointer pointing to one byte past the
    404   // end of the summary data if it exists or the input \c Cur.
    405   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
    406                                    const unsigned char *Cur);
    407 
    408 public:
    409   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
    410       : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
    411   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
    412   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
    413 
    414   /// Return the profile version.
    415   uint64_t getVersion() const { return Index->getVersion(); }
    416   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
    417 
    418   /// Return true if the given buffer is in an indexed instrprof format.
    419   static bool hasFormat(const MemoryBuffer &DataBuffer);
    420 
    421   /// Read the file header.
    422   Error readHeader() override;
    423   /// Read a single record.
    424   Error readNextRecord(NamedInstrProfRecord &Record) override;
    425 
    426   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
    427   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
    428                                                uint64_t FuncHash);
    429 
    430   /// Fill Counts with the profile data for the given function name.
    431   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
    432                           std::vector<uint64_t> &Counts);
    433 
    434   /// Return the maximum of all known function counts.
    435   uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
    436 
    437   /// Factory method to create an indexed reader.
    438   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    439   create(const Twine &Path);
    440 
    441   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    442   create(std::unique_ptr<MemoryBuffer> Buffer);
    443 
    444   // Used for testing purpose only.
    445   void setValueProfDataEndianness(support::endianness Endianness) {
    446     Index->setValueProfDataEndianness(Endianness);
    447   }
    448 
    449   // See description in the base class. This interface is designed
    450   // to be used by llvm-profdata (for dumping). Avoid using this when
    451   // the client is the compiler.
    452   InstrProfSymtab &getSymtab() override;
    453   ProfileSummary &getSummary() { return *(Summary.get()); }
    454 };
    455 
    456 } // end namespace llvm
    457 
    458 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
    459