Home | History | Annotate | Download | only in ProfileData
      1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains support for reading profiling data for instrumentation
     11 // based PGO and coverage.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
     16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
     17 
     18 #include "llvm/ADT/ArrayRef.h"
     19 #include "llvm/ADT/StringRef.h"
     20 #include "llvm/IR/ProfileSummary.h"
     21 #include "llvm/ProfileData/InstrProf.h"
     22 #include "llvm/Support/Endian.h"
     23 #include "llvm/Support/Error.h"
     24 #include "llvm/Support/LineIterator.h"
     25 #include "llvm/Support/MemoryBuffer.h"
     26 #include "llvm/Support/OnDiskHashTable.h"
     27 #include "llvm/Support/SwapByteOrder.h"
     28 #include <algorithm>
     29 #include <cassert>
     30 #include <cstddef>
     31 #include <cstdint>
     32 #include <iterator>
     33 #include <memory>
     34 #include <utility>
     35 #include <vector>
     36 
     37 namespace llvm {
     38 
     39 class InstrProfReader;
     40 
     41 /// A file format agnostic iterator over profiling data.
     42 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
     43                                                InstrProfRecord> {
     44   InstrProfReader *Reader = nullptr;
     45   InstrProfRecord Record;
     46 
     47   void Increment();
     48 
     49 public:
     50   InstrProfIterator() = default;
     51   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
     52 
     53   InstrProfIterator &operator++() { Increment(); return *this; }
     54   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
     55   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
     56   InstrProfRecord &operator*() { return Record; }
     57   InstrProfRecord *operator->() { return &Record; }
     58 };
     59 
     60 /// Base class and interface for reading profiling data of any known instrprof
     61 /// format. Provides an iterator over InstrProfRecords.
     62 class InstrProfReader {
     63   instrprof_error LastError = instrprof_error::success;
     64 
     65 public:
     66   InstrProfReader() = default;
     67   virtual ~InstrProfReader() = default;
     68 
     69   /// Read the header.  Required before reading first record.
     70   virtual Error readHeader() = 0;
     71 
     72   /// Read a single record.
     73   virtual Error readNextRecord(InstrProfRecord &Record) = 0;
     74 
     75   /// Iterator over profile data.
     76   InstrProfIterator begin() { return InstrProfIterator(this); }
     77   InstrProfIterator end() { return InstrProfIterator(); }
     78 
     79   virtual bool isIRLevelProfile() const = 0;
     80 
     81   /// Return the PGO symtab. There are three different readers:
     82   /// Raw, Text, and Indexed profile readers. The first two types
     83   /// of readers are used only by llvm-profdata tool, while the indexed
     84   /// profile reader is also used by llvm-cov tool and the compiler (
     85   /// backend or frontend). Since creating PGO symtab can create
     86   /// significant runtime and memory overhead (as it touches data
     87   /// for the whole program), InstrProfSymtab for the indexed profile
     88   /// reader should be created on demand and it is recommended to be
     89   /// only used for dumping purpose with llvm-proftool, not with the
     90   /// compiler.
     91   virtual InstrProfSymtab &getSymtab() = 0;
     92 
     93 protected:
     94   std::unique_ptr<InstrProfSymtab> Symtab;
     95   /// Set the current error and return same.
     96   Error error(instrprof_error Err) {
     97     LastError = Err;
     98     if (Err == instrprof_error::success)
     99       return Error::success();
    100     return make_error<InstrProfError>(Err);
    101   }
    102 
    103   Error error(Error E) { return error(InstrProfError::take(std::move(E))); }
    104 
    105   /// Clear the current error and return a successful one.
    106   Error success() { return error(instrprof_error::success); }
    107 
    108 public:
    109   /// Return true if the reader has finished reading the profile data.
    110   bool isEOF() { return LastError == instrprof_error::eof; }
    111 
    112   /// Return true if the reader encountered an error reading profiling data.
    113   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
    114 
    115   /// Get the current error.
    116   Error getError() {
    117     if (hasError())
    118       return make_error<InstrProfError>(LastError);
    119     return Error::success();
    120   }
    121 
    122   /// Factory method to create an appropriately typed reader for the given
    123   /// instrprof file.
    124   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
    125 
    126   static Expected<std::unique_ptr<InstrProfReader>>
    127   create(std::unique_ptr<MemoryBuffer> Buffer);
    128 };
    129 
    130 /// Reader for the simple text based instrprof format.
    131 ///
    132 /// This format is a simple text format that's suitable for test data. Records
    133 /// are separated by one or more blank lines, and record fields are separated by
    134 /// new lines.
    135 ///
    136 /// Each record consists of a function name, a function hash, a number of
    137 /// counters, and then each counter value, in that order.
    138 class TextInstrProfReader : public InstrProfReader {
    139 private:
    140   /// The profile data file contents.
    141   std::unique_ptr<MemoryBuffer> DataBuffer;
    142   /// Iterator over the profile data.
    143   line_iterator Line;
    144   bool IsIRLevelProfile = false;
    145 
    146   Error readValueProfileData(InstrProfRecord &Record);
    147 
    148 public:
    149   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
    150       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
    151   TextInstrProfReader(const TextInstrProfReader &) = delete;
    152   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
    153 
    154   /// Return true if the given buffer is in text instrprof format.
    155   static bool hasFormat(const MemoryBuffer &Buffer);
    156 
    157   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
    158 
    159   /// Read the header.
    160   Error readHeader() override;
    161 
    162   /// Read a single record.
    163   Error readNextRecord(InstrProfRecord &Record) override;
    164 
    165   InstrProfSymtab &getSymtab() override {
    166     assert(Symtab.get());
    167     return *Symtab.get();
    168   }
    169 };
    170 
    171 /// Reader for the raw instrprof binary format from runtime.
    172 ///
    173 /// This format is a raw memory dump of the instrumentation-baed profiling data
    174 /// from the runtime.  It has no index.
    175 ///
    176 /// Templated on the unsigned type whose size matches pointers on the platform
    177 /// that wrote the profile.
    178 template <class IntPtrT>
    179 class RawInstrProfReader : public InstrProfReader {
    180 private:
    181   /// The profile data file contents.
    182   std::unique_ptr<MemoryBuffer> DataBuffer;
    183   bool ShouldSwapBytes;
    184   // The value of the version field of the raw profile data header. The lower 56
    185   // bits specifies the format version and the most significant 8 bits specify
    186   // the variant types of the profile.
    187   uint64_t Version;
    188   uint64_t CountersDelta;
    189   uint64_t NamesDelta;
    190   const RawInstrProf::ProfileData<IntPtrT> *Data;
    191   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
    192   const uint64_t *CountersStart;
    193   const char *NamesStart;
    194   uint64_t NamesSize;
    195   // After value profile is all read, this pointer points to
    196   // the header of next profile data (if exists)
    197   const uint8_t *ValueDataStart;
    198   uint32_t ValueKindLast;
    199   uint32_t CurValueDataSize;
    200 
    201   InstrProfRecord::ValueMapType FunctionPtrToNameMap;
    202 
    203 public:
    204   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
    205       : DataBuffer(std::move(DataBuffer)) { }
    206   RawInstrProfReader(const RawInstrProfReader &) = delete;
    207   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
    208 
    209   static bool hasFormat(const MemoryBuffer &DataBuffer);
    210   Error readHeader() override;
    211   Error readNextRecord(InstrProfRecord &Record) override;
    212 
    213   bool isIRLevelProfile() const override {
    214     return (Version & VARIANT_MASK_IR_PROF) != 0;
    215   }
    216 
    217   InstrProfSymtab &getSymtab() override {
    218     assert(Symtab.get());
    219     return *Symtab.get();
    220   }
    221 
    222 private:
    223   Error createSymtab(InstrProfSymtab &Symtab);
    224   Error readNextHeader(const char *CurrentPos);
    225   Error readHeader(const RawInstrProf::Header &Header);
    226 
    227   template <class IntT> IntT swap(IntT Int) const {
    228     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
    229   }
    230 
    231   support::endianness getDataEndianness() const {
    232     support::endianness HostEndian = getHostEndianness();
    233     if (!ShouldSwapBytes)
    234       return HostEndian;
    235     if (HostEndian == support::little)
    236       return support::big;
    237     else
    238       return support::little;
    239   }
    240 
    241   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
    242     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
    243   }
    244 
    245   Error readName(InstrProfRecord &Record);
    246   Error readFuncHash(InstrProfRecord &Record);
    247   Error readRawCounts(InstrProfRecord &Record);
    248   Error readValueProfilingData(InstrProfRecord &Record);
    249   bool atEnd() const { return Data == DataEnd; }
    250 
    251   void advanceData() {
    252     Data++;
    253     ValueDataStart += CurValueDataSize;
    254   }
    255 
    256   const char *getNextHeaderPos() const {
    257       assert(atEnd());
    258       return (const char *)ValueDataStart;
    259   }
    260 
    261   const uint64_t *getCounter(IntPtrT CounterPtr) const {
    262     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
    263     return CountersStart + Offset;
    264   }
    265 
    266   StringRef getName(uint64_t NameRef) const {
    267     return Symtab->getFuncName(swap(NameRef));
    268   }
    269 };
    270 
    271 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
    272 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
    273 
    274 namespace IndexedInstrProf {
    275 
    276 enum class HashT : uint32_t;
    277 
    278 } // end namespace IndexedInstrProf
    279 
    280 /// Trait for lookups into the on-disk hash table for the binary instrprof
    281 /// format.
    282 class InstrProfLookupTrait {
    283   std::vector<InstrProfRecord> DataBuffer;
    284   IndexedInstrProf::HashT HashType;
    285   unsigned FormatVersion;
    286   // Endianness of the input value profile data.
    287   // It should be LE by default, but can be changed
    288   // for testing purpose.
    289   support::endianness ValueProfDataEndianness = support::little;
    290 
    291 public:
    292   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
    293       : HashType(HashType), FormatVersion(FormatVersion) {}
    294 
    295   typedef ArrayRef<InstrProfRecord> data_type;
    296 
    297   typedef StringRef internal_key_type;
    298   typedef StringRef external_key_type;
    299   typedef uint64_t hash_value_type;
    300   typedef uint64_t offset_type;
    301 
    302   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
    303   static StringRef GetInternalKey(StringRef K) { return K; }
    304   static StringRef GetExternalKey(StringRef K) { return K; }
    305 
    306   hash_value_type ComputeHash(StringRef K);
    307 
    308   static std::pair<offset_type, offset_type>
    309   ReadKeyDataLength(const unsigned char *&D) {
    310     using namespace support;
    311 
    312     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
    313     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
    314     return std::make_pair(KeyLen, DataLen);
    315   }
    316 
    317   StringRef ReadKey(const unsigned char *D, offset_type N) {
    318     return StringRef((const char *)D, N);
    319   }
    320 
    321   bool readValueProfilingData(const unsigned char *&D,
    322                               const unsigned char *const End);
    323   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
    324 
    325   // Used for testing purpose only.
    326   void setValueProfDataEndianness(support::endianness Endianness) {
    327     ValueProfDataEndianness = Endianness;
    328   }
    329 };
    330 
    331 struct InstrProfReaderIndexBase {
    332   virtual ~InstrProfReaderIndexBase() = default;
    333 
    334   // Read all the profile records with the same key pointed to the current
    335   // iterator.
    336   virtual Error getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
    337 
    338   // Read all the profile records with the key equal to FuncName
    339   virtual Error getRecords(StringRef FuncName,
    340                                      ArrayRef<InstrProfRecord> &Data) = 0;
    341   virtual void advanceToNextKey() = 0;
    342   virtual bool atEnd() const = 0;
    343   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
    344   virtual uint64_t getVersion() const = 0;
    345   virtual bool isIRLevelProfile() const = 0;
    346   virtual void populateSymtab(InstrProfSymtab &) = 0;
    347 };
    348 
    349 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
    350     OnDiskHashTableImplV3;
    351 
    352 template <typename HashTableImpl>
    353 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
    354 
    355 private:
    356   std::unique_ptr<HashTableImpl> HashTable;
    357   typename HashTableImpl::data_iterator RecordIterator;
    358   uint64_t FormatVersion;
    359 
    360 public:
    361   InstrProfReaderIndex(const unsigned char *Buckets,
    362                        const unsigned char *const Payload,
    363                        const unsigned char *const Base,
    364                        IndexedInstrProf::HashT HashType, uint64_t Version);
    365   ~InstrProfReaderIndex() override = default;
    366 
    367   Error getRecords(ArrayRef<InstrProfRecord> &Data) override;
    368   Error getRecords(StringRef FuncName,
    369                    ArrayRef<InstrProfRecord> &Data) override;
    370   void advanceToNextKey() override { RecordIterator++; }
    371 
    372   bool atEnd() const override {
    373     return RecordIterator == HashTable->data_end();
    374   }
    375 
    376   void setValueProfDataEndianness(support::endianness Endianness) override {
    377     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
    378   }
    379 
    380   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
    381 
    382   bool isIRLevelProfile() const override {
    383     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
    384   }
    385 
    386   void populateSymtab(InstrProfSymtab &Symtab) override {
    387     Symtab.create(HashTable->keys());
    388   }
    389 };
    390 
    391 /// Reader for the indexed binary instrprof format.
    392 class IndexedInstrProfReader : public InstrProfReader {
    393 private:
    394   /// The profile data file contents.
    395   std::unique_ptr<MemoryBuffer> DataBuffer;
    396   /// The index into the profile data.
    397   std::unique_ptr<InstrProfReaderIndexBase> Index;
    398   /// Profile summary data.
    399   std::unique_ptr<ProfileSummary> Summary;
    400 
    401   // Read the profile summary. Return a pointer pointing to one byte past the
    402   // end of the summary data if it exists or the input \c Cur.
    403   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
    404                                    const unsigned char *Cur);
    405 
    406 public:
    407   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
    408       : DataBuffer(std::move(DataBuffer)) {}
    409   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
    410   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
    411 
    412   /// Return the profile version.
    413   uint64_t getVersion() const { return Index->getVersion(); }
    414   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
    415 
    416   /// Return true if the given buffer is in an indexed instrprof format.
    417   static bool hasFormat(const MemoryBuffer &DataBuffer);
    418 
    419   /// Read the file header.
    420   Error readHeader() override;
    421   /// Read a single record.
    422   Error readNextRecord(InstrProfRecord &Record) override;
    423 
    424   /// Return the pointer to InstrProfRecord associated with FuncName
    425   /// and FuncHash
    426   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
    427                                                uint64_t FuncHash);
    428 
    429   /// Fill Counts with the profile data for the given function name.
    430   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
    431                           std::vector<uint64_t> &Counts);
    432 
    433   /// Return the maximum of all known function counts.
    434   uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
    435 
    436   /// Factory method to create an indexed reader.
    437   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    438   create(const Twine &Path);
    439 
    440   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    441   create(std::unique_ptr<MemoryBuffer> Buffer);
    442 
    443   // Used for testing purpose only.
    444   void setValueProfDataEndianness(support::endianness Endianness) {
    445     Index->setValueProfDataEndianness(Endianness);
    446   }
    447 
    448   // See description in the base class. This interface is designed
    449   // to be used by llvm-profdata (for dumping). Avoid using this when
    450   // the client is the compiler.
    451   InstrProfSymtab &getSymtab() override;
    452   ProfileSummary &getSummary() { return *(Summary.get()); }
    453 };
    454 
    455 } // end namespace llvm
    456 
    457 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
    458