1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for reading profiling data for instrumentation 11 // based PGO and coverage. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/LineIterator.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/OnDiskHashTable.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <cstdint> 32 #include <iterator> 33 #include <memory> 34 #include <utility> 35 #include <vector> 36 37 namespace llvm { 38 39 class InstrProfReader; 40 41 /// A file format agnostic iterator over profiling data. 42 class InstrProfIterator : public std::iterator<std::input_iterator_tag, 43 InstrProfRecord> { 44 InstrProfReader *Reader = nullptr; 45 InstrProfRecord Record; 46 47 void Increment(); 48 49 public: 50 InstrProfIterator() = default; 51 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 52 53 InstrProfIterator &operator++() { Increment(); return *this; } 54 bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; } 55 bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; } 56 InstrProfRecord &operator*() { return Record; } 57 InstrProfRecord *operator->() { return &Record; } 58 }; 59 60 /// Base class and interface for reading profiling data of any known instrprof 61 /// format. Provides an iterator over InstrProfRecords. 62 class InstrProfReader { 63 instrprof_error LastError = instrprof_error::success; 64 65 public: 66 InstrProfReader() = default; 67 virtual ~InstrProfReader() = default; 68 69 /// Read the header. Required before reading first record. 70 virtual Error readHeader() = 0; 71 72 /// Read a single record. 73 virtual Error readNextRecord(InstrProfRecord &Record) = 0; 74 75 /// Iterator over profile data. 76 InstrProfIterator begin() { return InstrProfIterator(this); } 77 InstrProfIterator end() { return InstrProfIterator(); } 78 79 virtual bool isIRLevelProfile() const = 0; 80 81 /// Return the PGO symtab. There are three different readers: 82 /// Raw, Text, and Indexed profile readers. The first two types 83 /// of readers are used only by llvm-profdata tool, while the indexed 84 /// profile reader is also used by llvm-cov tool and the compiler ( 85 /// backend or frontend). Since creating PGO symtab can create 86 /// significant runtime and memory overhead (as it touches data 87 /// for the whole program), InstrProfSymtab for the indexed profile 88 /// reader should be created on demand and it is recommended to be 89 /// only used for dumping purpose with llvm-proftool, not with the 90 /// compiler. 91 virtual InstrProfSymtab &getSymtab() = 0; 92 93 protected: 94 std::unique_ptr<InstrProfSymtab> Symtab; 95 /// Set the current error and return same. 96 Error error(instrprof_error Err) { 97 LastError = Err; 98 if (Err == instrprof_error::success) 99 return Error::success(); 100 return make_error<InstrProfError>(Err); 101 } 102 103 Error error(Error E) { return error(InstrProfError::take(std::move(E))); } 104 105 /// Clear the current error and return a successful one. 106 Error success() { return error(instrprof_error::success); } 107 108 public: 109 /// Return true if the reader has finished reading the profile data. 110 bool isEOF() { return LastError == instrprof_error::eof; } 111 112 /// Return true if the reader encountered an error reading profiling data. 113 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 114 115 /// Get the current error. 116 Error getError() { 117 if (hasError()) 118 return make_error<InstrProfError>(LastError); 119 return Error::success(); 120 } 121 122 /// Factory method to create an appropriately typed reader for the given 123 /// instrprof file. 124 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 125 126 static Expected<std::unique_ptr<InstrProfReader>> 127 create(std::unique_ptr<MemoryBuffer> Buffer); 128 }; 129 130 /// Reader for the simple text based instrprof format. 131 /// 132 /// This format is a simple text format that's suitable for test data. Records 133 /// are separated by one or more blank lines, and record fields are separated by 134 /// new lines. 135 /// 136 /// Each record consists of a function name, a function hash, a number of 137 /// counters, and then each counter value, in that order. 138 class TextInstrProfReader : public InstrProfReader { 139 private: 140 /// The profile data file contents. 141 std::unique_ptr<MemoryBuffer> DataBuffer; 142 /// Iterator over the profile data. 143 line_iterator Line; 144 bool IsIRLevelProfile = false; 145 146 Error readValueProfileData(InstrProfRecord &Record); 147 148 public: 149 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 150 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 151 TextInstrProfReader(const TextInstrProfReader &) = delete; 152 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 153 154 /// Return true if the given buffer is in text instrprof format. 155 static bool hasFormat(const MemoryBuffer &Buffer); 156 157 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 158 159 /// Read the header. 160 Error readHeader() override; 161 162 /// Read a single record. 163 Error readNextRecord(InstrProfRecord &Record) override; 164 165 InstrProfSymtab &getSymtab() override { 166 assert(Symtab.get()); 167 return *Symtab.get(); 168 } 169 }; 170 171 /// Reader for the raw instrprof binary format from runtime. 172 /// 173 /// This format is a raw memory dump of the instrumentation-baed profiling data 174 /// from the runtime. It has no index. 175 /// 176 /// Templated on the unsigned type whose size matches pointers on the platform 177 /// that wrote the profile. 178 template <class IntPtrT> 179 class RawInstrProfReader : public InstrProfReader { 180 private: 181 /// The profile data file contents. 182 std::unique_ptr<MemoryBuffer> DataBuffer; 183 bool ShouldSwapBytes; 184 // The value of the version field of the raw profile data header. The lower 56 185 // bits specifies the format version and the most significant 8 bits specify 186 // the variant types of the profile. 187 uint64_t Version; 188 uint64_t CountersDelta; 189 uint64_t NamesDelta; 190 const RawInstrProf::ProfileData<IntPtrT> *Data; 191 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 192 const uint64_t *CountersStart; 193 const char *NamesStart; 194 uint64_t NamesSize; 195 // After value profile is all read, this pointer points to 196 // the header of next profile data (if exists) 197 const uint8_t *ValueDataStart; 198 uint32_t ValueKindLast; 199 uint32_t CurValueDataSize; 200 201 InstrProfRecord::ValueMapType FunctionPtrToNameMap; 202 203 public: 204 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 205 : DataBuffer(std::move(DataBuffer)) { } 206 RawInstrProfReader(const RawInstrProfReader &) = delete; 207 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 208 209 static bool hasFormat(const MemoryBuffer &DataBuffer); 210 Error readHeader() override; 211 Error readNextRecord(InstrProfRecord &Record) override; 212 213 bool isIRLevelProfile() const override { 214 return (Version & VARIANT_MASK_IR_PROF) != 0; 215 } 216 217 InstrProfSymtab &getSymtab() override { 218 assert(Symtab.get()); 219 return *Symtab.get(); 220 } 221 222 private: 223 Error createSymtab(InstrProfSymtab &Symtab); 224 Error readNextHeader(const char *CurrentPos); 225 Error readHeader(const RawInstrProf::Header &Header); 226 227 template <class IntT> IntT swap(IntT Int) const { 228 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 229 } 230 231 support::endianness getDataEndianness() const { 232 support::endianness HostEndian = getHostEndianness(); 233 if (!ShouldSwapBytes) 234 return HostEndian; 235 if (HostEndian == support::little) 236 return support::big; 237 else 238 return support::little; 239 } 240 241 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 242 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 243 } 244 245 Error readName(InstrProfRecord &Record); 246 Error readFuncHash(InstrProfRecord &Record); 247 Error readRawCounts(InstrProfRecord &Record); 248 Error readValueProfilingData(InstrProfRecord &Record); 249 bool atEnd() const { return Data == DataEnd; } 250 251 void advanceData() { 252 Data++; 253 ValueDataStart += CurValueDataSize; 254 } 255 256 const char *getNextHeaderPos() const { 257 assert(atEnd()); 258 return (const char *)ValueDataStart; 259 } 260 261 const uint64_t *getCounter(IntPtrT CounterPtr) const { 262 ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 263 return CountersStart + Offset; 264 } 265 266 StringRef getName(uint64_t NameRef) const { 267 return Symtab->getFuncName(swap(NameRef)); 268 } 269 }; 270 271 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32; 272 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64; 273 274 namespace IndexedInstrProf { 275 276 enum class HashT : uint32_t; 277 278 } // end namespace IndexedInstrProf 279 280 /// Trait for lookups into the on-disk hash table for the binary instrprof 281 /// format. 282 class InstrProfLookupTrait { 283 std::vector<InstrProfRecord> DataBuffer; 284 IndexedInstrProf::HashT HashType; 285 unsigned FormatVersion; 286 // Endianness of the input value profile data. 287 // It should be LE by default, but can be changed 288 // for testing purpose. 289 support::endianness ValueProfDataEndianness = support::little; 290 291 public: 292 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 293 : HashType(HashType), FormatVersion(FormatVersion) {} 294 295 typedef ArrayRef<InstrProfRecord> data_type; 296 297 typedef StringRef internal_key_type; 298 typedef StringRef external_key_type; 299 typedef uint64_t hash_value_type; 300 typedef uint64_t offset_type; 301 302 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 303 static StringRef GetInternalKey(StringRef K) { return K; } 304 static StringRef GetExternalKey(StringRef K) { return K; } 305 306 hash_value_type ComputeHash(StringRef K); 307 308 static std::pair<offset_type, offset_type> 309 ReadKeyDataLength(const unsigned char *&D) { 310 using namespace support; 311 312 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 313 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 314 return std::make_pair(KeyLen, DataLen); 315 } 316 317 StringRef ReadKey(const unsigned char *D, offset_type N) { 318 return StringRef((const char *)D, N); 319 } 320 321 bool readValueProfilingData(const unsigned char *&D, 322 const unsigned char *const End); 323 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 324 325 // Used for testing purpose only. 326 void setValueProfDataEndianness(support::endianness Endianness) { 327 ValueProfDataEndianness = Endianness; 328 } 329 }; 330 331 struct InstrProfReaderIndexBase { 332 virtual ~InstrProfReaderIndexBase() = default; 333 334 // Read all the profile records with the same key pointed to the current 335 // iterator. 336 virtual Error getRecords(ArrayRef<InstrProfRecord> &Data) = 0; 337 338 // Read all the profile records with the key equal to FuncName 339 virtual Error getRecords(StringRef FuncName, 340 ArrayRef<InstrProfRecord> &Data) = 0; 341 virtual void advanceToNextKey() = 0; 342 virtual bool atEnd() const = 0; 343 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 344 virtual uint64_t getVersion() const = 0; 345 virtual bool isIRLevelProfile() const = 0; 346 virtual void populateSymtab(InstrProfSymtab &) = 0; 347 }; 348 349 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait> 350 OnDiskHashTableImplV3; 351 352 template <typename HashTableImpl> 353 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 354 355 private: 356 std::unique_ptr<HashTableImpl> HashTable; 357 typename HashTableImpl::data_iterator RecordIterator; 358 uint64_t FormatVersion; 359 360 public: 361 InstrProfReaderIndex(const unsigned char *Buckets, 362 const unsigned char *const Payload, 363 const unsigned char *const Base, 364 IndexedInstrProf::HashT HashType, uint64_t Version); 365 ~InstrProfReaderIndex() override = default; 366 367 Error getRecords(ArrayRef<InstrProfRecord> &Data) override; 368 Error getRecords(StringRef FuncName, 369 ArrayRef<InstrProfRecord> &Data) override; 370 void advanceToNextKey() override { RecordIterator++; } 371 372 bool atEnd() const override { 373 return RecordIterator == HashTable->data_end(); 374 } 375 376 void setValueProfDataEndianness(support::endianness Endianness) override { 377 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 378 } 379 380 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 381 382 bool isIRLevelProfile() const override { 383 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 384 } 385 386 void populateSymtab(InstrProfSymtab &Symtab) override { 387 Symtab.create(HashTable->keys()); 388 } 389 }; 390 391 /// Reader for the indexed binary instrprof format. 392 class IndexedInstrProfReader : public InstrProfReader { 393 private: 394 /// The profile data file contents. 395 std::unique_ptr<MemoryBuffer> DataBuffer; 396 /// The index into the profile data. 397 std::unique_ptr<InstrProfReaderIndexBase> Index; 398 /// Profile summary data. 399 std::unique_ptr<ProfileSummary> Summary; 400 401 // Read the profile summary. Return a pointer pointing to one byte past the 402 // end of the summary data if it exists or the input \c Cur. 403 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 404 const unsigned char *Cur); 405 406 public: 407 IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 408 : DataBuffer(std::move(DataBuffer)) {} 409 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 410 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 411 412 /// Return the profile version. 413 uint64_t getVersion() const { return Index->getVersion(); } 414 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 415 416 /// Return true if the given buffer is in an indexed instrprof format. 417 static bool hasFormat(const MemoryBuffer &DataBuffer); 418 419 /// Read the file header. 420 Error readHeader() override; 421 /// Read a single record. 422 Error readNextRecord(InstrProfRecord &Record) override; 423 424 /// Return the pointer to InstrProfRecord associated with FuncName 425 /// and FuncHash 426 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 427 uint64_t FuncHash); 428 429 /// Fill Counts with the profile data for the given function name. 430 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 431 std::vector<uint64_t> &Counts); 432 433 /// Return the maximum of all known function counts. 434 uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } 435 436 /// Factory method to create an indexed reader. 437 static Expected<std::unique_ptr<IndexedInstrProfReader>> 438 create(const Twine &Path); 439 440 static Expected<std::unique_ptr<IndexedInstrProfReader>> 441 create(std::unique_ptr<MemoryBuffer> Buffer); 442 443 // Used for testing purpose only. 444 void setValueProfDataEndianness(support::endianness Endianness) { 445 Index->setValueProfDataEndianness(Endianness); 446 } 447 448 // See description in the base class. This interface is designed 449 // to be used by llvm-profdata (for dumping). Avoid using this when 450 // the client is the compiler. 451 InstrProfSymtab &getSymtab() override; 452 ProfileSummary &getSummary() { return *(Summary.get()); } 453 }; 454 455 } // end namespace llvm 456 457 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 458