1 //=-- InstrProf.h - Instrumented profiling format support ---------*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Instrumentation-based profiling data is generated by instrumented 11 // binaries through library functions in compiler-rt, and read by the clang 12 // frontend to feed PGO. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_PROFILEDATA_INSTRPROF_H_ 17 #define LLVM_PROFILEDATA_INSTRPROF_H_ 18 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/ProfileData/InstrProfData.inc" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MD5.h" 28 #include <cstdint> 29 #include <list> 30 #include <system_error> 31 #include <vector> 32 33 #define INSTR_PROF_INDEX_VERSION 3 34 namespace llvm { 35 36 class Function; 37 class GlobalVariable; 38 class Module; 39 40 /// Return the name of data section containing profile counter variables. 41 inline StringRef getInstrProfCountersSectionName(bool AddSegment) { 42 return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR 43 : INSTR_PROF_CNTS_SECT_NAME_STR; 44 } 45 46 /// Return the name of data section containing names of instrumented 47 /// functions. 48 inline StringRef getInstrProfNameSectionName(bool AddSegment) { 49 return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR 50 : INSTR_PROF_NAME_SECT_NAME_STR; 51 } 52 53 /// Return the name of the data section containing per-function control 54 /// data. 55 inline StringRef getInstrProfDataSectionName(bool AddSegment) { 56 return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR 57 : INSTR_PROF_DATA_SECT_NAME_STR; 58 } 59 60 /// Return the name profile runtime entry point to do value profiling 61 /// for a given site. 62 inline StringRef getInstrProfValueProfFuncName() { 63 return INSTR_PROF_VALUE_PROF_FUNC_STR; 64 } 65 66 /// Return the name of the section containing function coverage mapping 67 /// data. 68 inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { 69 return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap"; 70 } 71 72 /// Return the name prefix of variables containing instrumented function names. 73 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } 74 75 /// Return the name prefix of variables containing per-function control data. 76 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } 77 78 /// Return the name prefix of profile counter variables. 79 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } 80 81 /// Return the name prefix of the COMDAT group for instrumentation variables 82 /// associated with a COMDAT function. 83 inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } 84 85 /// Return the name of a covarage mapping variable (internal linkage) 86 /// for each instrumented source module. Such variables are allocated 87 /// in the __llvm_covmap section. 88 inline StringRef getCoverageMappingVarName() { 89 return "__llvm_coverage_mapping"; 90 } 91 92 /// Return the name of function that registers all the per-function control 93 /// data at program startup time by calling __llvm_register_function. This 94 /// function has internal linkage and is called by __llvm_profile_init 95 /// runtime method. This function is not generated for these platforms: 96 /// Darwin, Linux, and FreeBSD. 97 inline StringRef getInstrProfRegFuncsName() { 98 return "__llvm_profile_register_functions"; 99 } 100 101 /// Return the name of the runtime interface that registers per-function control 102 /// data for one instrumented function. 103 inline StringRef getInstrProfRegFuncName() { 104 return "__llvm_profile_register_function"; 105 } 106 107 /// Return the name of the runtime initialization method that is generated by 108 /// the compiler. The function calls __llvm_profile_register_functions and 109 /// __llvm_profile_override_default_filename functions if needed. This function 110 /// has internal linkage and invoked at startup time via init_array. 111 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } 112 113 /// Return the name of the hook variable defined in profile runtime library. 114 /// A reference to the variable causes the linker to link in the runtime 115 /// initialization module (which defines the hook variable). 116 inline StringRef getInstrProfRuntimeHookVarName() { 117 return "__llvm_profile_runtime"; 118 } 119 120 /// Return the name of the compiler generated function that references the 121 /// runtime hook variable. The function is a weak global. 122 inline StringRef getInstrProfRuntimeHookVarUseFuncName() { 123 return "__llvm_profile_runtime_user"; 124 } 125 126 /// Return the name of the profile runtime interface that overrides the default 127 /// profile data file name. 128 inline StringRef getInstrProfFileOverriderFuncName() { 129 return "__llvm_profile_override_default_filename"; 130 } 131 132 /// Return the modified name for function \c F suitable to be 133 /// used the key for profile lookup. 134 std::string getPGOFuncName(const Function &F, 135 uint64_t Version = INSTR_PROF_INDEX_VERSION); 136 137 /// Return the modified name for a function suitable to be 138 /// used the key for profile lookup. The function's original 139 /// name is \c RawFuncName and has linkage of type \c Linkage. 140 /// The function is defined in module \c FileName. 141 std::string getPGOFuncName(StringRef RawFuncName, 142 GlobalValue::LinkageTypes Linkage, 143 StringRef FileName, 144 uint64_t Version = INSTR_PROF_INDEX_VERSION); 145 146 /// Create and return the global variable for function name used in PGO 147 /// instrumentation. \c FuncName is the name of the function returned 148 /// by \c getPGOFuncName call. 149 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName); 150 151 /// Create and return the global variable for function name used in PGO 152 /// instrumentation. /// \c FuncName is the name of the function 153 /// returned by \c getPGOFuncName call, \c M is the owning module, 154 /// and \c Linkage is the linkage of the instrumented function. 155 GlobalVariable *createPGOFuncNameVar(Module &M, 156 GlobalValue::LinkageTypes Linkage, 157 StringRef FuncName); 158 159 /// Given a PGO function name, remove the filename prefix and return 160 /// the original (static) function name. 161 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName); 162 163 const std::error_category &instrprof_category(); 164 165 enum class instrprof_error { 166 success = 0, 167 eof, 168 unrecognized_format, 169 bad_magic, 170 bad_header, 171 unsupported_version, 172 unsupported_hash_type, 173 too_large, 174 truncated, 175 malformed, 176 unknown_function, 177 hash_mismatch, 178 count_mismatch, 179 counter_overflow, 180 value_site_count_mismatch 181 }; 182 183 inline std::error_code make_error_code(instrprof_error E) { 184 return std::error_code(static_cast<int>(E), instrprof_category()); 185 } 186 187 inline instrprof_error MergeResult(instrprof_error &Accumulator, 188 instrprof_error Result) { 189 // Prefer first error encountered as later errors may be secondary effects of 190 // the initial problem. 191 if (Accumulator == instrprof_error::success && 192 Result != instrprof_error::success) 193 Accumulator = Result; 194 return Accumulator; 195 } 196 197 enum InstrProfValueKind : uint32_t { 198 #define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value, 199 #include "llvm/ProfileData/InstrProfData.inc" 200 }; 201 202 namespace object { 203 class SectionRef; 204 } 205 206 namespace IndexedInstrProf { 207 uint64_t ComputeHash(StringRef K); 208 } 209 210 /// A symbol table used for function PGO name look-up with keys 211 /// (such as pointers, md5hash values) to the function. A function's 212 /// PGO name or name's md5hash are used in retrieving the profile 213 /// data of the function. See \c getPGOFuncName() method for details 214 /// on how PGO name is formed. 215 class InstrProfSymtab { 216 public: 217 typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap; 218 219 private: 220 StringRef Data; 221 uint64_t Address; 222 // A map from MD5 hash keys to function name strings. 223 std::vector<std::pair<uint64_t, std::string>> HashNameMap; 224 // A map from function runtime address to function name MD5 hash. 225 // This map is only populated and used by raw instr profile reader. 226 AddrHashMap AddrToMD5Map; 227 228 public: 229 InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {} 230 231 /// Create InstrProfSymtab from an object file section which 232 /// contains function PGO names that are uncompressed. 233 /// This interface is used by CoverageMappingReader. 234 std::error_code create(object::SectionRef &Section); 235 /// This interface is used by reader of CoverageMapping test 236 /// format. 237 inline std::error_code create(StringRef D, uint64_t BaseAddr); 238 /// Create InstrProfSymtab from a set of names iteratable from 239 /// \p IterRange. This interface is used by IndexedProfReader. 240 template <typename NameIterRange> void create(const NameIterRange &IterRange); 241 // If the symtab is created by a series of calls to \c addFuncName, \c 242 // finalizeSymtab needs to be called before looking up function names. 243 // This is required because the underlying map is a vector (for space 244 // efficiency) which needs to be sorted. 245 inline void finalizeSymtab(); 246 /// Update the symtab by adding \p FuncName to the table. This interface 247 /// is used by the raw and text profile readers. 248 void addFuncName(StringRef FuncName) { 249 HashNameMap.push_back(std::make_pair( 250 IndexedInstrProf::ComputeHash(FuncName), FuncName.str())); 251 } 252 /// Map a function address to its name's MD5 hash. This interface 253 /// is only used by the raw profiler reader. 254 void mapAddress(uint64_t Addr, uint64_t MD5Val) { 255 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); 256 } 257 AddrHashMap &getAddrHashMap() { return AddrToMD5Map; } 258 /// Return function's PGO name from the function name's symabol 259 /// address in the object file. If an error occurs, Return 260 /// an empty string. 261 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); 262 /// Return function's PGO name from the name's md5 hash value. 263 /// If not found, return an empty string. 264 inline StringRef getFuncName(uint64_t FuncMD5Hash); 265 }; 266 267 std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { 268 Data = D; 269 Address = BaseAddr; 270 return std::error_code(); 271 } 272 273 template <typename NameIterRange> 274 void InstrProfSymtab::create(const NameIterRange &IterRange) { 275 for (auto Name : IterRange) 276 HashNameMap.push_back( 277 std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str())); 278 finalizeSymtab(); 279 } 280 281 void InstrProfSymtab::finalizeSymtab() { 282 std::sort(HashNameMap.begin(), HashNameMap.end(), less_first()); 283 HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()), 284 HashNameMap.end()); 285 std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); 286 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), 287 AddrToMD5Map.end()); 288 } 289 290 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { 291 auto Result = 292 std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash, 293 [](const std::pair<uint64_t, std::string> &LHS, 294 uint64_t RHS) { return LHS.first < RHS; }); 295 if (Result != HashNameMap.end()) 296 return Result->second; 297 return StringRef(); 298 } 299 300 struct InstrProfValueSiteRecord { 301 /// Value profiling data pairs at a given value site. 302 std::list<InstrProfValueData> ValueData; 303 304 InstrProfValueSiteRecord() { ValueData.clear(); } 305 template <class InputIterator> 306 InstrProfValueSiteRecord(InputIterator F, InputIterator L) 307 : ValueData(F, L) {} 308 309 /// Sort ValueData ascending by Value 310 void sortByTargetValues() { 311 ValueData.sort( 312 [](const InstrProfValueData &left, const InstrProfValueData &right) { 313 return left.Value < right.Value; 314 }); 315 } 316 317 /// Merge data from another InstrProfValueSiteRecord 318 /// Optionally scale merged counts by \p Weight. 319 instrprof_error mergeValueData(InstrProfValueSiteRecord &Input, 320 uint64_t Weight = 1); 321 }; 322 323 /// Profiling information for a single function. 324 struct InstrProfRecord { 325 InstrProfRecord() {} 326 InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts) 327 : Name(Name), Hash(Hash), Counts(std::move(Counts)) {} 328 StringRef Name; 329 uint64_t Hash; 330 std::vector<uint64_t> Counts; 331 332 typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType; 333 334 /// Return the number of value profile kinds with non-zero number 335 /// of profile sites. 336 inline uint32_t getNumValueKinds() const; 337 /// Return the number of instrumented sites for ValueKind. 338 inline uint32_t getNumValueSites(uint32_t ValueKind) const; 339 /// Return the total number of ValueData for ValueKind. 340 inline uint32_t getNumValueData(uint32_t ValueKind) const; 341 /// Return the number of value data collected for ValueKind at profiling 342 /// site: Site. 343 inline uint32_t getNumValueDataForSite(uint32_t ValueKind, 344 uint32_t Site) const; 345 /// Return the array of profiled values at \p Site. 346 inline std::unique_ptr<InstrProfValueData[]> 347 getValueForSite(uint32_t ValueKind, uint32_t Site, 348 uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; 349 inline void 350 getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, uint32_t Site, 351 uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; 352 /// Reserve space for NumValueSites sites. 353 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); 354 /// Add ValueData for ValueKind at value Site. 355 void addValueData(uint32_t ValueKind, uint32_t Site, 356 InstrProfValueData *VData, uint32_t N, 357 ValueMapType *ValueMap); 358 359 /// Merge the counts in \p Other into this one. 360 /// Optionally scale merged counts by \p Weight. 361 instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); 362 363 /// Clear value data entries 364 void clearValueData() { 365 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 366 getValueSitesForKind(Kind).clear(); 367 } 368 369 private: 370 std::vector<InstrProfValueSiteRecord> IndirectCallSites; 371 const std::vector<InstrProfValueSiteRecord> & 372 getValueSitesForKind(uint32_t ValueKind) const { 373 switch (ValueKind) { 374 case IPVK_IndirectCallTarget: 375 return IndirectCallSites; 376 default: 377 llvm_unreachable("Unknown value kind!"); 378 } 379 return IndirectCallSites; 380 } 381 382 std::vector<InstrProfValueSiteRecord> & 383 getValueSitesForKind(uint32_t ValueKind) { 384 return const_cast<std::vector<InstrProfValueSiteRecord> &>( 385 const_cast<const InstrProfRecord *>(this) 386 ->getValueSitesForKind(ValueKind)); 387 } 388 389 // Map indirect call target name hash to name string. 390 uint64_t remapValue(uint64_t Value, uint32_t ValueKind, 391 ValueMapType *HashKeys); 392 393 // Merge Value Profile data from Src record to this record for ValueKind. 394 // Scale merged value counts by \p Weight. 395 instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, 396 uint64_t Weight); 397 }; 398 399 uint32_t InstrProfRecord::getNumValueKinds() const { 400 uint32_t NumValueKinds = 0; 401 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 402 NumValueKinds += !(getValueSitesForKind(Kind).empty()); 403 return NumValueKinds; 404 } 405 406 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { 407 uint32_t N = 0; 408 const std::vector<InstrProfValueSiteRecord> &SiteRecords = 409 getValueSitesForKind(ValueKind); 410 for (auto &SR : SiteRecords) { 411 N += SR.ValueData.size(); 412 } 413 return N; 414 } 415 416 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { 417 return getValueSitesForKind(ValueKind).size(); 418 } 419 420 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, 421 uint32_t Site) const { 422 return getValueSitesForKind(ValueKind)[Site].ValueData.size(); 423 } 424 425 std::unique_ptr<InstrProfValueData[]> InstrProfRecord::getValueForSite( 426 uint32_t ValueKind, uint32_t Site, 427 uint64_t (*ValueMapper)(uint32_t, uint64_t)) const { 428 uint32_t N = getNumValueDataForSite(ValueKind, Site); 429 if (N == 0) 430 return std::unique_ptr<InstrProfValueData[]>(nullptr); 431 432 auto VD = llvm::make_unique<InstrProfValueData[]>(N); 433 getValueForSite(VD.get(), ValueKind, Site, ValueMapper); 434 435 return VD; 436 } 437 438 void InstrProfRecord::getValueForSite(InstrProfValueData Dest[], 439 uint32_t ValueKind, uint32_t Site, 440 uint64_t (*ValueMapper)(uint32_t, 441 uint64_t)) const { 442 uint32_t I = 0; 443 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { 444 Dest[I].Value = ValueMapper ? ValueMapper(ValueKind, V.Value) : V.Value; 445 Dest[I].Count = V.Count; 446 I++; 447 } 448 } 449 450 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { 451 std::vector<InstrProfValueSiteRecord> &ValueSites = 452 getValueSitesForKind(ValueKind); 453 ValueSites.reserve(NumValueSites); 454 } 455 456 inline support::endianness getHostEndianness() { 457 return sys::IsLittleEndianHost ? support::little : support::big; 458 } 459 460 // Include definitions for value profile data 461 #define INSTR_PROF_VALUE_PROF_DATA 462 #include "llvm/ProfileData/InstrProfData.inc" 463 464 /* 465 * Initialize the record for runtime value profile data. 466 * Return 0 if the initialization is successful, otherwise 467 * return 1. 468 */ 469 int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, 470 const uint16_t *NumValueSites, 471 ValueProfNode **Nodes); 472 473 /* Release memory allocated for the runtime record. */ 474 void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord); 475 476 /* Return the size of ValueProfData structure that can be used to store 477 the value profile data collected at runtime. */ 478 uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record); 479 480 /* Return a ValueProfData instance that stores the data collected at runtime. */ 481 ValueProfData * 482 serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, 483 ValueProfData *Dst); 484 485 namespace IndexedInstrProf { 486 487 enum class HashT : uint32_t { 488 MD5, 489 490 Last = MD5 491 }; 492 493 static inline uint64_t MD5Hash(StringRef Str) { 494 MD5 Hash; 495 Hash.update(Str); 496 llvm::MD5::MD5Result Result; 497 Hash.final(Result); 498 // Return the least significant 8 bytes. Our MD5 implementation returns the 499 // result in little endian, so we may need to swap bytes. 500 using namespace llvm::support; 501 return endian::read<uint64_t, little, unaligned>(Result); 502 } 503 504 inline uint64_t ComputeHash(HashT Type, StringRef K) { 505 switch (Type) { 506 case HashT::MD5: 507 return IndexedInstrProf::MD5Hash(K); 508 } 509 llvm_unreachable("Unhandled hash type"); 510 } 511 512 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" 513 const uint64_t Version = INSTR_PROF_INDEX_VERSION; 514 const HashT HashType = HashT::MD5; 515 516 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } 517 518 // This structure defines the file header of the LLVM profile 519 // data file in indexed-format. 520 struct Header { 521 uint64_t Magic; 522 uint64_t Version; 523 uint64_t MaxFunctionCount; 524 uint64_t HashType; 525 uint64_t HashOffset; 526 }; 527 528 } // end namespace IndexedInstrProf 529 530 namespace RawInstrProf { 531 532 const uint64_t Version = INSTR_PROF_RAW_VERSION; 533 534 template <class IntPtrT> inline uint64_t getMagic(); 535 template <> inline uint64_t getMagic<uint64_t>() { 536 return INSTR_PROF_RAW_MAGIC_64; 537 } 538 539 template <> inline uint64_t getMagic<uint32_t>() { 540 return INSTR_PROF_RAW_MAGIC_32; 541 } 542 543 // Per-function profile data header/control structure. 544 // The definition should match the structure defined in 545 // compiler-rt/lib/profile/InstrProfiling.h. 546 // It should also match the synthesized type in 547 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. 548 template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData { 549 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; 550 #include "llvm/ProfileData/InstrProfData.inc" 551 }; 552 553 // File header structure of the LLVM profile data in raw format. 554 // The definition should match the header referenced in 555 // compiler-rt/lib/profile/InstrProfilingFile.c and 556 // InstrProfilingBuffer.c. 557 struct Header { 558 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; 559 #include "llvm/ProfileData/InstrProfData.inc" 560 }; 561 562 } // end namespace RawInstrProf 563 564 namespace coverage { 565 566 // Profile coverage map has the following layout: 567 // [CoverageMapFileHeader] 568 // [ArrayStart] 569 // [CovMapFunctionRecord] 570 // [CovMapFunctionRecord] 571 // ... 572 // [ArrayEnd] 573 // [Encoded Region Mapping Data] 574 LLVM_PACKED_START 575 template <class IntPtrT> struct CovMapFunctionRecord { 576 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; 577 #include "llvm/ProfileData/InstrProfData.inc" 578 }; 579 LLVM_PACKED_END 580 581 } 582 583 } // end namespace llvm 584 585 namespace std { 586 template <> 587 struct is_error_code_enum<llvm::instrprof_error> : std::true_type {}; 588 } 589 590 #endif // LLVM_PROFILEDATA_INSTRPROF_H_ 591