1 //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Instrumentation-based profiling data is generated by instrumented 11 // binaries through library functions in compiler-rt, and read by the clang 12 // frontend to feed PGO. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_PROFILEDATA_INSTRPROF_H 17 #define LLVM_PROFILEDATA_INSTRPROF_H 18 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/IR/ProfileSummary.h" 25 #include "llvm/ProfileData/InstrProfData.inc" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/Error.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Host.h" 31 #include "llvm/Support/MD5.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstddef> 37 #include <cstdint> 38 #include <cstring> 39 #include <list> 40 #include <memory> 41 #include <string> 42 #include <system_error> 43 #include <utility> 44 #include <vector> 45 46 namespace llvm { 47 48 class Function; 49 class GlobalVariable; 50 struct InstrProfRecord; 51 class InstrProfSymtab; 52 class Instruction; 53 class MDNode; 54 class Module; 55 56 /// Return the name of data section containing profile counter variables. 57 inline StringRef getInstrProfCountersSectionName(bool AddSegment) { 58 return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR 59 : INSTR_PROF_CNTS_SECT_NAME_STR; 60 } 61 62 /// Return the name of data section containing names of instrumented 63 /// functions. 64 inline StringRef getInstrProfNameSectionName(bool AddSegment) { 65 return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR 66 : INSTR_PROF_NAME_SECT_NAME_STR; 67 } 68 69 /// Return the name of the data section containing per-function control 70 /// data. 71 inline StringRef getInstrProfDataSectionName(bool AddSegment) { 72 return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR 73 ",regular,live_support" 74 : INSTR_PROF_DATA_SECT_NAME_STR; 75 } 76 77 /// Return the name of data section containing pointers to value profile 78 /// counters/nodes. 79 inline StringRef getInstrProfValuesSectionName(bool AddSegment) { 80 return AddSegment ? "__DATA," INSTR_PROF_VALS_SECT_NAME_STR 81 : INSTR_PROF_VALS_SECT_NAME_STR; 82 } 83 84 /// Return the name of data section containing nodes holdling value 85 /// profiling data. 86 inline StringRef getInstrProfVNodesSectionName(bool AddSegment) { 87 return AddSegment ? "__DATA," INSTR_PROF_VNODES_SECT_NAME_STR 88 : INSTR_PROF_VNODES_SECT_NAME_STR; 89 } 90 91 /// Return the name profile runtime entry point to do value profiling 92 /// for a given site. 93 inline StringRef getInstrProfValueProfFuncName() { 94 return INSTR_PROF_VALUE_PROF_FUNC_STR; 95 } 96 97 /// Return the name profile runtime entry point to do value range profiling. 98 inline StringRef getInstrProfValueRangeProfFuncName() { 99 return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; 100 } 101 102 /// Return the name of the section containing function coverage mapping 103 /// data. 104 inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { 105 return AddSegment ? "__LLVM_COV," INSTR_PROF_COVMAP_SECT_NAME_STR 106 : INSTR_PROF_COVMAP_SECT_NAME_STR; 107 } 108 109 /// Return the name prefix of variables containing instrumented function names. 110 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } 111 112 /// Return the name prefix of variables containing per-function control data. 113 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } 114 115 /// Return the name prefix of profile counter variables. 116 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } 117 118 /// Return the name prefix of value profile variables. 119 inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } 120 121 /// Return the name of value profile node array variables: 122 inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } 123 124 /// Return the name prefix of the COMDAT group for instrumentation variables 125 /// associated with a COMDAT function. 126 inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } 127 128 /// Return the name of the variable holding the strings (possibly compressed) 129 /// of all function's PGO names. 130 inline StringRef getInstrProfNamesVarName() { 131 return "__llvm_prf_nm"; 132 } 133 134 /// Return the name of a covarage mapping variable (internal linkage) 135 /// for each instrumented source module. Such variables are allocated 136 /// in the __llvm_covmap section. 137 inline StringRef getCoverageMappingVarName() { 138 return "__llvm_coverage_mapping"; 139 } 140 141 /// Return the name of the internal variable recording the array 142 /// of PGO name vars referenced by the coverage mapping. The owning 143 /// functions of those names are not emitted by FE (e.g, unused inline 144 /// functions.) 145 inline StringRef getCoverageUnusedNamesVarName() { 146 return "__llvm_coverage_names"; 147 } 148 149 /// Return the name of function that registers all the per-function control 150 /// data at program startup time by calling __llvm_register_function. This 151 /// function has internal linkage and is called by __llvm_profile_init 152 /// runtime method. This function is not generated for these platforms: 153 /// Darwin, Linux, and FreeBSD. 154 inline StringRef getInstrProfRegFuncsName() { 155 return "__llvm_profile_register_functions"; 156 } 157 158 /// Return the name of the runtime interface that registers per-function control 159 /// data for one instrumented function. 160 inline StringRef getInstrProfRegFuncName() { 161 return "__llvm_profile_register_function"; 162 } 163 164 /// Return the name of the runtime interface that registers the PGO name strings. 165 inline StringRef getInstrProfNamesRegFuncName() { 166 return "__llvm_profile_register_names_function"; 167 } 168 169 /// Return the name of the runtime initialization method that is generated by 170 /// the compiler. The function calls __llvm_profile_register_functions and 171 /// __llvm_profile_override_default_filename functions if needed. This function 172 /// has internal linkage and invoked at startup time via init_array. 173 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } 174 175 /// Return the name of the hook variable defined in profile runtime library. 176 /// A reference to the variable causes the linker to link in the runtime 177 /// initialization module (which defines the hook variable). 178 inline StringRef getInstrProfRuntimeHookVarName() { 179 return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); 180 } 181 182 /// Return the name of the compiler generated function that references the 183 /// runtime hook variable. The function is a weak global. 184 inline StringRef getInstrProfRuntimeHookVarUseFuncName() { 185 return "__llvm_profile_runtime_user"; 186 } 187 188 /// Return the marker used to separate PGO names during serialization. 189 inline StringRef getInstrProfNameSeparator() { return "\01"; } 190 191 /// Return the modified name for function \c F suitable to be 192 /// used the key for profile lookup. Variable \c InLTO indicates if this 193 /// is called in LTO optimization passes. 194 std::string getPGOFuncName(const Function &F, bool InLTO = false, 195 uint64_t Version = INSTR_PROF_INDEX_VERSION); 196 197 /// Return the modified name for a function suitable to be 198 /// used the key for profile lookup. The function's original 199 /// name is \c RawFuncName and has linkage of type \c Linkage. 200 /// The function is defined in module \c FileName. 201 std::string getPGOFuncName(StringRef RawFuncName, 202 GlobalValue::LinkageTypes Linkage, 203 StringRef FileName, 204 uint64_t Version = INSTR_PROF_INDEX_VERSION); 205 206 /// Return the name of the global variable used to store a function 207 /// name in PGO instrumentation. \c FuncName is the name of the function 208 /// returned by the \c getPGOFuncName call. 209 std::string getPGOFuncNameVarName(StringRef FuncName, 210 GlobalValue::LinkageTypes Linkage); 211 212 /// Create and return the global variable for function name used in PGO 213 /// instrumentation. \c FuncName is the name of the function returned 214 /// by \c getPGOFuncName call. 215 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); 216 217 /// Create and return the global variable for function name used in PGO 218 /// instrumentation. /// \c FuncName is the name of the function 219 /// returned by \c getPGOFuncName call, \c M is the owning module, 220 /// and \c Linkage is the linkage of the instrumented function. 221 GlobalVariable *createPGOFuncNameVar(Module &M, 222 GlobalValue::LinkageTypes Linkage, 223 StringRef PGOFuncName); 224 225 /// Return the initializer in string of the PGO name var \c NameVar. 226 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); 227 228 /// Given a PGO function name, remove the filename prefix and return 229 /// the original (static) function name. 230 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, 231 StringRef FileName = "<unknown>"); 232 233 /// Given a vector of strings (function PGO names) \c NameStrs, the 234 /// method generates a combined string \c Result thatis ready to be 235 /// serialized. The \c Result string is comprised of three fields: 236 /// The first field is the legnth of the uncompressed strings, and the 237 /// the second field is the length of the zlib-compressed string. 238 /// Both fields are encoded in ULEB128. If \c doCompress is false, the 239 /// third field is the uncompressed strings; otherwise it is the 240 /// compressed string. When the string compression is off, the 241 /// second field will have value zero. 242 Error collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs, 243 bool doCompression, std::string &Result); 244 245 /// Produce \c Result string with the same format described above. The input 246 /// is vector of PGO function name variables that are referenced. 247 Error collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars, 248 std::string &Result, bool doCompression = true); 249 250 /// \c NameStrings is a string composed of one of more sub-strings encoded in 251 /// the format described above. The substrings are separated by 0 or more zero 252 /// bytes. This method decodes the string and populates the \c Symtab. 253 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); 254 255 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being 256 /// set in IR PGO compilation. 257 bool isIRPGOFlagSet(const Module *M); 258 259 /// Check if we can safely rename this Comdat function. Instances of the same 260 /// comdat function may have different control flows thus can not share the 261 /// same counter variable. 262 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false); 263 264 enum InstrProfValueKind : uint32_t { 265 #define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value, 266 #include "llvm/ProfileData/InstrProfData.inc" 267 }; 268 269 /// Get the value profile data for value site \p SiteIdx from \p InstrProfR 270 /// and annotate the instruction \p Inst with the value profile meta data. 271 /// Annotate up to \p MaxMDCount (default 3) number of records per value site. 272 void annotateValueSite(Module &M, Instruction &Inst, 273 const InstrProfRecord &InstrProfR, 274 InstrProfValueKind ValueKind, uint32_t SiteIndx, 275 uint32_t MaxMDCount = 3); 276 277 /// Same as the above interface but using an ArrayRef, as well as \p Sum. 278 void annotateValueSite(Module &M, Instruction &Inst, 279 ArrayRef<InstrProfValueData> VDs, 280 uint64_t Sum, InstrProfValueKind ValueKind, 281 uint32_t MaxMDCount); 282 283 /// Extract the value profile data from \p Inst which is annotated with 284 /// value profile meta data. Return false if there is no value data annotated, 285 /// otherwise return true. 286 bool getValueProfDataFromInst(const Instruction &Inst, 287 InstrProfValueKind ValueKind, 288 uint32_t MaxNumValueData, 289 InstrProfValueData ValueData[], 290 uint32_t &ActualNumValueData, uint64_t &TotalC); 291 292 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } 293 294 /// Return the PGOFuncName meta data associated with a function. 295 MDNode *getPGOFuncNameMetadata(const Function &F); 296 297 /// Create the PGOFuncName meta data if PGOFuncName is different from 298 /// function's raw name. This should only apply to internal linkage functions 299 /// declared by users only. 300 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); 301 302 /// Check if we can use Comdat for profile variables. This will eliminate 303 /// the duplicated profile variables for Comdat functions. 304 bool needsComdatForCounter(const Function &F, const Module &M); 305 306 const std::error_category &instrprof_category(); 307 308 enum class instrprof_error { 309 success = 0, 310 eof, 311 unrecognized_format, 312 bad_magic, 313 bad_header, 314 unsupported_version, 315 unsupported_hash_type, 316 too_large, 317 truncated, 318 malformed, 319 unknown_function, 320 hash_mismatch, 321 count_mismatch, 322 counter_overflow, 323 value_site_count_mismatch, 324 compress_failed, 325 uncompress_failed, 326 empty_raw_profile 327 }; 328 329 inline std::error_code make_error_code(instrprof_error E) { 330 return std::error_code(static_cast<int>(E), instrprof_category()); 331 } 332 333 class InstrProfError : public ErrorInfo<InstrProfError> { 334 public: 335 InstrProfError(instrprof_error Err) : Err(Err) { 336 assert(Err != instrprof_error::success && "Not an error"); 337 } 338 339 std::string message() const override; 340 341 void log(raw_ostream &OS) const override { OS << message(); } 342 343 std::error_code convertToErrorCode() const override { 344 return make_error_code(Err); 345 } 346 347 instrprof_error get() const { return Err; } 348 349 /// Consume an Error and return the raw enum value contained within it. The 350 /// Error must either be a success value, or contain a single InstrProfError. 351 static instrprof_error take(Error E) { 352 auto Err = instrprof_error::success; 353 handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) { 354 assert(Err == instrprof_error::success && "Multiple errors encountered"); 355 Err = IPE.get(); 356 }); 357 return Err; 358 } 359 360 static char ID; 361 362 private: 363 instrprof_error Err; 364 }; 365 366 class SoftInstrProfErrors { 367 /// Count the number of soft instrprof_errors encountered and keep track of 368 /// the first such error for reporting purposes. 369 370 /// The first soft error encountered. 371 instrprof_error FirstError = instrprof_error::success; 372 373 /// The number of hash mismatches. 374 unsigned NumHashMismatches = 0; 375 376 /// The number of count mismatches. 377 unsigned NumCountMismatches = 0; 378 379 /// The number of counter overflows. 380 unsigned NumCounterOverflows = 0; 381 382 /// The number of value site count mismatches. 383 unsigned NumValueSiteCountMismatches = 0; 384 385 public: 386 SoftInstrProfErrors() = default; 387 388 ~SoftInstrProfErrors() { 389 assert(FirstError == instrprof_error::success && 390 "Unchecked soft error encountered"); 391 } 392 393 /// Track a soft error (\p IE) and increment its associated counter. 394 void addError(instrprof_error IE); 395 396 /// Get the number of hash mismatches. 397 unsigned getNumHashMismatches() const { return NumHashMismatches; } 398 399 /// Get the number of count mismatches. 400 unsigned getNumCountMismatches() const { return NumCountMismatches; } 401 402 /// Get the number of counter overflows. 403 unsigned getNumCounterOverflows() const { return NumCounterOverflows; } 404 405 /// Get the number of value site count mismatches. 406 unsigned getNumValueSiteCountMismatches() const { 407 return NumValueSiteCountMismatches; 408 } 409 410 /// Return the first encountered error and reset FirstError to a success 411 /// value. 412 Error takeError() { 413 if (FirstError == instrprof_error::success) 414 return Error::success(); 415 auto E = make_error<InstrProfError>(FirstError); 416 FirstError = instrprof_error::success; 417 return E; 418 } 419 }; 420 421 namespace object { 422 423 class SectionRef; 424 425 } // end namespace object 426 427 namespace IndexedInstrProf { 428 429 uint64_t ComputeHash(StringRef K); 430 431 } // end namespace IndexedInstrProf 432 433 /// A symbol table used for function PGO name look-up with keys 434 /// (such as pointers, md5hash values) to the function. A function's 435 /// PGO name or name's md5hash are used in retrieving the profile 436 /// data of the function. See \c getPGOFuncName() method for details 437 /// on how PGO name is formed. 438 class InstrProfSymtab { 439 public: 440 typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap; 441 442 private: 443 StringRef Data; 444 uint64_t Address = 0; 445 // Unique name strings. 446 StringSet<> NameTab; 447 // A map from MD5 keys to function name strings. 448 std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; 449 // A map from MD5 keys to function define. We only populate this map 450 // when build the Symtab from a Module. 451 std::vector<std::pair<uint64_t, Function *>> MD5FuncMap; 452 // A map from function runtime address to function name MD5 hash. 453 // This map is only populated and used by raw instr profile reader. 454 AddrHashMap AddrToMD5Map; 455 456 public: 457 InstrProfSymtab() = default; 458 459 /// Create InstrProfSymtab from an object file section which 460 /// contains function PGO names. When section may contain raw 461 /// string data or string data in compressed form. This method 462 /// only initialize the symtab with reference to the data and 463 /// the section base address. The decompression will be delayed 464 /// until before it is used. See also \c create(StringRef) method. 465 Error create(object::SectionRef &Section); 466 467 /// This interface is used by reader of CoverageMapping test 468 /// format. 469 inline Error create(StringRef D, uint64_t BaseAddr); 470 471 /// \c NameStrings is a string composed of one of more sub-strings 472 /// encoded in the format described in \c collectPGOFuncNameStrings. 473 /// This method is a wrapper to \c readPGOFuncNameStrings method. 474 inline Error create(StringRef NameStrings); 475 476 /// A wrapper interface to populate the PGO symtab with functions 477 /// decls from module \c M. This interface is used by transformation 478 /// passes such as indirect function call promotion. Variable \c InLTO 479 /// indicates if this is called from LTO optimization passes. 480 void create(Module &M, bool InLTO = false); 481 482 /// Create InstrProfSymtab from a set of names iteratable from 483 /// \p IterRange. This interface is used by IndexedProfReader. 484 template <typename NameIterRange> void create(const NameIterRange &IterRange); 485 486 // If the symtab is created by a series of calls to \c addFuncName, \c 487 // finalizeSymtab needs to be called before looking up function names. 488 // This is required because the underlying map is a vector (for space 489 // efficiency) which needs to be sorted. 490 inline void finalizeSymtab(); 491 492 /// Update the symtab by adding \p FuncName to the table. This interface 493 /// is used by the raw and text profile readers. 494 void addFuncName(StringRef FuncName) { 495 auto Ins = NameTab.insert(FuncName); 496 if (Ins.second) 497 MD5NameMap.push_back(std::make_pair( 498 IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); 499 } 500 501 /// Map a function address to its name's MD5 hash. This interface 502 /// is only used by the raw profiler reader. 503 void mapAddress(uint64_t Addr, uint64_t MD5Val) { 504 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); 505 } 506 507 AddrHashMap &getAddrHashMap() { return AddrToMD5Map; } 508 509 /// Return function's PGO name from the function name's symbol 510 /// address in the object file. If an error occurs, return 511 /// an empty string. 512 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); 513 514 /// Return function's PGO name from the name's md5 hash value. 515 /// If not found, return an empty string. 516 inline StringRef getFuncName(uint64_t FuncMD5Hash); 517 518 /// Return function from the name's md5 hash. Return nullptr if not found. 519 inline Function *getFunction(uint64_t FuncMD5Hash); 520 521 /// Return the function's original assembly name by stripping off 522 /// the prefix attached (to symbols with priviate linkage). For 523 /// global functions, it returns the same string as getFuncName. 524 inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); 525 526 /// Return the name section data. 527 inline StringRef getNameData() const { return Data; } 528 }; 529 530 Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { 531 Data = D; 532 Address = BaseAddr; 533 return Error::success(); 534 } 535 536 Error InstrProfSymtab::create(StringRef NameStrings) { 537 return readPGOFuncNameStrings(NameStrings, *this); 538 } 539 540 template <typename NameIterRange> 541 void InstrProfSymtab::create(const NameIterRange &IterRange) { 542 for (auto Name : IterRange) 543 addFuncName(Name); 544 545 finalizeSymtab(); 546 } 547 548 void InstrProfSymtab::finalizeSymtab() { 549 std::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first()); 550 std::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first()); 551 std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); 552 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), 553 AddrToMD5Map.end()); 554 } 555 556 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { 557 auto Result = 558 std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, 559 [](const std::pair<uint64_t, std::string> &LHS, 560 uint64_t RHS) { return LHS.first < RHS; }); 561 if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) 562 return Result->second; 563 return StringRef(); 564 } 565 566 Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { 567 auto Result = 568 std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, 569 [](const std::pair<uint64_t, Function*> &LHS, 570 uint64_t RHS) { return LHS.first < RHS; }); 571 if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) 572 return Result->second; 573 return nullptr; 574 } 575 576 // See also getPGOFuncName implementation. These two need to be 577 // matched. 578 StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { 579 StringRef PGOName = getFuncName(FuncMD5Hash); 580 size_t S = PGOName.find_first_of(':'); 581 if (S == StringRef::npos) 582 return PGOName; 583 return PGOName.drop_front(S + 1); 584 } 585 586 struct InstrProfValueSiteRecord { 587 /// Value profiling data pairs at a given value site. 588 std::list<InstrProfValueData> ValueData; 589 590 InstrProfValueSiteRecord() { ValueData.clear(); } 591 template <class InputIterator> 592 InstrProfValueSiteRecord(InputIterator F, InputIterator L) 593 : ValueData(F, L) {} 594 595 /// Sort ValueData ascending by Value 596 void sortByTargetValues() { 597 ValueData.sort( 598 [](const InstrProfValueData &left, const InstrProfValueData &right) { 599 return left.Value < right.Value; 600 }); 601 } 602 /// Sort ValueData Descending by Count 603 inline void sortByCount(); 604 605 /// Merge data from another InstrProfValueSiteRecord 606 /// Optionally scale merged counts by \p Weight. 607 void merge(SoftInstrProfErrors &SIPE, InstrProfValueSiteRecord &Input, 608 uint64_t Weight = 1); 609 /// Scale up value profile data counts. 610 void scale(SoftInstrProfErrors &SIPE, uint64_t Weight); 611 }; 612 613 /// Profiling information for a single function. 614 struct InstrProfRecord { 615 StringRef Name; 616 uint64_t Hash; 617 std::vector<uint64_t> Counts; 618 SoftInstrProfErrors SIPE; 619 620 InstrProfRecord() = default; 621 InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts) 622 : Name(Name), Hash(Hash), Counts(std::move(Counts)) {} 623 624 typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType; 625 626 /// Return the number of value profile kinds with non-zero number 627 /// of profile sites. 628 inline uint32_t getNumValueKinds() const; 629 630 /// Return the number of instrumented sites for ValueKind. 631 inline uint32_t getNumValueSites(uint32_t ValueKind) const; 632 633 /// Return the total number of ValueData for ValueKind. 634 inline uint32_t getNumValueData(uint32_t ValueKind) const; 635 636 /// Return the number of value data collected for ValueKind at profiling 637 /// site: Site. 638 inline uint32_t getNumValueDataForSite(uint32_t ValueKind, 639 uint32_t Site) const; 640 641 /// Return the array of profiled values at \p Site. If \p TotalC 642 /// is not null, the total count of all target values at this site 643 /// will be stored in \c *TotalC. 644 inline std::unique_ptr<InstrProfValueData[]> 645 getValueForSite(uint32_t ValueKind, uint32_t Site, 646 uint64_t *TotalC = nullptr) const; 647 648 /// Get the target value/counts of kind \p ValueKind collected at site 649 /// \p Site and store the result in array \p Dest. Return the total 650 /// counts of all target values at this site. 651 inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, 652 uint32_t Site) const; 653 654 /// Reserve space for NumValueSites sites. 655 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); 656 657 /// Add ValueData for ValueKind at value Site. 658 void addValueData(uint32_t ValueKind, uint32_t Site, 659 InstrProfValueData *VData, uint32_t N, 660 ValueMapType *ValueMap); 661 662 /// Merge the counts in \p Other into this one. 663 /// Optionally scale merged counts by \p Weight. 664 void merge(InstrProfRecord &Other, uint64_t Weight = 1); 665 666 /// Scale up profile counts (including value profile data) by 667 /// \p Weight. 668 void scale(uint64_t Weight); 669 670 /// Sort value profile data (per site) by count. 671 void sortValueData() { 672 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 673 std::vector<InstrProfValueSiteRecord> &SiteRecords = 674 getValueSitesForKind(Kind); 675 for (auto &SR : SiteRecords) 676 SR.sortByCount(); 677 } 678 } 679 680 /// Clear value data entries and edge counters. 681 void Clear() { 682 Counts.clear(); 683 clearValueData(); 684 } 685 686 /// Clear value data entries 687 void clearValueData() { 688 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 689 getValueSitesForKind(Kind).clear(); 690 } 691 692 /// Get the error contained within the record's soft error counter. 693 Error takeError() { return SIPE.takeError(); } 694 695 private: 696 std::vector<InstrProfValueSiteRecord> IndirectCallSites; 697 std::vector<InstrProfValueSiteRecord> MemOPSizes; 698 const std::vector<InstrProfValueSiteRecord> & 699 700 getValueSitesForKind(uint32_t ValueKind) const { 701 switch (ValueKind) { 702 case IPVK_IndirectCallTarget: 703 return IndirectCallSites; 704 case IPVK_MemOPSize: 705 return MemOPSizes; 706 default: 707 llvm_unreachable("Unknown value kind!"); 708 } 709 return IndirectCallSites; 710 } 711 712 std::vector<InstrProfValueSiteRecord> & 713 getValueSitesForKind(uint32_t ValueKind) { 714 return const_cast<std::vector<InstrProfValueSiteRecord> &>( 715 const_cast<const InstrProfRecord *>(this) 716 ->getValueSitesForKind(ValueKind)); 717 } 718 719 // Map indirect call target name hash to name string. 720 uint64_t remapValue(uint64_t Value, uint32_t ValueKind, 721 ValueMapType *HashKeys); 722 723 // Merge Value Profile data from Src record to this record for ValueKind. 724 // Scale merged value counts by \p Weight. 725 void mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, 726 uint64_t Weight); 727 728 // Scale up value profile data count. 729 void scaleValueProfData(uint32_t ValueKind, uint64_t Weight); 730 }; 731 732 uint32_t InstrProfRecord::getNumValueKinds() const { 733 uint32_t NumValueKinds = 0; 734 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 735 NumValueKinds += !(getValueSitesForKind(Kind).empty()); 736 return NumValueKinds; 737 } 738 739 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { 740 uint32_t N = 0; 741 const std::vector<InstrProfValueSiteRecord> &SiteRecords = 742 getValueSitesForKind(ValueKind); 743 for (auto &SR : SiteRecords) { 744 N += SR.ValueData.size(); 745 } 746 return N; 747 } 748 749 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { 750 return getValueSitesForKind(ValueKind).size(); 751 } 752 753 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, 754 uint32_t Site) const { 755 return getValueSitesForKind(ValueKind)[Site].ValueData.size(); 756 } 757 758 std::unique_ptr<InstrProfValueData[]> 759 InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, 760 uint64_t *TotalC) const { 761 uint64_t Dummy; 762 uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC); 763 uint32_t N = getNumValueDataForSite(ValueKind, Site); 764 if (N == 0) { 765 TotalCount = 0; 766 return std::unique_ptr<InstrProfValueData[]>(nullptr); 767 } 768 769 auto VD = llvm::make_unique<InstrProfValueData[]>(N); 770 TotalCount = getValueForSite(VD.get(), ValueKind, Site); 771 772 return VD; 773 } 774 775 uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], 776 uint32_t ValueKind, 777 uint32_t Site) const { 778 uint32_t I = 0; 779 uint64_t TotalCount = 0; 780 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { 781 Dest[I].Value = V.Value; 782 Dest[I].Count = V.Count; 783 TotalCount = SaturatingAdd(TotalCount, V.Count); 784 I++; 785 } 786 return TotalCount; 787 } 788 789 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { 790 std::vector<InstrProfValueSiteRecord> &ValueSites = 791 getValueSitesForKind(ValueKind); 792 ValueSites.reserve(NumValueSites); 793 } 794 795 inline support::endianness getHostEndianness() { 796 return sys::IsLittleEndianHost ? support::little : support::big; 797 } 798 799 // Include definitions for value profile data 800 #define INSTR_PROF_VALUE_PROF_DATA 801 #include "llvm/ProfileData/InstrProfData.inc" 802 803 void InstrProfValueSiteRecord::sortByCount() { 804 ValueData.sort( 805 [](const InstrProfValueData &left, const InstrProfValueData &right) { 806 return left.Count > right.Count; 807 }); 808 // Now truncate 809 size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 810 if (ValueData.size() > max_s) 811 ValueData.resize(max_s); 812 } 813 814 namespace IndexedInstrProf { 815 816 enum class HashT : uint32_t { 817 MD5, 818 Last = MD5 819 }; 820 821 inline uint64_t ComputeHash(HashT Type, StringRef K) { 822 switch (Type) { 823 case HashT::MD5: 824 return MD5Hash(K); 825 } 826 llvm_unreachable("Unhandled hash type"); 827 } 828 829 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" 830 831 enum ProfVersion { 832 // Version 1 is the first version. In this version, the value of 833 // a key/value pair can only include profile data of a single function. 834 // Due to this restriction, the number of block counters for a given 835 // function is not recorded but derived from the length of the value. 836 Version1 = 1, 837 // The version 2 format supports recording profile data of multiple 838 // functions which share the same key in one value field. To support this, 839 // the number block counters is recorded as an uint64_t field right after the 840 // function structural hash. 841 Version2 = 2, 842 // Version 3 supports value profile data. The value profile data is expected 843 // to follow the block counter profile data. 844 Version3 = 3, 845 // In this version, profile summary data \c IndexedInstrProf::Summary is 846 // stored after the profile header. 847 Version4 = 4, 848 // The current version is 4. 849 CurrentVersion = INSTR_PROF_INDEX_VERSION 850 }; 851 const uint64_t Version = ProfVersion::CurrentVersion; 852 853 const HashT HashType = HashT::MD5; 854 855 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } 856 857 // This structure defines the file header of the LLVM profile 858 // data file in indexed-format. 859 struct Header { 860 uint64_t Magic; 861 uint64_t Version; 862 uint64_t Unused; // Becomes unused since version 4 863 uint64_t HashType; 864 uint64_t HashOffset; 865 }; 866 867 // Profile summary data recorded in the profile data file in indexed 868 // format. It is introduced in version 4. The summary data follows 869 // right after the profile file header. 870 struct Summary { 871 struct Entry { 872 uint64_t Cutoff; ///< The required percentile of total execution count. 873 uint64_t 874 MinBlockCount; ///< The minimum execution count for this percentile. 875 uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. 876 }; 877 // The field kind enumerator to assigned value mapping should remain 878 // unchanged when a new kind is added or an old kind gets deleted in 879 // the future. 880 enum SummaryFieldKind { 881 /// The total number of functions instrumented. 882 TotalNumFunctions = 0, 883 /// Total number of instrumented blocks/edges. 884 TotalNumBlocks = 1, 885 /// The maximal execution count among all functions. 886 /// This field does not exist for profile data from IR based 887 /// instrumentation. 888 MaxFunctionCount = 2, 889 /// Max block count of the program. 890 MaxBlockCount = 3, 891 /// Max internal block count of the program (excluding entry blocks). 892 MaxInternalBlockCount = 4, 893 /// The sum of all instrumented block counts. 894 TotalBlockCount = 5, 895 NumKinds = TotalBlockCount + 1 896 }; 897 898 // The number of summmary fields following the summary header. 899 uint64_t NumSummaryFields; 900 // The number of Cutoff Entries (Summary::Entry) following summary fields. 901 uint64_t NumCutoffEntries; 902 903 static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { 904 return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + 905 NumSumFields * sizeof(uint64_t); 906 } 907 908 const uint64_t *getSummaryDataBase() const { 909 return reinterpret_cast<const uint64_t *>(this + 1); 910 } 911 912 uint64_t *getSummaryDataBase() { 913 return reinterpret_cast<uint64_t *>(this + 1); 914 } 915 916 const Entry *getCutoffEntryBase() const { 917 return reinterpret_cast<const Entry *>( 918 &getSummaryDataBase()[NumSummaryFields]); 919 } 920 921 Entry *getCutoffEntryBase() { 922 return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]); 923 } 924 925 uint64_t get(SummaryFieldKind K) const { 926 return getSummaryDataBase()[K]; 927 } 928 929 void set(SummaryFieldKind K, uint64_t V) { 930 getSummaryDataBase()[K] = V; 931 } 932 933 const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } 934 935 void setEntry(uint32_t I, const ProfileSummaryEntry &E) { 936 Entry &ER = getCutoffEntryBase()[I]; 937 ER.Cutoff = E.Cutoff; 938 ER.MinBlockCount = E.MinCount; 939 ER.NumBlocks = E.NumCounts; 940 } 941 942 Summary(uint32_t Size) { memset(this, 0, Size); } 943 void operator delete(void *ptr) { ::operator delete(ptr); } 944 945 Summary() = delete; 946 }; 947 948 inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) { 949 return std::unique_ptr<Summary>(new (::operator new(TotalSize)) 950 Summary(TotalSize)); 951 } 952 953 } // end namespace IndexedInstrProf 954 955 namespace RawInstrProf { 956 957 // Version 1: First version 958 // Version 2: Added value profile data section. Per-function control data 959 // struct has more fields to describe value profile information. 960 // Version 3: Compressed name section support. Function PGO name reference 961 // from control data struct is changed from raw pointer to Name's MD5 value. 962 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the 963 // raw header. 964 const uint64_t Version = INSTR_PROF_RAW_VERSION; 965 966 template <class IntPtrT> inline uint64_t getMagic(); 967 template <> inline uint64_t getMagic<uint64_t>() { 968 return INSTR_PROF_RAW_MAGIC_64; 969 } 970 971 template <> inline uint64_t getMagic<uint32_t>() { 972 return INSTR_PROF_RAW_MAGIC_32; 973 } 974 975 // Per-function profile data header/control structure. 976 // The definition should match the structure defined in 977 // compiler-rt/lib/profile/InstrProfiling.h. 978 // It should also match the synthesized type in 979 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. 980 template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData { 981 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; 982 #include "llvm/ProfileData/InstrProfData.inc" 983 }; 984 985 // File header structure of the LLVM profile data in raw format. 986 // The definition should match the header referenced in 987 // compiler-rt/lib/profile/InstrProfilingFile.c and 988 // InstrProfilingBuffer.c. 989 struct Header { 990 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; 991 #include "llvm/ProfileData/InstrProfData.inc" 992 }; 993 994 } // end namespace RawInstrProf 995 996 // Parse MemOP Size range option. 997 void getMemOPSizeRangeFromOption(std::string Str, int64_t &RangeStart, 998 int64_t &RangeLast); 999 1000 } // end namespace llvm 1001 1002 #endif // LLVM_PROFILEDATA_INSTRPROF_H 1003