1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Defines aspects of the compilation that persist across multiple 12 /// functions. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "IceGlobalContext.h" 17 18 #include "IceCfg.h" 19 #include "IceCfgNode.h" 20 #include "IceClFlags.h" 21 #include "IceDefs.h" 22 #include "IceELFObjectWriter.h" 23 #include "IceGlobalInits.h" 24 #include "IceLiveness.h" 25 #include "IceOperand.h" 26 #include "IceRevision.h" 27 #include "IceTargetLowering.h" 28 #include "IceTimerTree.h" 29 #include "IceTypes.def" 30 #include "IceTypes.h" 31 32 #ifdef __clang__ 33 #pragma clang diagnostic push 34 #pragma clang diagnostic ignored "-Wunused-parameter" 35 #endif // __clang__ 36 37 #include "llvm/Support/Timer.h" 38 39 #ifdef __clang__ 40 #pragma clang diagnostic pop 41 #endif // __clang__ 42 43 #include <algorithm> // max() 44 45 namespace std { 46 template <> struct hash<Ice::RelocatableTuple> { 47 size_t operator()(const Ice::RelocatableTuple &Key) const { 48 // Use the relocatable's name, plus the hash of a combination of the number 49 // of OffsetExprs and the known, fixed offset for the reloc. We left shift 50 // the known relocatable by 5 trying to minimize the interaction between the 51 // bits in OffsetExpr.size() and Key.Offset. 52 return hash<Ice::SizeT>()(Key.Name.getID()) + 53 hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5)); 54 } 55 }; 56 } // end of namespace std 57 58 namespace Ice { 59 60 namespace { 61 62 // Define the key comparison function for the constant pool's unordered_map, 63 // but only for key types of interest: integer types, floating point types, and 64 // the special RelocatableTuple. 65 template <typename KeyType, class Enable = void> struct KeyCompare {}; 66 67 template <typename KeyType> 68 struct KeyCompare<KeyType, 69 typename std::enable_if< 70 std::is_integral<KeyType>::value || 71 std::is_same<KeyType, RelocatableTuple>::value>::type> { 72 bool operator()(const KeyType &Value1, const KeyType &Value2) const { 73 return Value1 == Value2; 74 } 75 }; 76 template <typename KeyType> 77 struct KeyCompare<KeyType, typename std::enable_if< 78 std::is_floating_point<KeyType>::value>::type> { 79 bool operator()(const KeyType &Value1, const KeyType &Value2) const { 80 return !memcmp(&Value1, &Value2, sizeof(KeyType)); 81 } 82 }; 83 84 // Define a key comparison function for sorting the constant pool's values 85 // after they are dumped to a vector. This covers integer types, floating point 86 // types, and ConstantRelocatable values. 87 template <typename ValueType, class Enable = void> struct KeyCompareLess {}; 88 89 template <typename ValueType> 90 struct KeyCompareLess<ValueType, 91 typename std::enable_if<std::is_floating_point< 92 typename ValueType::PrimType>::value>::type> { 93 bool operator()(const Constant *Const1, const Constant *Const2) const { 94 using CompareType = uint64_t; 95 static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType), 96 "Expected floating-point type of width 64-bit or less"); 97 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); 98 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); 99 // We avoid "V1<V2" because of NaN. 100 // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the 101 // endian-ness of the host system running Subzero. 102 // Instead, compare the result of bit_cast to uint64_t. 103 uint64_t I1 = 0, I2 = 0; 104 memcpy(&I1, &V1, sizeof(V1)); 105 memcpy(&I2, &V2, sizeof(V2)); 106 return I1 < I2; 107 } 108 }; 109 template <typename ValueType> 110 struct KeyCompareLess<ValueType, 111 typename std::enable_if<std::is_integral< 112 typename ValueType::PrimType>::value>::type> { 113 bool operator()(const Constant *Const1, const Constant *Const2) const { 114 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); 115 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); 116 return V1 < V2; 117 } 118 }; 119 template <typename ValueType> 120 struct KeyCompareLess< 121 ValueType, typename std::enable_if< 122 std::is_same<ValueType, ConstantRelocatable>::value>::type> { 123 bool operator()(const Constant *Const1, const Constant *Const2) const { 124 auto *V1 = llvm::cast<ValueType>(Const1); 125 auto *V2 = llvm::cast<ValueType>(Const2); 126 if (V1->getName() == V2->getName()) 127 return V1->getOffset() < V2->getOffset(); 128 return V1->getName() < V2->getName(); 129 } 130 }; 131 132 // TypePool maps constants of type KeyType (e.g. float) to pointers to 133 // type ValueType (e.g. ConstantFloat). 134 template <Type Ty, typename KeyType, typename ValueType> class TypePool { 135 TypePool(const TypePool &) = delete; 136 TypePool &operator=(const TypePool &) = delete; 137 138 public: 139 TypePool() = default; 140 ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) { 141 auto Iter = Pool.find(Key); 142 if (Iter != Pool.end()) { 143 Iter->second->updateLookupCount(); 144 return Iter->second; 145 } 146 auto *Result = ValueType::create(Ctx, Ty, Key); 147 Pool[Key] = Result; 148 Result->updateLookupCount(); 149 return Result; 150 } 151 ConstantList getConstantPool() const { 152 ConstantList Constants; 153 Constants.reserve(Pool.size()); 154 for (auto &I : Pool) 155 Constants.push_back(I.second); 156 // The sort (and its KeyCompareLess machinery) is not strictly necessary, 157 // but is desirable for producing output that is deterministic across 158 // unordered_map::iterator implementations. 159 std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>()); 160 return Constants; 161 } 162 size_t size() const { return Pool.size(); } 163 164 private: 165 // Use the default hash function, and a custom key comparison function. The 166 // key comparison function for floating point variables can't use the default 167 // == based implementation because of special C++ semantics regarding +0.0, 168 // -0.0, and NaN comparison. However, it's OK to use the default hash for 169 // floating point values because KeyCompare is the final source of truth - in 170 // the worst case a "false" collision must be resolved. 171 using ContainerType = 172 std::unordered_map<KeyType, ValueType *, std::hash<KeyType>, 173 KeyCompare<KeyType>>; 174 ContainerType Pool; 175 }; 176 177 // UndefPool maps ICE types to the corresponding ConstantUndef values. 178 class UndefPool { 179 UndefPool(const UndefPool &) = delete; 180 UndefPool &operator=(const UndefPool &) = delete; 181 182 public: 183 UndefPool() : Pool(IceType_NUM) {} 184 185 ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) { 186 if (Pool[Ty] == nullptr) 187 Pool[Ty] = ConstantUndef::create(Ctx, Ty); 188 return Pool[Ty]; 189 } 190 191 private: 192 std::vector<ConstantUndef *> Pool; 193 }; 194 195 } // end of anonymous namespace 196 197 // The global constant pool bundles individual pools of each type of 198 // interest. 199 class ConstantPool { 200 ConstantPool(const ConstantPool &) = delete; 201 ConstantPool &operator=(const ConstantPool &) = delete; 202 203 public: 204 ConstantPool() = default; 205 TypePool<IceType_f32, float, ConstantFloat> Floats; 206 TypePool<IceType_f64, double, ConstantDouble> Doubles; 207 TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1; 208 TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8; 209 TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16; 210 TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32; 211 TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64; 212 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables; 213 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> 214 ExternRelocatables; 215 UndefPool Undefs; 216 }; 217 218 void GlobalContext::waitForWorkerThreads() { 219 if (WaitForWorkerThreadsCalled.exchange(true)) 220 return; 221 optQueueNotifyEnd(); 222 for (std::thread &Worker : TranslationThreads) { 223 Worker.join(); 224 } 225 TranslationThreads.clear(); 226 227 // Only notify the emit queue to end after all the translation threads have 228 // ended. 229 emitQueueNotifyEnd(); 230 for (std::thread &Worker : EmitterThreads) { 231 Worker.join(); 232 } 233 EmitterThreads.clear(); 234 235 if (BuildDefs::timers()) { 236 auto Timers = getTimers(); 237 for (ThreadContext *TLS : AllThreadContexts) 238 Timers->mergeFrom(TLS->Timers); 239 } 240 if (BuildDefs::dump()) { 241 // Do a separate loop over AllThreadContexts to avoid holding two locks at 242 // once. 243 auto Stats = getStatsCumulative(); 244 for (ThreadContext *TLS : AllThreadContexts) 245 Stats->add(TLS->StatsCumulative); 246 } 247 } 248 249 void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) { 250 if (!BuildDefs::dump()) 251 return; 252 OstreamLocker _(Ctx); 253 Ostream &Str = Ctx->getStrDump(); 254 const std::string Name = 255 (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize()); 256 #define X(str, tag) \ 257 Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n"; 258 CODESTATS_TABLE 259 #undef X 260 Str << "|" << Name << "|Spills+Fills|" 261 << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n"; 262 Str << "|" << Name << "|Memory Usage |"; 263 if (const auto MemUsed = static_cast<size_t>( 264 llvm::TimeRecord::getCurrentTime(false).getMemUsed())) { 265 static constexpr size_t _1MB = 1024 * 1024; 266 Str << (MemUsed / _1MB) << " MB"; 267 } else { 268 Str << "(requires '-track-memory')"; 269 } 270 Str << "\n"; 271 Str << "|" << Name << "|CPool Sizes "; 272 { 273 auto Pool = Ctx->getConstPool(); 274 Str << "|f32=" << Pool->Floats.size(); 275 Str << "|f64=" << Pool->Doubles.size(); 276 Str << "|i1=" << Pool->Integers1.size(); 277 Str << "|i8=" << Pool->Integers8.size(); 278 Str << "|i16=" << Pool->Integers16.size(); 279 Str << "|i32=" << Pool->Integers32.size(); 280 Str << "|i64=" << Pool->Integers64.size(); 281 Str << "|Rel=" << Pool->Relocatables.size(); 282 Str << "|ExtRel=" << Pool->ExternRelocatables.size(); 283 } 284 Str << "\n"; 285 if (Func != nullptr) { 286 Str << "|" << Name << "|Cfg Memory |" << Func->getTotalMemoryMB() 287 << " MB\n"; 288 Str << "|" << Name << "|Liveness Memory |" << Func->getLivenessMemoryMB() 289 << " MB\n"; 290 } 291 } 292 293 namespace { 294 295 // By default, wake up the main parser thread when the OptQ gets half empty. 296 static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1; 297 298 } // end of anonymous namespace 299 300 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, 301 ELFStreamer *ELFStr) 302 : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(), 303 StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this), 304 ObjectWriter(), 305 OptQWakeupSize(std::max(DefaultOptQWakeupSize, 306 size_t(getFlags().getNumTranslationThreads()))), 307 OptQ(/*Sequential=*/getFlags().isSequential(), 308 /*MaxSize=*/ 309 getFlags().isParseParallel() 310 ? MaxOptQSize 311 : getFlags().getNumTranslationThreads()), 312 // EmitQ is allowed unlimited size. 313 EmitQ(/*Sequential=*/getFlags().isSequential()), 314 DataLowering(TargetDataLowering::createLowering(this)) { 315 assert(OsDump && "OsDump is not defined for GlobalContext"); 316 assert(OsEmit && "OsEmit is not defined for GlobalContext"); 317 assert(OsError && "OsError is not defined for GlobalContext"); 318 // Make sure thread_local fields are properly initialized before any 319 // accesses are made. Do this here instead of at the start of 320 // main() so that all clients (e.g. unit tests) can benefit for 321 // free. 322 GlobalContext::TlsInit(); 323 Cfg::TlsInit(); 324 Liveness::TlsInit(); 325 // Create a new ThreadContext for the current thread. No need to 326 // lock AllThreadContexts at this point since no other threads have 327 // access yet to this GlobalContext object. 328 ThreadContext *MyTLS = new ThreadContext(); 329 AllThreadContexts.push_back(MyTLS); 330 ICE_TLS_SET_FIELD(TLS, MyTLS); 331 // Pre-register built-in stack names. 332 if (BuildDefs::timers()) { 333 // TODO(stichnot): There needs to be a strong relationship between 334 // the newTimerStackID() return values and TSK_Default/TSK_Funcs. 335 newTimerStackID("Total across all functions"); 336 newTimerStackID("Per-function summary"); 337 } 338 Timers.initInto(MyTLS->Timers); 339 switch (getFlags().getOutFileType()) { 340 case FT_Elf: 341 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); 342 break; 343 case FT_Asm: 344 case FT_Iasm: 345 break; 346 } 347 // Cache up front common constants. 348 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \ 349 ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag); 350 ICETYPE_TABLE; 351 #undef X 352 ConstantTrue = getConstantInt1Internal(1); 353 // Define runtime helper functions. 354 #define X(Tag, Name) \ 355 RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] = \ 356 getConstantExternSym(getGlobalString(Name)); 357 RUNTIME_HELPER_FUNCTIONS_TABLE 358 #undef X 359 360 TargetLowering::staticInit(this); 361 362 if (getFlags().getEmitRevision()) { 363 // Embed the Subzero revision into the compiled binary by creating a special 364 // global variable initialized with the revision string. 365 auto *Revision = VariableDeclaration::create(&Globals, true); 366 Revision->setName(this, "__Sz_revision"); 367 Revision->setIsConstant(true); 368 const char *RevisionString = getSubzeroRevision(); 369 Revision->addInitializer(VariableDeclaration::DataInitializer::create( 370 &Globals, RevisionString, 1 + strlen(RevisionString))); 371 Globals.push_back(Revision); 372 } 373 } 374 375 void GlobalContext::translateFunctions() { 376 TimerMarker Timer(TimerStack::TT_translateFunctions, this); 377 while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) { 378 std::unique_ptr<EmitterWorkItem> Item; 379 auto Func = OptItem->getParsedCfg(); 380 // Install Func in TLS for Cfg-specific container allocators. 381 CfgLocalAllocatorScope _(Func.get()); 382 // Reset per-function stats being accumulated in TLS. 383 resetStats(); 384 // Set verbose level to none if the current function does NOT match the 385 // -verbose-focus command-line option. 386 if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(), 387 Func->getSequenceNumber())) 388 Func->setVerbose(IceV_None); 389 // Disable translation if -notranslate is specified, or if the current 390 // function matches the -translate-only option. If translation is disabled, 391 // just dump the high-level IR and continue. 392 if (getFlags().getDisableTranslation() || 393 !getFlags().matchTranslateOnly(Func->getFunctionName(), 394 Func->getSequenceNumber())) { 395 Func->dump(); 396 // Add a dummy work item as a placeholder. This maintains sequence 397 // numbers so that the emitter thread will emit subsequent functions. 398 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); 399 emitQueueBlockingPush(std::move(Item)); 400 continue; // Func goes out of scope and gets deleted 401 } 402 403 Func->translate(); 404 if (Func->hasError()) { 405 getErrorStatus()->assign(EC_Translation); 406 OstreamLocker L(this); 407 getStrError() << "ICE translation error: " << Func->getFunctionName() 408 << ": " << Func->getError() << ": " 409 << Func->getFunctionNameAndSize() << "\n"; 410 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); 411 } else { 412 Func->getAssembler<>()->setInternal(Func->getInternal()); 413 switch (getFlags().getOutFileType()) { 414 case FT_Elf: 415 case FT_Iasm: { 416 Func->emitIAS(); 417 // The Cfg has already emitted into the assembly buffer, so 418 // stats have been fully collected into this thread's TLS. 419 // Dump them before TLS is reset for the next Cfg. 420 if (BuildDefs::dump()) 421 dumpStats(Func.get()); 422 auto Asm = Func->releaseAssembler(); 423 // Copy relevant fields into Asm before Func is deleted. 424 Asm->setFunctionName(Func->getFunctionName()); 425 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), 426 std::move(Asm)); 427 Item->setGlobalInits(Func->getGlobalInits()); 428 } break; 429 case FT_Asm: 430 // The Cfg has not been emitted yet, so stats are not ready 431 // to be dumped. 432 std::unique_ptr<VariableDeclarationList> GlobalInits = 433 Func->getGlobalInits(); 434 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), 435 std::move(Func)); 436 Item->setGlobalInits(std::move(GlobalInits)); 437 break; 438 } 439 } 440 assert(Item != nullptr); 441 emitQueueBlockingPush(std::move(Item)); 442 // The Cfg now gets deleted as Func goes out of scope. 443 } 444 } 445 446 namespace { 447 448 // Ensure Pending is large enough that Pending[Index] is valid. 449 void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending, 450 uint32_t Index) { 451 if (Index >= Pending->size()) 452 Utils::reserveAndResize(*Pending, Index + 1); 453 } 454 455 } // end of anonymous namespace 456 457 void GlobalContext::emitFileHeader() { 458 TimerMarker T1(Ice::TimerStack::TT_emitAsm, this); 459 if (getFlags().getOutFileType() == FT_Elf) { 460 getObjectWriter()->writeInitialELFHeader(); 461 } else { 462 if (!BuildDefs::dump()) { 463 getStrError() << "emitFileHeader for non-ELF"; 464 getErrorStatus()->assign(EC_Translation); 465 } 466 TargetHeaderLowering::createLowering(this)->lower(); 467 } 468 } 469 470 void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); } 471 472 void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); } 473 474 void GlobalContext::emitTargetRODataSections() { 475 DataLowering->emitTargetRODataSections(); 476 } 477 478 void GlobalContext::saveBlockInfoPtrs() { 479 for (VariableDeclaration *Global : Globals) { 480 if (Cfg::isProfileGlobal(*Global)) { 481 ProfileBlockInfos.push_back(Global); 482 } 483 } 484 } 485 486 void GlobalContext::lowerGlobals(const std::string &SectionSuffix) { 487 TimerMarker T(TimerStack::TT_emitGlobalInitializers, this); 488 const bool DumpGlobalVariables = 489 BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) && 490 getFlags().matchVerboseFocusOn("", 0); 491 if (DumpGlobalVariables) { 492 OstreamLocker L(this); 493 Ostream &Stream = getStrDump(); 494 for (const Ice::VariableDeclaration *Global : Globals) { 495 Global->dump(Stream); 496 } 497 } 498 if (getFlags().getDisableTranslation()) 499 return; 500 501 saveBlockInfoPtrs(); 502 // If we need to shuffle the layout of global variables, shuffle them now. 503 if (getFlags().getReorderGlobalVariables()) { 504 // Create a random number generator for global variable reordering. 505 RandomNumberGenerator RNG(getFlags().getRandomSeed(), 506 RPE_GlobalVariableReordering); 507 RandomShuffle(Globals.begin(), Globals.end(), 508 [&RNG](int N) { return (uint32_t)RNG.next(N); }); 509 } 510 511 if (!BuildDefs::minimal() && Instrumentor) 512 Instrumentor->instrumentGlobals(Globals); 513 514 DataLowering->lowerGlobals(Globals, SectionSuffix); 515 if (ProfileBlockInfos.empty() && DisposeGlobalVariablesAfterLowering) { 516 Globals.clearAndPurge(); 517 } else { 518 Globals.clear(); 519 } 520 } 521 522 void GlobalContext::lowerProfileData() { 523 // ProfileBlockInfoVarDecl is initialized in the constructor, and will only 524 // ever be nullptr after this method completes. This assertion is a convoluted 525 // way of ensuring lowerProfileData is invoked a single time. 526 assert(ProfileBlockInfoVarDecl == nullptr); 527 528 auto GlobalVariablePool = getInitializerAllocator(); 529 ProfileBlockInfoVarDecl = 530 VariableDeclaration::createExternal(GlobalVariablePool.get()); 531 ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64)); 532 ProfileBlockInfoVarDecl->setIsConstant(true); 533 534 // Note: if you change this symbol, make sure to update 535 // runtime/szrt_profiler.c as well. 536 ProfileBlockInfoVarDecl->setName(this, "__Sz_block_profile_info"); 537 538 for (const VariableDeclaration *PBI : ProfileBlockInfos) { 539 if (Cfg::isProfileGlobal(*PBI)) { 540 constexpr RelocOffsetT BlockExecutionCounterOffset = 0; 541 ProfileBlockInfoVarDecl->addInitializer( 542 VariableDeclaration::RelocInitializer::create( 543 GlobalVariablePool.get(), PBI, 544 {RelocOffset::create(this, BlockExecutionCounterOffset)})); 545 } 546 } 547 548 // This adds a 64-bit sentinel entry to the end of our array. For 32-bit 549 // architectures this will waste 4 bytes. 550 const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64); 551 ProfileBlockInfoVarDecl->addInitializer( 552 VariableDeclaration::ZeroInitializer::create(GlobalVariablePool.get(), 553 Sizeof64BitNullPtr)); 554 Globals.push_back(ProfileBlockInfoVarDecl); 555 constexpr char ProfileDataSection[] = "$sz_profiler$"; 556 lowerGlobals(ProfileDataSection); 557 } 558 559 void GlobalContext::emitItems() { 560 const bool Threaded = !getFlags().isSequential(); 561 // Pending is a vector containing the reassembled, ordered list of 562 // work items. When we're ready for the next item, we first check 563 // whether it's in the Pending list. If not, we take an item from 564 // the work queue, and if it's not the item we're waiting for, we 565 // insert it into Pending and repeat. The work item is deleted 566 // after it is processed. 567 std::vector<std::unique_ptr<EmitterWorkItem>> Pending; 568 uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); 569 uint32_t ShuffleStartIndex = DesiredSequenceNumber; 570 uint32_t ShuffleEndIndex = DesiredSequenceNumber; 571 bool EmitQueueEmpty = false; 572 const uint32_t ShuffleWindowSize = 573 std::max(1u, getFlags().getReorderFunctionsWindowSize()); 574 bool Shuffle = Threaded && getFlags().getReorderFunctions(); 575 // Create a random number generator for function reordering. 576 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_FunctionReordering); 577 578 while (!EmitQueueEmpty) { 579 resizePending(&Pending, DesiredSequenceNumber); 580 // See if Pending contains DesiredSequenceNumber. 581 if (Pending[DesiredSequenceNumber] == nullptr) { 582 // We need to fetch an EmitterWorkItem from the queue. 583 auto RawItem = emitQueueBlockingPop(); 584 if (RawItem == nullptr) { 585 // This is the notifier for an empty queue. 586 EmitQueueEmpty = true; 587 } else { 588 // We get an EmitterWorkItem, we need to add it to Pending. 589 uint32_t ItemSeq = RawItem->getSequenceNumber(); 590 if (Threaded && ItemSeq != DesiredSequenceNumber) { 591 // Not the desired one, add it to Pending but do not increase 592 // DesiredSequenceNumber. Continue the loop, do not emit the item. 593 resizePending(&Pending, ItemSeq); 594 Pending[ItemSeq] = std::move(RawItem); 595 continue; 596 } 597 // ItemSeq == DesiredSequenceNumber, we need to check if we should 598 // emit it or not. If !Threaded, we're OK with ItemSeq != 599 // DesiredSequenceNumber. 600 Pending[DesiredSequenceNumber] = std::move(RawItem); 601 } 602 } 603 const auto *CurrentWorkItem = Pending[DesiredSequenceNumber].get(); 604 605 // We have the desired EmitterWorkItem or nullptr as the end notifier. 606 // If the emitter queue is not empty, increase DesiredSequenceNumber and 607 // ShuffleEndIndex. 608 if (!EmitQueueEmpty) { 609 DesiredSequenceNumber++; 610 ShuffleEndIndex++; 611 } 612 613 if (Shuffle) { 614 // Continue fetching EmitterWorkItem if function reordering is turned on, 615 // and emit queue is not empty, and the number of consecutive pending 616 // items is smaller than the window size, and RawItem is not a 617 // WI_GlobalInits kind. Emit WI_GlobalInits kind block first to avoid 618 // holding an arbitrarily large GlobalDeclarationList. 619 if (!EmitQueueEmpty && 620 ShuffleEndIndex - ShuffleStartIndex < ShuffleWindowSize && 621 CurrentWorkItem->getKind() != EmitterWorkItem::WI_GlobalInits) 622 continue; 623 624 // Emit the EmitterWorkItem between Pending[ShuffleStartIndex] to 625 // Pending[ShuffleEndIndex]. If function reordering turned on, shuffle the 626 // pending items from Pending[ShuffleStartIndex] to 627 // Pending[ShuffleEndIndex]. 628 RandomShuffle(Pending.begin() + ShuffleStartIndex, 629 Pending.begin() + ShuffleEndIndex, 630 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); 631 } 632 633 // Emit the item from ShuffleStartIndex to ShuffleEndIndex. 634 for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) { 635 std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]); 636 637 switch (Item->getKind()) { 638 case EmitterWorkItem::WI_Nop: 639 break; 640 case EmitterWorkItem::WI_GlobalInits: { 641 accumulateGlobals(Item->getGlobalInits()); 642 } break; 643 case EmitterWorkItem::WI_Asm: { 644 lowerGlobalsIfNoCodeHasBeenSeen(); 645 accumulateGlobals(Item->getGlobalInits()); 646 647 std::unique_ptr<Assembler> Asm = Item->getAsm(); 648 Asm->alignFunction(); 649 GlobalString Name = Asm->getFunctionName(); 650 switch (getFlags().getOutFileType()) { 651 case FT_Elf: 652 getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(), 653 Asm.get()); 654 break; 655 case FT_Iasm: { 656 OstreamLocker L(this); 657 Cfg::emitTextHeader(Name, this, Asm.get()); 658 Asm->emitIASBytes(this); 659 } break; 660 case FT_Asm: 661 llvm::report_fatal_error("Unexpected FT_Asm"); 662 break; 663 } 664 } break; 665 case EmitterWorkItem::WI_Cfg: { 666 if (!BuildDefs::dump()) 667 llvm::report_fatal_error("WI_Cfg work item created inappropriately"); 668 lowerGlobalsIfNoCodeHasBeenSeen(); 669 accumulateGlobals(Item->getGlobalInits()); 670 671 assert(getFlags().getOutFileType() == FT_Asm); 672 std::unique_ptr<Cfg> Func = Item->getCfg(); 673 // Unfortunately, we have to temporarily install the Cfg in TLS 674 // because Variable::asType() uses the allocator to create the 675 // differently-typed copy. 676 CfgLocalAllocatorScope _(Func.get()); 677 Func->emit(); 678 dumpStats(Func.get()); 679 } break; 680 } 681 } 682 // Update the start index for next shuffling queue 683 ShuffleStartIndex = ShuffleEndIndex; 684 } 685 686 // In case there are no code to be generated, we invoke the conditional 687 // lowerGlobals again -- this is a no-op if code has been emitted. 688 lowerGlobalsIfNoCodeHasBeenSeen(); 689 } 690 691 GlobalContext::~GlobalContext() { 692 llvm::DeleteContainerPointers(AllThreadContexts); 693 LockedPtr<DestructorArray> Dtors = getDestructors(); 694 // Destructors are invoked in the opposite object construction order. 695 for (const auto &Dtor : reverse_range(*Dtors)) 696 Dtor(); 697 } 698 699 void GlobalContext::dumpStrings() { 700 if (!getFlags().getDumpStrings()) 701 return; 702 OstreamLocker _(this); 703 Ostream &Str = getStrDump(); 704 Str << "GlobalContext strings:\n"; 705 getStrings()->dump(Str); 706 } 707 708 void GlobalContext::dumpConstantLookupCounts() { 709 if (!BuildDefs::dump()) 710 return; 711 const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) && 712 getFlags().matchVerboseFocusOn("", 0); 713 if (!DumpCounts) 714 return; 715 716 OstreamLocker _(this); 717 Ostream &Str = getStrDump(); 718 Str << "Constant pool use stats: count+value+type\n"; 719 #define X(WhichPool) \ 720 for (auto *C : getConstPool()->WhichPool.getConstantPool()) { \ 721 Str << C->getLookupCount() << " "; \ 722 C->dump(Str); \ 723 Str << " " << C->getType() << "\n"; \ 724 } 725 X(Integers1); 726 X(Integers8); 727 X(Integers16); 728 X(Integers32); 729 X(Integers64); 730 X(Floats); 731 X(Doubles); 732 X(Relocatables); 733 X(ExternRelocatables); 734 #undef X 735 } 736 737 // TODO(stichnot): Consider adding thread-local caches of constant pool entries 738 // to reduce contention. 739 740 // All locking is done by the getConstantInt[0-9]+() target function. 741 Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) { 742 switch (Ty) { 743 case IceType_i1: 744 return getConstantInt1(Value); 745 case IceType_i8: 746 return getConstantInt8(Value); 747 case IceType_i16: 748 return getConstantInt16(Value); 749 case IceType_i32: 750 return getConstantInt32(Value); 751 case IceType_i64: 752 return getConstantInt64(Value); 753 default: 754 llvm_unreachable("Bad integer type for getConstant"); 755 } 756 return nullptr; 757 } 758 759 Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) { 760 ConstantInt1 &= INT8_C(1); 761 return getConstPool()->Integers1.getOrAdd(this, ConstantInt1); 762 } 763 764 Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) { 765 return getConstPool()->Integers8.getOrAdd(this, ConstantInt8); 766 } 767 768 Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) { 769 return getConstPool()->Integers16.getOrAdd(this, ConstantInt16); 770 } 771 772 Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) { 773 return getConstPool()->Integers32.getOrAdd(this, ConstantInt32); 774 } 775 776 Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) { 777 return getConstPool()->Integers64.getOrAdd(this, ConstantInt64); 778 } 779 780 Constant *GlobalContext::getConstantFloat(float ConstantFloat) { 781 return getConstPool()->Floats.getOrAdd(this, ConstantFloat); 782 } 783 784 Constant *GlobalContext::getConstantDouble(double ConstantDouble) { 785 return getConstPool()->Doubles.getOrAdd(this, ConstantDouble); 786 } 787 788 Constant *GlobalContext::getConstantSymWithEmitString( 789 const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr, 790 GlobalString Name, const std::string &EmitString) { 791 return getConstPool()->Relocatables.getOrAdd( 792 this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString)); 793 } 794 795 Constant *GlobalContext::getConstantSym(RelocOffsetT Offset, 796 GlobalString Name) { 797 constexpr char EmptyEmitString[] = ""; 798 return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString); 799 } 800 801 Constant *GlobalContext::getConstantExternSym(GlobalString Name) { 802 constexpr RelocOffsetT Offset = 0; 803 return getConstPool()->ExternRelocatables.getOrAdd( 804 this, RelocatableTuple(Offset, {}, Name)); 805 } 806 807 Constant *GlobalContext::getConstantUndef(Type Ty) { 808 return getConstPool()->Undefs.getOrAdd(this, Ty); 809 } 810 811 Constant *GlobalContext::getConstantZero(Type Ty) { 812 Constant *Zero = ConstZeroForType[Ty]; 813 if (Zero == nullptr) 814 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); 815 return Zero; 816 } 817 818 // All locking is done by the getConstant*() target function. 819 Constant *GlobalContext::getConstantZeroInternal(Type Ty) { 820 switch (Ty) { 821 case IceType_i1: 822 return getConstantInt1Internal(0); 823 case IceType_i8: 824 return getConstantInt8Internal(0); 825 case IceType_i16: 826 return getConstantInt16Internal(0); 827 case IceType_i32: 828 return getConstantInt32Internal(0); 829 case IceType_i64: 830 return getConstantInt64Internal(0); 831 case IceType_f32: 832 return getConstantFloat(0); 833 case IceType_f64: 834 return getConstantDouble(0); 835 default: 836 return nullptr; 837 } 838 } 839 840 ConstantList GlobalContext::getConstantPool(Type Ty) { 841 switch (Ty) { 842 case IceType_i1: 843 case IceType_i8: 844 return getConstPool()->Integers8.getConstantPool(); 845 case IceType_i16: 846 return getConstPool()->Integers16.getConstantPool(); 847 case IceType_i32: 848 return getConstPool()->Integers32.getConstantPool(); 849 case IceType_i64: 850 return getConstPool()->Integers64.getConstantPool(); 851 case IceType_f32: 852 return getConstPool()->Floats.getConstantPool(); 853 case IceType_f64: 854 return getConstPool()->Doubles.getConstantPool(); 855 case IceType_v4i1: 856 case IceType_v8i1: 857 case IceType_v16i1: 858 case IceType_v16i8: 859 case IceType_v8i16: 860 case IceType_v4i32: 861 case IceType_v4f32: 862 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); 863 break; 864 case IceType_void: 865 case IceType_NUM: 866 break; 867 } 868 llvm_unreachable("Unknown type"); 869 } 870 871 ConstantList GlobalContext::getConstantExternSyms() { 872 return getConstPool()->ExternRelocatables.getConstantPool(); 873 } 874 875 GlobalString GlobalContext::getGlobalString(const std::string &Name) { 876 return GlobalString::createWithString(this, Name); 877 } 878 879 JumpTableDataList GlobalContext::getJumpTables() { 880 JumpTableDataList JumpTables(*getJumpTableList()); 881 // Make order deterministic by sorting into functions and then ID of the jump 882 // table within that function. 883 std::sort(JumpTables.begin(), JumpTables.end(), 884 [](const JumpTableData &A, const JumpTableData &B) { 885 if (A.getFunctionName() != B.getFunctionName()) 886 return A.getFunctionName() < B.getFunctionName(); 887 return A.getId() < B.getId(); 888 }); 889 890 if (getFlags().getReorderPooledConstants()) { 891 // If reorder-pooled-constants option is set to true, we also shuffle the 892 // jump tables before emitting them. 893 894 // Create a random number generator for jump tables reordering, considering 895 // jump tables as pooled constants. 896 RandomNumberGenerator RNG(getFlags().getRandomSeed(), 897 RPE_PooledConstantReordering); 898 RandomShuffle(JumpTables.begin(), JumpTables.end(), 899 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); 900 } 901 return JumpTables; 902 } 903 904 void GlobalContext::addJumpTableData(JumpTableData JumpTable) { 905 getJumpTableList()->emplace_back(std::move(JumpTable)); 906 } 907 908 TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) { 909 if (!BuildDefs::timers()) 910 return 0; 911 auto Timers = getTimers(); 912 TimerStackIdT NewID = Timers->size(); 913 Timers->push_back(TimerStack(Name)); 914 return NewID; 915 } 916 917 TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID, 918 const std::string &Name) { 919 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 920 assert(StackID < Timers->size()); 921 return Timers->at(StackID).getTimerID(Name); 922 } 923 924 void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) { 925 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 926 assert(StackID < Timers->size()); 927 Timers->at(StackID).push(ID); 928 } 929 930 void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) { 931 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 932 assert(StackID < Timers->size()); 933 Timers->at(StackID).pop(ID); 934 } 935 936 void GlobalContext::resetTimer(TimerStackIdT StackID) { 937 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 938 assert(StackID < Timers->size()); 939 Timers->at(StackID).reset(); 940 } 941 942 std::string GlobalContext::getTimerName(TimerStackIdT StackID) { 943 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 944 assert(StackID < Timers->size()); 945 return Timers->at(StackID).getName(); 946 } 947 948 void GlobalContext::setTimerName(TimerStackIdT StackID, 949 const std::string &NewName) { 950 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 951 assert(StackID < Timers->size()); 952 Timers->at(StackID).setName(NewName); 953 } 954 955 // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the 956 // interface to take and transfer ownership, but they internally store the raw 957 // Cfg pointer in the work queue. This allows e.g. future queue optimizations 958 // such as the use of atomics to modify queue elements. 959 void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) { 960 assert(Item); 961 { 962 TimerMarker _(TimerStack::TT_qTransPush, this); 963 OptQ.blockingPush(std::move(Item)); 964 } 965 if (getFlags().isSequential()) 966 translateFunctions(); 967 } 968 969 std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() { 970 TimerMarker _(TimerStack::TT_qTransPop, this); 971 return OptQ.blockingPop(OptQWakeupSize); 972 } 973 974 void GlobalContext::emitQueueBlockingPush( 975 std::unique_ptr<EmitterWorkItem> Item) { 976 assert(Item); 977 { 978 TimerMarker _(TimerStack::TT_qEmitPush, this); 979 EmitQ.blockingPush(std::move(Item)); 980 } 981 if (getFlags().isSequential()) 982 emitItems(); 983 } 984 985 std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() { 986 TimerMarker _(TimerStack::TT_qEmitPop, this); 987 return EmitQ.blockingPop(); 988 } 989 990 void GlobalContext::dumpStats(const Cfg *Func) { 991 if (!getFlags().getDumpStats()) 992 return; 993 if (Func == nullptr) { 994 getStatsCumulative()->dump(Func, this); 995 } else { 996 ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this); 997 } 998 } 999 1000 void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) { 1001 if (!BuildDefs::timers()) 1002 return; 1003 auto Timers = getTimers(); 1004 assert(Timers->size() > StackID); 1005 OstreamLocker L(this); 1006 Timers->at(StackID).dump(getStrDump(), DumpCumulative); 1007 } 1008 1009 void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride, 1010 TimerStackIdT StackID, 1011 bool DumpCumulative) { 1012 if (!BuildDefs::timers()) 1013 return; 1014 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 1015 assert(Timers->size() > StackID); 1016 // Temporarily override the thread-local timer name with the given name. 1017 // Don't do it permanently because the final timer merge at the end expects 1018 // the thread-local timer names to be the same as the global timer name. 1019 auto OrigName = getTimerName(StackID); 1020 setTimerName(StackID, TimerNameOverride); 1021 { 1022 OstreamLocker _(this); 1023 Timers->at(StackID).dump(getStrDump(), DumpCumulative); 1024 } 1025 setTimerName(StackID, OrigName); 1026 } 1027 1028 LockedPtr<StringPool> 1029 GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) { 1030 return PoolOwner->getStrings(); 1031 } 1032 1033 TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx, 1034 const std::string &FuncName) { 1035 if (!BuildDefs::timers()) 1036 return 0; 1037 if (!getFlags().getTimeEachFunction()) 1038 return 0; 1039 return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName); 1040 } 1041 1042 void TimerMarker::push() { 1043 switch (StackID) { 1044 case GlobalContext::TSK_Default: 1045 Active = getFlags().getSubzeroTimingEnabled() || 1046 !getFlags().getTimingFocusOnString().empty(); 1047 break; 1048 case GlobalContext::TSK_Funcs: 1049 Active = getFlags().getTimeEachFunction(); 1050 break; 1051 default: 1052 break; 1053 } 1054 if (Active) 1055 Ctx->pushTimer(ID, StackID); 1056 } 1057 1058 void TimerMarker::pushCfg(const Cfg *Func) { 1059 Ctx = Func->getContext(); 1060 Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled(); 1061 if (Active) 1062 Ctx->pushTimer(ID, StackID); 1063 } 1064 1065 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); 1066 1067 } // end of namespace Ice 1068