1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements the skeleton of the TargetLowering class. 12 /// 13 /// Specifically this invokes the appropriate lowering method for a given 14 /// instruction kind and driving global register allocation. It also implements 15 /// the non-deleted instruction iteration in LoweringContext. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "IceTargetLowering.h" 20 21 #include "IceBitVector.h" 22 #include "IceCfg.h" // setError() 23 #include "IceCfgNode.h" 24 #include "IceGlobalContext.h" 25 #include "IceGlobalInits.h" 26 #include "IceInstVarIter.h" 27 #include "IceLiveness.h" 28 #include "IceOperand.h" 29 #include "IceRegAlloc.h" 30 31 #include <string> 32 #include <vector> 33 34 #define TARGET_LOWERING_CLASS_FOR(t) Target_##t 35 36 // We prevent target-specific implementation details from leaking outside their 37 // implementations by forbidding #include of target-specific header files 38 // anywhere outside their own files. To create target-specific objects 39 // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the 40 // following named constructors. For reference, each target Foo needs to 41 // implement the following named constructors and initializer: 42 // 43 // namespace Foo { 44 // unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *); 45 // unique_ptr<Ice::TargetDataLowering> 46 // createTargetDataLowering(Ice::GlobalContext*); 47 // unique_ptr<Ice::TargetHeaderLowering> 48 // createTargetHeaderLowering(Ice::GlobalContext *); 49 // void staticInit(::Ice::GlobalContext *); 50 // } 51 #define SUBZERO_TARGET(X) \ 52 namespace X { \ 53 std::unique_ptr<::Ice::TargetLowering> \ 54 createTargetLowering(::Ice::Cfg *Func); \ 55 std::unique_ptr<::Ice::TargetDataLowering> \ 56 createTargetDataLowering(::Ice::GlobalContext *Ctx); \ 57 std::unique_ptr<::Ice::TargetHeaderLowering> \ 58 createTargetHeaderLowering(::Ice::GlobalContext *Ctx); \ 59 void staticInit(::Ice::GlobalContext *Ctx); \ 60 bool shouldBePooled(const ::Ice::Constant *C); \ 61 ::Ice::Type getPointerType(); \ 62 } // end of namespace X 63 #include "SZTargets.def" 64 #undef SUBZERO_TARGET 65 66 namespace Ice { 67 void LoweringContext::init(CfgNode *N) { 68 Node = N; 69 End = getNode()->getInsts().end(); 70 rewind(); 71 advanceForward(Next); 72 } 73 74 void LoweringContext::rewind() { 75 Begin = getNode()->getInsts().begin(); 76 Cur = Begin; 77 skipDeleted(Cur); 78 Next = Cur; 79 availabilityReset(); 80 } 81 82 void LoweringContext::insert(Inst *Instr) { 83 getNode()->getInsts().insert(Next, Instr); 84 LastInserted = Instr; 85 } 86 87 void LoweringContext::skipDeleted(InstList::iterator &I) const { 88 while (I != End && I->isDeleted()) 89 ++I; 90 } 91 92 void LoweringContext::advanceForward(InstList::iterator &I) const { 93 if (I != End) { 94 ++I; 95 skipDeleted(I); 96 } 97 } 98 99 Inst *LoweringContext::getLastInserted() const { 100 assert(LastInserted); 101 return LastInserted; 102 } 103 104 void LoweringContext::availabilityReset() { 105 LastDest = nullptr; 106 LastSrc = nullptr; 107 } 108 109 void LoweringContext::availabilityUpdate() { 110 availabilityReset(); 111 Inst *Instr = LastInserted; 112 if (Instr == nullptr) 113 return; 114 if (!Instr->isVarAssign()) 115 return; 116 // Since isVarAssign() is true, the source operand must be a Variable. 117 LastDest = Instr->getDest(); 118 LastSrc = llvm::cast<Variable>(Instr->getSrc(0)); 119 } 120 121 Variable *LoweringContext::availabilityGet(Operand *Src) const { 122 assert(Src); 123 if (Src == LastDest) 124 return LastSrc; 125 return nullptr; 126 } 127 128 namespace { 129 130 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset, 131 std::function<std::string(RegNumT)> getRegName, 132 const std::string &LineIndentString) { 133 constexpr size_t RegistersPerLine = 16; 134 size_t Count = 0; 135 for (RegNumT RegNum : RegNumBVIter(Bitset)) { 136 if (Count == 0) { 137 Str << LineIndentString; 138 } else { 139 Str << ","; 140 } 141 if (Count > 0 && Count % RegistersPerLine == 0) 142 Str << "\n" << LineIndentString; 143 ++Count; 144 Str << getRegName(RegNum); 145 } 146 if (Count) 147 Str << "\n"; 148 } 149 150 // Splits "<class>:<reg>" into "<class>" plus "<reg>". If there is no <class> 151 // component, the result is "" plus "<reg>". 152 void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass, 153 std::string *SplitRegName) { 154 constexpr const char Separator[] = ":"; 155 constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1; 156 size_t Pos = RegName.find(Separator); 157 if (Pos == std::string::npos) { 158 *SplitRegClass = ""; 159 *SplitRegName = RegName; 160 } else { 161 *SplitRegClass = RegName.substr(0, Pos); 162 *SplitRegName = RegName.substr(Pos + SeparatorWidth); 163 } 164 } 165 166 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) { 167 llvm::report_fatal_error("Unsupported target: " + 168 std::string(targetArchString(Target))); 169 } 170 171 } // end of anonymous namespace 172 173 void TargetLowering::filterTypeToRegisterSet( 174 GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[], 175 size_t TypeToRegisterSetSize, 176 std::function<std::string(RegNumT)> getRegName, 177 std::function<const char *(RegClass)> getRegClassName) { 178 std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize, 179 SmallBitVector(NumRegs)); 180 std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize, 181 SmallBitVector(NumRegs)); 182 183 std::unordered_map<std::string, RegNumT> RegNameToIndex; 184 for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) { 185 const auto RegNum = RegNumT::fromInt(RegIndex); 186 RegNameToIndex[getRegName(RegNum)] = RegNum; 187 } 188 189 std::vector<std::string> BadRegNames; 190 191 // The processRegList function iterates across the RegNames vector. Each 192 // entry in the vector is a string of the form "<reg>" or "<class>:<reg>". 193 // The register class and register number are computed, and the corresponding 194 // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set 195 // for all classes. 196 auto processRegList = [&](const std::vector<std::string> &RegNames, 197 std::vector<SmallBitVector> &RegSet) { 198 for (const std::string &RegClassAndName : RegNames) { 199 std::string RClass; 200 std::string RName; 201 splitToClassAndName(RegClassAndName, &RClass, &RName); 202 if (!RegNameToIndex.count(RName)) { 203 BadRegNames.push_back(RName); 204 continue; 205 } 206 const int32_t RegIndex = RegNameToIndex.at(RName); 207 for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; 208 ++TypeIndex) { 209 if (RClass.empty() || 210 RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) { 211 RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex]; 212 } 213 } 214 } 215 }; 216 217 processRegList(getFlags().getUseRestrictedRegisters(), UseSet); 218 processRegList(getFlags().getExcludedRegisters(), ExcludeSet); 219 220 if (!BadRegNames.empty()) { 221 std::string Buffer; 222 llvm::raw_string_ostream StrBuf(Buffer); 223 StrBuf << "Unrecognized use/exclude registers:"; 224 for (const auto &RegName : BadRegNames) 225 StrBuf << " " << RegName; 226 llvm::report_fatal_error(StrBuf.str()); 227 } 228 229 // Apply filters. 230 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { 231 SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex]; 232 SmallBitVector *UseBitSet = &UseSet[TypeIndex]; 233 SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex]; 234 if (UseBitSet->any()) 235 *TypeBitSet = *UseBitSet; 236 (*TypeBitSet).reset(*ExcludeBitSet); 237 } 238 239 // Display filtered register sets, if requested. 240 if (BuildDefs::dump() && NumRegs && 241 (getFlags().getVerbose() & IceV_AvailableRegs)) { 242 Ostream &Str = Ctx->getStrDump(); 243 const std::string Indent = " "; 244 const std::string IndentTwice = Indent + Indent; 245 Str << "Registers available for register allocation:\n"; 246 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { 247 Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex)) 248 << ":\n"; 249 printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName, 250 IndentTwice); 251 } 252 Str << "\n"; 253 } 254 } 255 256 std::unique_ptr<TargetLowering> 257 TargetLowering::createLowering(TargetArch Target, Cfg *Func) { 258 switch (Target) { 259 default: 260 badTargetFatalError(Target); 261 #define SUBZERO_TARGET(X) \ 262 case TARGET_LOWERING_CLASS_FOR(X): \ 263 return ::X::createTargetLowering(Func); 264 #include "SZTargets.def" 265 #undef SUBZERO_TARGET 266 } 267 } 268 269 void TargetLowering::staticInit(GlobalContext *Ctx) { 270 const TargetArch Target = getFlags().getTargetArch(); 271 // Call the specified target's static initializer. 272 switch (Target) { 273 default: 274 badTargetFatalError(Target); 275 #define SUBZERO_TARGET(X) \ 276 case TARGET_LOWERING_CLASS_FOR(X): { \ 277 static bool InitGuard##X = false; \ 278 if (InitGuard##X) { \ 279 return; \ 280 } \ 281 InitGuard##X = true; \ 282 ::X::staticInit(Ctx); \ 283 } break; 284 #include "SZTargets.def" 285 #undef SUBZERO_TARGET 286 } 287 } 288 289 bool TargetLowering::shouldBePooled(const Constant *C) { 290 const TargetArch Target = getFlags().getTargetArch(); 291 switch (Target) { 292 default: 293 return false; 294 #define SUBZERO_TARGET(X) \ 295 case TARGET_LOWERING_CLASS_FOR(X): \ 296 return ::X::shouldBePooled(C); 297 #include "SZTargets.def" 298 #undef SUBZERO_TARGET 299 } 300 } 301 302 ::Ice::Type TargetLowering::getPointerType() { 303 const TargetArch Target = getFlags().getTargetArch(); 304 switch (Target) { 305 default: 306 return ::Ice::IceType_void; 307 #define SUBZERO_TARGET(X) \ 308 case TARGET_LOWERING_CLASS_FOR(X): \ 309 return ::X::getPointerType(); 310 #include "SZTargets.def" 311 #undef SUBZERO_TARGET 312 } 313 } 314 315 TargetLowering::SandboxType 316 TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) { 317 assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi()); 318 if (Flags.getUseNonsfi()) { 319 return TargetLowering::ST_Nonsfi; 320 } 321 if (Flags.getUseSandboxing()) { 322 return TargetLowering::ST_NaCl; 323 } 324 return TargetLowering::ST_None; 325 } 326 327 TargetLowering::TargetLowering(Cfg *Func) 328 : Func(Func), Ctx(Func->getContext()), 329 SandboxingType(determineSandboxTypeFromFlags(getFlags())) {} 330 331 TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target, 332 InstBundleLock::Option Option) 333 : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) { 334 assert(!Target->AutoBundling); 335 Target->AutoBundling = true; 336 if (NeedSandboxing) { 337 Target->_bundle_lock(Option); 338 } 339 } 340 341 TargetLowering::AutoBundle::~AutoBundle() { 342 assert(Target->AutoBundling); 343 Target->AutoBundling = false; 344 if (NeedSandboxing) { 345 Target->_bundle_unlock(); 346 } 347 } 348 349 void TargetLowering::genTargetHelperCalls() { 350 TimerMarker T(TimerStack::TT_genHelpers, Func); 351 Utils::BoolFlagSaver _(GeneratingTargetHelpers, true); 352 for (CfgNode *Node : Func->getNodes()) { 353 Context.init(Node); 354 while (!Context.atEnd()) { 355 PostIncrLoweringContext _(Context); 356 genTargetHelperCallFor(iteratorToInst(Context.getCur())); 357 } 358 } 359 } 360 361 void TargetLowering::doAddressOpt() { 362 doAddressOptOther(); 363 if (llvm::isa<InstLoad>(*Context.getCur())) 364 doAddressOptLoad(); 365 else if (llvm::isa<InstStore>(*Context.getCur())) 366 doAddressOptStore(); 367 else if (auto *Intrinsic = 368 llvm::dyn_cast<InstIntrinsicCall>(&*Context.getCur())) { 369 if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::LoadSubVector) 370 doAddressOptLoadSubVector(); 371 else if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::StoreSubVector) 372 doAddressOptStoreSubVector(); 373 } 374 Context.advanceCur(); 375 Context.advanceNext(); 376 } 377 378 void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) { 379 Inst *I = iteratorToInst(Context.getCur()); 380 bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) || 381 llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() || 382 I->isDeleted(); 383 if (!ShouldSkip) { 384 int Probability = getFlags().getNopProbabilityAsPercentage(); 385 for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) { 386 randomlyInsertNop(Probability / 100.0, RNG); 387 } 388 } 389 } 390 391 // Lowers a single instruction according to the information in Context, by 392 // checking the Context.Cur instruction kind and calling the appropriate 393 // lowering method. The lowering method should insert target instructions at 394 // the Cur.Next insertion point, and should not delete the Context.Cur 395 // instruction or advance Context.Cur. 396 // 397 // The lowering method may look ahead in the instruction stream as desired, and 398 // lower additional instructions in conjunction with the current one, for 399 // example fusing a compare and branch. If it does, it should advance 400 // Context.Cur to point to the next non-deleted instruction to process, and it 401 // should delete any additional instructions it consumes. 402 void TargetLowering::lower() { 403 assert(!Context.atEnd()); 404 Inst *Instr = iteratorToInst(Context.getCur()); 405 Instr->deleteIfDead(); 406 if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) && 407 !llvm::isa<InstFakeUse>(Instr)) { 408 // Mark the current instruction as deleted before lowering, otherwise the 409 // Dest variable will likely get marked as non-SSA. See 410 // Variable::setDefinition(). However, just pass-through FakeDef and 411 // FakeUse instructions that might have been inserted prior to lowering. 412 Instr->setDeleted(); 413 switch (Instr->getKind()) { 414 case Inst::Alloca: 415 lowerAlloca(llvm::cast<InstAlloca>(Instr)); 416 break; 417 case Inst::Arithmetic: 418 lowerArithmetic(llvm::cast<InstArithmetic>(Instr)); 419 break; 420 case Inst::Assign: 421 lowerAssign(llvm::cast<InstAssign>(Instr)); 422 break; 423 case Inst::Br: 424 lowerBr(llvm::cast<InstBr>(Instr)); 425 break; 426 case Inst::Breakpoint: 427 lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr)); 428 break; 429 case Inst::Call: 430 lowerCall(llvm::cast<InstCall>(Instr)); 431 break; 432 case Inst::Cast: 433 lowerCast(llvm::cast<InstCast>(Instr)); 434 break; 435 case Inst::ExtractElement: 436 lowerExtractElement(llvm::cast<InstExtractElement>(Instr)); 437 break; 438 case Inst::Fcmp: 439 lowerFcmp(llvm::cast<InstFcmp>(Instr)); 440 break; 441 case Inst::Icmp: 442 lowerIcmp(llvm::cast<InstIcmp>(Instr)); 443 break; 444 case Inst::InsertElement: 445 lowerInsertElement(llvm::cast<InstInsertElement>(Instr)); 446 break; 447 case Inst::IntrinsicCall: { 448 auto *Call = llvm::cast<InstIntrinsicCall>(Instr); 449 if (Call->getIntrinsicInfo().ReturnsTwice) 450 setCallsReturnsTwice(true); 451 lowerIntrinsicCall(Call); 452 break; 453 } 454 case Inst::Load: 455 lowerLoad(llvm::cast<InstLoad>(Instr)); 456 break; 457 case Inst::Phi: 458 lowerPhi(llvm::cast<InstPhi>(Instr)); 459 break; 460 case Inst::Ret: 461 lowerRet(llvm::cast<InstRet>(Instr)); 462 break; 463 case Inst::Select: 464 lowerSelect(llvm::cast<InstSelect>(Instr)); 465 break; 466 case Inst::ShuffleVector: 467 lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr)); 468 break; 469 case Inst::Store: 470 lowerStore(llvm::cast<InstStore>(Instr)); 471 break; 472 case Inst::Switch: 473 lowerSwitch(llvm::cast<InstSwitch>(Instr)); 474 break; 475 case Inst::Unreachable: 476 lowerUnreachable(llvm::cast<InstUnreachable>(Instr)); 477 break; 478 default: 479 lowerOther(Instr); 480 break; 481 } 482 483 postLower(); 484 } 485 486 Context.advanceCur(); 487 Context.advanceNext(); 488 } 489 490 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next, 491 InstHighLevel *Instr) { 492 // TODO(stichnot): Consider modifying the design/implementation to avoid 493 // multiple init() calls when using lowerInst() to lower several instructions 494 // in the same node. 495 Context.init(Node); 496 Context.setNext(Next); 497 Context.insert(Instr); 498 --Next; 499 assert(iteratorToInst(Next) == Instr); 500 Context.setCur(Next); 501 lower(); 502 } 503 504 void TargetLowering::lowerOther(const Inst *Instr) { 505 (void)Instr; 506 Func->setError("Can't lower unsupported instruction type"); 507 } 508 509 // Drives register allocation, allowing all physical registers (except perhaps 510 // for the frame pointer) to be allocated. This set of registers could 511 // potentially be parameterized if we want to restrict registers e.g. for 512 // performance testing. 513 void TargetLowering::regAlloc(RegAllocKind Kind) { 514 TimerMarker T(TimerStack::TT_regAlloc, Func); 515 LinearScan LinearScan(Func); 516 RegSetMask RegInclude = RegSet_None; 517 RegSetMask RegExclude = RegSet_None; 518 RegInclude |= RegSet_CallerSave; 519 RegInclude |= RegSet_CalleeSave; 520 if (hasFramePointer()) 521 RegExclude |= RegSet_FramePointer; 522 SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); 523 bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc()); 524 CfgSet<Variable *> EmptySet; 525 do { 526 LinearScan.init(Kind, EmptySet); 527 LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); 528 if (!LinearScan.hasEvictions()) 529 Repeat = false; 530 Kind = RAK_SecondChance; 531 } while (Repeat); 532 // TODO(stichnot): Run the register allocator one more time to do stack slot 533 // coalescing. The idea would be to initialize the Unhandled list with the 534 // set of Variables that have no register and a non-empty live range, and 535 // model an infinite number of registers. Maybe use the register aliasing 536 // mechanism to get better packing of narrower slots. 537 if (getFlags().getSplitGlobalVars()) 538 postRegallocSplitting(RegMask); 539 } 540 541 namespace { 542 CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start, 543 InstNumberT End) { 544 CfgVector<Inst *> Result; 545 bool Started = false; 546 auto Process = [&](InstList &Insts) { 547 for (auto &Instr : Insts) { 548 if (Instr.isDeleted()) { 549 continue; 550 } 551 if (Instr.getNumber() == Start) { 552 Started = true; 553 } 554 if (Started) { 555 Result.emplace_back(&Instr); 556 } 557 if (Instr.getNumber() == End) { 558 break; 559 } 560 } 561 }; 562 Process(Node->getPhis()); 563 Process(Node->getInsts()); 564 // TODO(manasijm): Investigate why checking >= End significantly changes 565 // output. Should not happen when renumbering produces monotonically 566 // increasing instruction numbers and live ranges begin and end on non-deleted 567 // instructions. 568 return Result; 569 } 570 } 571 572 void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) { 573 // Splits the live ranges of global(/multi block) variables and runs the 574 // register allocator to find registers for as many of the new variables as 575 // possible. 576 // TODO(manasijm): Merge the small liveranges back into multi-block ones when 577 // the variables get the same register. This will reduce the amount of new 578 // instructions inserted. This might involve a full dataflow analysis. 579 // Also, modify the preference mechanism in the register allocator to match. 580 581 TimerMarker _(TimerStack::TT_splitGlobalVars, Func); 582 CfgSet<Variable *> SplitCandidates; 583 584 // Find variables that do not have registers but are allowed to. Also skip 585 // variables with single segment live ranges as they are not split further in 586 // this function. 587 for (Variable *Var : Func->getVariables()) { 588 if (!Var->mustNotHaveReg() && !Var->hasReg()) { 589 if (Var->getLiveRange().getNumSegments() > 1) 590 SplitCandidates.insert(Var); 591 } 592 } 593 if (SplitCandidates.empty()) 594 return; 595 596 CfgSet<Variable *> ExtraVars; 597 598 struct UseInfo { 599 Variable *Replacing = nullptr; 600 Inst *FirstUse = nullptr; 601 Inst *LastDef = nullptr; 602 SizeT UseCount = 0; 603 }; 604 CfgUnorderedMap<Variable *, UseInfo> VarInfo; 605 // Split the live ranges of the viable variables by node. 606 // Compute metadata (UseInfo) for each of the resulting variables. 607 for (auto *Var : SplitCandidates) { 608 for (auto &Segment : Var->getLiveRange().getSegments()) { 609 UseInfo Info; 610 Info.Replacing = Var; 611 auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first); 612 613 for (auto *Instr : 614 getInstructionsInRange(Node, Segment.first, Segment.second)) { 615 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { 616 // It's safe to iterate over the top-level src operands rather than 617 // using FOREACH_VAR_IN_INST(), because any variables inside e.g. 618 // mem operands should already have registers. 619 if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { 620 if (Var == Info.Replacing) { 621 if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) { 622 Info.FirstUse = Instr; 623 } 624 Info.UseCount++; 625 } 626 } 627 } 628 if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) { 629 Info.LastDef = Instr; 630 } 631 } 632 633 static constexpr SizeT MinUseThreshold = 3; 634 // Skip if variable has less than `MinUseThreshold` uses in the segment. 635 if (Info.UseCount < MinUseThreshold) 636 continue; 637 638 if (!Info.FirstUse && !Info.LastDef) { 639 continue; 640 } 641 642 LiveRange LR; 643 LR.addSegment(Segment); 644 Variable *NewVar = Func->makeVariable(Var->getType()); 645 646 NewVar->setLiveRange(LR); 647 648 VarInfo[NewVar] = Info; 649 650 ExtraVars.insert(NewVar); 651 } 652 } 653 // Run the register allocator with all these new variables included 654 LinearScan RegAlloc(Func); 655 RegAlloc.init(RAK_Global, SplitCandidates); 656 RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); 657 658 // Modify the Cfg to use the new variables that now have registers. 659 for (auto *ExtraVar : ExtraVars) { 660 if (!ExtraVar->hasReg()) { 661 continue; 662 } 663 664 auto &Info = VarInfo[ExtraVar]; 665 666 assert(ExtraVar->getLiveRange().getSegments().size() == 1); 667 auto Segment = ExtraVar->getLiveRange().getSegments()[0]; 668 669 auto *Node = 670 Info.Replacing->getLiveRange().getNodeForSegment(Segment.first); 671 672 auto RelevantInsts = 673 getInstructionsInRange(Node, Segment.first, Segment.second); 674 675 if (RelevantInsts.empty()) 676 continue; 677 678 // Replace old variables 679 for (auto *Instr : RelevantInsts) { 680 if (llvm::isa<InstPhi>(Instr)) 681 continue; 682 // TODO(manasijm): Figure out how to safely enable replacing phi dest 683 // variables. The issue is that we can not insert low level mov 684 // instructions into the PhiList. 685 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { 686 // FOREACH_VAR_IN_INST() not needed. Same logic as above. 687 if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { 688 if (Var == Info.Replacing) { 689 Instr->replaceSource(i, ExtraVar); 690 } 691 } 692 } 693 if (Instr->getDest() == Info.Replacing) { 694 Instr->replaceDest(ExtraVar); 695 } 696 } 697 698 assert(Info.FirstUse != Info.LastDef); 699 assert(Info.FirstUse || Info.LastDef); 700 701 // Insert spill code 702 if (Info.FirstUse != nullptr) { 703 auto *NewInst = 704 Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing); 705 Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst); 706 } 707 if (Info.LastDef != nullptr) { 708 auto *NewInst = 709 Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar); 710 Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst); 711 } 712 } 713 } 714 715 void TargetLowering::markRedefinitions() { 716 // Find (non-SSA) instructions where the Dest variable appears in some source 717 // operand, and set the IsDestRedefined flag to keep liveness analysis 718 // consistent. 719 for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E; 720 ++Instr) { 721 if (Instr->isDeleted()) 722 continue; 723 Variable *Dest = Instr->getDest(); 724 if (Dest == nullptr) 725 continue; 726 FOREACH_VAR_IN_INST(Var, *Instr) { 727 if (Var == Dest) { 728 Instr->setDestRedefined(); 729 break; 730 } 731 } 732 } 733 } 734 735 void TargetLowering::addFakeDefUses(const Inst *Instr) { 736 FOREACH_VAR_IN_INST(Var, *Instr) { 737 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) { 738 Context.insert<InstFakeUse>(Var64->getLo()); 739 Context.insert<InstFakeUse>(Var64->getHi()); 740 } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) { 741 for (Variable *Var : VarVec->getContainers()) { 742 Context.insert<InstFakeUse>(Var); 743 } 744 } else { 745 Context.insert<InstFakeUse>(Var); 746 } 747 } 748 Variable *Dest = Instr->getDest(); 749 if (Dest == nullptr) 750 return; 751 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) { 752 Context.insert<InstFakeDef>(Var64->getLo()); 753 Context.insert<InstFakeDef>(Var64->getHi()); 754 } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) { 755 for (Variable *Var : VarVec->getContainers()) { 756 Context.insert<InstFakeDef>(Var); 757 } 758 } else { 759 Context.insert<InstFakeDef>(Dest); 760 } 761 } 762 763 void TargetLowering::sortVarsByAlignment(VarList &Dest, 764 const VarList &Source) const { 765 Dest = Source; 766 // Instead of std::sort, we could do a bucket sort with log2(alignment) as 767 // the buckets, if performance is an issue. 768 std::sort(Dest.begin(), Dest.end(), 769 [this](const Variable *V1, const Variable *V2) { 770 const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType()); 771 const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType()); 772 if (WidthV1 == WidthV2) 773 return V1->getIndex() < V2->getIndex(); 774 return WidthV1 > WidthV2; 775 }); 776 } 777 778 void TargetLowering::getVarStackSlotParams( 779 VarList &SortedSpilledVariables, SmallBitVector &RegsUsed, 780 size_t *GlobalsSize, size_t *SpillAreaSizeBytes, 781 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, 782 std::function<bool(Variable *)> TargetVarHook) { 783 const VariablesMetadata *VMetadata = Func->getVMetadata(); 784 BitVector IsVarReferenced(Func->getNumVariables()); 785 for (CfgNode *Node : Func->getNodes()) { 786 for (Inst &Instr : Node->getInsts()) { 787 if (Instr.isDeleted()) 788 continue; 789 if (const Variable *Var = Instr.getDest()) 790 IsVarReferenced[Var->getIndex()] = true; 791 FOREACH_VAR_IN_INST(Var, Instr) { 792 IsVarReferenced[Var->getIndex()] = true; 793 } 794 } 795 } 796 797 // If SimpleCoalescing is false, each variable without a register gets its 798 // own unique stack slot, which leads to large stack frames. If 799 // SimpleCoalescing is true, then each "global" variable without a register 800 // gets its own slot, but "local" variable slots are reused across basic 801 // blocks. E.g., if A and B are local to block 1 and C is local to block 2, 802 // then C may share a slot with A or B. 803 // 804 // We cannot coalesce stack slots if this function calls a "returns twice" 805 // function. In that case, basic blocks may be revisited, and variables local 806 // to those basic blocks are actually live until after the called function 807 // returns a second time. 808 const bool SimpleCoalescing = !callsReturnsTwice(); 809 810 CfgVector<size_t> LocalsSize(Func->getNumNodes()); 811 const VarList &Variables = Func->getVariables(); 812 VarList SpilledVariables; 813 for (Variable *Var : Variables) { 814 if (Var->hasReg()) { 815 // Don't consider a rematerializable variable to be an actual register use 816 // (specifically of the frame pointer). Otherwise, the prolog may decide 817 // to save the frame pointer twice - once because of the explicit need for 818 // a frame pointer, and once because of an active use of a callee-save 819 // register. 820 if (!Var->isRematerializable()) 821 RegsUsed[Var->getRegNum()] = true; 822 continue; 823 } 824 // An argument either does not need a stack slot (if passed in a register) 825 // or already has one (if passed on the stack). 826 if (Var->getIsArg()) { 827 if (!Var->hasReg()) { 828 assert(!Var->hasStackOffset()); 829 Var->setHasStackOffset(); 830 } 831 continue; 832 } 833 // An unreferenced variable doesn't need a stack slot. 834 if (!IsVarReferenced[Var->getIndex()]) 835 continue; 836 // Check a target-specific variable (it may end up sharing stack slots) and 837 // not need accounting here. 838 if (TargetVarHook(Var)) 839 continue; 840 assert(!Var->hasStackOffset()); 841 Var->setHasStackOffset(); 842 SpilledVariables.push_back(Var); 843 } 844 845 SortedSpilledVariables.reserve(SpilledVariables.size()); 846 sortVarsByAlignment(SortedSpilledVariables, SpilledVariables); 847 848 for (Variable *Var : SortedSpilledVariables) { 849 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 850 // We have sorted by alignment, so the first variable we encounter that is 851 // located in each area determines the max alignment for the area. 852 if (!*SpillAreaAlignmentBytes) 853 *SpillAreaAlignmentBytes = Increment; 854 if (SimpleCoalescing && VMetadata->isTracked(Var)) { 855 if (VMetadata->isMultiBlock(Var)) { 856 *GlobalsSize += Increment; 857 } else { 858 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); 859 LocalsSize[NodeIndex] += Increment; 860 if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes) 861 *SpillAreaSizeBytes = LocalsSize[NodeIndex]; 862 if (!*LocalsSlotsAlignmentBytes) 863 *LocalsSlotsAlignmentBytes = Increment; 864 } 865 } else { 866 *SpillAreaSizeBytes += Increment; 867 } 868 } 869 // For testing legalization of large stack offsets on targets with limited 870 // offset bits in instruction encodings, add some padding. 871 *SpillAreaSizeBytes += getFlags().getTestStackExtra(); 872 } 873 874 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset, 875 uint32_t SpillAreaAlignmentBytes, 876 size_t GlobalsSize, 877 uint32_t LocalsSlotsAlignmentBytes, 878 uint32_t *SpillAreaPaddingBytes, 879 uint32_t *LocalsSlotsPaddingBytes) { 880 if (SpillAreaAlignmentBytes) { 881 uint32_t PaddingStart = SpillAreaStartOffset; 882 uint32_t SpillAreaStart = 883 Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes); 884 *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; 885 } 886 887 // If there are separate globals and locals areas, make sure the locals area 888 // is aligned by padding the end of the globals area. 889 if (LocalsSlotsAlignmentBytes) { 890 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; 891 GlobalsAndSubsequentPaddingSize = 892 Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); 893 *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize; 894 } 895 } 896 897 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables, 898 size_t SpillAreaPaddingBytes, 899 size_t SpillAreaSizeBytes, 900 size_t GlobalsAndSubsequentPaddingSize, 901 bool UsesFramePointer) { 902 const VariablesMetadata *VMetadata = Func->getVMetadata(); 903 // For testing legalization of large stack offsets on targets with limited 904 // offset bits in instruction encodings, add some padding. This assumes that 905 // SpillAreaSizeBytes has accounted for the extra test padding. When 906 // UseFramePointer is true, the offset depends on the padding, not just the 907 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the 908 // offsets depend on the gap between SpillAreaSizeBytes and 909 // SpillAreaPaddingBytes, so we don't increment that. 910 size_t TestPadding = getFlags().getTestStackExtra(); 911 if (UsesFramePointer) 912 SpillAreaPaddingBytes += TestPadding; 913 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; 914 size_t NextStackOffset = SpillAreaPaddingBytes; 915 CfgVector<size_t> LocalsSize(Func->getNumNodes()); 916 const bool SimpleCoalescing = !callsReturnsTwice(); 917 918 for (Variable *Var : SortedSpilledVariables) { 919 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 920 if (SimpleCoalescing && VMetadata->isTracked(Var)) { 921 if (VMetadata->isMultiBlock(Var)) { 922 GlobalsSpaceUsed += Increment; 923 NextStackOffset = GlobalsSpaceUsed; 924 } else { 925 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); 926 LocalsSize[NodeIndex] += Increment; 927 NextStackOffset = SpillAreaPaddingBytes + 928 GlobalsAndSubsequentPaddingSize + 929 LocalsSize[NodeIndex]; 930 } 931 } else { 932 NextStackOffset += Increment; 933 } 934 if (UsesFramePointer) 935 Var->setStackOffset(-NextStackOffset); 936 else 937 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); 938 } 939 } 940 941 InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest, 942 SizeT MaxSrcs) { 943 constexpr bool HasTailCall = false; 944 Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID); 945 InstCall *Call = 946 InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall); 947 return Call; 948 } 949 950 bool TargetLowering::shouldOptimizeMemIntrins() { 951 return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt(); 952 } 953 954 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind, 955 Variable *Dest, Operand *Src0, 956 Operand *Src1) { 957 scalarizeInstruction( 958 Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) { 959 return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1); 960 }, Src0, Src1); 961 } 962 963 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, 964 const char *Suffix) const { 965 if (!BuildDefs::dump()) 966 return; 967 Ostream &Str = Ctx->getStrEmit(); 968 const std::string &EmitStr = C->getEmitString(); 969 if (!EmitStr.empty()) { 970 // C has a custom emit string, so we use it instead of the canonical 971 // Name + Offset form. 972 Str << EmitStr; 973 return; 974 } 975 Str << C->getName() << Suffix; 976 RelocOffsetT Offset = C->getOffset(); 977 if (Offset) { 978 if (Offset > 0) 979 Str << "+"; 980 Str << Offset; 981 } 982 } 983 984 std::unique_ptr<TargetDataLowering> 985 TargetDataLowering::createLowering(GlobalContext *Ctx) { 986 TargetArch Target = getFlags().getTargetArch(); 987 switch (Target) { 988 default: 989 badTargetFatalError(Target); 990 #define SUBZERO_TARGET(X) \ 991 case TARGET_LOWERING_CLASS_FOR(X): \ 992 return ::X::createTargetDataLowering(Ctx); 993 #include "SZTargets.def" 994 #undef SUBZERO_TARGET 995 } 996 } 997 998 TargetDataLowering::~TargetDataLowering() = default; 999 1000 namespace { 1001 1002 // dataSectionSuffix decides whether to use SectionSuffix or VarName as data 1003 // section suffix. Essentially, when using separate data sections for globals 1004 // SectionSuffix is not necessary. 1005 std::string dataSectionSuffix(const std::string &SectionSuffix, 1006 const std::string &VarName, 1007 const bool DataSections) { 1008 if (SectionSuffix.empty() && !DataSections) { 1009 return ""; 1010 } 1011 1012 if (DataSections) { 1013 // With data sections we don't need to use the SectionSuffix. 1014 return "." + VarName; 1015 } 1016 1017 assert(!SectionSuffix.empty()); 1018 return "." + SectionSuffix; 1019 } 1020 1021 } // end of anonymous namespace 1022 1023 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var, 1024 const std::string &SectionSuffix) { 1025 if (!BuildDefs::dump()) 1026 return; 1027 1028 // If external and not initialized, this must be a cross test. Don't generate 1029 // a declaration for such cases. 1030 const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal(); 1031 if (IsExternal && !Var.hasInitializer()) 1032 return; 1033 1034 Ostream &Str = Ctx->getStrEmit(); 1035 const bool HasNonzeroInitializer = Var.hasNonzeroInitializer(); 1036 const bool IsConstant = Var.getIsConstant(); 1037 const SizeT Size = Var.getNumBytes(); 1038 const std::string Name = Var.getName().toString(); 1039 1040 Str << "\t.type\t" << Name << ",%object\n"; 1041 1042 const bool UseDataSections = getFlags().getDataSections(); 1043 const bool UseNonsfi = getFlags().getUseNonsfi(); 1044 const std::string Suffix = 1045 dataSectionSuffix(SectionSuffix, Name, UseDataSections); 1046 if (IsConstant && UseNonsfi) 1047 Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n"; 1048 else if (IsConstant) 1049 Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n"; 1050 else if (HasNonzeroInitializer) 1051 Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n"; 1052 else 1053 Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n"; 1054 1055 if (IsExternal) 1056 Str << "\t.globl\t" << Name << "\n"; 1057 1058 const uint32_t Align = Var.getAlignment(); 1059 if (Align > 1) { 1060 assert(llvm::isPowerOf2_32(Align)); 1061 // Use the .p2align directive, since the .align N directive can either 1062 // interpret N as bytes, or power of 2 bytes, depending on the target. 1063 Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n"; 1064 } 1065 1066 Str << Name << ":\n"; 1067 1068 if (HasNonzeroInitializer) { 1069 for (const auto *Init : Var.getInitializers()) { 1070 switch (Init->getKind()) { 1071 case VariableDeclaration::Initializer::DataInitializerKind: { 1072 const auto &Data = 1073 llvm::cast<VariableDeclaration::DataInitializer>(Init) 1074 ->getContents(); 1075 for (SizeT i = 0; i < Init->getNumBytes(); ++i) { 1076 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 1077 } 1078 break; 1079 } 1080 case VariableDeclaration::Initializer::ZeroInitializerKind: 1081 Str << "\t.zero\t" << Init->getNumBytes() << "\n"; 1082 break; 1083 case VariableDeclaration::Initializer::RelocInitializerKind: { 1084 const auto *Reloc = 1085 llvm::cast<VariableDeclaration::RelocInitializer>(Init); 1086 Str << "\t" << getEmit32Directive() << "\t"; 1087 Str << Reloc->getDeclaration()->getName(); 1088 if (Reloc->hasFixup()) { 1089 // TODO(jpp): this is ARM32 specific. 1090 Str << "(GOTOFF)"; 1091 } 1092 if (RelocOffsetT Offset = Reloc->getOffset()) { 1093 if (Offset >= 0 || (Offset == INT32_MIN)) 1094 Str << " + " << Offset; 1095 else 1096 Str << " - " << -Offset; 1097 } 1098 Str << "\n"; 1099 break; 1100 } 1101 } 1102 } 1103 } else { 1104 // NOTE: for non-constant zero initializers, this is BSS (no bits), so an 1105 // ELF writer would not write to the file, and only track virtual offsets, 1106 // but the .s writer still needs this .zero and cannot simply use the .size 1107 // to advance offsets. 1108 Str << "\t.zero\t" << Size << "\n"; 1109 } 1110 1111 Str << "\t.size\t" << Name << ", " << Size << "\n"; 1112 } 1113 1114 std::unique_ptr<TargetHeaderLowering> 1115 TargetHeaderLowering::createLowering(GlobalContext *Ctx) { 1116 TargetArch Target = getFlags().getTargetArch(); 1117 switch (Target) { 1118 default: 1119 badTargetFatalError(Target); 1120 #define SUBZERO_TARGET(X) \ 1121 case TARGET_LOWERING_CLASS_FOR(X): \ 1122 return ::X::createTargetHeaderLowering(Ctx); 1123 #include "SZTargets.def" 1124 #undef SUBZERO_TARGET 1125 } 1126 } 1127 1128 TargetHeaderLowering::~TargetHeaderLowering() = default; 1129 1130 } // end of namespace Ice 1131