1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements the CfgNode class, including the complexities of 12 /// instruction insertion and in-edge calculation. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "IceCfgNode.h" 17 18 #include "IceAssembler.h" 19 #include "IceCfg.h" 20 #include "IceGlobalInits.h" 21 #include "IceInst.h" 22 #include "IceInstVarIter.h" 23 #include "IceLiveness.h" 24 #include "IceOperand.h" 25 #include "IceTargetLowering.h" 26 27 namespace Ice { 28 29 // Adds an instruction to either the Phi list or the regular instruction list. 30 // Validates that all Phis are added before all regular instructions. 31 void CfgNode::appendInst(Inst *Instr) { 32 ++InstCountEstimate; 33 34 if (BuildDefs::wasm()) { 35 if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) { 36 for (auto *N : Instr->getTerminatorEdges()) { 37 N->addInEdge(this); 38 addOutEdge(N); 39 } 40 } 41 } 42 43 if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) { 44 if (!Insts.empty()) { 45 Func->setError("Phi instruction added to the middle of a block"); 46 return; 47 } 48 Phis.push_back(Phi); 49 } else { 50 Insts.push_back(Instr); 51 } 52 } 53 54 void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) { 55 for (SizeT i = 0; i < InEdges.size(); ++i) { 56 if (InEdges[i] == Old) { 57 InEdges[i] = New; 58 } 59 } 60 for (auto &Inst : getPhis()) { 61 auto &Phi = llvm::cast<InstPhi>(Inst); 62 for (SizeT i = 0; i < Phi.getSrcSize(); ++i) { 63 if (Phi.getLabel(i) == Old) { 64 Phi.setLabel(i, New); 65 } 66 } 67 } 68 } 69 70 namespace { 71 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) { 72 const bool DoDelete = 73 BuildDefs::minimal() || !getFlags().getKeepDeletedInsts(); 74 auto I = L->begin(), E = L->end(), Next = I; 75 for (++Next; I != E; I = Next++) { 76 if (DoDelete && I->isDeleted()) { 77 L->remove(I); 78 } else { 79 I->renumber(Func); 80 } 81 } 82 } 83 } // end of anonymous namespace 84 85 void CfgNode::renumberInstructions() { 86 InstNumberT FirstNumber = Func->getNextInstNumber(); 87 removeDeletedAndRenumber(&Phis, Func); 88 removeDeletedAndRenumber(&Insts, Func); 89 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; 90 } 91 92 // When a node is created, the OutEdges are immediately known, but the InEdges 93 // have to be built up incrementally. After the CFG has been constructed, the 94 // computePredecessors() pass finalizes it by creating the InEdges list. 95 void CfgNode::computePredecessors() { 96 for (CfgNode *Succ : OutEdges) 97 Succ->InEdges.push_back(this); 98 } 99 100 void CfgNode::computeSuccessors() { 101 OutEdges.clear(); 102 InEdges.clear(); 103 assert(!Insts.empty()); 104 OutEdges = Insts.rbegin()->getTerminatorEdges(); 105 } 106 107 // Ensure each Phi instruction in the node is consistent with respect to control 108 // flow. For each predecessor, there must be a phi argument with that label. 109 // If a phi argument's label doesn't appear in the predecessor list (which can 110 // happen as a result of e.g. unreachable node elimination), its value is 111 // modified to be zero, to maintain consistency in liveness analysis. This 112 // allows us to remove some dead control flow without a major rework of the phi 113 // instructions. We don't check that phi arguments with the same label have the 114 // same value. 115 void CfgNode::enforcePhiConsistency() { 116 for (Inst &Instr : Phis) { 117 auto *Phi = llvm::cast<InstPhi>(&Instr); 118 // We do a simple O(N^2) algorithm to check for consistency. Even so, it 119 // shows up as only about 0.2% of the total translation time. But if 120 // necessary, we could improve the complexity by using a hash table to 121 // count how many times each node is referenced in the Phi instruction, and 122 // how many times each node is referenced in the incoming edge list, and 123 // compare the two for equality. 124 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { 125 CfgNode *Label = Phi->getLabel(i); 126 bool Found = false; 127 for (CfgNode *InNode : getInEdges()) { 128 if (InNode == Label) { 129 Found = true; 130 break; 131 } 132 } 133 if (!Found) { 134 // Predecessor was unreachable, so if (impossibly) the control flow 135 // enters from that predecessor, the value should be zero. 136 Phi->clearOperandForTarget(Label); 137 } 138 } 139 for (CfgNode *InNode : getInEdges()) { 140 bool Found = false; 141 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { 142 CfgNode *Label = Phi->getLabel(i); 143 if (InNode == Label) { 144 Found = true; 145 break; 146 } 147 } 148 if (!Found) 149 llvm::report_fatal_error("Phi error: missing label for incoming edge"); 150 } 151 } 152 } 153 154 // This does part 1 of Phi lowering, by creating a new dest variable for each 155 // Phi instruction, replacing the Phi instruction's dest with that variable, 156 // and adding an explicit assignment of the old dest to the new dest. For 157 // example, 158 // a=phi(...) 159 // changes to 160 // "a_phi=phi(...); a=a_phi". 161 // 162 // This is in preparation for part 2 which deletes the Phi instructions and 163 // appends assignment instructions to predecessor blocks. Note that this 164 // transformation preserves SSA form. 165 void CfgNode::placePhiLoads() { 166 for (Inst &I : Phis) { 167 auto *Phi = llvm::dyn_cast<InstPhi>(&I); 168 Insts.insert(Insts.begin(), Phi->lower(Func)); 169 } 170 } 171 172 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, 173 // create a corresponding assignment instruction, and add all the assignments 174 // near the end of this block. They need to be added before any branch 175 // instruction, and also if the block ends with a compare instruction followed 176 // by a branch instruction that we may want to fuse, it's better to insert the 177 // new assignments before the compare instruction. The 178 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. 179 // 180 // Note that this transformation takes the Phi dest variables out of SSA form, 181 // as there may be assignments to the dest variable in multiple blocks. 182 void CfgNode::placePhiStores() { 183 // Find the insertion point. 184 InstList::iterator InsertionPoint = Insts.end(); 185 // Every block must end in a terminator instruction, and therefore must have 186 // at least one instruction, so it's valid to decrement InsertionPoint (but 187 // assert just in case). 188 assert(InsertionPoint != Insts.begin()); 189 --InsertionPoint; 190 // Confirm that InsertionPoint is a terminator instruction. Calling 191 // getTerminatorEdges() on a non-terminator instruction will cause an 192 // llvm_unreachable(). 193 (void)InsertionPoint->getTerminatorEdges(); 194 // SafeInsertionPoint is always immediately before the terminator 195 // instruction. If the block ends in a compare and conditional branch, it's 196 // better to place the Phi store before the compare so as not to interfere 197 // with compare/branch fusing. However, if the compare instruction's dest 198 // operand is the same as the new assignment statement's source operand, this 199 // can't be done due to data dependences, so we need to fall back to the 200 // SafeInsertionPoint. To illustrate: 201 // ; <label>:95 202 // %97 = load i8* %96, align 1 203 // %98 = icmp ne i8 %97, 0 204 // br i1 %98, label %99, label %2132 205 // ; <label>:99 206 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] 207 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] 208 // would be Phi-lowered as: 209 // ; <label>:95 210 // %97 = load i8* %96, align 1 211 // %100_phi = %97 ; can be at InsertionPoint 212 // %98 = icmp ne i8 %97, 0 213 // %101_phi = %98 ; must be at SafeInsertionPoint 214 // br i1 %98, label %99, label %2132 215 // ; <label>:99 216 // %100 = %100_phi 217 // %101 = %101_phi 218 // 219 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint 220 // mechanism. If a source basic block ends in a conditional branch: 221 // labelSource: 222 // ... 223 // br i1 %foo, label %labelTrue, label %labelFalse 224 // and a branch target has a Phi involving the branch operand: 225 // labelTrue: 226 // %bar = phi i1 [ %foo, %labelSource ], ... 227 // then we actually know the constant i1 value of the Phi operand: 228 // labelTrue: 229 // %bar = phi i1 [ true, %labelSource ], ... 230 // It seems that this optimization should be done by clang or opt, but we 231 // could also do it here. 232 InstList::iterator SafeInsertionPoint = InsertionPoint; 233 // Keep track of the dest variable of a compare instruction, so that we 234 // insert the new instruction at the SafeInsertionPoint if the compare's dest 235 // matches the Phi-lowered assignment's source. 236 Variable *CmpInstDest = nullptr; 237 // If the current insertion point is at a conditional branch instruction, and 238 // the previous instruction is a compare instruction, then we move the 239 // insertion point before the compare instruction so as not to interfere with 240 // compare/branch fusing. 241 if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { 242 if (!Branch->isUnconditional()) { 243 if (InsertionPoint != Insts.begin()) { 244 --InsertionPoint; 245 if (llvm::isa<InstIcmp>(InsertionPoint) || 246 llvm::isa<InstFcmp>(InsertionPoint)) { 247 CmpInstDest = InsertionPoint->getDest(); 248 } else { 249 ++InsertionPoint; 250 } 251 } 252 } 253 } 254 255 // Consider every out-edge. 256 for (CfgNode *Succ : OutEdges) { 257 // Consider every Phi instruction at the out-edge. 258 for (Inst &I : Succ->Phis) { 259 auto *Phi = llvm::dyn_cast<InstPhi>(&I); 260 Operand *Operand = Phi->getOperandForTarget(this); 261 assert(Operand); 262 Variable *Dest = I.getDest(); 263 assert(Dest); 264 auto *NewInst = InstAssign::create(Func, Dest, Operand); 265 if (CmpInstDest == Operand) 266 Insts.insert(SafeInsertionPoint, NewInst); 267 else 268 Insts.insert(InsertionPoint, NewInst); 269 } 270 } 271 } 272 273 // Deletes the phi instructions after the loads and stores are placed. 274 void CfgNode::deletePhis() { 275 for (Inst &I : Phis) 276 I.setDeleted(); 277 } 278 279 // Splits the edge from Pred to this node by creating a new node and hooking up 280 // the in and out edges appropriately. (The EdgeIndex parameter is only used to 281 // make the new node's name unique when there are multiple edges between the 282 // same pair of nodes.) The new node's instruction list is initialized to the 283 // empty list, with no terminator instruction. There must not be multiple edges 284 // from Pred to this node so all Inst::getTerminatorEdges implementations must 285 // not contain duplicates. 286 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { 287 CfgNode *NewNode = Func->makeNode(); 288 // Depth is the minimum as it works if both are the same, but if one is 289 // outside the loop and the other is inside, the new node should be placed 290 // outside and not be executed multiple times within the loop. 291 NewNode->setLoopNestDepth( 292 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); 293 if (BuildDefs::dump()) 294 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + 295 std::to_string(EdgeIndex)); 296 // The new node is added to the end of the node list, and will later need to 297 // be sorted into a reasonable topological order. 298 NewNode->setNeedsPlacement(true); 299 // Repoint Pred's out-edge. 300 bool Found = false; 301 for (CfgNode *&I : Pred->OutEdges) { 302 if (I == this) { 303 I = NewNode; 304 NewNode->InEdges.push_back(Pred); 305 Found = true; 306 break; 307 } 308 } 309 assert(Found); 310 (void)Found; 311 // Repoint this node's in-edge. 312 Found = false; 313 for (CfgNode *&I : InEdges) { 314 if (I == Pred) { 315 I = NewNode; 316 NewNode->OutEdges.push_back(this); 317 Found = true; 318 break; 319 } 320 } 321 assert(Found); 322 (void)Found; 323 // Repoint all suitable branch instructions' target and return. 324 Found = false; 325 for (Inst &I : Pred->getInsts()) 326 if (!I.isDeleted() && I.repointEdges(this, NewNode)) 327 Found = true; 328 assert(Found); 329 (void)Found; 330 return NewNode; 331 } 332 333 namespace { 334 335 // Helpers for advancedPhiLowering(). 336 337 class PhiDesc { 338 PhiDesc() = delete; 339 PhiDesc(const PhiDesc &) = delete; 340 PhiDesc &operator=(const PhiDesc &) = delete; 341 342 public: 343 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} 344 PhiDesc(PhiDesc &&) = default; 345 InstPhi *Phi = nullptr; 346 Variable *Dest = nullptr; 347 Operand *Src = nullptr; 348 bool Processed = false; 349 size_t NumPred = 0; // number of entries whose Src is this Dest 350 int32_t Weight = 0; // preference for topological order 351 }; 352 using PhiDescList = llvm::SmallVector<PhiDesc, 32>; 353 354 // Always pick NumPred=0 over NumPred>0. 355 constexpr int32_t WeightNoPreds = 8; 356 // Prefer Src as a register because the register might free up. 357 constexpr int32_t WeightSrcIsReg = 4; 358 // Prefer Dest not as a register because the register stays free longer. 359 constexpr int32_t WeightDestNotReg = 2; 360 // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a 361 // dependency cycle must be broken so that hopefully only one temporary 362 // assignment has to be added to break the cycle. 363 constexpr int32_t WeightOnePred = 1; 364 365 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, 366 const Operand *Opnd) { 367 if (Var1 == Opnd) 368 return true; 369 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); 370 if (Var2 == nullptr) 371 return false; 372 373 // If either operand lacks a register, they cannot be the same. 374 if (!Var1->hasReg()) 375 return false; 376 if (!Var2->hasReg()) 377 return false; 378 379 const auto RegNum1 = Var1->getRegNum(); 380 const auto RegNum2 = Var2->getRegNum(); 381 // Quick common-case check. 382 if (RegNum1 == RegNum2) 383 return true; 384 385 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == 386 Target->getAliasesForRegister(RegNum2)[RegNum1]); 387 return Target->getAliasesForRegister(RegNum1)[RegNum2]; 388 } 389 390 // Update NumPred for all Phi assignments using Var as their Dest variable. 391 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0. 392 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { 393 for (PhiDesc &Item : Desc) { 394 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { 395 --Item.NumPred; 396 if (Item.NumPred == 1) { 397 // If NumPred changed from 2 to 1, add in WeightOnePred. 398 Item.Weight += WeightOnePred; 399 } else if (Item.NumPred == 0) { 400 // If NumPred changed from 1 to 0, subtract WeightOnePred and add in 401 // WeightNoPreds. 402 Item.Weight += (WeightNoPreds - WeightOnePred); 403 } 404 } 405 } 406 } 407 408 } // end of anonymous namespace 409 410 // This the "advanced" version of Phi lowering for a basic block, in contrast 411 // to the simple version that lowers through assignments involving temporaries. 412 // 413 // All Phi instructions in a basic block are conceptually executed in parallel. 414 // However, if we lower Phis early and commit to a sequential ordering, we may 415 // end up creating unnecessary interferences which lead to worse register 416 // allocation. Delaying Phi scheduling until after register allocation can help 417 // unless there are no free registers for shuffling registers or stack slots 418 // and spilling becomes necessary. 419 // 420 // The advanced Phi lowering starts by finding a topological sort of the Phi 421 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on 422 // B. Preexisting register assignments are considered in the topological sort. 423 // If a topological sort is not possible due to a cycle, the cycle is broken by 424 // introducing a non-parallel temporary. For example, a cycle arising from a 425 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being 426 // equal, prefer to schedule assignments with register-allocated Src operands 427 // earlier, in case that register becomes free afterwards, and prefer to 428 // schedule assignments with register-allocated Dest variables later, to keep 429 // that register free for longer. 430 // 431 // Once the ordering is determined, the Cfg edge is split and the assignment 432 // list is lowered by the target lowering layer. Since the assignment lowering 433 // may create new infinite-weight temporaries, a follow-on register allocation 434 // pass will be needed. To prepare for this, liveness (including live range 435 // calculation) of the split nodes needs to be calculated, and liveness of the 436 // original node need to be updated to "undo" the effects of the phi 437 // assignments. 438 439 // The specific placement of the new node within the Cfg node list is deferred 440 // until later, including after empty node contraction. 441 // 442 // After phi assignments are lowered across all blocks, another register 443 // allocation pass is run, focusing only on pre-colored and infinite-weight 444 // variables, similar to Om1 register allocation (except without the need to 445 // specially compute these variables' live ranges, since they have already been 446 // precisely calculated). The register allocator in this mode needs the ability 447 // to forcibly spill and reload registers in case none are naturally available. 448 void CfgNode::advancedPhiLowering() { 449 if (getPhis().empty()) 450 return; 451 452 PhiDescList Desc; 453 454 for (Inst &I : Phis) { 455 auto *Phi = llvm::dyn_cast<InstPhi>(&I); 456 if (!Phi->isDeleted()) { 457 Variable *Dest = Phi->getDest(); 458 Desc.emplace_back(Phi, Dest); 459 // Undo the effect of the phi instruction on this node's live-in set by 460 // marking the phi dest variable as live on entry. 461 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); 462 auto &LiveIn = Func->getLiveness()->getLiveIn(this); 463 if (VarNum < LiveIn.size()) { 464 assert(!LiveIn[VarNum]); 465 LiveIn[VarNum] = true; 466 } 467 Phi->setDeleted(); 468 } 469 } 470 if (Desc.empty()) 471 return; 472 473 TargetLowering *Target = Func->getTarget(); 474 SizeT InEdgeIndex = 0; 475 for (CfgNode *Pred : InEdges) { 476 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); 477 SizeT Remaining = Desc.size(); 478 479 // First pass computes Src and initializes NumPred. 480 for (PhiDesc &Item : Desc) { 481 Variable *Dest = Item.Dest; 482 Operand *Src = Item.Phi->getOperandForTarget(Pred); 483 Item.Src = Src; 484 Item.Processed = false; 485 Item.NumPred = 0; 486 // Cherry-pick any trivial assignments, so that they don't contribute to 487 // the running complexity of the topological sort. 488 if (sameVarOrReg(Target, Dest, Src)) { 489 Item.Processed = true; 490 --Remaining; 491 if (Dest != Src) 492 // If Dest and Src are syntactically the same, don't bother adding 493 // the assignment, because in all respects it would be redundant, and 494 // if Dest/Src are on the stack, the target lowering may naively 495 // decide to lower it using a temporary register. 496 Split->appendInst(InstAssign::create(Func, Dest, Src)); 497 } 498 } 499 // Second pass computes NumPred by comparing every pair of Phi instructions. 500 for (PhiDesc &Item : Desc) { 501 if (Item.Processed) 502 continue; 503 const Variable *Dest = Item.Dest; 504 for (PhiDesc &Item2 : Desc) { 505 if (Item2.Processed) 506 continue; 507 // There shouldn't be two different Phis with the same Dest variable or 508 // register. 509 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); 510 if (sameVarOrReg(Target, Dest, Item2.Src)) 511 ++Item.NumPred; 512 } 513 } 514 515 // Another pass to compute initial Weight values. 516 for (PhiDesc &Item : Desc) { 517 if (Item.Processed) 518 continue; 519 int32_t Weight = 0; 520 if (Item.NumPred == 0) 521 Weight += WeightNoPreds; 522 if (Item.NumPred == 1) 523 Weight += WeightOnePred; 524 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) 525 if (Var->hasReg()) 526 Weight += WeightSrcIsReg; 527 if (!Item.Dest->hasReg()) 528 Weight += WeightDestNotReg; 529 Item.Weight = Weight; 530 } 531 532 // Repeatedly choose and process the best candidate in the topological sort, 533 // until no candidates remain. This implementation is O(N^2) where N is the 534 // number of Phi instructions, but with a small constant factor compared to 535 // a likely implementation of O(N) topological sort. 536 for (; Remaining; --Remaining) { 537 int32_t BestWeight = -1; 538 PhiDesc *BestItem = nullptr; 539 // Find the best candidate. 540 for (PhiDesc &Item : Desc) { 541 if (Item.Processed) 542 continue; 543 const int32_t Weight = Item.Weight; 544 if (Weight > BestWeight) { 545 BestItem = &Item; 546 BestWeight = Weight; 547 } 548 } 549 assert(BestWeight >= 0); 550 Variable *Dest = BestItem->Dest; 551 Operand *Src = BestItem->Src; 552 assert(!sameVarOrReg(Target, Dest, Src)); 553 // Break a cycle by introducing a temporary. 554 while (BestItem->NumPred > 0) { 555 bool Found = false; 556 // If the target instruction "A=B" is part of a cycle, find the "X=A" 557 // assignment in the cycle because it will have to be rewritten as 558 // "X=tmp". 559 for (PhiDesc &Item : Desc) { 560 if (Item.Processed) 561 continue; 562 Operand *OtherSrc = Item.Src; 563 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { 564 SizeT VarNum = Func->getNumVariables(); 565 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); 566 if (BuildDefs::dump()) 567 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); 568 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); 569 Item.Src = Tmp; 570 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); 571 Found = true; 572 break; 573 } 574 } 575 assert(Found); 576 (void)Found; 577 } 578 // Now that a cycle (if any) has been broken, create the actual 579 // assignment. 580 Split->appendInst(InstAssign::create(Func, Dest, Src)); 581 if (auto *Var = llvm::dyn_cast<Variable>(Src)) 582 updatePreds(Desc, Target, Var); 583 BestItem->Processed = true; 584 } 585 Split->appendInst(InstBr::create(Func, this)); 586 587 Split->genCode(); 588 Func->getVMetadata()->addNode(Split); 589 // Validate to be safe. All items should be marked as processed, and have 590 // no predecessors. 591 if (BuildDefs::asserts()) { 592 for (PhiDesc &Item : Desc) { 593 (void)Item; 594 assert(Item.Processed); 595 assert(Item.NumPred == 0); 596 } 597 } 598 } 599 } 600 601 // Does address mode optimization. Pass each instruction to the TargetLowering 602 // object. If it returns a new instruction (representing the optimized address 603 // mode), then insert the new instruction and delete the old. 604 void CfgNode::doAddressOpt() { 605 TargetLowering *Target = Func->getTarget(); 606 LoweringContext &Context = Target->getContext(); 607 Context.init(this); 608 while (!Context.atEnd()) { 609 Target->doAddressOpt(); 610 } 611 } 612 613 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { 614 TargetLowering *Target = Func->getTarget(); 615 LoweringContext &Context = Target->getContext(); 616 Context.init(this); 617 Context.setInsertPoint(Context.getCur()); 618 // Do not insert nop in bundle locked instructions. 619 bool PauseNopInsertion = false; 620 while (!Context.atEnd()) { 621 if (llvm::isa<InstBundleLock>(Context.getCur())) { 622 PauseNopInsertion = true; 623 } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) { 624 PauseNopInsertion = false; 625 } 626 if (!PauseNopInsertion) 627 Target->doNopInsertion(RNG); 628 // Ensure Cur=Next, so that the nops are inserted before the current 629 // instruction rather than after. 630 Context.advanceCur(); 631 Context.advanceNext(); 632 } 633 } 634 635 // Drives the target lowering. Passes the current instruction and the next 636 // non-deleted instruction for target lowering. 637 void CfgNode::genCode() { 638 TargetLowering *Target = Func->getTarget(); 639 LoweringContext &Context = Target->getContext(); 640 // Lower the regular instructions. 641 Context.init(this); 642 Target->initNodeForLowering(this); 643 while (!Context.atEnd()) { 644 InstList::iterator Orig = Context.getCur(); 645 if (llvm::isa<InstRet>(*Orig)) 646 setHasReturn(); 647 Target->lower(); 648 // Ensure target lowering actually moved the cursor. 649 assert(Context.getCur() != Orig); 650 } 651 Context.availabilityReset(); 652 // Do preliminary lowering of the Phi instructions. 653 Target->prelowerPhis(); 654 } 655 656 void CfgNode::livenessLightweight() { 657 SizeT NumVars = Func->getNumVariables(); 658 LivenessBV Live(NumVars); 659 // Process regular instructions in reverse order. 660 for (Inst &I : reverse_range(Insts)) { 661 if (I.isDeleted()) 662 continue; 663 I.livenessLightweight(Func, Live); 664 } 665 for (Inst &I : Phis) { 666 if (I.isDeleted()) 667 continue; 668 I.livenessLightweight(Func, Live); 669 } 670 } 671 672 // Performs liveness analysis on the block. Returns true if the incoming 673 // liveness changed from before, false if it stayed the same. (If it changes, 674 // the node's predecessors need to be processed again.) 675 bool CfgNode::liveness(Liveness *Liveness) { 676 const SizeT NumVars = Liveness->getNumVarsInNode(this); 677 const SizeT NumGlobalVars = Liveness->getNumGlobalVars(); 678 LivenessBV &Live = Liveness->getScratchBV(); 679 Live.clear(); 680 681 LiveBeginEndMap *LiveBegin = nullptr; 682 LiveBeginEndMap *LiveEnd = nullptr; 683 // Mark the beginning and ending of each variable's live range with the 684 // sentinel instruction number 0. 685 if (Liveness->getMode() == Liveness_Intervals) { 686 LiveBegin = Liveness->getLiveBegin(this); 687 LiveEnd = Liveness->getLiveEnd(this); 688 LiveBegin->clear(); 689 LiveEnd->clear(); 690 // Guess that the number of live ranges beginning is roughly the number of 691 // instructions, and same for live ranges ending. 692 LiveBegin->reserve(getInstCountEstimate()); 693 LiveEnd->reserve(getInstCountEstimate()); 694 } 695 696 // Initialize Live to be the union of all successors' LiveIn. 697 for (CfgNode *Succ : OutEdges) { 698 const LivenessBV &LiveIn = Liveness->getLiveIn(Succ); 699 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); 700 Live |= LiveIn; 701 // Mark corresponding argument of phis in successor as live. 702 for (Inst &I : Succ->Phis) { 703 if (I.isDeleted()) 704 continue; 705 auto *Phi = llvm::cast<InstPhi>(&I); 706 Phi->livenessPhiOperand(Live, this, Liveness); 707 } 708 } 709 assert(Live.empty() || Live.size() == NumGlobalVars); 710 Liveness->getLiveOut(this) = Live; 711 712 // Expand Live so it can hold locals in addition to globals. 713 Live.resize(NumVars); 714 // Process regular instructions in reverse order. 715 for (Inst &I : reverse_range(Insts)) { 716 if (I.isDeleted()) 717 continue; 718 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); 719 } 720 // Process phis in forward order so that we can override the instruction 721 // number to be that of the earliest phi instruction in the block. 722 SizeT NumNonDeadPhis = 0; 723 InstNumberT FirstPhiNumber = Inst::NumberSentinel; 724 for (Inst &I : Phis) { 725 if (I.isDeleted()) 726 continue; 727 if (FirstPhiNumber == Inst::NumberSentinel) 728 FirstPhiNumber = I.getNumber(); 729 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) 730 ++NumNonDeadPhis; 731 } 732 733 // When using the sparse representation, after traversing the instructions in 734 // the block, the Live bitvector should only contain set bits for global 735 // variables upon block entry. We validate this by testing the upper bits of 736 // the Live bitvector. 737 if (Live.find_next(NumGlobalVars) != -1) { 738 if (BuildDefs::dump()) { 739 // This is a fatal liveness consistency error. Print some diagnostics and 740 // abort. 741 Ostream &Str = Func->getContext()->getStrDump(); 742 Func->resetCurrentNode(); 743 Str << "Invalid Live ="; 744 for (SizeT i = NumGlobalVars; i < Live.size(); ++i) { 745 if (Live.test(i)) { 746 Str << " "; 747 Liveness->getVariable(i, this)->dump(Func); 748 } 749 } 750 Str << "\n"; 751 } 752 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); 753 } 754 // Now truncate Live to prevent LiveIn from growing. 755 Live.resize(NumGlobalVars); 756 757 bool Changed = false; 758 LivenessBV &LiveIn = Liveness->getLiveIn(this); 759 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); 760 // Add in current LiveIn 761 Live |= LiveIn; 762 // Check result, set LiveIn=Live 763 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); 764 bool LiveInChanged = (Live != LiveIn); 765 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); 766 if (LiveInChanged) 767 LiveIn = Live; 768 PrevNumNonDeadPhis = NumNonDeadPhis; 769 return Changed; 770 } 771 772 // Validate the integrity of the live ranges in this block. If there are any 773 // errors, it prints details and returns false. On success, it returns true. 774 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const { 775 if (!BuildDefs::asserts()) 776 return true; 777 778 // Verify there are no duplicates. 779 auto ComparePair = 780 [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) { 781 return A.first == B.first; 782 }; 783 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); 784 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); 785 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) == 786 MapBegin.end() && 787 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) == 788 MapEnd.end()) 789 return true; 790 791 // There is definitely a liveness error. All paths from here return false. 792 if (!BuildDefs::dump()) 793 return false; 794 795 // Print all the errors. 796 if (BuildDefs::dump()) { 797 GlobalContext *Ctx = Func->getContext(); 798 OstreamLocker L(Ctx); 799 Ostream &Str = Ctx->getStrDump(); 800 if (Func->isVerbose()) { 801 Str << "Live range errors in the following block:\n"; 802 dump(Func); 803 } 804 for (auto Start = MapBegin.begin(); 805 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) != 806 MapBegin.end(); 807 ++Start) { 808 auto Next = Start + 1; 809 Str << "Duplicate LR begin, block " << getName() << ", instructions " 810 << Start->second << " & " << Next->second << ", variable " 811 << Liveness->getVariable(Start->first, this)->getName() << "\n"; 812 } 813 for (auto Start = MapEnd.begin(); 814 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) != 815 MapEnd.end(); 816 ++Start) { 817 auto Next = Start + 1; 818 Str << "Duplicate LR end, block " << getName() << ", instructions " 819 << Start->second << " & " << Next->second << ", variable " 820 << Liveness->getVariable(Start->first, this)->getName() << "\n"; 821 } 822 } 823 824 return false; 825 } 826 827 // Once basic liveness is complete, compute actual live ranges. It is assumed 828 // that within a single basic block, a live range begins at most once and ends 829 // at most once. This is certainly true for pure SSA form. It is also true once 830 // phis are lowered, since each assignment to the phi-based temporary is in a 831 // different basic block, and there is a single read that ends the live in the 832 // basic block that contained the actual phi instruction. 833 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, 834 InstNumberT LastInstNum) { 835 TimerMarker T1(TimerStack::TT_liveRange, Func); 836 837 const SizeT NumVars = Liveness->getNumVarsInNode(this); 838 const LivenessBV &LiveIn = Liveness->getLiveIn(this); 839 const LivenessBV &LiveOut = Liveness->getLiveOut(this); 840 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); 841 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); 842 std::sort(MapBegin.begin(), MapBegin.end()); 843 std::sort(MapEnd.begin(), MapEnd.end()); 844 845 if (!livenessValidateIntervals(Liveness)) { 846 llvm::report_fatal_error("livenessAddIntervals: Liveness error"); 847 return; 848 } 849 850 LivenessBV &LiveInAndOut = Liveness->getScratchBV(); 851 LiveInAndOut = LiveIn; 852 LiveInAndOut &= LiveOut; 853 854 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. 855 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); 856 auto IBE = MapBegin.end(), IEE = MapEnd.end(); 857 while (IBB != IBE || IEB != IEE) { 858 SizeT i1 = IBB == IBE ? NumVars : IBB->first; 859 SizeT i2 = IEB == IEE ? NumVars : IEB->first; 860 SizeT i = std::min(i1, i2); 861 // i1 is the Variable number of the next MapBegin entry, and i2 is the 862 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's 863 // live range begins and ends in this block. If i1<i2, then i1's live range 864 // begins at instruction IBB->second and extends through the end of the 865 // block. If i1>i2, then i2's live range begins at the first instruction of 866 // the block and ends at IEB->second. In any case, we choose the lesser of 867 // i1 and i2 and proceed accordingly. 868 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; 869 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; 870 871 Variable *Var = Liveness->getVariable(i, this); 872 if (LB > LE) { 873 Var->addLiveRange(FirstInstNum, LE, this); 874 Var->addLiveRange(LB, LastInstNum + 1, this); 875 // Assert that Var is a global variable by checking that its liveness 876 // index is less than the number of globals. This ensures that the 877 // LiveInAndOut[] access is valid. 878 assert(i < Liveness->getNumGlobalVars()); 879 LiveInAndOut[i] = false; 880 } else { 881 Var->addLiveRange(LB, LE, this); 882 } 883 if (i == i1) 884 ++IBB; 885 if (i == i2) 886 ++IEB; 887 } 888 // Process the variables that are live across the entire block. 889 for (int i = LiveInAndOut.find_first(); i != -1; 890 i = LiveInAndOut.find_next(i)) { 891 Variable *Var = Liveness->getVariable(i, this); 892 if (Liveness->getRangeMask(Var->getIndex())) 893 Var->addLiveRange(FirstInstNum, LastInstNum + 1, this); 894 } 895 } 896 897 // If this node contains only deleted instructions, and ends in an 898 // unconditional branch, contract the node by repointing all its in-edges to 899 // its successor. 900 void CfgNode::contractIfEmpty() { 901 if (InEdges.empty()) 902 return; 903 Inst *Branch = nullptr; 904 for (Inst &I : Insts) { 905 if (I.isDeleted()) 906 continue; 907 if (I.isUnconditionalBranch()) 908 Branch = &I; 909 else if (!I.isRedundantAssign()) 910 return; 911 } 912 // Make sure there is actually a successor to repoint in-edges to. 913 if (OutEdges.empty()) 914 return; 915 assert(hasSingleOutEdge()); 916 // Don't try to delete a self-loop. 917 if (OutEdges[0] == this) 918 return; 919 // Make sure the node actually contains (ends with) an unconditional branch. 920 if (Branch == nullptr) 921 return; 922 923 Branch->setDeleted(); 924 CfgNode *Successor = OutEdges.front(); 925 // Repoint all this node's in-edges to this node's successor, unless this 926 // node's successor is actually itself (in which case the statement 927 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator 928 // over this->InEdges). 929 if (Successor != this) { 930 for (CfgNode *Pred : InEdges) { 931 for (CfgNode *&I : Pred->OutEdges) { 932 if (I == this) { 933 I = Successor; 934 Successor->InEdges.push_back(Pred); 935 } 936 } 937 for (Inst &I : Pred->getInsts()) { 938 if (!I.isDeleted()) 939 I.repointEdges(this, Successor); 940 } 941 } 942 943 // Remove the in-edge to the successor to allow node reordering to make 944 // better decisions. For example it's more helpful to place a node after a 945 // reachable predecessor than an unreachable one (like the one we just 946 // contracted). 947 Successor->InEdges.erase( 948 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); 949 } 950 InEdges.clear(); 951 } 952 953 void CfgNode::doBranchOpt(const CfgNode *NextNode) { 954 TargetLowering *Target = Func->getTarget(); 955 // Find the first opportunity for branch optimization (which will be the last 956 // instruction in the block) and stop. This is sufficient unless there is 957 // some target lowering where we have the possibility of multiple 958 // optimizations per block. Take care with switch lowering as there are 959 // multiple unconditional branches and only the last can be deleted. 960 for (Inst &I : reverse_range(Insts)) { 961 if (!I.isDeleted()) { 962 Target->doBranchOpt(&I, NextNode); 963 return; 964 } 965 } 966 } 967 968 // ======================== Dump routines ======================== // 969 970 namespace { 971 972 // Helper functions for emit(). 973 974 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node, 975 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) { 976 if (!BuildDefs::dump()) 977 return; 978 Liveness *Liveness = Func->getLiveness(); 979 const LivenessBV *Live; 980 const auto StackReg = Func->getTarget()->getStackReg(); 981 const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg(); 982 if (IsLiveIn) { 983 Live = &Liveness->getLiveIn(Node); 984 Str << "\t\t\t\t/* LiveIn="; 985 } else { 986 Live = &Liveness->getLiveOut(Node); 987 Str << "\t\t\t\t/* LiveOut="; 988 } 989 if (!Live->empty()) { 990 CfgVector<Variable *> LiveRegs; 991 for (SizeT i = 0; i < Live->size(); ++i) { 992 if (!(*Live)[i]) 993 continue; 994 Variable *Var = Liveness->getVariable(i, Node); 995 if (!Var->hasReg()) 996 continue; 997 const auto RegNum = Var->getRegNum(); 998 if (RegNum == StackReg || RegNum == FrameOrStackReg) 999 continue; 1000 if (IsLiveIn) 1001 ++LiveRegCount[RegNum]; 1002 LiveRegs.push_back(Var); 1003 } 1004 // Sort the variables by regnum so they are always printed in a familiar 1005 // order. 1006 std::sort(LiveRegs.begin(), LiveRegs.end(), 1007 [](const Variable *V1, const Variable *V2) { 1008 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum()); 1009 }); 1010 bool First = true; 1011 for (Variable *Var : LiveRegs) { 1012 if (!First) 1013 Str << ","; 1014 First = false; 1015 Var->emit(Func); 1016 } 1017 } 1018 Str << " */\n"; 1019 } 1020 1021 /// Returns true if some text was emitted - in which case the caller definitely 1022 /// needs to emit a newline character. 1023 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, 1024 CfgVector<SizeT> &LiveRegCount) { 1025 bool Printed = false; 1026 if (!BuildDefs::dump()) 1027 return Printed; 1028 Variable *Dest = Instr->getDest(); 1029 // Normally we increment the live count for the dest register. But we 1030 // shouldn't if the instruction's IsDestRedefined flag is set, because this 1031 // means that the target lowering created this instruction as a non-SSA 1032 // assignment; i.e., a different, previous instruction started the dest 1033 // variable's live range. 1034 if (!Instr->isDestRedefined() && Dest && Dest->hasReg()) 1035 ++LiveRegCount[Dest->getRegNum()]; 1036 FOREACH_VAR_IN_INST(Var, *Instr) { 1037 bool ShouldReport = Instr->isLastUse(Var); 1038 if (ShouldReport && Var->hasReg()) { 1039 // Don't report end of live range until the live count reaches 0. 1040 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; 1041 if (NewCount) 1042 ShouldReport = false; 1043 } 1044 if (ShouldReport) { 1045 if (Printed) 1046 Str << ","; 1047 else 1048 Str << " \t/* END="; 1049 Var->emit(Func); 1050 Printed = true; 1051 } 1052 } 1053 if (Printed) 1054 Str << " */"; 1055 return Printed; 1056 } 1057 1058 void updateStats(Cfg *Func, const Inst *I) { 1059 if (!BuildDefs::dump()) 1060 return; 1061 // Update emitted instruction count, plus fill/spill count for Variable 1062 // operands without a physical register. 1063 if (uint32_t Count = I->getEmitInstCount()) { 1064 Func->getContext()->statsUpdateEmitted(Count); 1065 if (Variable *Dest = I->getDest()) { 1066 if (!Dest->hasReg()) 1067 Func->getContext()->statsUpdateFills(); 1068 } 1069 for (SizeT S = 0; S < I->getSrcSize(); ++S) { 1070 if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { 1071 if (!Src->hasReg()) 1072 Func->getContext()->statsUpdateSpills(); 1073 } 1074 } 1075 } 1076 } 1077 1078 } // end of anonymous namespace 1079 1080 void CfgNode::emit(Cfg *Func) const { 1081 if (!BuildDefs::dump()) 1082 return; 1083 Func->setCurrentNode(this); 1084 Ostream &Str = Func->getContext()->getStrEmit(); 1085 Liveness *Liveness = Func->getLiveness(); 1086 const bool DecorateAsm = Liveness && getFlags().getDecorateAsm(); 1087 Str << getAsmName() << ":\n"; 1088 // LiveRegCount keeps track of the number of currently live variables that 1089 // each register is assigned to. Normally that would be only 0 or 1, but the 1090 // register allocator's AllowOverlap inference allows it to be greater than 1 1091 // for short periods. 1092 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); 1093 if (DecorateAsm) { 1094 constexpr bool IsLiveIn = true; 1095 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); 1096 if (getInEdges().size()) { 1097 Str << "\t\t\t\t/* preds="; 1098 bool First = true; 1099 for (CfgNode *I : getInEdges()) { 1100 if (!First) 1101 Str << ","; 1102 First = false; 1103 Str << "$" << I->getName(); 1104 } 1105 Str << " */\n"; 1106 } 1107 if (getLoopNestDepth()) { 1108 Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n"; 1109 } 1110 } 1111 1112 for (const Inst &I : Phis) { 1113 if (I.isDeleted()) 1114 continue; 1115 // Emitting a Phi instruction should cause an error. 1116 I.emit(Func); 1117 } 1118 for (const Inst &I : Insts) { 1119 if (I.isDeleted()) 1120 continue; 1121 if (I.isRedundantAssign()) { 1122 // Usually, redundant assignments end the live range of the src variable 1123 // and begin the live range of the dest variable, with no net effect on 1124 // the liveness of their register. However, if the register allocator 1125 // infers the AllowOverlap condition, then this may be a redundant 1126 // assignment that does not end the src variable's live range, in which 1127 // case the active variable count for that register needs to be bumped. 1128 // That normally would have happened as part of emitLiveRangesEnded(), 1129 // but that isn't called for redundant assignments. 1130 Variable *Dest = I.getDest(); 1131 if (DecorateAsm && Dest->hasReg()) { 1132 ++LiveRegCount[Dest->getRegNum()]; 1133 if (I.isLastUse(I.getSrc(0))) 1134 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()]; 1135 } 1136 continue; 1137 } 1138 I.emit(Func); 1139 bool Printed = false; 1140 if (DecorateAsm) 1141 Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount); 1142 if (Printed || llvm::isa<InstTarget>(&I)) 1143 Str << "\n"; 1144 updateStats(Func, &I); 1145 } 1146 if (DecorateAsm) { 1147 constexpr bool IsLiveIn = false; 1148 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); 1149 } 1150 } 1151 1152 // Helper class for emitIAS(). 1153 namespace { 1154 class BundleEmitHelper { 1155 BundleEmitHelper() = delete; 1156 BundleEmitHelper(const BundleEmitHelper &) = delete; 1157 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; 1158 1159 public: 1160 BundleEmitHelper(Assembler *Asm, const InstList &Insts) 1161 : Asm(Asm), End(Insts.end()), BundleLockStart(End), 1162 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), 1163 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} 1164 // Check whether we're currently within a bundle_lock region. 1165 bool isInBundleLockRegion() const { return BundleLockStart != End; } 1166 // Check whether the current bundle_lock region has the align_to_end option. 1167 bool isAlignToEnd() const { 1168 assert(isInBundleLockRegion()); 1169 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == 1170 InstBundleLock::Opt_AlignToEnd; 1171 } 1172 bool isPadToEnd() const { 1173 assert(isInBundleLockRegion()); 1174 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == 1175 InstBundleLock::Opt_PadToEnd; 1176 } 1177 // Check whether the entire bundle_lock region falls within the same bundle. 1178 bool isSameBundle() const { 1179 assert(isInBundleLockRegion()); 1180 return SizeSnapshotPre == SizeSnapshotPost || 1181 (SizeSnapshotPre & BundleMaskHi) == 1182 ((SizeSnapshotPost - 1) & BundleMaskHi); 1183 } 1184 // Get the bundle alignment of the first instruction of the bundle_lock 1185 // region. 1186 intptr_t getPreAlignment() const { 1187 assert(isInBundleLockRegion()); 1188 return SizeSnapshotPre & BundleMaskLo; 1189 } 1190 // Get the bundle alignment of the first instruction past the bundle_lock 1191 // region. 1192 intptr_t getPostAlignment() const { 1193 assert(isInBundleLockRegion()); 1194 return SizeSnapshotPost & BundleMaskLo; 1195 } 1196 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll 1197 // back the instruction iteration to that point. 1198 InstList::const_iterator getBundleLockStart() const { 1199 assert(isInBundleLockRegion()); 1200 return BundleLockStart; 1201 } 1202 // Set up bookkeeping when the bundle_lock instruction is first processed. 1203 void enterBundleLock(InstList::const_iterator I) { 1204 assert(!isInBundleLockRegion()); 1205 BundleLockStart = I; 1206 SizeSnapshotPre = Asm->getBufferSize(); 1207 Asm->setPreliminary(true); 1208 assert(isInBundleLockRegion()); 1209 } 1210 // Update bookkeeping when the bundle_unlock instruction is processed. 1211 void enterBundleUnlock() { 1212 assert(isInBundleLockRegion()); 1213 SizeSnapshotPost = Asm->getBufferSize(); 1214 } 1215 // Update bookkeeping when we are completely finished with the bundle_lock 1216 // region. 1217 void leaveBundleLockRegion() { BundleLockStart = End; } 1218 // Check whether the instruction sequence fits within the current bundle, and 1219 // if not, add nop padding to the end of the current bundle. 1220 void padToNextBundle() { 1221 assert(isInBundleLockRegion()); 1222 if (!isSameBundle()) { 1223 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); 1224 Asm->padWithNop(PadToNextBundle); 1225 SizeSnapshotPre += PadToNextBundle; 1226 SizeSnapshotPost += PadToNextBundle; 1227 assert((Asm->getBufferSize() & BundleMaskLo) == 0); 1228 assert(Asm->getBufferSize() == SizeSnapshotPre); 1229 } 1230 } 1231 // If align_to_end is specified, add padding such that the instruction 1232 // sequences ends precisely at a bundle boundary. 1233 void padForAlignToEnd() { 1234 assert(isInBundleLockRegion()); 1235 if (isAlignToEnd()) { 1236 if (intptr_t Offset = getPostAlignment()) { 1237 Asm->padWithNop(BundleSize - Offset); 1238 SizeSnapshotPre = Asm->getBufferSize(); 1239 } 1240 } 1241 } 1242 // If pad_to_end is specified, add padding such that the first instruction 1243 // after the instruction sequence starts at a bundle boundary. 1244 void padForPadToEnd() { 1245 assert(isInBundleLockRegion()); 1246 if (isPadToEnd()) { 1247 if (intptr_t Offset = getPostAlignment()) { 1248 Asm->padWithNop(BundleSize - Offset); 1249 SizeSnapshotPre = Asm->getBufferSize(); 1250 } 1251 } 1252 } // Update bookkeeping when rolling back for the second pass. 1253 void rollback() { 1254 assert(isInBundleLockRegion()); 1255 Asm->setBufferSize(SizeSnapshotPre); 1256 Asm->setPreliminary(false); 1257 } 1258 1259 private: 1260 Assembler *const Asm; 1261 // End is a sentinel value such that BundleLockStart==End implies that we are 1262 // not in a bundle_lock region. 1263 const InstList::const_iterator End; 1264 InstList::const_iterator BundleLockStart; 1265 const intptr_t BundleSize; 1266 // Masking with BundleMaskLo identifies an address's bundle offset. 1267 const intptr_t BundleMaskLo; 1268 // Masking with BundleMaskHi identifies an address's bundle. 1269 const intptr_t BundleMaskHi; 1270 intptr_t SizeSnapshotPre = 0; 1271 intptr_t SizeSnapshotPost = 0; 1272 }; 1273 1274 } // end of anonymous namespace 1275 1276 void CfgNode::emitIAS(Cfg *Func) const { 1277 Func->setCurrentNode(this); 1278 Assembler *Asm = Func->getAssembler<>(); 1279 // TODO(stichnot): When sandboxing, defer binding the node label until just 1280 // before the first instruction is emitted, to reduce the chance that a 1281 // padding nop is a branch target. 1282 Asm->bindCfgNodeLabel(this); 1283 for (const Inst &I : Phis) { 1284 if (I.isDeleted()) 1285 continue; 1286 // Emitting a Phi instruction should cause an error. 1287 I.emitIAS(Func); 1288 } 1289 1290 // Do the simple emission if not sandboxed. 1291 if (!getFlags().getUseSandboxing()) { 1292 for (const Inst &I : Insts) { 1293 if (!I.isDeleted() && !I.isRedundantAssign()) { 1294 I.emitIAS(Func); 1295 updateStats(Func, &I); 1296 } 1297 } 1298 return; 1299 } 1300 1301 // The remainder of the function handles emission with sandboxing. There are 1302 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock 1303 // instructions. All other instructions are treated as an implicit 1304 // one-instruction bundle_lock region. Emission is done twice for each 1305 // bundle_lock region. The first pass is a preliminary pass, after which we 1306 // can figure out what nop padding is needed, then roll back, and make the 1307 // final pass. 1308 // 1309 // Ideally, the first pass would be speculative and the second pass would 1310 // only be done if nop padding were needed, but the structure of the 1311 // integrated assembler makes it hard to roll back the state of label 1312 // bindings, label links, and relocation fixups. Instead, the first pass just 1313 // disables all mutation of that state. 1314 1315 BundleEmitHelper Helper(Asm, Insts); 1316 InstList::const_iterator End = Insts.end(); 1317 // Retrying indicates that we had to roll back to the bundle_lock instruction 1318 // to apply padding before the bundle_lock sequence. 1319 bool Retrying = false; 1320 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { 1321 if (I->isDeleted() || I->isRedundantAssign()) 1322 continue; 1323 1324 if (llvm::isa<InstBundleLock>(I)) { 1325 // Set up the initial bundle_lock state. This should not happen while 1326 // retrying, because the retry rolls back to the instruction following 1327 // the bundle_lock instruction. 1328 assert(!Retrying); 1329 Helper.enterBundleLock(I); 1330 continue; 1331 } 1332 1333 if (llvm::isa<InstBundleUnlock>(I)) { 1334 Helper.enterBundleUnlock(); 1335 if (Retrying) { 1336 // Make sure all instructions are in the same bundle. 1337 assert(Helper.isSameBundle()); 1338 // If align_to_end is specified, make sure the next instruction begins 1339 // the bundle. 1340 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); 1341 Helper.padForPadToEnd(); 1342 Helper.leaveBundleLockRegion(); 1343 Retrying = false; 1344 } else { 1345 // This is the first pass, so roll back for the retry pass. 1346 Helper.rollback(); 1347 // Pad to the next bundle if the instruction sequence crossed a bundle 1348 // boundary. 1349 Helper.padToNextBundle(); 1350 // Insert additional padding to make AlignToEnd work. 1351 Helper.padForAlignToEnd(); 1352 // Prepare for the retry pass after padding is done. 1353 Retrying = true; 1354 I = Helper.getBundleLockStart(); 1355 } 1356 continue; 1357 } 1358 1359 // I points to a non bundle_lock/bundle_unlock instruction. 1360 if (Helper.isInBundleLockRegion()) { 1361 I->emitIAS(Func); 1362 // Only update stats during the final pass. 1363 if (Retrying) 1364 updateStats(Func, iteratorToInst(I)); 1365 } else { 1366 // Treat it as though there were an implicit bundle_lock and 1367 // bundle_unlock wrapping the instruction. 1368 Helper.enterBundleLock(I); 1369 I->emitIAS(Func); 1370 Helper.enterBundleUnlock(); 1371 Helper.rollback(); 1372 Helper.padToNextBundle(); 1373 I->emitIAS(Func); 1374 updateStats(Func, iteratorToInst(I)); 1375 Helper.leaveBundleLockRegion(); 1376 } 1377 } 1378 1379 // Don't allow bundle locking across basic blocks, to keep the backtracking 1380 // mechanism simple. 1381 assert(!Helper.isInBundleLockRegion()); 1382 assert(!Retrying); 1383 } 1384 1385 void CfgNode::dump(Cfg *Func) const { 1386 if (!BuildDefs::dump()) 1387 return; 1388 Func->setCurrentNode(this); 1389 Ostream &Str = Func->getContext()->getStrDump(); 1390 Liveness *Liveness = Func->getLiveness(); 1391 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop)) 1392 Str << getName() << ":\n"; 1393 // Dump the loop nest depth 1394 if (Func->isVerbose(IceV_Loop)) 1395 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n"; 1396 // Dump list of predecessor nodes. 1397 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) { 1398 Str << " // preds = "; 1399 bool First = true; 1400 for (CfgNode *I : InEdges) { 1401 if (!First) 1402 Str << ", "; 1403 First = false; 1404 Str << "%" << I->getName(); 1405 } 1406 Str << "\n"; 1407 } 1408 // Dump the live-in variables. 1409 if (Func->isVerbose(IceV_Liveness)) { 1410 if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) { 1411 const LivenessBV &LiveIn = Liveness->getLiveIn(this); 1412 Str << " // LiveIn:"; 1413 for (SizeT i = 0; i < LiveIn.size(); ++i) { 1414 if (LiveIn[i]) { 1415 Variable *Var = Liveness->getVariable(i, this); 1416 Str << " %" << Var->getName(); 1417 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { 1418 Str << ":" 1419 << Func->getTarget()->getRegName(Var->getRegNum(), 1420 Var->getType()); 1421 } 1422 } 1423 } 1424 Str << "\n"; 1425 } 1426 } 1427 // Dump each instruction. 1428 if (Func->isVerbose(IceV_Instructions)) { 1429 for (const Inst &I : Phis) 1430 I.dumpDecorated(Func); 1431 for (const Inst &I : Insts) 1432 I.dumpDecorated(Func); 1433 } 1434 // Dump the live-out variables. 1435 if (Func->isVerbose(IceV_Liveness)) { 1436 if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) { 1437 const LivenessBV &LiveOut = Liveness->getLiveOut(this); 1438 Str << " // LiveOut:"; 1439 for (SizeT i = 0; i < LiveOut.size(); ++i) { 1440 if (LiveOut[i]) { 1441 Variable *Var = Liveness->getVariable(i, this); 1442 Str << " %" << Var->getName(); 1443 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { 1444 Str << ":" 1445 << Func->getTarget()->getRegName(Var->getRegNum(), 1446 Var->getType()); 1447 } 1448 } 1449 } 1450 Str << "\n"; 1451 } 1452 } 1453 // Dump list of successor nodes. 1454 if (Func->isVerbose(IceV_Succs)) { 1455 Str << " // succs = "; 1456 bool First = true; 1457 for (CfgNode *I : OutEdges) { 1458 if (!First) 1459 Str << ", "; 1460 First = false; 1461 Str << "%" << I->getName(); 1462 } 1463 Str << "\n"; 1464 } 1465 } 1466 1467 void CfgNode::profileExecutionCount(VariableDeclaration *Var) { 1468 GlobalContext *Ctx = Func->getContext(); 1469 GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64"); 1470 1471 bool BadIntrinsic = false; 1472 const Intrinsics::FullIntrinsicInfo *Info = 1473 Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic); 1474 assert(!BadIntrinsic); 1475 assert(Info != nullptr); 1476 1477 Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64); 1478 constexpr RelocOffsetT Offset = 0; 1479 Constant *Counter = Ctx->getConstantSym(Offset, Var->getName()); 1480 Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd); 1481 Constant *One = Ctx->getConstantInt64(1); 1482 Constant *OrderAcquireRelease = 1483 Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease); 1484 1485 auto *Instr = InstIntrinsicCall::create( 1486 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); 1487 Instr->addArg(AtomicRMWOp); 1488 Instr->addArg(Counter); 1489 Instr->addArg(One); 1490 Instr->addArg(OrderAcquireRelease); 1491 Insts.push_front(Instr); 1492 } 1493 1494 void CfgNode::removeInEdge(CfgNode *In) { 1495 InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In)); 1496 } 1497 1498 CfgNode *CfgNode::shortCircuit() { 1499 auto *Func = getCfg(); 1500 auto *Last = &getInsts().back(); 1501 Variable *Condition = nullptr; 1502 InstBr *Br = nullptr; 1503 if ((Br = llvm::dyn_cast<InstBr>(Last))) { 1504 if (!Br->isUnconditional()) { 1505 Condition = llvm::dyn_cast<Variable>(Br->getCondition()); 1506 } 1507 } 1508 if (Condition == nullptr) 1509 return nullptr; 1510 1511 auto *JumpOnTrue = Br->getTargetTrue(); 1512 auto *JumpOnFalse = Br->getTargetFalse(); 1513 1514 bool FoundOr = false; 1515 bool FoundAnd = false; 1516 1517 InstArithmetic *TopLevelBoolOp = nullptr; 1518 1519 for (auto &Inst : reverse_range(getInsts())) { 1520 if (Inst.isDeleted()) 1521 continue; 1522 if (Inst.getDest() == Condition) { 1523 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) { 1524 1525 FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or); 1526 FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And); 1527 1528 if (FoundOr || FoundAnd) { 1529 TopLevelBoolOp = Arith; 1530 break; 1531 } 1532 } 1533 } 1534 } 1535 1536 if (!TopLevelBoolOp) 1537 return nullptr; 1538 1539 auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool { 1540 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { 1541 if (Instr->getSrc(i) == Opr) 1542 return true; 1543 } 1544 return false; 1545 }; 1546 Inst *FirstOperandDef = nullptr; 1547 for (auto &Inst : getInsts()) { 1548 if (IsOperand(TopLevelBoolOp, Inst.getDest())) { 1549 FirstOperandDef = &Inst; 1550 break; 1551 } 1552 } 1553 1554 if (FirstOperandDef == nullptr) { 1555 return nullptr; 1556 } 1557 1558 // Check for side effects 1559 auto It = Ice::instToIterator(FirstOperandDef); 1560 while (It != getInsts().end()) { 1561 if (It->isDeleted()) { 1562 ++It; 1563 continue; 1564 } 1565 if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) { 1566 break; 1567 } 1568 auto *Dest = It->getDest(); 1569 if (It->getDest() == nullptr || It->hasSideEffects() || 1570 !Func->getVMetadata()->isSingleBlock(Dest)) { 1571 // Relying on short cicuit eval here. 1572 // getVMetadata()->isSingleBlock(Dest) 1573 // will segfault if It->getDest() == nullptr 1574 return nullptr; 1575 } 1576 It++; 1577 } 1578 1579 auto *NewNode = Func->makeNode(); 1580 NewNode->setLoopNestDepth(getLoopNestDepth()); 1581 It = Ice::instToIterator(FirstOperandDef); 1582 It++; // Have to split after the def 1583 1584 NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It, 1585 getInsts().end()); 1586 1587 if (BuildDefs::dump()) { 1588 NewNode->setName(getName().append("_2")); 1589 setName(getName().append("_1")); 1590 } 1591 1592 // Point edges properly 1593 NewNode->addInEdge(this); 1594 for (auto *Out : getOutEdges()) { 1595 NewNode->addOutEdge(Out); 1596 Out->addInEdge(NewNode); 1597 } 1598 removeAllOutEdges(); 1599 addOutEdge(NewNode); 1600 1601 // Manage Phi instructions of successors 1602 for (auto *Succ : NewNode->getOutEdges()) { 1603 for (auto &Inst : Succ->getPhis()) { 1604 auto *Phi = llvm::cast<InstPhi>(&Inst); 1605 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { 1606 if (Phi->getLabel(i) == this) { 1607 Phi->addArgument(Phi->getSrc(i), NewNode); 1608 } 1609 } 1610 } 1611 } 1612 1613 // Create new Br instruction 1614 InstBr *NewInst = nullptr; 1615 if (FoundOr) { 1616 addOutEdge(JumpOnTrue); 1617 JumpOnFalse->removeInEdge(this); 1618 NewInst = 1619 InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode); 1620 } else if (FoundAnd) { 1621 addOutEdge(JumpOnFalse); 1622 JumpOnTrue->removeInEdge(this); 1623 NewInst = 1624 InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse); 1625 } else { 1626 return nullptr; 1627 } 1628 1629 assert(NewInst != nullptr); 1630 appendInst(NewInst); 1631 1632 Operand *UnusedOperand = nullptr; 1633 assert(TopLevelBoolOp->getSrcSize() == 2); 1634 if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest()) 1635 UnusedOperand = TopLevelBoolOp->getSrc(1); 1636 else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest()) 1637 UnusedOperand = TopLevelBoolOp->getSrc(0); 1638 assert(UnusedOperand); 1639 1640 Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br 1641 1642 TopLevelBoolOp->setDeleted(); 1643 return NewNode; 1644 } 1645 1646 } // end of namespace Ice 1647