Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Implements the CfgNode class, including the complexities of
     12 /// instruction insertion and in-edge calculation.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "IceCfgNode.h"
     17 
     18 #include "IceAssembler.h"
     19 #include "IceCfg.h"
     20 #include "IceGlobalInits.h"
     21 #include "IceInst.h"
     22 #include "IceInstVarIter.h"
     23 #include "IceLiveness.h"
     24 #include "IceOperand.h"
     25 #include "IceTargetLowering.h"
     26 
     27 namespace Ice {
     28 
     29 // Adds an instruction to either the Phi list or the regular instruction list.
     30 // Validates that all Phis are added before all regular instructions.
     31 void CfgNode::appendInst(Inst *Instr) {
     32   ++InstCountEstimate;
     33 
     34   if (BuildDefs::wasm()) {
     35     if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) {
     36       for (auto *N : Instr->getTerminatorEdges()) {
     37         N->addInEdge(this);
     38         addOutEdge(N);
     39       }
     40     }
     41   }
     42 
     43   if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) {
     44     if (!Insts.empty()) {
     45       Func->setError("Phi instruction added to the middle of a block");
     46       return;
     47     }
     48     Phis.push_back(Phi);
     49   } else {
     50     Insts.push_back(Instr);
     51   }
     52 }
     53 
     54 void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
     55   for (SizeT i = 0; i < InEdges.size(); ++i) {
     56     if (InEdges[i] == Old) {
     57       InEdges[i] = New;
     58     }
     59   }
     60   for (auto &Inst : getPhis()) {
     61     auto &Phi = llvm::cast<InstPhi>(Inst);
     62     for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
     63       if (Phi.getLabel(i) == Old) {
     64         Phi.setLabel(i, New);
     65       }
     66     }
     67   }
     68 }
     69 
     70 namespace {
     71 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
     72   const bool DoDelete =
     73       BuildDefs::minimal() || !getFlags().getKeepDeletedInsts();
     74   auto I = L->begin(), E = L->end(), Next = I;
     75   for (++Next; I != E; I = Next++) {
     76     if (DoDelete && I->isDeleted()) {
     77       L->remove(I);
     78     } else {
     79       I->renumber(Func);
     80     }
     81   }
     82 }
     83 } // end of anonymous namespace
     84 
     85 void CfgNode::renumberInstructions() {
     86   InstNumberT FirstNumber = Func->getNextInstNumber();
     87   removeDeletedAndRenumber(&Phis, Func);
     88   removeDeletedAndRenumber(&Insts, Func);
     89   InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
     90 }
     91 
     92 // When a node is created, the OutEdges are immediately known, but the InEdges
     93 // have to be built up incrementally. After the CFG has been constructed, the
     94 // computePredecessors() pass finalizes it by creating the InEdges list.
     95 void CfgNode::computePredecessors() {
     96   for (CfgNode *Succ : OutEdges)
     97     Succ->InEdges.push_back(this);
     98 }
     99 
    100 void CfgNode::computeSuccessors() {
    101   OutEdges.clear();
    102   InEdges.clear();
    103   assert(!Insts.empty());
    104   OutEdges = Insts.rbegin()->getTerminatorEdges();
    105 }
    106 
    107 // Ensure each Phi instruction in the node is consistent with respect to control
    108 // flow.  For each predecessor, there must be a phi argument with that label.
    109 // If a phi argument's label doesn't appear in the predecessor list (which can
    110 // happen as a result of e.g. unreachable node elimination), its value is
    111 // modified to be zero, to maintain consistency in liveness analysis.  This
    112 // allows us to remove some dead control flow without a major rework of the phi
    113 // instructions.  We don't check that phi arguments with the same label have the
    114 // same value.
    115 void CfgNode::enforcePhiConsistency() {
    116   for (Inst &Instr : Phis) {
    117     auto *Phi = llvm::cast<InstPhi>(&Instr);
    118     // We do a simple O(N^2) algorithm to check for consistency. Even so, it
    119     // shows up as only about 0.2% of the total translation time. But if
    120     // necessary, we could improve the complexity by using a hash table to
    121     // count how many times each node is referenced in the Phi instruction, and
    122     // how many times each node is referenced in the incoming edge list, and
    123     // compare the two for equality.
    124     for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
    125       CfgNode *Label = Phi->getLabel(i);
    126       bool Found = false;
    127       for (CfgNode *InNode : getInEdges()) {
    128         if (InNode == Label) {
    129           Found = true;
    130           break;
    131         }
    132       }
    133       if (!Found) {
    134         // Predecessor was unreachable, so if (impossibly) the control flow
    135         // enters from that predecessor, the value should be zero.
    136         Phi->clearOperandForTarget(Label);
    137       }
    138     }
    139     for (CfgNode *InNode : getInEdges()) {
    140       bool Found = false;
    141       for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
    142         CfgNode *Label = Phi->getLabel(i);
    143         if (InNode == Label) {
    144           Found = true;
    145           break;
    146         }
    147       }
    148       if (!Found)
    149         llvm::report_fatal_error("Phi error: missing label for incoming edge");
    150     }
    151   }
    152 }
    153 
    154 // This does part 1 of Phi lowering, by creating a new dest variable for each
    155 // Phi instruction, replacing the Phi instruction's dest with that variable,
    156 // and adding an explicit assignment of the old dest to the new dest. For
    157 // example,
    158 //   a=phi(...)
    159 // changes to
    160 //   "a_phi=phi(...); a=a_phi".
    161 //
    162 // This is in preparation for part 2 which deletes the Phi instructions and
    163 // appends assignment instructions to predecessor blocks. Note that this
    164 // transformation preserves SSA form.
    165 void CfgNode::placePhiLoads() {
    166   for (Inst &I : Phis) {
    167     auto *Phi = llvm::dyn_cast<InstPhi>(&I);
    168     Insts.insert(Insts.begin(), Phi->lower(Func));
    169   }
    170 }
    171 
    172 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
    173 // create a corresponding assignment instruction, and add all the assignments
    174 // near the end of this block. They need to be added before any branch
    175 // instruction, and also if the block ends with a compare instruction followed
    176 // by a branch instruction that we may want to fuse, it's better to insert the
    177 // new assignments before the compare instruction. The
    178 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
    179 //
    180 // Note that this transformation takes the Phi dest variables out of SSA form,
    181 // as there may be assignments to the dest variable in multiple blocks.
    182 void CfgNode::placePhiStores() {
    183   // Find the insertion point.
    184   InstList::iterator InsertionPoint = Insts.end();
    185   // Every block must end in a terminator instruction, and therefore must have
    186   // at least one instruction, so it's valid to decrement InsertionPoint (but
    187   // assert just in case).
    188   assert(InsertionPoint != Insts.begin());
    189   --InsertionPoint;
    190   // Confirm that InsertionPoint is a terminator instruction. Calling
    191   // getTerminatorEdges() on a non-terminator instruction will cause an
    192   // llvm_unreachable().
    193   (void)InsertionPoint->getTerminatorEdges();
    194   // SafeInsertionPoint is always immediately before the terminator
    195   // instruction. If the block ends in a compare and conditional branch, it's
    196   // better to place the Phi store before the compare so as not to interfere
    197   // with compare/branch fusing. However, if the compare instruction's dest
    198   // operand is the same as the new assignment statement's source operand, this
    199   // can't be done due to data dependences, so we need to fall back to the
    200   // SafeInsertionPoint. To illustrate:
    201   //   ; <label>:95
    202   //   %97 = load i8* %96, align 1
    203   //   %98 = icmp ne i8 %97, 0
    204   //   br i1 %98, label %99, label %2132
    205   //   ; <label>:99
    206   //   %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
    207   //   %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
    208   // would be Phi-lowered as:
    209   //   ; <label>:95
    210   //   %97 = load i8* %96, align 1
    211   //   %100_phi = %97 ; can be at InsertionPoint
    212   //   %98 = icmp ne i8 %97, 0
    213   //   %101_phi = %98 ; must be at SafeInsertionPoint
    214   //   br i1 %98, label %99, label %2132
    215   //   ; <label>:99
    216   //   %100 = %100_phi
    217   //   %101 = %101_phi
    218   //
    219   // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
    220   // mechanism. If a source basic block ends in a conditional branch:
    221   //   labelSource:
    222   //   ...
    223   //   br i1 %foo, label %labelTrue, label %labelFalse
    224   // and a branch target has a Phi involving the branch operand:
    225   //   labelTrue:
    226   //   %bar = phi i1 [ %foo, %labelSource ], ...
    227   // then we actually know the constant i1 value of the Phi operand:
    228   //   labelTrue:
    229   //   %bar = phi i1 [ true, %labelSource ], ...
    230   // It seems that this optimization should be done by clang or opt, but we
    231   // could also do it here.
    232   InstList::iterator SafeInsertionPoint = InsertionPoint;
    233   // Keep track of the dest variable of a compare instruction, so that we
    234   // insert the new instruction at the SafeInsertionPoint if the compare's dest
    235   // matches the Phi-lowered assignment's source.
    236   Variable *CmpInstDest = nullptr;
    237   // If the current insertion point is at a conditional branch instruction, and
    238   // the previous instruction is a compare instruction, then we move the
    239   // insertion point before the compare instruction so as not to interfere with
    240   // compare/branch fusing.
    241   if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
    242     if (!Branch->isUnconditional()) {
    243       if (InsertionPoint != Insts.begin()) {
    244         --InsertionPoint;
    245         if (llvm::isa<InstIcmp>(InsertionPoint) ||
    246             llvm::isa<InstFcmp>(InsertionPoint)) {
    247           CmpInstDest = InsertionPoint->getDest();
    248         } else {
    249           ++InsertionPoint;
    250         }
    251       }
    252     }
    253   }
    254 
    255   // Consider every out-edge.
    256   for (CfgNode *Succ : OutEdges) {
    257     // Consider every Phi instruction at the out-edge.
    258     for (Inst &I : Succ->Phis) {
    259       auto *Phi = llvm::dyn_cast<InstPhi>(&I);
    260       Operand *Operand = Phi->getOperandForTarget(this);
    261       assert(Operand);
    262       Variable *Dest = I.getDest();
    263       assert(Dest);
    264       auto *NewInst = InstAssign::create(Func, Dest, Operand);
    265       if (CmpInstDest == Operand)
    266         Insts.insert(SafeInsertionPoint, NewInst);
    267       else
    268         Insts.insert(InsertionPoint, NewInst);
    269     }
    270   }
    271 }
    272 
    273 // Deletes the phi instructions after the loads and stores are placed.
    274 void CfgNode::deletePhis() {
    275   for (Inst &I : Phis)
    276     I.setDeleted();
    277 }
    278 
    279 // Splits the edge from Pred to this node by creating a new node and hooking up
    280 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
    281 // make the new node's name unique when there are multiple edges between the
    282 // same pair of nodes.) The new node's instruction list is initialized to the
    283 // empty list, with no terminator instruction. There must not be multiple edges
    284 // from Pred to this node so all Inst::getTerminatorEdges implementations must
    285 // not contain duplicates.
    286 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
    287   CfgNode *NewNode = Func->makeNode();
    288   // Depth is the minimum as it works if both are the same, but if one is
    289   // outside the loop and the other is inside, the new node should be placed
    290   // outside and not be executed multiple times within the loop.
    291   NewNode->setLoopNestDepth(
    292       std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
    293   if (BuildDefs::dump())
    294     NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
    295                      std::to_string(EdgeIndex));
    296   // The new node is added to the end of the node list, and will later need to
    297   // be sorted into a reasonable topological order.
    298   NewNode->setNeedsPlacement(true);
    299   // Repoint Pred's out-edge.
    300   bool Found = false;
    301   for (CfgNode *&I : Pred->OutEdges) {
    302     if (I == this) {
    303       I = NewNode;
    304       NewNode->InEdges.push_back(Pred);
    305       Found = true;
    306       break;
    307     }
    308   }
    309   assert(Found);
    310   (void)Found;
    311   // Repoint this node's in-edge.
    312   Found = false;
    313   for (CfgNode *&I : InEdges) {
    314     if (I == Pred) {
    315       I = NewNode;
    316       NewNode->OutEdges.push_back(this);
    317       Found = true;
    318       break;
    319     }
    320   }
    321   assert(Found);
    322   (void)Found;
    323   // Repoint all suitable branch instructions' target and return.
    324   Found = false;
    325   for (Inst &I : Pred->getInsts())
    326     if (!I.isDeleted() && I.repointEdges(this, NewNode))
    327       Found = true;
    328   assert(Found);
    329   (void)Found;
    330   return NewNode;
    331 }
    332 
    333 namespace {
    334 
    335 // Helpers for advancedPhiLowering().
    336 
    337 class PhiDesc {
    338   PhiDesc() = delete;
    339   PhiDesc(const PhiDesc &) = delete;
    340   PhiDesc &operator=(const PhiDesc &) = delete;
    341 
    342 public:
    343   PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
    344   PhiDesc(PhiDesc &&) = default;
    345   InstPhi *Phi = nullptr;
    346   Variable *Dest = nullptr;
    347   Operand *Src = nullptr;
    348   bool Processed = false;
    349   size_t NumPred = 0; // number of entries whose Src is this Dest
    350   int32_t Weight = 0; // preference for topological order
    351 };
    352 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
    353 
    354 // Always pick NumPred=0 over NumPred>0.
    355 constexpr int32_t WeightNoPreds = 8;
    356 // Prefer Src as a register because the register might free up.
    357 constexpr int32_t WeightSrcIsReg = 4;
    358 // Prefer Dest not as a register because the register stays free longer.
    359 constexpr int32_t WeightDestNotReg = 2;
    360 // Prefer NumPred=1 over NumPred>1.  This is used as a tiebreaker when a
    361 // dependency cycle must be broken so that hopefully only one temporary
    362 // assignment has to be added to break the cycle.
    363 constexpr int32_t WeightOnePred = 1;
    364 
    365 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
    366                   const Operand *Opnd) {
    367   if (Var1 == Opnd)
    368     return true;
    369   const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
    370   if (Var2 == nullptr)
    371     return false;
    372 
    373   // If either operand lacks a register, they cannot be the same.
    374   if (!Var1->hasReg())
    375     return false;
    376   if (!Var2->hasReg())
    377     return false;
    378 
    379   const auto RegNum1 = Var1->getRegNum();
    380   const auto RegNum2 = Var2->getRegNum();
    381   // Quick common-case check.
    382   if (RegNum1 == RegNum2)
    383     return true;
    384 
    385   assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
    386          Target->getAliasesForRegister(RegNum2)[RegNum1]);
    387   return Target->getAliasesForRegister(RegNum1)[RegNum2];
    388 }
    389 
    390 // Update NumPred for all Phi assignments using Var as their Dest variable.
    391 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0.
    392 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
    393   for (PhiDesc &Item : Desc) {
    394     if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
    395       --Item.NumPred;
    396       if (Item.NumPred == 1) {
    397         // If NumPred changed from 2 to 1, add in WeightOnePred.
    398         Item.Weight += WeightOnePred;
    399       } else if (Item.NumPred == 0) {
    400         // If NumPred changed from 1 to 0, subtract WeightOnePred and add in
    401         // WeightNoPreds.
    402         Item.Weight += (WeightNoPreds - WeightOnePred);
    403       }
    404     }
    405   }
    406 }
    407 
    408 } // end of anonymous namespace
    409 
    410 // This the "advanced" version of Phi lowering for a basic block, in contrast
    411 // to the simple version that lowers through assignments involving temporaries.
    412 //
    413 // All Phi instructions in a basic block are conceptually executed in parallel.
    414 // However, if we lower Phis early and commit to a sequential ordering, we may
    415 // end up creating unnecessary interferences which lead to worse register
    416 // allocation. Delaying Phi scheduling until after register allocation can help
    417 // unless there are no free registers for shuffling registers or stack slots
    418 // and spilling becomes necessary.
    419 //
    420 // The advanced Phi lowering starts by finding a topological sort of the Phi
    421 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
    422 // B. Preexisting register assignments are considered in the topological sort.
    423 // If a topological sort is not possible due to a cycle, the cycle is broken by
    424 // introducing a non-parallel temporary. For example, a cycle arising from a
    425 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
    426 // equal, prefer to schedule assignments with register-allocated Src operands
    427 // earlier, in case that register becomes free afterwards, and prefer to
    428 // schedule assignments with register-allocated Dest variables later, to keep
    429 // that register free for longer.
    430 //
    431 // Once the ordering is determined, the Cfg edge is split and the assignment
    432 // list is lowered by the target lowering layer. Since the assignment lowering
    433 // may create new infinite-weight temporaries, a follow-on register allocation
    434 // pass will be needed. To prepare for this, liveness (including live range
    435 // calculation) of the split nodes needs to be calculated, and liveness of the
    436 // original node need to be updated to "undo" the effects of the phi
    437 // assignments.
    438 
    439 // The specific placement of the new node within the Cfg node list is deferred
    440 // until later, including after empty node contraction.
    441 //
    442 // After phi assignments are lowered across all blocks, another register
    443 // allocation pass is run, focusing only on pre-colored and infinite-weight
    444 // variables, similar to Om1 register allocation (except without the need to
    445 // specially compute these variables' live ranges, since they have already been
    446 // precisely calculated). The register allocator in this mode needs the ability
    447 // to forcibly spill and reload registers in case none are naturally available.
    448 void CfgNode::advancedPhiLowering() {
    449   if (getPhis().empty())
    450     return;
    451 
    452   PhiDescList Desc;
    453 
    454   for (Inst &I : Phis) {
    455     auto *Phi = llvm::dyn_cast<InstPhi>(&I);
    456     if (!Phi->isDeleted()) {
    457       Variable *Dest = Phi->getDest();
    458       Desc.emplace_back(Phi, Dest);
    459       // Undo the effect of the phi instruction on this node's live-in set by
    460       // marking the phi dest variable as live on entry.
    461       SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
    462       auto &LiveIn = Func->getLiveness()->getLiveIn(this);
    463       if (VarNum < LiveIn.size()) {
    464         assert(!LiveIn[VarNum]);
    465         LiveIn[VarNum] = true;
    466       }
    467       Phi->setDeleted();
    468     }
    469   }
    470   if (Desc.empty())
    471     return;
    472 
    473   TargetLowering *Target = Func->getTarget();
    474   SizeT InEdgeIndex = 0;
    475   for (CfgNode *Pred : InEdges) {
    476     CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
    477     SizeT Remaining = Desc.size();
    478 
    479     // First pass computes Src and initializes NumPred.
    480     for (PhiDesc &Item : Desc) {
    481       Variable *Dest = Item.Dest;
    482       Operand *Src = Item.Phi->getOperandForTarget(Pred);
    483       Item.Src = Src;
    484       Item.Processed = false;
    485       Item.NumPred = 0;
    486       // Cherry-pick any trivial assignments, so that they don't contribute to
    487       // the running complexity of the topological sort.
    488       if (sameVarOrReg(Target, Dest, Src)) {
    489         Item.Processed = true;
    490         --Remaining;
    491         if (Dest != Src)
    492           // If Dest and Src are syntactically the same, don't bother adding
    493           // the assignment, because in all respects it would be redundant, and
    494           // if Dest/Src are on the stack, the target lowering may naively
    495           // decide to lower it using a temporary register.
    496           Split->appendInst(InstAssign::create(Func, Dest, Src));
    497       }
    498     }
    499     // Second pass computes NumPred by comparing every pair of Phi instructions.
    500     for (PhiDesc &Item : Desc) {
    501       if (Item.Processed)
    502         continue;
    503       const Variable *Dest = Item.Dest;
    504       for (PhiDesc &Item2 : Desc) {
    505         if (Item2.Processed)
    506           continue;
    507         // There shouldn't be two different Phis with the same Dest variable or
    508         // register.
    509         assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
    510         if (sameVarOrReg(Target, Dest, Item2.Src))
    511           ++Item.NumPred;
    512       }
    513     }
    514 
    515     // Another pass to compute initial Weight values.
    516     for (PhiDesc &Item : Desc) {
    517       if (Item.Processed)
    518         continue;
    519       int32_t Weight = 0;
    520       if (Item.NumPred == 0)
    521         Weight += WeightNoPreds;
    522       if (Item.NumPred == 1)
    523         Weight += WeightOnePred;
    524       if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
    525         if (Var->hasReg())
    526           Weight += WeightSrcIsReg;
    527       if (!Item.Dest->hasReg())
    528         Weight += WeightDestNotReg;
    529       Item.Weight = Weight;
    530     }
    531 
    532     // Repeatedly choose and process the best candidate in the topological sort,
    533     // until no candidates remain. This implementation is O(N^2) where N is the
    534     // number of Phi instructions, but with a small constant factor compared to
    535     // a likely implementation of O(N) topological sort.
    536     for (; Remaining; --Remaining) {
    537       int32_t BestWeight = -1;
    538       PhiDesc *BestItem = nullptr;
    539       // Find the best candidate.
    540       for (PhiDesc &Item : Desc) {
    541         if (Item.Processed)
    542           continue;
    543         const int32_t Weight = Item.Weight;
    544         if (Weight > BestWeight) {
    545           BestItem = &Item;
    546           BestWeight = Weight;
    547         }
    548       }
    549       assert(BestWeight >= 0);
    550       Variable *Dest = BestItem->Dest;
    551       Operand *Src = BestItem->Src;
    552       assert(!sameVarOrReg(Target, Dest, Src));
    553       // Break a cycle by introducing a temporary.
    554       while (BestItem->NumPred > 0) {
    555         bool Found = false;
    556         // If the target instruction "A=B" is part of a cycle, find the "X=A"
    557         // assignment in the cycle because it will have to be rewritten as
    558         // "X=tmp".
    559         for (PhiDesc &Item : Desc) {
    560           if (Item.Processed)
    561             continue;
    562           Operand *OtherSrc = Item.Src;
    563           if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
    564             SizeT VarNum = Func->getNumVariables();
    565             Variable *Tmp = Func->makeVariable(OtherSrc->getType());
    566             if (BuildDefs::dump())
    567               Tmp->setName(Func, "__split_" + std::to_string(VarNum));
    568             Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
    569             Item.Src = Tmp;
    570             updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
    571             Found = true;
    572             break;
    573           }
    574         }
    575         assert(Found);
    576         (void)Found;
    577       }
    578       // Now that a cycle (if any) has been broken, create the actual
    579       // assignment.
    580       Split->appendInst(InstAssign::create(Func, Dest, Src));
    581       if (auto *Var = llvm::dyn_cast<Variable>(Src))
    582         updatePreds(Desc, Target, Var);
    583       BestItem->Processed = true;
    584     }
    585     Split->appendInst(InstBr::create(Func, this));
    586 
    587     Split->genCode();
    588     Func->getVMetadata()->addNode(Split);
    589     // Validate to be safe.  All items should be marked as processed, and have
    590     // no predecessors.
    591     if (BuildDefs::asserts()) {
    592       for (PhiDesc &Item : Desc) {
    593         (void)Item;
    594         assert(Item.Processed);
    595         assert(Item.NumPred == 0);
    596       }
    597     }
    598   }
    599 }
    600 
    601 // Does address mode optimization. Pass each instruction to the TargetLowering
    602 // object. If it returns a new instruction (representing the optimized address
    603 // mode), then insert the new instruction and delete the old.
    604 void CfgNode::doAddressOpt() {
    605   TargetLowering *Target = Func->getTarget();
    606   LoweringContext &Context = Target->getContext();
    607   Context.init(this);
    608   while (!Context.atEnd()) {
    609     Target->doAddressOpt();
    610   }
    611 }
    612 
    613 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
    614   TargetLowering *Target = Func->getTarget();
    615   LoweringContext &Context = Target->getContext();
    616   Context.init(this);
    617   Context.setInsertPoint(Context.getCur());
    618   // Do not insert nop in bundle locked instructions.
    619   bool PauseNopInsertion = false;
    620   while (!Context.atEnd()) {
    621     if (llvm::isa<InstBundleLock>(Context.getCur())) {
    622       PauseNopInsertion = true;
    623     } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
    624       PauseNopInsertion = false;
    625     }
    626     if (!PauseNopInsertion)
    627       Target->doNopInsertion(RNG);
    628     // Ensure Cur=Next, so that the nops are inserted before the current
    629     // instruction rather than after.
    630     Context.advanceCur();
    631     Context.advanceNext();
    632   }
    633 }
    634 
    635 // Drives the target lowering. Passes the current instruction and the next
    636 // non-deleted instruction for target lowering.
    637 void CfgNode::genCode() {
    638   TargetLowering *Target = Func->getTarget();
    639   LoweringContext &Context = Target->getContext();
    640   // Lower the regular instructions.
    641   Context.init(this);
    642   Target->initNodeForLowering(this);
    643   while (!Context.atEnd()) {
    644     InstList::iterator Orig = Context.getCur();
    645     if (llvm::isa<InstRet>(*Orig))
    646       setHasReturn();
    647     Target->lower();
    648     // Ensure target lowering actually moved the cursor.
    649     assert(Context.getCur() != Orig);
    650   }
    651   Context.availabilityReset();
    652   // Do preliminary lowering of the Phi instructions.
    653   Target->prelowerPhis();
    654 }
    655 
    656 void CfgNode::livenessLightweight() {
    657   SizeT NumVars = Func->getNumVariables();
    658   LivenessBV Live(NumVars);
    659   // Process regular instructions in reverse order.
    660   for (Inst &I : reverse_range(Insts)) {
    661     if (I.isDeleted())
    662       continue;
    663     I.livenessLightweight(Func, Live);
    664   }
    665   for (Inst &I : Phis) {
    666     if (I.isDeleted())
    667       continue;
    668     I.livenessLightweight(Func, Live);
    669   }
    670 }
    671 
    672 // Performs liveness analysis on the block. Returns true if the incoming
    673 // liveness changed from before, false if it stayed the same. (If it changes,
    674 // the node's predecessors need to be processed again.)
    675 bool CfgNode::liveness(Liveness *Liveness) {
    676   const SizeT NumVars = Liveness->getNumVarsInNode(this);
    677   const SizeT NumGlobalVars = Liveness->getNumGlobalVars();
    678   LivenessBV &Live = Liveness->getScratchBV();
    679   Live.clear();
    680 
    681   LiveBeginEndMap *LiveBegin = nullptr;
    682   LiveBeginEndMap *LiveEnd = nullptr;
    683   // Mark the beginning and ending of each variable's live range with the
    684   // sentinel instruction number 0.
    685   if (Liveness->getMode() == Liveness_Intervals) {
    686     LiveBegin = Liveness->getLiveBegin(this);
    687     LiveEnd = Liveness->getLiveEnd(this);
    688     LiveBegin->clear();
    689     LiveEnd->clear();
    690     // Guess that the number of live ranges beginning is roughly the number of
    691     // instructions, and same for live ranges ending.
    692     LiveBegin->reserve(getInstCountEstimate());
    693     LiveEnd->reserve(getInstCountEstimate());
    694   }
    695 
    696   // Initialize Live to be the union of all successors' LiveIn.
    697   for (CfgNode *Succ : OutEdges) {
    698     const LivenessBV &LiveIn = Liveness->getLiveIn(Succ);
    699     assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
    700     Live |= LiveIn;
    701     // Mark corresponding argument of phis in successor as live.
    702     for (Inst &I : Succ->Phis) {
    703       if (I.isDeleted())
    704         continue;
    705       auto *Phi = llvm::cast<InstPhi>(&I);
    706       Phi->livenessPhiOperand(Live, this, Liveness);
    707     }
    708   }
    709   assert(Live.empty() || Live.size() == NumGlobalVars);
    710   Liveness->getLiveOut(this) = Live;
    711 
    712   // Expand Live so it can hold locals in addition to globals.
    713   Live.resize(NumVars);
    714   // Process regular instructions in reverse order.
    715   for (Inst &I : reverse_range(Insts)) {
    716     if (I.isDeleted())
    717       continue;
    718     I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
    719   }
    720   // Process phis in forward order so that we can override the instruction
    721   // number to be that of the earliest phi instruction in the block.
    722   SizeT NumNonDeadPhis = 0;
    723   InstNumberT FirstPhiNumber = Inst::NumberSentinel;
    724   for (Inst &I : Phis) {
    725     if (I.isDeleted())
    726       continue;
    727     if (FirstPhiNumber == Inst::NumberSentinel)
    728       FirstPhiNumber = I.getNumber();
    729     if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
    730       ++NumNonDeadPhis;
    731   }
    732 
    733   // When using the sparse representation, after traversing the instructions in
    734   // the block, the Live bitvector should only contain set bits for global
    735   // variables upon block entry.  We validate this by testing the upper bits of
    736   // the Live bitvector.
    737   if (Live.find_next(NumGlobalVars) != -1) {
    738     if (BuildDefs::dump()) {
    739       // This is a fatal liveness consistency error. Print some diagnostics and
    740       // abort.
    741       Ostream &Str = Func->getContext()->getStrDump();
    742       Func->resetCurrentNode();
    743       Str << "Invalid Live =";
    744       for (SizeT i = NumGlobalVars; i < Live.size(); ++i) {
    745         if (Live.test(i)) {
    746           Str << " ";
    747           Liveness->getVariable(i, this)->dump(Func);
    748         }
    749       }
    750       Str << "\n";
    751     }
    752     llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
    753   }
    754   // Now truncate Live to prevent LiveIn from growing.
    755   Live.resize(NumGlobalVars);
    756 
    757   bool Changed = false;
    758   LivenessBV &LiveIn = Liveness->getLiveIn(this);
    759   assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
    760   // Add in current LiveIn
    761   Live |= LiveIn;
    762   // Check result, set LiveIn=Live
    763   SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
    764   bool LiveInChanged = (Live != LiveIn);
    765   Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
    766   if (LiveInChanged)
    767     LiveIn = Live;
    768   PrevNumNonDeadPhis = NumNonDeadPhis;
    769   return Changed;
    770 }
    771 
    772 // Validate the integrity of the live ranges in this block.  If there are any
    773 // errors, it prints details and returns false.  On success, it returns true.
    774 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
    775   if (!BuildDefs::asserts())
    776     return true;
    777 
    778   // Verify there are no duplicates.
    779   auto ComparePair =
    780       [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
    781         return A.first == B.first;
    782       };
    783   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
    784   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
    785   if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
    786           MapBegin.end() &&
    787       std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
    788           MapEnd.end())
    789     return true;
    790 
    791   // There is definitely a liveness error.  All paths from here return false.
    792   if (!BuildDefs::dump())
    793     return false;
    794 
    795   // Print all the errors.
    796   if (BuildDefs::dump()) {
    797     GlobalContext *Ctx = Func->getContext();
    798     OstreamLocker L(Ctx);
    799     Ostream &Str = Ctx->getStrDump();
    800     if (Func->isVerbose()) {
    801       Str << "Live range errors in the following block:\n";
    802       dump(Func);
    803     }
    804     for (auto Start = MapBegin.begin();
    805          (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
    806          MapBegin.end();
    807          ++Start) {
    808       auto Next = Start + 1;
    809       Str << "Duplicate LR begin, block " << getName() << ", instructions "
    810           << Start->second << " & " << Next->second << ", variable "
    811           << Liveness->getVariable(Start->first, this)->getName() << "\n";
    812     }
    813     for (auto Start = MapEnd.begin();
    814          (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
    815          MapEnd.end();
    816          ++Start) {
    817       auto Next = Start + 1;
    818       Str << "Duplicate LR end,   block " << getName() << ", instructions "
    819           << Start->second << " & " << Next->second << ", variable "
    820           << Liveness->getVariable(Start->first, this)->getName() << "\n";
    821     }
    822   }
    823 
    824   return false;
    825 }
    826 
    827 // Once basic liveness is complete, compute actual live ranges. It is assumed
    828 // that within a single basic block, a live range begins at most once and ends
    829 // at most once. This is certainly true for pure SSA form. It is also true once
    830 // phis are lowered, since each assignment to the phi-based temporary is in a
    831 // different basic block, and there is a single read that ends the live in the
    832 // basic block that contained the actual phi instruction.
    833 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
    834                                    InstNumberT LastInstNum) {
    835   TimerMarker T1(TimerStack::TT_liveRange, Func);
    836 
    837   const SizeT NumVars = Liveness->getNumVarsInNode(this);
    838   const LivenessBV &LiveIn = Liveness->getLiveIn(this);
    839   const LivenessBV &LiveOut = Liveness->getLiveOut(this);
    840   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
    841   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
    842   std::sort(MapBegin.begin(), MapBegin.end());
    843   std::sort(MapEnd.begin(), MapEnd.end());
    844 
    845   if (!livenessValidateIntervals(Liveness)) {
    846     llvm::report_fatal_error("livenessAddIntervals: Liveness error");
    847     return;
    848   }
    849 
    850   LivenessBV &LiveInAndOut = Liveness->getScratchBV();
    851   LiveInAndOut = LiveIn;
    852   LiveInAndOut &= LiveOut;
    853 
    854   // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
    855   auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
    856   auto IBE = MapBegin.end(), IEE = MapEnd.end();
    857   while (IBB != IBE || IEB != IEE) {
    858     SizeT i1 = IBB == IBE ? NumVars : IBB->first;
    859     SizeT i2 = IEB == IEE ? NumVars : IEB->first;
    860     SizeT i = std::min(i1, i2);
    861     // i1 is the Variable number of the next MapBegin entry, and i2 is the
    862     // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
    863     // live range begins and ends in this block. If i1<i2, then i1's live range
    864     // begins at instruction IBB->second and extends through the end of the
    865     // block. If i1>i2, then i2's live range begins at the first instruction of
    866     // the block and ends at IEB->second. In any case, we choose the lesser of
    867     // i1 and i2 and proceed accordingly.
    868     InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
    869     InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
    870 
    871     Variable *Var = Liveness->getVariable(i, this);
    872     if (LB > LE) {
    873       Var->addLiveRange(FirstInstNum, LE, this);
    874       Var->addLiveRange(LB, LastInstNum + 1, this);
    875       // Assert that Var is a global variable by checking that its liveness
    876       // index is less than the number of globals. This ensures that the
    877       // LiveInAndOut[] access is valid.
    878       assert(i < Liveness->getNumGlobalVars());
    879       LiveInAndOut[i] = false;
    880     } else {
    881       Var->addLiveRange(LB, LE, this);
    882     }
    883     if (i == i1)
    884       ++IBB;
    885     if (i == i2)
    886       ++IEB;
    887   }
    888   // Process the variables that are live across the entire block.
    889   for (int i = LiveInAndOut.find_first(); i != -1;
    890        i = LiveInAndOut.find_next(i)) {
    891     Variable *Var = Liveness->getVariable(i, this);
    892     if (Liveness->getRangeMask(Var->getIndex()))
    893       Var->addLiveRange(FirstInstNum, LastInstNum + 1, this);
    894   }
    895 }
    896 
    897 // If this node contains only deleted instructions, and ends in an
    898 // unconditional branch, contract the node by repointing all its in-edges to
    899 // its successor.
    900 void CfgNode::contractIfEmpty() {
    901   if (InEdges.empty())
    902     return;
    903   Inst *Branch = nullptr;
    904   for (Inst &I : Insts) {
    905     if (I.isDeleted())
    906       continue;
    907     if (I.isUnconditionalBranch())
    908       Branch = &I;
    909     else if (!I.isRedundantAssign())
    910       return;
    911   }
    912   // Make sure there is actually a successor to repoint in-edges to.
    913   if (OutEdges.empty())
    914     return;
    915   assert(hasSingleOutEdge());
    916   // Don't try to delete a self-loop.
    917   if (OutEdges[0] == this)
    918     return;
    919   // Make sure the node actually contains (ends with) an unconditional branch.
    920   if (Branch == nullptr)
    921     return;
    922 
    923   Branch->setDeleted();
    924   CfgNode *Successor = OutEdges.front();
    925   // Repoint all this node's in-edges to this node's successor, unless this
    926   // node's successor is actually itself (in which case the statement
    927   // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
    928   // over this->InEdges).
    929   if (Successor != this) {
    930     for (CfgNode *Pred : InEdges) {
    931       for (CfgNode *&I : Pred->OutEdges) {
    932         if (I == this) {
    933           I = Successor;
    934           Successor->InEdges.push_back(Pred);
    935         }
    936       }
    937       for (Inst &I : Pred->getInsts()) {
    938         if (!I.isDeleted())
    939           I.repointEdges(this, Successor);
    940       }
    941     }
    942 
    943     // Remove the in-edge to the successor to allow node reordering to make
    944     // better decisions. For example it's more helpful to place a node after a
    945     // reachable predecessor than an unreachable one (like the one we just
    946     // contracted).
    947     Successor->InEdges.erase(
    948         std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
    949   }
    950   InEdges.clear();
    951 }
    952 
    953 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
    954   TargetLowering *Target = Func->getTarget();
    955   // Find the first opportunity for branch optimization (which will be the last
    956   // instruction in the block) and stop. This is sufficient unless there is
    957   // some target lowering where we have the possibility of multiple
    958   // optimizations per block. Take care with switch lowering as there are
    959   // multiple unconditional branches and only the last can be deleted.
    960   for (Inst &I : reverse_range(Insts)) {
    961     if (!I.isDeleted()) {
    962       Target->doBranchOpt(&I, NextNode);
    963       return;
    964     }
    965   }
    966 }
    967 
    968 // ======================== Dump routines ======================== //
    969 
    970 namespace {
    971 
    972 // Helper functions for emit().
    973 
    974 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
    975                        bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
    976   if (!BuildDefs::dump())
    977     return;
    978   Liveness *Liveness = Func->getLiveness();
    979   const LivenessBV *Live;
    980   const auto StackReg = Func->getTarget()->getStackReg();
    981   const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
    982   if (IsLiveIn) {
    983     Live = &Liveness->getLiveIn(Node);
    984     Str << "\t\t\t\t/* LiveIn=";
    985   } else {
    986     Live = &Liveness->getLiveOut(Node);
    987     Str << "\t\t\t\t/* LiveOut=";
    988   }
    989   if (!Live->empty()) {
    990     CfgVector<Variable *> LiveRegs;
    991     for (SizeT i = 0; i < Live->size(); ++i) {
    992       if (!(*Live)[i])
    993         continue;
    994       Variable *Var = Liveness->getVariable(i, Node);
    995       if (!Var->hasReg())
    996         continue;
    997       const auto RegNum = Var->getRegNum();
    998       if (RegNum == StackReg || RegNum == FrameOrStackReg)
    999         continue;
   1000       if (IsLiveIn)
   1001         ++LiveRegCount[RegNum];
   1002       LiveRegs.push_back(Var);
   1003     }
   1004     // Sort the variables by regnum so they are always printed in a familiar
   1005     // order.
   1006     std::sort(LiveRegs.begin(), LiveRegs.end(),
   1007               [](const Variable *V1, const Variable *V2) {
   1008                 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum());
   1009               });
   1010     bool First = true;
   1011     for (Variable *Var : LiveRegs) {
   1012       if (!First)
   1013         Str << ",";
   1014       First = false;
   1015       Var->emit(Func);
   1016     }
   1017   }
   1018   Str << " */\n";
   1019 }
   1020 
   1021 /// Returns true if some text was emitted - in which case the caller definitely
   1022 /// needs to emit a newline character.
   1023 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
   1024                          CfgVector<SizeT> &LiveRegCount) {
   1025   bool Printed = false;
   1026   if (!BuildDefs::dump())
   1027     return Printed;
   1028   Variable *Dest = Instr->getDest();
   1029   // Normally we increment the live count for the dest register. But we
   1030   // shouldn't if the instruction's IsDestRedefined flag is set, because this
   1031   // means that the target lowering created this instruction as a non-SSA
   1032   // assignment; i.e., a different, previous instruction started the dest
   1033   // variable's live range.
   1034   if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
   1035     ++LiveRegCount[Dest->getRegNum()];
   1036   FOREACH_VAR_IN_INST(Var, *Instr) {
   1037     bool ShouldReport = Instr->isLastUse(Var);
   1038     if (ShouldReport && Var->hasReg()) {
   1039       // Don't report end of live range until the live count reaches 0.
   1040       SizeT NewCount = --LiveRegCount[Var->getRegNum()];
   1041       if (NewCount)
   1042         ShouldReport = false;
   1043     }
   1044     if (ShouldReport) {
   1045       if (Printed)
   1046         Str << ",";
   1047       else
   1048         Str << " \t/* END=";
   1049       Var->emit(Func);
   1050       Printed = true;
   1051     }
   1052   }
   1053   if (Printed)
   1054     Str << " */";
   1055   return Printed;
   1056 }
   1057 
   1058 void updateStats(Cfg *Func, const Inst *I) {
   1059   if (!BuildDefs::dump())
   1060     return;
   1061   // Update emitted instruction count, plus fill/spill count for Variable
   1062   // operands without a physical register.
   1063   if (uint32_t Count = I->getEmitInstCount()) {
   1064     Func->getContext()->statsUpdateEmitted(Count);
   1065     if (Variable *Dest = I->getDest()) {
   1066       if (!Dest->hasReg())
   1067         Func->getContext()->statsUpdateFills();
   1068     }
   1069     for (SizeT S = 0; S < I->getSrcSize(); ++S) {
   1070       if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
   1071         if (!Src->hasReg())
   1072           Func->getContext()->statsUpdateSpills();
   1073       }
   1074     }
   1075   }
   1076 }
   1077 
   1078 } // end of anonymous namespace
   1079 
   1080 void CfgNode::emit(Cfg *Func) const {
   1081   if (!BuildDefs::dump())
   1082     return;
   1083   Func->setCurrentNode(this);
   1084   Ostream &Str = Func->getContext()->getStrEmit();
   1085   Liveness *Liveness = Func->getLiveness();
   1086   const bool DecorateAsm = Liveness && getFlags().getDecorateAsm();
   1087   Str << getAsmName() << ":\n";
   1088   // LiveRegCount keeps track of the number of currently live variables that
   1089   // each register is assigned to. Normally that would be only 0 or 1, but the
   1090   // register allocator's AllowOverlap inference allows it to be greater than 1
   1091   // for short periods.
   1092   CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
   1093   if (DecorateAsm) {
   1094     constexpr bool IsLiveIn = true;
   1095     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
   1096     if (getInEdges().size()) {
   1097       Str << "\t\t\t\t/* preds=";
   1098       bool First = true;
   1099       for (CfgNode *I : getInEdges()) {
   1100         if (!First)
   1101           Str << ",";
   1102         First = false;
   1103         Str << "$" << I->getName();
   1104       }
   1105       Str << " */\n";
   1106     }
   1107     if (getLoopNestDepth()) {
   1108       Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
   1109     }
   1110   }
   1111 
   1112   for (const Inst &I : Phis) {
   1113     if (I.isDeleted())
   1114       continue;
   1115     // Emitting a Phi instruction should cause an error.
   1116     I.emit(Func);
   1117   }
   1118   for (const Inst &I : Insts) {
   1119     if (I.isDeleted())
   1120       continue;
   1121     if (I.isRedundantAssign()) {
   1122       // Usually, redundant assignments end the live range of the src variable
   1123       // and begin the live range of the dest variable, with no net effect on
   1124       // the liveness of their register. However, if the register allocator
   1125       // infers the AllowOverlap condition, then this may be a redundant
   1126       // assignment that does not end the src variable's live range, in which
   1127       // case the active variable count for that register needs to be bumped.
   1128       // That normally would have happened as part of emitLiveRangesEnded(),
   1129       // but that isn't called for redundant assignments.
   1130       Variable *Dest = I.getDest();
   1131       if (DecorateAsm && Dest->hasReg()) {
   1132         ++LiveRegCount[Dest->getRegNum()];
   1133         if (I.isLastUse(I.getSrc(0)))
   1134           --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
   1135       }
   1136       continue;
   1137     }
   1138     I.emit(Func);
   1139     bool Printed = false;
   1140     if (DecorateAsm)
   1141       Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
   1142     if (Printed || llvm::isa<InstTarget>(&I))
   1143       Str << "\n";
   1144     updateStats(Func, &I);
   1145   }
   1146   if (DecorateAsm) {
   1147     constexpr bool IsLiveIn = false;
   1148     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
   1149   }
   1150 }
   1151 
   1152 // Helper class for emitIAS().
   1153 namespace {
   1154 class BundleEmitHelper {
   1155   BundleEmitHelper() = delete;
   1156   BundleEmitHelper(const BundleEmitHelper &) = delete;
   1157   BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
   1158 
   1159 public:
   1160   BundleEmitHelper(Assembler *Asm, const InstList &Insts)
   1161       : Asm(Asm), End(Insts.end()), BundleLockStart(End),
   1162         BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
   1163         BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
   1164   // Check whether we're currently within a bundle_lock region.
   1165   bool isInBundleLockRegion() const { return BundleLockStart != End; }
   1166   // Check whether the current bundle_lock region has the align_to_end option.
   1167   bool isAlignToEnd() const {
   1168     assert(isInBundleLockRegion());
   1169     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
   1170            InstBundleLock::Opt_AlignToEnd;
   1171   }
   1172   bool isPadToEnd() const {
   1173     assert(isInBundleLockRegion());
   1174     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
   1175            InstBundleLock::Opt_PadToEnd;
   1176   }
   1177   // Check whether the entire bundle_lock region falls within the same bundle.
   1178   bool isSameBundle() const {
   1179     assert(isInBundleLockRegion());
   1180     return SizeSnapshotPre == SizeSnapshotPost ||
   1181            (SizeSnapshotPre & BundleMaskHi) ==
   1182                ((SizeSnapshotPost - 1) & BundleMaskHi);
   1183   }
   1184   // Get the bundle alignment of the first instruction of the bundle_lock
   1185   // region.
   1186   intptr_t getPreAlignment() const {
   1187     assert(isInBundleLockRegion());
   1188     return SizeSnapshotPre & BundleMaskLo;
   1189   }
   1190   // Get the bundle alignment of the first instruction past the bundle_lock
   1191   // region.
   1192   intptr_t getPostAlignment() const {
   1193     assert(isInBundleLockRegion());
   1194     return SizeSnapshotPost & BundleMaskLo;
   1195   }
   1196   // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
   1197   // back the instruction iteration to that point.
   1198   InstList::const_iterator getBundleLockStart() const {
   1199     assert(isInBundleLockRegion());
   1200     return BundleLockStart;
   1201   }
   1202   // Set up bookkeeping when the bundle_lock instruction is first processed.
   1203   void enterBundleLock(InstList::const_iterator I) {
   1204     assert(!isInBundleLockRegion());
   1205     BundleLockStart = I;
   1206     SizeSnapshotPre = Asm->getBufferSize();
   1207     Asm->setPreliminary(true);
   1208     assert(isInBundleLockRegion());
   1209   }
   1210   // Update bookkeeping when the bundle_unlock instruction is processed.
   1211   void enterBundleUnlock() {
   1212     assert(isInBundleLockRegion());
   1213     SizeSnapshotPost = Asm->getBufferSize();
   1214   }
   1215   // Update bookkeeping when we are completely finished with the bundle_lock
   1216   // region.
   1217   void leaveBundleLockRegion() { BundleLockStart = End; }
   1218   // Check whether the instruction sequence fits within the current bundle, and
   1219   // if not, add nop padding to the end of the current bundle.
   1220   void padToNextBundle() {
   1221     assert(isInBundleLockRegion());
   1222     if (!isSameBundle()) {
   1223       intptr_t PadToNextBundle = BundleSize - getPreAlignment();
   1224       Asm->padWithNop(PadToNextBundle);
   1225       SizeSnapshotPre += PadToNextBundle;
   1226       SizeSnapshotPost += PadToNextBundle;
   1227       assert((Asm->getBufferSize() & BundleMaskLo) == 0);
   1228       assert(Asm->getBufferSize() == SizeSnapshotPre);
   1229     }
   1230   }
   1231   // If align_to_end is specified, add padding such that the instruction
   1232   // sequences ends precisely at a bundle boundary.
   1233   void padForAlignToEnd() {
   1234     assert(isInBundleLockRegion());
   1235     if (isAlignToEnd()) {
   1236       if (intptr_t Offset = getPostAlignment()) {
   1237         Asm->padWithNop(BundleSize - Offset);
   1238         SizeSnapshotPre = Asm->getBufferSize();
   1239       }
   1240     }
   1241   }
   1242   // If pad_to_end is specified, add padding such that the first instruction
   1243   // after the instruction sequence starts at a bundle boundary.
   1244   void padForPadToEnd() {
   1245     assert(isInBundleLockRegion());
   1246     if (isPadToEnd()) {
   1247       if (intptr_t Offset = getPostAlignment()) {
   1248         Asm->padWithNop(BundleSize - Offset);
   1249         SizeSnapshotPre = Asm->getBufferSize();
   1250       }
   1251     }
   1252   } // Update bookkeeping when rolling back for the second pass.
   1253   void rollback() {
   1254     assert(isInBundleLockRegion());
   1255     Asm->setBufferSize(SizeSnapshotPre);
   1256     Asm->setPreliminary(false);
   1257   }
   1258 
   1259 private:
   1260   Assembler *const Asm;
   1261   // End is a sentinel value such that BundleLockStart==End implies that we are
   1262   // not in a bundle_lock region.
   1263   const InstList::const_iterator End;
   1264   InstList::const_iterator BundleLockStart;
   1265   const intptr_t BundleSize;
   1266   // Masking with BundleMaskLo identifies an address's bundle offset.
   1267   const intptr_t BundleMaskLo;
   1268   // Masking with BundleMaskHi identifies an address's bundle.
   1269   const intptr_t BundleMaskHi;
   1270   intptr_t SizeSnapshotPre = 0;
   1271   intptr_t SizeSnapshotPost = 0;
   1272 };
   1273 
   1274 } // end of anonymous namespace
   1275 
   1276 void CfgNode::emitIAS(Cfg *Func) const {
   1277   Func->setCurrentNode(this);
   1278   Assembler *Asm = Func->getAssembler<>();
   1279   // TODO(stichnot): When sandboxing, defer binding the node label until just
   1280   // before the first instruction is emitted, to reduce the chance that a
   1281   // padding nop is a branch target.
   1282   Asm->bindCfgNodeLabel(this);
   1283   for (const Inst &I : Phis) {
   1284     if (I.isDeleted())
   1285       continue;
   1286     // Emitting a Phi instruction should cause an error.
   1287     I.emitIAS(Func);
   1288   }
   1289 
   1290   // Do the simple emission if not sandboxed.
   1291   if (!getFlags().getUseSandboxing()) {
   1292     for (const Inst &I : Insts) {
   1293       if (!I.isDeleted() && !I.isRedundantAssign()) {
   1294         I.emitIAS(Func);
   1295         updateStats(Func, &I);
   1296       }
   1297     }
   1298     return;
   1299   }
   1300 
   1301   // The remainder of the function handles emission with sandboxing. There are
   1302   // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
   1303   // instructions. All other instructions are treated as an implicit
   1304   // one-instruction bundle_lock region. Emission is done twice for each
   1305   // bundle_lock region. The first pass is a preliminary pass, after which we
   1306   // can figure out what nop padding is needed, then roll back, and make the
   1307   // final pass.
   1308   //
   1309   // Ideally, the first pass would be speculative and the second pass would
   1310   // only be done if nop padding were needed, but the structure of the
   1311   // integrated assembler makes it hard to roll back the state of label
   1312   // bindings, label links, and relocation fixups. Instead, the first pass just
   1313   // disables all mutation of that state.
   1314 
   1315   BundleEmitHelper Helper(Asm, Insts);
   1316   InstList::const_iterator End = Insts.end();
   1317   // Retrying indicates that we had to roll back to the bundle_lock instruction
   1318   // to apply padding before the bundle_lock sequence.
   1319   bool Retrying = false;
   1320   for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
   1321     if (I->isDeleted() || I->isRedundantAssign())
   1322       continue;
   1323 
   1324     if (llvm::isa<InstBundleLock>(I)) {
   1325       // Set up the initial bundle_lock state. This should not happen while
   1326       // retrying, because the retry rolls back to the instruction following
   1327       // the bundle_lock instruction.
   1328       assert(!Retrying);
   1329       Helper.enterBundleLock(I);
   1330       continue;
   1331     }
   1332 
   1333     if (llvm::isa<InstBundleUnlock>(I)) {
   1334       Helper.enterBundleUnlock();
   1335       if (Retrying) {
   1336         // Make sure all instructions are in the same bundle.
   1337         assert(Helper.isSameBundle());
   1338         // If align_to_end is specified, make sure the next instruction begins
   1339         // the bundle.
   1340         assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
   1341         Helper.padForPadToEnd();
   1342         Helper.leaveBundleLockRegion();
   1343         Retrying = false;
   1344       } else {
   1345         // This is the first pass, so roll back for the retry pass.
   1346         Helper.rollback();
   1347         // Pad to the next bundle if the instruction sequence crossed a bundle
   1348         // boundary.
   1349         Helper.padToNextBundle();
   1350         // Insert additional padding to make AlignToEnd work.
   1351         Helper.padForAlignToEnd();
   1352         // Prepare for the retry pass after padding is done.
   1353         Retrying = true;
   1354         I = Helper.getBundleLockStart();
   1355       }
   1356       continue;
   1357     }
   1358 
   1359     // I points to a non bundle_lock/bundle_unlock instruction.
   1360     if (Helper.isInBundleLockRegion()) {
   1361       I->emitIAS(Func);
   1362       // Only update stats during the final pass.
   1363       if (Retrying)
   1364         updateStats(Func, iteratorToInst(I));
   1365     } else {
   1366       // Treat it as though there were an implicit bundle_lock and
   1367       // bundle_unlock wrapping the instruction.
   1368       Helper.enterBundleLock(I);
   1369       I->emitIAS(Func);
   1370       Helper.enterBundleUnlock();
   1371       Helper.rollback();
   1372       Helper.padToNextBundle();
   1373       I->emitIAS(Func);
   1374       updateStats(Func, iteratorToInst(I));
   1375       Helper.leaveBundleLockRegion();
   1376     }
   1377   }
   1378 
   1379   // Don't allow bundle locking across basic blocks, to keep the backtracking
   1380   // mechanism simple.
   1381   assert(!Helper.isInBundleLockRegion());
   1382   assert(!Retrying);
   1383 }
   1384 
   1385 void CfgNode::dump(Cfg *Func) const {
   1386   if (!BuildDefs::dump())
   1387     return;
   1388   Func->setCurrentNode(this);
   1389   Ostream &Str = Func->getContext()->getStrDump();
   1390   Liveness *Liveness = Func->getLiveness();
   1391   if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
   1392     Str << getName() << ":\n";
   1393   // Dump the loop nest depth
   1394   if (Func->isVerbose(IceV_Loop))
   1395     Str << "    // LoopNestDepth = " << getLoopNestDepth() << "\n";
   1396   // Dump list of predecessor nodes.
   1397   if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
   1398     Str << "    // preds = ";
   1399     bool First = true;
   1400     for (CfgNode *I : InEdges) {
   1401       if (!First)
   1402         Str << ", ";
   1403       First = false;
   1404       Str << "%" << I->getName();
   1405     }
   1406     Str << "\n";
   1407   }
   1408   // Dump the live-in variables.
   1409   if (Func->isVerbose(IceV_Liveness)) {
   1410     if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) {
   1411       const LivenessBV &LiveIn = Liveness->getLiveIn(this);
   1412       Str << "    // LiveIn:";
   1413       for (SizeT i = 0; i < LiveIn.size(); ++i) {
   1414         if (LiveIn[i]) {
   1415           Variable *Var = Liveness->getVariable(i, this);
   1416           Str << " %" << Var->getName();
   1417           if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
   1418             Str << ":"
   1419                 << Func->getTarget()->getRegName(Var->getRegNum(),
   1420                                                  Var->getType());
   1421           }
   1422         }
   1423       }
   1424       Str << "\n";
   1425     }
   1426   }
   1427   // Dump each instruction.
   1428   if (Func->isVerbose(IceV_Instructions)) {
   1429     for (const Inst &I : Phis)
   1430       I.dumpDecorated(Func);
   1431     for (const Inst &I : Insts)
   1432       I.dumpDecorated(Func);
   1433   }
   1434   // Dump the live-out variables.
   1435   if (Func->isVerbose(IceV_Liveness)) {
   1436     if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) {
   1437       const LivenessBV &LiveOut = Liveness->getLiveOut(this);
   1438       Str << "    // LiveOut:";
   1439       for (SizeT i = 0; i < LiveOut.size(); ++i) {
   1440         if (LiveOut[i]) {
   1441           Variable *Var = Liveness->getVariable(i, this);
   1442           Str << " %" << Var->getName();
   1443           if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
   1444             Str << ":"
   1445                 << Func->getTarget()->getRegName(Var->getRegNum(),
   1446                                                  Var->getType());
   1447           }
   1448         }
   1449       }
   1450       Str << "\n";
   1451     }
   1452   }
   1453   // Dump list of successor nodes.
   1454   if (Func->isVerbose(IceV_Succs)) {
   1455     Str << "    // succs = ";
   1456     bool First = true;
   1457     for (CfgNode *I : OutEdges) {
   1458       if (!First)
   1459         Str << ", ";
   1460       First = false;
   1461       Str << "%" << I->getName();
   1462     }
   1463     Str << "\n";
   1464   }
   1465 }
   1466 
   1467 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
   1468   GlobalContext *Ctx = Func->getContext();
   1469   GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64");
   1470 
   1471   bool BadIntrinsic = false;
   1472   const Intrinsics::FullIntrinsicInfo *Info =
   1473       Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
   1474   assert(!BadIntrinsic);
   1475   assert(Info != nullptr);
   1476 
   1477   Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64);
   1478   constexpr RelocOffsetT Offset = 0;
   1479   Constant *Counter = Ctx->getConstantSym(Offset, Var->getName());
   1480   Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd);
   1481   Constant *One = Ctx->getConstantInt64(1);
   1482   Constant *OrderAcquireRelease =
   1483       Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
   1484 
   1485   auto *Instr = InstIntrinsicCall::create(
   1486       Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
   1487   Instr->addArg(AtomicRMWOp);
   1488   Instr->addArg(Counter);
   1489   Instr->addArg(One);
   1490   Instr->addArg(OrderAcquireRelease);
   1491   Insts.push_front(Instr);
   1492 }
   1493 
   1494 void CfgNode::removeInEdge(CfgNode *In) {
   1495   InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
   1496 }
   1497 
   1498 CfgNode *CfgNode::shortCircuit() {
   1499   auto *Func = getCfg();
   1500   auto *Last = &getInsts().back();
   1501   Variable *Condition = nullptr;
   1502   InstBr *Br = nullptr;
   1503   if ((Br = llvm::dyn_cast<InstBr>(Last))) {
   1504     if (!Br->isUnconditional()) {
   1505       Condition = llvm::dyn_cast<Variable>(Br->getCondition());
   1506     }
   1507   }
   1508   if (Condition == nullptr)
   1509     return nullptr;
   1510 
   1511   auto *JumpOnTrue = Br->getTargetTrue();
   1512   auto *JumpOnFalse = Br->getTargetFalse();
   1513 
   1514   bool FoundOr = false;
   1515   bool FoundAnd = false;
   1516 
   1517   InstArithmetic *TopLevelBoolOp = nullptr;
   1518 
   1519   for (auto &Inst : reverse_range(getInsts())) {
   1520     if (Inst.isDeleted())
   1521       continue;
   1522     if (Inst.getDest() == Condition) {
   1523       if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
   1524 
   1525         FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
   1526         FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
   1527 
   1528         if (FoundOr || FoundAnd) {
   1529           TopLevelBoolOp = Arith;
   1530           break;
   1531         }
   1532       }
   1533     }
   1534   }
   1535 
   1536   if (!TopLevelBoolOp)
   1537     return nullptr;
   1538 
   1539   auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
   1540     for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
   1541       if (Instr->getSrc(i) == Opr)
   1542         return true;
   1543     }
   1544     return false;
   1545   };
   1546   Inst *FirstOperandDef = nullptr;
   1547   for (auto &Inst : getInsts()) {
   1548     if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
   1549       FirstOperandDef = &Inst;
   1550       break;
   1551     }
   1552   }
   1553 
   1554   if (FirstOperandDef == nullptr) {
   1555     return nullptr;
   1556   }
   1557 
   1558   // Check for side effects
   1559   auto It = Ice::instToIterator(FirstOperandDef);
   1560   while (It != getInsts().end()) {
   1561     if (It->isDeleted()) {
   1562       ++It;
   1563       continue;
   1564     }
   1565     if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
   1566       break;
   1567     }
   1568     auto *Dest = It->getDest();
   1569     if (It->getDest() == nullptr || It->hasSideEffects() ||
   1570         !Func->getVMetadata()->isSingleBlock(Dest)) {
   1571       // Relying on short cicuit eval here.
   1572       // getVMetadata()->isSingleBlock(Dest)
   1573       // will segfault if It->getDest() == nullptr
   1574       return nullptr;
   1575     }
   1576     It++;
   1577   }
   1578 
   1579   auto *NewNode = Func->makeNode();
   1580   NewNode->setLoopNestDepth(getLoopNestDepth());
   1581   It = Ice::instToIterator(FirstOperandDef);
   1582   It++; // Have to split after the def
   1583 
   1584   NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
   1585                              getInsts().end());
   1586 
   1587   if (BuildDefs::dump()) {
   1588     NewNode->setName(getName().append("_2"));
   1589     setName(getName().append("_1"));
   1590   }
   1591 
   1592   // Point edges properly
   1593   NewNode->addInEdge(this);
   1594   for (auto *Out : getOutEdges()) {
   1595     NewNode->addOutEdge(Out);
   1596     Out->addInEdge(NewNode);
   1597   }
   1598   removeAllOutEdges();
   1599   addOutEdge(NewNode);
   1600 
   1601   // Manage Phi instructions of successors
   1602   for (auto *Succ : NewNode->getOutEdges()) {
   1603     for (auto &Inst : Succ->getPhis()) {
   1604       auto *Phi = llvm::cast<InstPhi>(&Inst);
   1605       for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
   1606         if (Phi->getLabel(i) == this) {
   1607           Phi->addArgument(Phi->getSrc(i), NewNode);
   1608         }
   1609       }
   1610     }
   1611   }
   1612 
   1613   // Create new Br instruction
   1614   InstBr *NewInst = nullptr;
   1615   if (FoundOr) {
   1616     addOutEdge(JumpOnTrue);
   1617     JumpOnFalse->removeInEdge(this);
   1618     NewInst =
   1619         InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
   1620   } else if (FoundAnd) {
   1621     addOutEdge(JumpOnFalse);
   1622     JumpOnTrue->removeInEdge(this);
   1623     NewInst =
   1624         InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
   1625   } else {
   1626     return nullptr;
   1627   }
   1628 
   1629   assert(NewInst != nullptr);
   1630   appendInst(NewInst);
   1631 
   1632   Operand *UnusedOperand = nullptr;
   1633   assert(TopLevelBoolOp->getSrcSize() == 2);
   1634   if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
   1635     UnusedOperand = TopLevelBoolOp->getSrc(1);
   1636   else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
   1637     UnusedOperand = TopLevelBoolOp->getSrc(0);
   1638   assert(UnusedOperand);
   1639 
   1640   Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
   1641 
   1642   TopLevelBoolOp->setDeleted();
   1643   return NewNode;
   1644 }
   1645 
   1646 } // end of namespace Ice
   1647