Home | History | Annotate | Download | only in opt
      1 // Copyright (c) 2018 Google LLC.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <algorithm>
     16 #include <memory>
     17 #include <unordered_map>
     18 #include <unordered_set>
     19 #include <utility>
     20 #include <vector>
     21 
     22 #include "source/cfa.h"
     23 #include "source/opt/cfg.h"
     24 #include "source/opt/ir_builder.h"
     25 #include "source/opt/ir_context.h"
     26 #include "source/opt/loop_descriptor.h"
     27 #include "source/opt/loop_utils.h"
     28 
     29 namespace spvtools {
     30 namespace opt {
     31 
     32 namespace {
     33 // Return true if |bb| is dominated by at least one block in |exits|
     34 static inline bool DominatesAnExit(BasicBlock* bb,
     35                                    const std::unordered_set<BasicBlock*>& exits,
     36                                    const DominatorTree& dom_tree) {
     37   for (BasicBlock* e_bb : exits)
     38     if (dom_tree.Dominates(bb, e_bb)) return true;
     39   return false;
     40 }
     41 
     42 // Utility class to rewrite out-of-loop uses of an in-loop definition in terms
     43 // of phi instructions to achieve a LCSSA form.
     44 // For a given definition, the class user registers phi instructions using that
     45 // definition in all loop exit blocks by which the definition escapes.
     46 // Then, when rewriting a use of the definition, the rewriter walks the
     47 // paths from the use the loop exits. At each step, it will insert a phi
     48 // instruction to merge the incoming value according to exit blocks definition.
     49 class LCSSARewriter {
     50  public:
     51   LCSSARewriter(IRContext* context, const DominatorTree& dom_tree,
     52                 const std::unordered_set<BasicBlock*>& exit_bb,
     53                 BasicBlock* merge_block)
     54       : context_(context),
     55         cfg_(context_->cfg()),
     56         dom_tree_(dom_tree),
     57         exit_bb_(exit_bb),
     58         merge_block_id_(merge_block ? merge_block->id() : 0) {}
     59 
     60   struct UseRewriter {
     61     explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn)
     62         : base_(base), def_insn_(def_insn) {}
     63     // Rewrites the use of |def_insn_| by the instruction |user| at the index
     64     // |operand_index| in terms of phi instruction. This recursively builds new
     65     // phi instructions from |user| to the loop exit blocks' phis. The use of
     66     // |def_insn_| in |user| is replaced by the relevant phi instruction at the
     67     // end of the operation.
     68     // It is assumed that |user| does not dominates any of the loop exit basic
     69     // block. This operation does not update the def/use manager, instead it
     70     // records what needs to be updated. The actual update is performed by
     71     // UpdateManagers.
     72     void RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) {
     73       assert(
     74           (user->opcode() != SpvOpPhi || bb != GetParent(user)) &&
     75           "The root basic block must be the incoming edge if |user| is a phi "
     76           "instruction");
     77       assert((user->opcode() == SpvOpPhi || bb == GetParent(user)) &&
     78              "The root basic block must be the instruction parent if |user| is "
     79              "not "
     80              "phi instruction");
     81 
     82       Instruction* new_def = GetOrBuildIncoming(bb->id());
     83 
     84       user->SetOperand(operand_index, {new_def->result_id()});
     85       rewritten_.insert(user);
     86     }
     87 
     88     // In-place update of some managers (avoid full invalidation).
     89     inline void UpdateManagers() {
     90       analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr();
     91       // Register all new definitions.
     92       for (Instruction* insn : rewritten_) {
     93         def_use_mgr->AnalyzeInstDef(insn);
     94       }
     95       // Register all new uses.
     96       for (Instruction* insn : rewritten_) {
     97         def_use_mgr->AnalyzeInstUse(insn);
     98       }
     99     }
    100 
    101    private:
    102     // Return the basic block that |instr| belongs to.
    103     BasicBlock* GetParent(Instruction* instr) {
    104       return base_->context_->get_instr_block(instr);
    105     }
    106 
    107     // Builds a phi instruction for the basic block |bb|. The function assumes
    108     // that |defining_blocks| contains the list of basic block that define the
    109     // usable value for each predecessor of |bb|.
    110     inline Instruction* CreatePhiInstruction(
    111         BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) {
    112       std::vector<uint32_t> incomings;
    113       const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
    114       assert(bb_preds.size() == defining_blocks.size());
    115       for (size_t i = 0; i < bb_preds.size(); i++) {
    116         incomings.push_back(
    117             GetOrBuildIncoming(defining_blocks[i])->result_id());
    118         incomings.push_back(bb_preds[i]);
    119       }
    120       InstructionBuilder builder(base_->context_, &*bb->begin(),
    121                                  IRContext::kAnalysisInstrToBlockMapping);
    122       Instruction* incoming_phi =
    123           builder.AddPhi(def_insn_.type_id(), incomings);
    124 
    125       rewritten_.insert(incoming_phi);
    126       return incoming_phi;
    127     }
    128 
    129     // Builds a phi instruction for the basic block |bb|, all incoming values
    130     // will be |value|.
    131     inline Instruction* CreatePhiInstruction(BasicBlock* bb,
    132                                              const Instruction& value) {
    133       std::vector<uint32_t> incomings;
    134       const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
    135       for (size_t i = 0; i < bb_preds.size(); i++) {
    136         incomings.push_back(value.result_id());
    137         incomings.push_back(bb_preds[i]);
    138       }
    139       InstructionBuilder builder(base_->context_, &*bb->begin(),
    140                                  IRContext::kAnalysisInstrToBlockMapping);
    141       Instruction* incoming_phi =
    142           builder.AddPhi(def_insn_.type_id(), incomings);
    143 
    144       rewritten_.insert(incoming_phi);
    145       return incoming_phi;
    146     }
    147 
    148     // Return the new def to use for the basic block |bb_id|.
    149     // If |bb_id| does not have a suitable def to use then we:
    150     //   - return the common def used by all predecessors;
    151     //   - if there is no common def, then we build a new phi instr at the
    152     //     beginning of |bb_id| and return this new instruction.
    153     Instruction* GetOrBuildIncoming(uint32_t bb_id) {
    154       assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block");
    155 
    156       Instruction*& incoming_phi = bb_to_phi_[bb_id];
    157       if (incoming_phi) {
    158         return incoming_phi;
    159       }
    160 
    161       BasicBlock* bb = &*base_->cfg_->block(bb_id);
    162       // If this is an exit basic block, look if there already is an eligible
    163       // phi instruction. An eligible phi has |def_insn_| as all incoming
    164       // values.
    165       if (base_->exit_bb_.count(bb)) {
    166         // Look if there is an eligible phi in this block.
    167         if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) {
    168               for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
    169                 if (phi->GetSingleWordInOperand(i) != def_insn_.result_id())
    170                   return true;
    171               }
    172               incoming_phi = phi;
    173               rewritten_.insert(incoming_phi);
    174               return false;
    175             })) {
    176           return incoming_phi;
    177         }
    178         incoming_phi = CreatePhiInstruction(bb, def_insn_);
    179         return incoming_phi;
    180       }
    181 
    182       // Get the block that defines the value to use for each predecessor.
    183       // If the vector has 1 value, then it means that this block does not need
    184       // to build a phi instruction unless |bb_id| is the loop merge block.
    185       const std::vector<uint32_t>& defining_blocks =
    186           base_->GetDefiningBlocks(bb_id);
    187 
    188       // Special case for structured loops: merge block might be different from
    189       // the exit block set. To maintain structured properties it will ease
    190       // transformations if the merge block also holds a phi instruction like
    191       // the exit ones.
    192       if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) {
    193         if (defining_blocks.size() > 1) {
    194           incoming_phi = CreatePhiInstruction(bb, defining_blocks);
    195         } else {
    196           assert(bb_id == base_->merge_block_id_);
    197           incoming_phi =
    198               CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0]));
    199         }
    200       } else {
    201         incoming_phi = GetOrBuildIncoming(defining_blocks[0]);
    202       }
    203 
    204       return incoming_phi;
    205     }
    206 
    207     LCSSARewriter* base_;
    208     const Instruction& def_insn_;
    209     std::unordered_map<uint32_t, Instruction*> bb_to_phi_;
    210     std::unordered_set<Instruction*> rewritten_;
    211   };
    212 
    213  private:
    214   // Return the new def to use for the basic block |bb_id|.
    215   // If |bb_id| does not have a suitable def to use then we:
    216   //   - return the common def used by all predecessors;
    217   //   - if there is no common def, then we build a new phi instr at the
    218   //     beginning of |bb_id| and return this new instruction.
    219   const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) {
    220     assert(cfg_->block(bb_id) != nullptr && "Unknown basic block");
    221     std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id];
    222 
    223     if (defining_blocks.size()) return defining_blocks;
    224 
    225     // Check if one of the loop exit basic block dominates |bb_id|.
    226     for (const BasicBlock* e_bb : exit_bb_) {
    227       if (dom_tree_.Dominates(e_bb->id(), bb_id)) {
    228         defining_blocks.push_back(e_bb->id());
    229         return defining_blocks;
    230       }
    231     }
    232 
    233     // Process parents, they will returns their suitable blocks.
    234     // If they are all the same, this means this basic block is dominated by a
    235     // common block, so we won't need to build a phi instruction.
    236     for (uint32_t pred_id : cfg_->preds(bb_id)) {
    237       const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id);
    238       if (pred_blocks.size() == 1)
    239         defining_blocks.push_back(pred_blocks[0]);
    240       else
    241         defining_blocks.push_back(pred_id);
    242     }
    243     assert(defining_blocks.size());
    244     if (std::all_of(defining_blocks.begin(), defining_blocks.end(),
    245                     [&defining_blocks](uint32_t id) {
    246                       return id == defining_blocks[0];
    247                     })) {
    248       // No need for a phi.
    249       defining_blocks.resize(1);
    250     }
    251 
    252     return defining_blocks;
    253   }
    254 
    255   IRContext* context_;
    256   CFG* cfg_;
    257   const DominatorTree& dom_tree_;
    258   const std::unordered_set<BasicBlock*>& exit_bb_;
    259   uint32_t merge_block_id_;
    260   // This map represent the set of known paths. For each key, the vector
    261   // represent the set of blocks holding the definition to be used to build the
    262   // phi instruction.
    263   // If the vector has 0 value, then the path is unknown yet, and must be built.
    264   // If the vector has 1 value, then the value defined by that basic block
    265   //   should be used.
    266   // If the vector has more than 1 value, then a phi node must be created, the
    267   //   basic block ordering is the same as the predecessor ordering.
    268   std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_;
    269 };
    270 
    271 // Make the set |blocks| closed SSA. The set is closed SSA if all the uses
    272 // outside the set are phi instructions in exiting basic block set (hold by
    273 // |lcssa_rewriter|).
    274 inline void MakeSetClosedSSA(IRContext* context, Function* function,
    275                              const std::unordered_set<uint32_t>& blocks,
    276                              const std::unordered_set<BasicBlock*>& exit_bb,
    277                              LCSSARewriter* lcssa_rewriter) {
    278   CFG& cfg = *context->cfg();
    279   DominatorTree& dom_tree =
    280       context->GetDominatorAnalysis(function)->GetDomTree();
    281   analysis::DefUseManager* def_use_manager = context->get_def_use_mgr();
    282 
    283   for (uint32_t bb_id : blocks) {
    284     BasicBlock* bb = cfg.block(bb_id);
    285     // If bb does not dominate an exit block, then it cannot have escaping defs.
    286     if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue;
    287     for (Instruction& inst : *bb) {
    288       LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst);
    289       def_use_manager->ForEachUse(
    290           &inst, [&blocks, &rewriter, &exit_bb, context](
    291                      Instruction* use, uint32_t operand_index) {
    292             BasicBlock* use_parent = context->get_instr_block(use);
    293             assert(use_parent);
    294             if (blocks.count(use_parent->id())) return;
    295 
    296             if (use->opcode() == SpvOpPhi) {
    297               // If the use is a Phi instruction and the incoming block is
    298               // coming from the loop, then that's consistent with LCSSA form.
    299               if (exit_bb.count(use_parent)) {
    300                 return;
    301               } else {
    302                 // That's not an exit block, but the user is a phi instruction.
    303                 // Consider the incoming branch only.
    304                 use_parent = context->get_instr_block(
    305                     use->GetSingleWordOperand(operand_index + 1));
    306               }
    307             }
    308             // Rewrite the use. Note that this call does not invalidate the
    309             // def/use manager. So this operation is safe.
    310             rewriter.RewriteUse(use_parent, use, operand_index);
    311           });
    312       rewriter.UpdateManagers();
    313     }
    314   }
    315 }
    316 
    317 }  // namespace
    318 
    319 void LoopUtils::CreateLoopDedicatedExits() {
    320   Function* function = loop_->GetHeaderBlock()->GetParent();
    321   LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function);
    322   CFG& cfg = *context_->cfg();
    323   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
    324 
    325   const IRContext::Analysis PreservedAnalyses =
    326       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
    327 
    328   // Gathers the set of basic block that are not in this loop and have at least
    329   // one predecessor in the loop and one not in the loop.
    330   std::unordered_set<uint32_t> exit_bb_set;
    331   loop_->GetExitBlocks(&exit_bb_set);
    332 
    333   std::unordered_set<BasicBlock*> new_loop_exits;
    334   bool made_change = false;
    335   // For each block, we create a new one that gathers all branches from
    336   // the loop and fall into the block.
    337   for (uint32_t non_dedicate_id : exit_bb_set) {
    338     BasicBlock* non_dedicate = cfg.block(non_dedicate_id);
    339     const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id);
    340     // Ignore the block if all the predecessors are in the loop.
    341     if (std::all_of(bb_pred.begin(), bb_pred.end(),
    342                     [this](uint32_t id) { return loop_->IsInsideLoop(id); })) {
    343       new_loop_exits.insert(non_dedicate);
    344       continue;
    345     }
    346 
    347     made_change = true;
    348     Function::iterator insert_pt = function->begin();
    349     for (; insert_pt != function->end() && &*insert_pt != non_dedicate;
    350          ++insert_pt) {
    351     }
    352     assert(insert_pt != function->end() && "Basic Block not found");
    353 
    354     // Create the dedicate exit basic block.
    355     // TODO(1841): Handle id overflow.
    356     BasicBlock& exit = *insert_pt.InsertBefore(std::unique_ptr<BasicBlock>(
    357         new BasicBlock(std::unique_ptr<Instruction>(new Instruction(
    358             context_, SpvOpLabel, 0, context_->TakeNextId(), {})))));
    359     exit.SetParent(function);
    360 
    361     // Redirect in loop predecessors to |exit| block.
    362     for (uint32_t exit_pred_id : bb_pred) {
    363       if (loop_->IsInsideLoop(exit_pred_id)) {
    364         BasicBlock* pred_block = cfg.block(exit_pred_id);
    365         pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) {
    366           if (*id == non_dedicate->id()) *id = exit.id();
    367         });
    368         // Update the CFG.
    369         // |non_dedicate|'s predecessor list will be updated at the end of the
    370         // loop.
    371         cfg.RegisterBlock(pred_block);
    372       }
    373     }
    374 
    375     // Register the label to the def/use manager, requires for the phi patching.
    376     def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst());
    377     context_->set_instr_block(exit.GetLabelInst(), &exit);
    378 
    379     InstructionBuilder builder(context_, &exit, PreservedAnalyses);
    380     // Now jump from our dedicate basic block to the old exit.
    381     // We also reset the insert point so all instructions are inserted before
    382     // the branch.
    383     builder.SetInsertPoint(builder.AddBranch(non_dedicate->id()));
    384     non_dedicate->ForEachPhiInst(
    385         [&builder, &exit, def_use_mgr, this](Instruction* phi) {
    386           // New phi operands for this instruction.
    387           std::vector<uint32_t> new_phi_op;
    388           // Phi operands for the dedicated exit block.
    389           std::vector<uint32_t> exit_phi_op;
    390           for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
    391             uint32_t def_id = phi->GetSingleWordInOperand(i);
    392             uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1);
    393             if (loop_->IsInsideLoop(incoming_id)) {
    394               exit_phi_op.push_back(def_id);
    395               exit_phi_op.push_back(incoming_id);
    396             } else {
    397               new_phi_op.push_back(def_id);
    398               new_phi_op.push_back(incoming_id);
    399             }
    400           }
    401 
    402           // Build the new phi instruction dedicated exit block.
    403           Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op);
    404           // Build the new incoming branch.
    405           new_phi_op.push_back(exit_phi->result_id());
    406           new_phi_op.push_back(exit.id());
    407           // Rewrite operands.
    408           uint32_t idx = 0;
    409           for (; idx < new_phi_op.size(); idx++)
    410             phi->SetInOperand(idx, {new_phi_op[idx]});
    411           // Remove extra operands, from last to first (more efficient).
    412           for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--)
    413             phi->RemoveInOperand(j);
    414           // Update the def/use manager for this |phi|.
    415           def_use_mgr->AnalyzeInstUse(phi);
    416         });
    417     // Update the CFG.
    418     cfg.RegisterBlock(&exit);
    419     cfg.RemoveNonExistingEdges(non_dedicate->id());
    420     new_loop_exits.insert(&exit);
    421     // If non_dedicate is in a loop, add the new dedicated exit in that loop.
    422     if (Loop* parent_loop = loop_desc[non_dedicate])
    423       parent_loop->AddBasicBlock(&exit);
    424   }
    425 
    426   if (new_loop_exits.size() == 1) {
    427     loop_->SetMergeBlock(*new_loop_exits.begin());
    428   }
    429 
    430   if (made_change) {
    431     context_->InvalidateAnalysesExceptFor(
    432         PreservedAnalyses | IRContext::kAnalysisCFG |
    433         IRContext::Analysis::kAnalysisLoopAnalysis);
    434   }
    435 }
    436 
    437 void LoopUtils::MakeLoopClosedSSA() {
    438   CreateLoopDedicatedExits();
    439 
    440   Function* function = loop_->GetHeaderBlock()->GetParent();
    441   CFG& cfg = *context_->cfg();
    442   DominatorTree& dom_tree =
    443       context_->GetDominatorAnalysis(function)->GetDomTree();
    444 
    445   std::unordered_set<BasicBlock*> exit_bb;
    446   {
    447     std::unordered_set<uint32_t> exit_bb_id;
    448     loop_->GetExitBlocks(&exit_bb_id);
    449     for (uint32_t bb_id : exit_bb_id) {
    450       exit_bb.insert(cfg.block(bb_id));
    451     }
    452   }
    453 
    454   LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb,
    455                                loop_->GetMergeBlock());
    456   MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb,
    457                    &lcssa_rewriter);
    458 
    459   // Make sure all defs post-dominated by the merge block have their last use no
    460   // further than the merge block.
    461   if (loop_->GetMergeBlock()) {
    462     std::unordered_set<uint32_t> merging_bb_id;
    463     loop_->GetMergingBlocks(&merging_bb_id);
    464     merging_bb_id.erase(loop_->GetMergeBlock()->id());
    465     // Reset the exit set, now only the merge block is the exit.
    466     exit_bb.clear();
    467     exit_bb.insert(loop_->GetMergeBlock());
    468     // LCSSARewriter is reusable here only because it forces the creation of a
    469     // phi instruction in the merge block.
    470     MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb,
    471                      &lcssa_rewriter);
    472   }
    473 
    474   context_->InvalidateAnalysesExceptFor(
    475       IRContext::Analysis::kAnalysisCFG |
    476       IRContext::Analysis::kAnalysisDominatorAnalysis |
    477       IRContext::Analysis::kAnalysisLoopAnalysis);
    478 }
    479 
    480 Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const {
    481   // Compute the structured order of the loop basic blocks and store it in the
    482   // vector ordered_loop_blocks.
    483   std::vector<BasicBlock*> ordered_loop_blocks;
    484   loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
    485 
    486   // Clone the loop.
    487   return CloneLoop(cloning_result, ordered_loop_blocks);
    488 }
    489 
    490 Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) {
    491   // Clone the loop.
    492   Loop* new_loop = CloneLoop(cloning_result);
    493 
    494   // Create a new exit block/label for the new loop.
    495   // TODO(1841): Handle id overflow.
    496   std::unique_ptr<Instruction> new_label{new Instruction(
    497       context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
    498   std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
    499   new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent());
    500 
    501   // Create an unconditional branch to the header block.
    502   InstructionBuilder builder{context_, new_exit_bb.get()};
    503   builder.AddBranch(loop_->GetHeaderBlock()->id());
    504 
    505   // Save the ids of the new and old merge block.
    506   const uint32_t old_merge_block = loop_->GetMergeBlock()->id();
    507   const uint32_t new_merge_block = new_exit_bb->id();
    508 
    509   // Replace the uses of the old merge block in the new loop with the new merge
    510   // block.
    511   for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) {
    512     for (Instruction& inst : *basic_block) {
    513       // For each operand in each instruction check if it is using the old merge
    514       // block and change it to be the new merge block.
    515       auto replace_merge_use = [old_merge_block,
    516                                 new_merge_block](uint32_t* id) {
    517         if (*id == old_merge_block) *id = new_merge_block;
    518       };
    519       inst.ForEachInOperand(replace_merge_use);
    520     }
    521   }
    522 
    523   const uint32_t old_header = loop_->GetHeaderBlock()->id();
    524   const uint32_t new_header = new_loop->GetHeaderBlock()->id();
    525   analysis::DefUseManager* def_use = context_->get_def_use_mgr();
    526 
    527   def_use->ForEachUse(old_header,
    528                       [new_header, this](Instruction* inst, uint32_t operand) {
    529                         if (!this->loop_->IsInsideLoop(inst))
    530                           inst->SetOperand(operand, {new_header});
    531                       });
    532 
    533   // TODO(1841): Handle failure to create pre-header.
    534   def_use->ForEachUse(
    535       loop_->GetOrCreatePreHeaderBlock()->id(),
    536       [new_merge_block, this](Instruction* inst, uint32_t operand) {
    537         if (this->loop_->IsInsideLoop(inst))
    538           inst->SetOperand(operand, {new_merge_block});
    539 
    540       });
    541   new_loop->SetMergeBlock(new_exit_bb.get());
    542 
    543   new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock());
    544 
    545   // Add the new block into the cloned instructions.
    546   cloning_result->cloned_bb_.push_back(std::move(new_exit_bb));
    547 
    548   return new_loop;
    549 }
    550 
    551 Loop* LoopUtils::CloneLoop(
    552     LoopCloningResult* cloning_result,
    553     const std::vector<BasicBlock*>& ordered_loop_blocks) const {
    554   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
    555 
    556   std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_);
    557 
    558   CFG& cfg = *context_->cfg();
    559 
    560   // Clone and place blocks in a SPIR-V compliant order (dominators first).
    561   for (BasicBlock* old_bb : ordered_loop_blocks) {
    562     // For each basic block in the loop, we clone it and register the mapping
    563     // between old and new ids.
    564     BasicBlock* new_bb = old_bb->Clone(context_);
    565     new_bb->SetParent(&function_);
    566     // TODO(1841): Handle id overflow.
    567     new_bb->GetLabelInst()->SetResultId(context_->TakeNextId());
    568     def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst());
    569     context_->set_instr_block(new_bb->GetLabelInst(), new_bb);
    570     cloning_result->cloned_bb_.emplace_back(new_bb);
    571 
    572     cloning_result->old_to_new_bb_[old_bb->id()] = new_bb;
    573     cloning_result->new_to_old_bb_[new_bb->id()] = old_bb;
    574     cloning_result->value_map_[old_bb->id()] = new_bb->id();
    575 
    576     if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb);
    577 
    578     for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin();
    579          new_inst != new_bb->end(); ++new_inst, ++old_inst) {
    580       cloning_result->ptr_map_[&*new_inst] = &*old_inst;
    581       if (new_inst->HasResultId()) {
    582         // TODO(1841): Handle id overflow.
    583         new_inst->SetResultId(context_->TakeNextId());
    584         cloning_result->value_map_[old_inst->result_id()] =
    585             new_inst->result_id();
    586 
    587         // Only look at the defs for now, uses are not updated yet.
    588         def_use_mgr->AnalyzeInstDef(&*new_inst);
    589       }
    590     }
    591   }
    592 
    593   // All instructions (including all labels) have been cloned,
    594   // remap instruction operands id with the new ones.
    595   for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) {
    596     BasicBlock* bb = bb_ref.get();
    597 
    598     for (Instruction& insn : *bb) {
    599       insn.ForEachInId([cloning_result](uint32_t* old_id) {
    600         // If the operand is defined in the loop, remap the id.
    601         auto id_it = cloning_result->value_map_.find(*old_id);
    602         if (id_it != cloning_result->value_map_.end()) {
    603           *old_id = id_it->second;
    604         }
    605       });
    606       // Only look at what the instruction uses. All defs are register, so all
    607       // should be fine now.
    608       def_use_mgr->AnalyzeInstUse(&insn);
    609       context_->set_instr_block(&insn, bb);
    610     }
    611     cfg.RegisterBlock(bb);
    612   }
    613 
    614   PopulateLoopNest(new_loop.get(), *cloning_result);
    615 
    616   return new_loop.release();
    617 }
    618 
    619 void LoopUtils::PopulateLoopNest(
    620     Loop* new_loop, const LoopCloningResult& cloning_result) const {
    621   std::unordered_map<Loop*, Loop*> loop_mapping;
    622   loop_mapping[loop_] = new_loop;
    623 
    624   if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop);
    625   PopulateLoopDesc(new_loop, loop_, cloning_result);
    626 
    627   for (Loop& sub_loop :
    628        make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) {
    629     Loop* cloned = new Loop(context_);
    630     if (Loop* parent = loop_mapping[sub_loop.GetParent()])
    631       parent->AddNestedLoop(cloned);
    632     loop_mapping[&sub_loop] = cloned;
    633     PopulateLoopDesc(cloned, &sub_loop, cloning_result);
    634   }
    635 
    636   loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop));
    637 }
    638 
    639 // Populates |new_loop| descriptor according to |old_loop|'s one.
    640 void LoopUtils::PopulateLoopDesc(
    641     Loop* new_loop, Loop* old_loop,
    642     const LoopCloningResult& cloning_result) const {
    643   for (uint32_t bb_id : old_loop->GetBlocks()) {
    644     BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id);
    645     new_loop->AddBasicBlock(bb);
    646   }
    647   new_loop->SetHeaderBlock(
    648       cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id()));
    649   if (old_loop->GetLatchBlock())
    650     new_loop->SetLatchBlock(
    651         cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id()));
    652   if (old_loop->GetContinueBlock())
    653     new_loop->SetContinueBlock(
    654         cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id()));
    655   if (old_loop->GetMergeBlock()) {
    656     auto it =
    657         cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id());
    658     BasicBlock* bb = it != cloning_result.old_to_new_bb_.end()
    659                          ? it->second
    660                          : old_loop->GetMergeBlock();
    661     new_loop->SetMergeBlock(bb);
    662   }
    663   if (old_loop->GetPreHeaderBlock()) {
    664     auto it =
    665         cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id());
    666     if (it != cloning_result.old_to_new_bb_.end()) {
    667       new_loop->SetPreHeaderBlock(it->second);
    668     }
    669   }
    670 }
    671 
    672 // Class to gather some metrics about a region of interest.
    673 void CodeMetrics::Analyze(const Loop& loop) {
    674   CFG& cfg = *loop.GetContext()->cfg();
    675 
    676   roi_size_ = 0;
    677   block_sizes_.clear();
    678 
    679   for (uint32_t id : loop.GetBlocks()) {
    680     const BasicBlock* bb = cfg.block(id);
    681     size_t bb_size = 0;
    682     bb->ForEachInst([&bb_size](const Instruction* insn) {
    683       if (insn->opcode() == SpvOpLabel) return;
    684       if (insn->IsNop()) return;
    685       if (insn->opcode() == SpvOpPhi) return;
    686       bb_size++;
    687     });
    688     block_sizes_[bb->id()] = bb_size;
    689     roi_size_ += bb_size;
    690   }
    691 }
    692 
    693 }  // namespace opt
    694 }  // namespace spvtools
    695