Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
     18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
     19 
     20 #include "base/arena_bit_vector.h"
     21 #include "base/arena_containers.h"
     22 #include "base/bit_vector-inl.h"
     23 #include "nodes.h"
     24 
     25 namespace art {
     26 
     27 class InductionVarRange;
     28 
     29 static const bool kSuperblockClonerLogging = false;
     30 
     31 // Represents an edge between two HBasicBlocks.
     32 //
     33 // Note: objects of this class are small - pass them by value.
     34 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
     35  public:
     36   HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
     37     DCHECK_NE(to_, kInvalidBlockId);
     38     DCHECK_NE(from_, kInvalidBlockId);
     39   }
     40   HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
     41     DCHECK_NE(to_, kInvalidBlockId);
     42     DCHECK_NE(from_, kInvalidBlockId);
     43   }
     44   HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
     45 
     46   uint32_t GetFrom() const { return from_; }
     47   uint32_t GetTo() const { return to_; }
     48 
     49   bool operator==(const HEdge& other) const {
     50     return this->from_ == other.from_ && this->to_ == other.to_;
     51   }
     52 
     53   bool operator!=(const HEdge& other) const { return !operator==(other); }
     54   void Dump(std::ostream& stream) const;
     55 
     56   // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
     57   // has to_ block as a successor.
     58   bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
     59 
     60  private:
     61   // Predecessor block id.
     62   uint32_t from_;
     63   // Successor block id.
     64   uint32_t to_;
     65 };
     66 
     67 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
     68 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
     69   if (!edge.IsValid()) {
     70     return false;
     71   }
     72   uint32_t from = edge.GetFrom();
     73   uint32_t to = edge.GetTo();
     74   if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
     75     return false;
     76   }
     77 
     78   HBasicBlock* block_from = graph->GetBlocks()[from];
     79   HBasicBlock* block_to = graph->GetBlocks()[to];
     80   if (block_from == nullptr || block_to == nullptr) {
     81     return false;
     82   }
     83 
     84   return block_from->HasSuccessor(block_to, 0);
     85 }
     86 
     87 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
     88 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
     89 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
     90 // and a set of rules how to treat edges, remap their successors. By using this approach such
     91 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
     92 //
     93 // The idea of the transformation is based on "Superblock cloning" technique described in the book
     94 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
     95 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
     96 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
     97 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
     98 //
     99 // There are two states of the IR graph: original graph (before the transformation) and
    100 // copy graph (after).
    101 //
    102 // Before the transformation:
    103 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
    104 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
    105 // where pred, succ - basic blocks):
    106 //  - internal - pred, succ are members of orig_bb_set.
    107 //  - outside  - pred, succ are not members of orig_bb_set.
    108 //  - incoming - pred is not a member of orig_bb_set, succ is.
    109 //  - outgoing - pred is a member of orig_bb_set, succ is not.
    110 //
    111 // Transformation:
    112 //
    113 // 1. Initial cloning:
    114 //   1.1. For each orig_block in orig_bb_set create a copy copy_block; these new blocks
    115 //        form copy_bb_set.
    116 //   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
    117 //        copies of X, Y basic blocks correspondingly; these new edges form copy_internal edge
    118 //        set.
    119 //   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
    120 //        copies of X, Y basic blocks correspondingly; these new edges form copy_outgoing edge
    121 //        set.
    122 // 2. Successors remapping.
    123 //   2.1. 'remap_orig_internal - set of edges (X, Y) from orig_bb_set whose successors should
    124 //        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
    125 //   2.2. remap_copy_internal - set of edges (X_1, Y_1) from copy_bb_set whose successors
    126 //        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
    127 //   2.3. 'remap_incoming - set of edges (X, Y) from the incoming edge set in the original graph
    128 //        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
    129 //        (X, Y_1)).
    130 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
    131 // 4. Fix/resolve data flow.
    132 // 5. Do cleanups (DCE, critical edges splitting, etc).
    133 //
    134 class SuperblockCloner : public ValueObject {
    135  public:
    136   // TODO: Investigate optimal types for the containers.
    137   using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
    138   using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
    139   using HBasicBlockSet = ArenaBitVector;
    140   using HEdgeSet = ArenaHashSet<HEdge>;
    141 
    142   SuperblockCloner(HGraph* graph,
    143                    const HBasicBlockSet* orig_bb_set,
    144                    HBasicBlockMap* bb_map,
    145                    HInstructionMap* hir_map,
    146                    InductionVarRange* induction_range);
    147 
    148   // Sets edge successor remapping info specified by corresponding edge sets.
    149   void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
    150                                  const HEdgeSet* remap_copy_internal,
    151                                  const HEdgeSet* remap_incoming);
    152 
    153   // Returns whether the specified subgraph is copyable.
    154   // TODO: Start from small range of graph patterns then extend it.
    155   bool IsSubgraphClonable() const;
    156 
    157   // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
    158   // when iterative DF algorithm is not required and dominators/instructions inputs can be
    159   // trivially adjusted.
    160   //
    161   // TODO: formally describe the criteria.
    162   //
    163   // Loop peeling and unrolling satisfy the criteria.
    164   bool IsFastCase() const;
    165 
    166   // Runs the copy algorithm according to the description.
    167   void Run();
    168 
    169   // Cleans up the graph after transformation: splits critical edges, recalculates control flow
    170   // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
    171   void CleanUp();
    172 
    173   // Returns a clone of a basic block (orig_block).
    174   //
    175   //  - The copy block will have no successors/predecessors; they should be set up manually.
    176   //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
    177   //    this correspondence is recorded in the map (old instruction, new instruction).
    178   //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
    179   //    same, as in the original block, PHIs do not reflect a correct correspondence between the
    180   //    value and predecessors (as the copy block has no predecessors by now), etc.
    181   HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
    182 
    183   // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
    184   // and hir_map_.
    185   void CloneBasicBlocks();
    186 
    187   HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
    188     auto copy_input_iter = hir_map_->find(orig_instr);
    189     DCHECK(copy_input_iter != hir_map_->end());
    190     return copy_input_iter->second;
    191   }
    192 
    193   HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
    194     HBasicBlock* block = bb_map_->Get(orig_block);
    195     DCHECK(block != nullptr);
    196     return block;
    197   }
    198 
    199   HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
    200     for (auto it : *hir_map_) {
    201       if (it.second == copy_instr) {
    202         return it.first;
    203       }
    204     }
    205     return nullptr;
    206   }
    207 
    208   bool IsInOrigBBSet(uint32_t block_id) const {
    209     return orig_bb_set_.IsBitSet(block_id);
    210   }
    211 
    212   bool IsInOrigBBSet(const HBasicBlock* block) const {
    213     return IsInOrigBBSet(block->GetBlockId());
    214   }
    215 
    216   // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
    217   // dominators) needs to be adjusted.
    218   HLoopInformation* GetRegionToBeAdjusted() const {
    219     return outer_loop_;
    220   }
    221 
    222  private:
    223   // Fills the 'exits' vector with the subgraph exits.
    224   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
    225 
    226   // Finds and records information about the area in the graph for which control flow (back edges,
    227   // loops, dominators) needs to be adjusted.
    228   void FindAndSetLocalAreaForAdjustments();
    229 
    230   // Remaps edges' successors according to the info specified in the edges sets.
    231   //
    232   // Only edge successors/predecessors and phis' input records (to have a correspondence between
    233   // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
    234   // phis' nor instructions' inputs values are resolved.
    235   void RemapEdgesSuccessors();
    236 
    237   // Adjusts control flow (back edges, loops, dominators) for the local area defined by
    238   // FindAndSetLocalAreaForAdjustments.
    239   void AdjustControlFlowInfo();
    240 
    241   // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
    242   // the SSA form.
    243   void ResolveDataFlow();
    244 
    245   //
    246   // Helpers for live-outs processing and Subgraph-closed SSA.
    247   //
    248   //  - live-outs - values which are defined inside the subgraph and have uses outside.
    249   //  - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
    250   //    have no outside uses except for the phi-nodes in the subgraph exits.
    251   //
    252   // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
    253   // makes the subgraph-closed SSA form construction much easier.
    254   //
    255   // TODO: Support subgraphs with live-outs and multiple exits.
    256   //
    257 
    258   // For each live-out value 'val' in the region puts a record <val, val> into the map.
    259   // Returns whether all of the instructions in the subgraph are clonable.
    260   bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
    261 
    262   // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
    263   //
    264   // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
    265   // the record in the map to <val, phi> and replaces all outside uses with this phi.
    266   void ConstructSubgraphClosedSSA();
    267 
    268   // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
    269   // (<val, phi>) phi after the cloning is done.
    270   void FixSubgraphClosedSSAAfterCloning();
    271 
    272   //
    273   // Helpers for CloneBasicBlock.
    274   //
    275 
    276   // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
    277   // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
    278   void ReplaceInputsWithCopies(HInstruction* copy_instr);
    279 
    280   // Recursively clones the environment for the copy instruction. If the input of the original
    281   // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
    282   // leaves it the same as original.
    283   void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
    284 
    285   //
    286   // Helpers for RemapEdgesSuccessors.
    287   //
    288 
    289   // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
    290   // copy_succ.
    291   void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    292 
    293   // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
    294   void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    295 
    296   // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
    297   void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    298 
    299   //
    300   // Local versions of control flow calculation/adjustment routines.
    301   //
    302 
    303   void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
    304   void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
    305   GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
    306   void CleanUpControlFlow();
    307 
    308   //
    309   // Helpers for ResolveDataFlow
    310   //
    311 
    312   // Resolves the inputs of the phi.
    313   void ResolvePhi(HPhi* phi);
    314 
    315   // Update induction range after when fixing SSA.
    316   void UpdateInductionRangeInfoOf(
    317       HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
    318 
    319   //
    320   // Debug and logging methods.
    321   //
    322   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
    323   bool CheckRemappingInfoIsValid();
    324   void VerifyGraph();
    325   void DumpInputSets();
    326 
    327   HBasicBlock* GetBlockById(uint32_t block_id) const {
    328     DCHECK(block_id < graph_->GetBlocks().size());
    329     HBasicBlock* block = graph_->GetBlocks()[block_id];
    330     DCHECK(block != nullptr);
    331     return block;
    332   }
    333 
    334   HGraph* const graph_;
    335   ArenaAllocator* const arena_;
    336 
    337   // Set of basic block in the original graph to be copied.
    338   HBasicBlockSet orig_bb_set_;
    339 
    340   // Sets of edges which require successors remapping.
    341   const HEdgeSet* remap_orig_internal_;
    342   const HEdgeSet* remap_copy_internal_;
    343   const HEdgeSet* remap_incoming_;
    344 
    345   // Correspondence map for blocks: (original block, copy block).
    346   HBasicBlockMap* bb_map_;
    347   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
    348   HInstructionMap* hir_map_;
    349   // As a result of cloning, the induction range analysis information can be invalidated
    350   // and must be updated. If not null, the cloner updates it for changed instructions.
    351   InductionVarRange* induction_range_;
    352   // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
    353   HLoopInformation* outer_loop_;
    354   HBasicBlockSet outer_loop_bb_set_;
    355 
    356   HInstructionMap live_outs_;
    357 
    358   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
    359   ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
    360 
    361   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
    362 };
    363 
    364 // Helper class to perform loop peeling/unrolling.
    365 //
    366 // This helper should be used when correspondence map between original and copied
    367 // basic blocks/instructions are demanded.
    368 class PeelUnrollHelper : public ValueObject {
    369  public:
    370   PeelUnrollHelper(HLoopInformation* info,
    371                    SuperblockCloner::HBasicBlockMap* bb_map,
    372                    SuperblockCloner::HInstructionMap* hir_map,
    373                    InductionVarRange* induction_range) :
    374       loop_info_(info),
    375       cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
    376     // For now do peeling/unrolling only for natural loops.
    377     DCHECK(!info->IsIrreducible());
    378   }
    379 
    380   // Returns whether the loop can be peeled/unrolled (static function).
    381   static bool IsLoopClonable(HLoopInformation* loop_info);
    382 
    383   // Returns whether the loop can be peeled/unrolled.
    384   bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
    385 
    386   HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); }
    387   HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); }
    388   HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
    389 
    390  protected:
    391   // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
    392   //
    393   // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
    394   HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
    395 
    396  private:
    397   HLoopInformation* loop_info_;
    398   SuperblockCloner cloner_;
    399 
    400   DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
    401 };
    402 
    403 // Helper class to perform loop peeling/unrolling.
    404 //
    405 // This helper should be used when there is no need to get correspondence information between
    406 // original and copied basic blocks/instructions.
    407 class PeelUnrollSimpleHelper : public ValueObject {
    408  public:
    409   PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
    410   bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
    411   HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
    412   HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
    413   HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
    414 
    415   const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
    416   const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
    417 
    418  private:
    419   SuperblockCloner::HBasicBlockMap bb_map_;
    420   SuperblockCloner::HInstructionMap hir_map_;
    421   PeelUnrollHelper helper_;
    422 
    423   DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
    424 };
    425 
    426 // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
    427 void CollectRemappingInfoForPeelUnroll(bool to_unroll,
    428                                        HLoopInformation* loop_info,
    429                                        SuperblockCloner::HEdgeSet* remap_orig_internal,
    430                                        SuperblockCloner::HEdgeSet* remap_copy_internal,
    431                                        SuperblockCloner::HEdgeSet* remap_incoming);
    432 
    433 // Returns whether blocks from 'work_set' are reachable from the rest of the graph.
    434 //
    435 // Returns whether such a set 'outer_entries' of basic blocks exists that:
    436 // - each block from 'outer_entries' is not from 'work_set'.
    437 // - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
    438 //
    439 // After the function returns work_set contains only blocks from the original 'work_set'
    440 // which are unreachable from the rest of the graph.
    441 bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
    442 
    443 // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
    444 // graph.
    445 HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
    446 }  // namespace art
    447 
    448 namespace std {
    449 
    450 template <>
    451 struct hash<art::HEdge> {
    452   size_t operator()(art::HEdge const& x) const noexcept  {
    453     // Use Cantor pairing function as the hash function.
    454     size_t a = x.GetFrom();
    455     size_t b = x.GetTo();
    456     return (a + b) * (a + b + 1) / 2 + b;
    457   }
    458 };
    459 ostream& operator<<(ostream& os, const art::HEdge& e);
    460 
    461 }  // namespace std
    462 
    463 #endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
    464