Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
     18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
     19 
     20 #include "base/arena_bit_vector.h"
     21 #include "base/arena_containers.h"
     22 #include "base/bit_vector-inl.h"
     23 #include "nodes.h"
     24 
     25 namespace art {
     26 
     27 static const bool kSuperblockClonerLogging = false;
     28 static const bool kSuperblockClonerVerify = false;
     29 
     30 // Represents an edge between two HBasicBlocks.
     31 //
     32 // Note: objects of this class are small - pass them by value.
     33 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
     34  public:
     35   HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
     36     DCHECK_NE(to_, kInvalidBlockId);
     37     DCHECK_NE(from_, kInvalidBlockId);
     38   }
     39   HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
     40     DCHECK_NE(to_, kInvalidBlockId);
     41     DCHECK_NE(from_, kInvalidBlockId);
     42   }
     43   HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
     44 
     45   uint32_t GetFrom() const { return from_; }
     46   uint32_t GetTo() const { return to_; }
     47 
     48   bool operator==(const HEdge& other) const {
     49     return this->from_ == other.from_ && this->to_ == other.to_;
     50   }
     51 
     52   bool operator!=(const HEdge& other) const { return !operator==(other); }
     53   void Dump(std::ostream& stream) const;
     54 
     55   // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
     56   // has to_ block as a successor.
     57   bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
     58 
     59  private:
     60   // Predecessor block id.
     61   uint32_t from_;
     62   // Successor block id.
     63   uint32_t to_;
     64 };
     65 
     66 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
     67 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
     68   if (!edge.IsValid()) {
     69     return false;
     70   }
     71   uint32_t from = edge.GetFrom();
     72   uint32_t to = edge.GetTo();
     73   if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
     74     return false;
     75   }
     76 
     77   HBasicBlock* block_from = graph->GetBlocks()[from];
     78   HBasicBlock* block_to = graph->GetBlocks()[to];
     79   if (block_from == nullptr || block_to == nullptr) {
     80     return false;
     81   }
     82 
     83   return block_from->HasSuccessor(block_to, 0);
     84 }
     85 
     86 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
     87 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
     88 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
     89 // and a set of rules how to treat edges, remap their successors. By using this approach such
     90 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
     91 //
     92 // The idea of the transformation is based on "Superblock cloning" technique described in the book
     93 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
     94 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
     95 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
     96 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
     97 //
     98 // There are two states of the IR graph: original graph (before the transformation) and
     99 // copy graph (after).
    100 //
    101 // Before the transformation:
    102 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
    103 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
    104 // where pred, succ - basic blocks):
    105 //  - internal - pred, succ are members of orig_bb_set.
    106 //  - outside  - pred, succ are not members of orig_bb_set.
    107 //  - incoming - pred is not a member of orig_bb_set, succ is.
    108 //  - outgoing - pred is a member of orig_bb_set, succ is not.
    109 //
    110 // Transformation:
    111 //
    112 // 1. Initial cloning:
    113 //   1.1. For each orig_block in orig_bb_set create a copy copy_block; these new blocks
    114 //        form copy_bb_set.
    115 //   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
    116 //        copies of X, Y basic blocks correspondingly; these new edges form copy_internal edge
    117 //        set.
    118 //   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
    119 //        copies of X, Y basic blocks correspondingly; these new edges form copy_outgoing edge
    120 //        set.
    121 // 2. Successors remapping.
    122 //   2.1. 'remap_orig_internal - set of edges (X, Y) from orig_bb_set whose successors should
    123 //        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
    124 //   2.2. remap_copy_internal - set of edges (X_1, Y_1) from copy_bb_set whose successors
    125 //        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
    126 //   2.3. 'remap_incoming - set of edges (X, Y) from the incoming edge set in the original graph
    127 //        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
    128 //        (X, Y_1)).
    129 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
    130 // 4. Fix/resolve data flow.
    131 // 5. Do cleanups (DCE, critical edges splitting, etc).
    132 //
    133 class SuperblockCloner : public ValueObject {
    134  public:
    135   // TODO: Investigate optimal types for the containers.
    136   using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
    137   using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
    138   using HBasicBlockSet = ArenaBitVector;
    139   using HEdgeSet = ArenaHashSet<HEdge>;
    140 
    141   SuperblockCloner(HGraph* graph,
    142                    const HBasicBlockSet* orig_bb_set,
    143                    HBasicBlockMap* bb_map,
    144                    HInstructionMap* hir_map);
    145 
    146   // Sets edge successor remapping info specified by corresponding edge sets.
    147   void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
    148                                  const HEdgeSet* remap_copy_internal,
    149                                  const HEdgeSet* remap_incoming);
    150 
    151   // Returns whether the specified subgraph is copyable.
    152   // TODO: Start from small range of graph patterns then extend it.
    153   bool IsSubgraphClonable() const;
    154 
    155   // Runs the copy algorithm according to the description.
    156   void Run();
    157 
    158   // Cleans up the graph after transformation: splits critical edges, recalculates control flow
    159   // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
    160   void CleanUp();
    161 
    162   // Returns a clone of a basic block (orig_block).
    163   //
    164   //  - The copy block will have no successors/predecessors; they should be set up manually.
    165   //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
    166   //    this correspondence is recorded in the map (old instruction, new instruction).
    167   //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
    168   //    same, as in the original block, PHIs do not reflect a correct correspondence between the
    169   //    value and predecessors (as the copy block has no predecessors by now), etc.
    170   HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
    171 
    172   // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
    173   // and hir_map_.
    174   void CloneBasicBlocks();
    175 
    176   HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
    177     auto copy_input_iter = hir_map_->find(orig_instr);
    178     DCHECK(copy_input_iter != hir_map_->end());
    179     return copy_input_iter->second;
    180   }
    181 
    182   HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
    183     HBasicBlock* block = bb_map_->Get(orig_block);
    184     DCHECK(block != nullptr);
    185     return block;
    186   }
    187 
    188   HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
    189     for (auto it : *hir_map_) {
    190       if (it.second == copy_instr) {
    191         return it.first;
    192       }
    193     }
    194     return nullptr;
    195   }
    196 
    197   bool IsInOrigBBSet(uint32_t block_id) const {
    198     return orig_bb_set_.IsBitSet(block_id);
    199   }
    200 
    201   bool IsInOrigBBSet(const HBasicBlock* block) const {
    202     return IsInOrigBBSet(block->GetBlockId());
    203   }
    204 
    205  private:
    206   // Fills the 'exits' vector with the subgraph exits.
    207   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
    208 
    209   // Finds and records information about the area in the graph for which control-flow (back edges,
    210   // loops, dominators) needs to be adjusted.
    211   void FindAndSetLocalAreaForAdjustments();
    212 
    213   // Remaps edges' successors according to the info specified in the edges sets.
    214   //
    215   // Only edge successors/predecessors and phis' input records (to have a correspondence between
    216   // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
    217   // phis' nor instructions' inputs values are resolved.
    218   void RemapEdgesSuccessors();
    219 
    220   // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
    221   // FindAndSetLocalAreaForAdjustments.
    222   void AdjustControlFlowInfo();
    223 
    224   // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
    225   // the SSA form.
    226   void ResolveDataFlow();
    227 
    228   //
    229   // Helpers for CloneBasicBlock.
    230   //
    231 
    232   // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
    233   // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
    234   void ReplaceInputsWithCopies(HInstruction* copy_instr);
    235 
    236   // Recursively clones the environment for the copy instruction. If the input of the original
    237   // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
    238   // leaves it the same as original.
    239   void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
    240 
    241   //
    242   // Helpers for RemapEdgesSuccessors.
    243   //
    244 
    245   // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
    246   // copy_succ.
    247   void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    248 
    249   // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
    250   void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    251 
    252   // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
    253   void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
    254 
    255   //
    256   // Local versions of control flow calculation/adjustment routines.
    257   //
    258 
    259   void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
    260   void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
    261   GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
    262   void CleanUpControlFlow();
    263 
    264   //
    265   // Helpers for ResolveDataFlow
    266   //
    267 
    268   // Resolves the inputs of the phi.
    269   void ResolvePhi(HPhi* phi);
    270 
    271   //
    272   // Debug and logging methods.
    273   //
    274   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
    275 
    276   HBasicBlock* GetBlockById(uint32_t block_id) const {
    277     DCHECK(block_id < graph_->GetBlocks().size());
    278     HBasicBlock* block = graph_->GetBlocks()[block_id];
    279     DCHECK(block != nullptr);
    280     return block;
    281   }
    282 
    283   HGraph* const graph_;
    284   ArenaAllocator* const arena_;
    285 
    286   // Set of basic block in the original graph to be copied.
    287   HBasicBlockSet orig_bb_set_;
    288 
    289   // Sets of edges which require successors remapping.
    290   const HEdgeSet* remap_orig_internal_;
    291   const HEdgeSet* remap_copy_internal_;
    292   const HEdgeSet* remap_incoming_;
    293 
    294   // Correspondence map for blocks: (original block, copy block).
    295   HBasicBlockMap* bb_map_;
    296   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
    297   HInstructionMap* hir_map_;
    298   // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
    299   HLoopInformation* outer_loop_;
    300   HBasicBlockSet outer_loop_bb_set_;
    301 
    302   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
    303 
    304   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
    305 };
    306 
    307 }  // namespace art
    308 
    309 namespace std {
    310 
    311 template <>
    312 struct hash<art::HEdge> {
    313   size_t operator()(art::HEdge const& x) const noexcept  {
    314     // Use Cantor pairing function as the hash function.
    315     uint32_t a = x.GetFrom();
    316     uint32_t b = x.GetTo();
    317     return (a + b) * (a + b + 1) / 2 + b;
    318   }
    319 };
    320 
    321 }  // namespace std
    322 
    323 #endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
    324