1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 19 20 #include "base/arena_bit_vector.h" 21 #include "base/arena_containers.h" 22 #include "base/bit_vector-inl.h" 23 #include "nodes.h" 24 25 namespace art { 26 27 static const bool kSuperblockClonerLogging = false; 28 static const bool kSuperblockClonerVerify = false; 29 30 // Represents an edge between two HBasicBlocks. 31 // 32 // Note: objects of this class are small - pass them by value. 33 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { 34 public: 35 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { 36 DCHECK_NE(to_, kInvalidBlockId); 37 DCHECK_NE(from_, kInvalidBlockId); 38 } 39 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { 40 DCHECK_NE(to_, kInvalidBlockId); 41 DCHECK_NE(from_, kInvalidBlockId); 42 } 43 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} 44 45 uint32_t GetFrom() const { return from_; } 46 uint32_t GetTo() const { return to_; } 47 48 bool operator==(const HEdge& other) const { 49 return this->from_ == other.from_ && this->to_ == other.to_; 50 } 51 52 bool operator!=(const HEdge& other) const { return !operator==(other); } 53 void Dump(std::ostream& stream) const; 54 55 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block 56 // has to_ block as a successor. 57 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } 58 59 private: 60 // Predecessor block id. 61 uint32_t from_; 62 // Successor block id. 63 uint32_t to_; 64 }; 65 66 // Returns whether a HEdge edge corresponds to an existing edge in the graph. 67 inline bool IsEdgeValid(HEdge edge, HGraph* graph) { 68 if (!edge.IsValid()) { 69 return false; 70 } 71 uint32_t from = edge.GetFrom(); 72 uint32_t to = edge.GetTo(); 73 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { 74 return false; 75 } 76 77 HBasicBlock* block_from = graph->GetBlocks()[from]; 78 HBasicBlock* block_to = graph->GetBlocks()[to]; 79 if (block_from == nullptr || block_to == nullptr) { 80 return false; 81 } 82 83 return block_from->HasSuccessor(block_to, 0); 84 } 85 86 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without 87 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted 88 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy 89 // and a set of rules how to treat edges, remap their successors. By using this approach such 90 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. 91 // 92 // The idea of the transformation is based on "Superblock cloning" technique described in the book 93 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University 94 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective 95 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. 96 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. 97 // 98 // There are two states of the IR graph: original graph (before the transformation) and 99 // copy graph (after). 100 // 101 // Before the transformation: 102 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original 103 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", 104 // where pred, succ - basic blocks): 105 // - internal - pred, succ are members of orig_bb_set. 106 // - outside - pred, succ are not members of orig_bb_set. 107 // - incoming - pred is not a member of orig_bb_set, succ is. 108 // - outgoing - pred is a member of orig_bb_set, succ is not. 109 // 110 // Transformation: 111 // 112 // 1. Initial cloning: 113 // 1.1. For each orig_block in orig_bb_set create a copy copy_block; these new blocks 114 // form copy_bb_set. 115 // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the 116 // copies of X, Y basic blocks correspondingly; these new edges form copy_internal edge 117 // set. 118 // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the 119 // copies of X, Y basic blocks correspondingly; these new edges form copy_outgoing edge 120 // set. 121 // 2. Successors remapping. 122 // 2.1. 'remap_orig_internal - set of edges (X, Y) from orig_bb_set whose successors should 123 // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). 124 // 2.2. remap_copy_internal - set of edges (X_1, Y_1) from copy_bb_set whose successors 125 // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). 126 // 2.3. 'remap_incoming - set of edges (X, Y) from the incoming edge set in the original graph 127 // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into 128 // (X, Y_1)). 129 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). 130 // 4. Fix/resolve data flow. 131 // 5. Do cleanups (DCE, critical edges splitting, etc). 132 // 133 class SuperblockCloner : public ValueObject { 134 public: 135 // TODO: Investigate optimal types for the containers. 136 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; 137 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; 138 using HBasicBlockSet = ArenaBitVector; 139 using HEdgeSet = ArenaHashSet<HEdge>; 140 141 SuperblockCloner(HGraph* graph, 142 const HBasicBlockSet* orig_bb_set, 143 HBasicBlockMap* bb_map, 144 HInstructionMap* hir_map); 145 146 // Sets edge successor remapping info specified by corresponding edge sets. 147 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, 148 const HEdgeSet* remap_copy_internal, 149 const HEdgeSet* remap_incoming); 150 151 // Returns whether the specified subgraph is copyable. 152 // TODO: Start from small range of graph patterns then extend it. 153 bool IsSubgraphClonable() const; 154 155 // Runs the copy algorithm according to the description. 156 void Run(); 157 158 // Cleans up the graph after transformation: splits critical edges, recalculates control flow 159 // information (back-edges, dominators, loop info, etc), eliminates redundant phis. 160 void CleanUp(); 161 162 // Returns a clone of a basic block (orig_block). 163 // 164 // - The copy block will have no successors/predecessors; they should be set up manually. 165 // - For each instruction in the orig_block a copy is created and inserted into the copy block; 166 // this correspondence is recorded in the map (old instruction, new instruction). 167 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the 168 // same, as in the original block, PHIs do not reflect a correct correspondence between the 169 // value and predecessors (as the copy block has no predecessors by now), etc. 170 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); 171 172 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ 173 // and hir_map_. 174 void CloneBasicBlocks(); 175 176 HInstruction* GetInstrCopy(HInstruction* orig_instr) const { 177 auto copy_input_iter = hir_map_->find(orig_instr); 178 DCHECK(copy_input_iter != hir_map_->end()); 179 return copy_input_iter->second; 180 } 181 182 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { 183 HBasicBlock* block = bb_map_->Get(orig_block); 184 DCHECK(block != nullptr); 185 return block; 186 } 187 188 HInstruction* GetInstrOrig(HInstruction* copy_instr) const { 189 for (auto it : *hir_map_) { 190 if (it.second == copy_instr) { 191 return it.first; 192 } 193 } 194 return nullptr; 195 } 196 197 bool IsInOrigBBSet(uint32_t block_id) const { 198 return orig_bb_set_.IsBitSet(block_id); 199 } 200 201 bool IsInOrigBBSet(const HBasicBlock* block) const { 202 return IsInOrigBBSet(block->GetBlockId()); 203 } 204 205 private: 206 // Fills the 'exits' vector with the subgraph exits. 207 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); 208 209 // Finds and records information about the area in the graph for which control-flow (back edges, 210 // loops, dominators) needs to be adjusted. 211 void FindAndSetLocalAreaForAdjustments(); 212 213 // Remaps edges' successors according to the info specified in the edges sets. 214 // 215 // Only edge successors/predecessors and phis' input records (to have a correspondence between 216 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither 217 // phis' nor instructions' inputs values are resolved. 218 void RemapEdgesSuccessors(); 219 220 // Adjusts control-flow (back edges, loops, dominators) for the local area defined by 221 // FindAndSetLocalAreaForAdjustments. 222 void AdjustControlFlowInfo(); 223 224 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in 225 // the SSA form. 226 void ResolveDataFlow(); 227 228 // 229 // Helpers for CloneBasicBlock. 230 // 231 232 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the 233 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. 234 void ReplaceInputsWithCopies(HInstruction* copy_instr); 235 236 // Recursively clones the environment for the copy instruction. If the input of the original 237 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise 238 // leaves it the same as original. 239 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); 240 241 // 242 // Helpers for RemapEdgesSuccessors. 243 // 244 245 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and 246 // copy_succ. 247 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 248 249 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. 250 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 251 252 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. 253 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 254 255 // 256 // Local versions of control flow calculation/adjustment routines. 257 // 258 259 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); 260 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); 261 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); 262 void CleanUpControlFlow(); 263 264 // 265 // Helpers for ResolveDataFlow 266 // 267 268 // Resolves the inputs of the phi. 269 void ResolvePhi(HPhi* phi); 270 271 // 272 // Debug and logging methods. 273 // 274 void CheckInstructionInputsRemapping(HInstruction* orig_instr); 275 276 HBasicBlock* GetBlockById(uint32_t block_id) const { 277 DCHECK(block_id < graph_->GetBlocks().size()); 278 HBasicBlock* block = graph_->GetBlocks()[block_id]; 279 DCHECK(block != nullptr); 280 return block; 281 } 282 283 HGraph* const graph_; 284 ArenaAllocator* const arena_; 285 286 // Set of basic block in the original graph to be copied. 287 HBasicBlockSet orig_bb_set_; 288 289 // Sets of edges which require successors remapping. 290 const HEdgeSet* remap_orig_internal_; 291 const HEdgeSet* remap_copy_internal_; 292 const HEdgeSet* remap_incoming_; 293 294 // Correspondence map for blocks: (original block, copy block). 295 HBasicBlockMap* bb_map_; 296 // Correspondence map for instructions: (original HInstruction, copy HInstruction). 297 HInstructionMap* hir_map_; 298 // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. 299 HLoopInformation* outer_loop_; 300 HBasicBlockSet outer_loop_bb_set_; 301 302 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); 303 304 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); 305 }; 306 307 } // namespace art 308 309 namespace std { 310 311 template <> 312 struct hash<art::HEdge> { 313 size_t operator()(art::HEdge const& x) const noexcept { 314 // Use Cantor pairing function as the hash function. 315 uint32_t a = x.GetFrom(); 316 uint32_t b = x.GetTo(); 317 return (a + b) * (a + b + 1) / 2 + b; 318 } 319 }; 320 321 } // namespace std 322 323 #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 324