1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 19 20 #include "base/arena_bit_vector.h" 21 #include "base/arena_containers.h" 22 #include "base/bit_vector-inl.h" 23 #include "nodes.h" 24 25 namespace art { 26 27 class InductionVarRange; 28 29 static const bool kSuperblockClonerLogging = false; 30 31 // Represents an edge between two HBasicBlocks. 32 // 33 // Note: objects of this class are small - pass them by value. 34 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { 35 public: 36 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { 37 DCHECK_NE(to_, kInvalidBlockId); 38 DCHECK_NE(from_, kInvalidBlockId); 39 } 40 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { 41 DCHECK_NE(to_, kInvalidBlockId); 42 DCHECK_NE(from_, kInvalidBlockId); 43 } 44 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} 45 46 uint32_t GetFrom() const { return from_; } 47 uint32_t GetTo() const { return to_; } 48 49 bool operator==(const HEdge& other) const { 50 return this->from_ == other.from_ && this->to_ == other.to_; 51 } 52 53 bool operator!=(const HEdge& other) const { return !operator==(other); } 54 void Dump(std::ostream& stream) const; 55 56 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block 57 // has to_ block as a successor. 58 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } 59 60 private: 61 // Predecessor block id. 62 uint32_t from_; 63 // Successor block id. 64 uint32_t to_; 65 }; 66 67 // Returns whether a HEdge edge corresponds to an existing edge in the graph. 68 inline bool IsEdgeValid(HEdge edge, HGraph* graph) { 69 if (!edge.IsValid()) { 70 return false; 71 } 72 uint32_t from = edge.GetFrom(); 73 uint32_t to = edge.GetTo(); 74 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { 75 return false; 76 } 77 78 HBasicBlock* block_from = graph->GetBlocks()[from]; 79 HBasicBlock* block_to = graph->GetBlocks()[to]; 80 if (block_from == nullptr || block_to == nullptr) { 81 return false; 82 } 83 84 return block_from->HasSuccessor(block_to, 0); 85 } 86 87 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without 88 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted 89 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy 90 // and a set of rules how to treat edges, remap their successors. By using this approach such 91 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. 92 // 93 // The idea of the transformation is based on "Superblock cloning" technique described in the book 94 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University 95 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective 96 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. 97 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. 98 // 99 // There are two states of the IR graph: original graph (before the transformation) and 100 // copy graph (after). 101 // 102 // Before the transformation: 103 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original 104 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", 105 // where pred, succ - basic blocks): 106 // - internal - pred, succ are members of orig_bb_set. 107 // - outside - pred, succ are not members of orig_bb_set. 108 // - incoming - pred is not a member of orig_bb_set, succ is. 109 // - outgoing - pred is a member of orig_bb_set, succ is not. 110 // 111 // Transformation: 112 // 113 // 1. Initial cloning: 114 // 1.1. For each orig_block in orig_bb_set create a copy copy_block; these new blocks 115 // form copy_bb_set. 116 // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the 117 // copies of X, Y basic blocks correspondingly; these new edges form copy_internal edge 118 // set. 119 // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the 120 // copies of X, Y basic blocks correspondingly; these new edges form copy_outgoing edge 121 // set. 122 // 2. Successors remapping. 123 // 2.1. 'remap_orig_internal - set of edges (X, Y) from orig_bb_set whose successors should 124 // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). 125 // 2.2. remap_copy_internal - set of edges (X_1, Y_1) from copy_bb_set whose successors 126 // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). 127 // 2.3. 'remap_incoming - set of edges (X, Y) from the incoming edge set in the original graph 128 // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into 129 // (X, Y_1)). 130 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). 131 // 4. Fix/resolve data flow. 132 // 5. Do cleanups (DCE, critical edges splitting, etc). 133 // 134 class SuperblockCloner : public ValueObject { 135 public: 136 // TODO: Investigate optimal types for the containers. 137 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; 138 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; 139 using HBasicBlockSet = ArenaBitVector; 140 using HEdgeSet = ArenaHashSet<HEdge>; 141 142 SuperblockCloner(HGraph* graph, 143 const HBasicBlockSet* orig_bb_set, 144 HBasicBlockMap* bb_map, 145 HInstructionMap* hir_map, 146 InductionVarRange* induction_range); 147 148 // Sets edge successor remapping info specified by corresponding edge sets. 149 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, 150 const HEdgeSet* remap_copy_internal, 151 const HEdgeSet* remap_incoming); 152 153 // Returns whether the specified subgraph is copyable. 154 // TODO: Start from small range of graph patterns then extend it. 155 bool IsSubgraphClonable() const; 156 157 // Returns whether selected subgraph satisfies the criteria for fast data flow resolution 158 // when iterative DF algorithm is not required and dominators/instructions inputs can be 159 // trivially adjusted. 160 // 161 // TODO: formally describe the criteria. 162 // 163 // Loop peeling and unrolling satisfy the criteria. 164 bool IsFastCase() const; 165 166 // Runs the copy algorithm according to the description. 167 void Run(); 168 169 // Cleans up the graph after transformation: splits critical edges, recalculates control flow 170 // information (back-edges, dominators, loop info, etc), eliminates redundant phis. 171 void CleanUp(); 172 173 // Returns a clone of a basic block (orig_block). 174 // 175 // - The copy block will have no successors/predecessors; they should be set up manually. 176 // - For each instruction in the orig_block a copy is created and inserted into the copy block; 177 // this correspondence is recorded in the map (old instruction, new instruction). 178 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the 179 // same, as in the original block, PHIs do not reflect a correct correspondence between the 180 // value and predecessors (as the copy block has no predecessors by now), etc. 181 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); 182 183 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ 184 // and hir_map_. 185 void CloneBasicBlocks(); 186 187 HInstruction* GetInstrCopy(HInstruction* orig_instr) const { 188 auto copy_input_iter = hir_map_->find(orig_instr); 189 DCHECK(copy_input_iter != hir_map_->end()); 190 return copy_input_iter->second; 191 } 192 193 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { 194 HBasicBlock* block = bb_map_->Get(orig_block); 195 DCHECK(block != nullptr); 196 return block; 197 } 198 199 HInstruction* GetInstrOrig(HInstruction* copy_instr) const { 200 for (auto it : *hir_map_) { 201 if (it.second == copy_instr) { 202 return it.first; 203 } 204 } 205 return nullptr; 206 } 207 208 bool IsInOrigBBSet(uint32_t block_id) const { 209 return orig_bb_set_.IsBitSet(block_id); 210 } 211 212 bool IsInOrigBBSet(const HBasicBlock* block) const { 213 return IsInOrigBBSet(block->GetBlockId()); 214 } 215 216 // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, 217 // dominators) needs to be adjusted. 218 HLoopInformation* GetRegionToBeAdjusted() const { 219 return outer_loop_; 220 } 221 222 private: 223 // Fills the 'exits' vector with the subgraph exits. 224 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; 225 226 // Finds and records information about the area in the graph for which control flow (back edges, 227 // loops, dominators) needs to be adjusted. 228 void FindAndSetLocalAreaForAdjustments(); 229 230 // Remaps edges' successors according to the info specified in the edges sets. 231 // 232 // Only edge successors/predecessors and phis' input records (to have a correspondence between 233 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither 234 // phis' nor instructions' inputs values are resolved. 235 void RemapEdgesSuccessors(); 236 237 // Adjusts control flow (back edges, loops, dominators) for the local area defined by 238 // FindAndSetLocalAreaForAdjustments. 239 void AdjustControlFlowInfo(); 240 241 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in 242 // the SSA form. 243 void ResolveDataFlow(); 244 245 // 246 // Helpers for live-outs processing and Subgraph-closed SSA. 247 // 248 // - live-outs - values which are defined inside the subgraph and have uses outside. 249 // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph 250 // have no outside uses except for the phi-nodes in the subgraph exits. 251 // 252 // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this 253 // makes the subgraph-closed SSA form construction much easier. 254 // 255 // TODO: Support subgraphs with live-outs and multiple exits. 256 // 257 258 // For each live-out value 'val' in the region puts a record <val, val> into the map. 259 // Returns whether all of the instructions in the subgraph are clonable. 260 bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; 261 262 // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. 263 // 264 // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates 265 // the record in the map to <val, phi> and replaces all outside uses with this phi. 266 void ConstructSubgraphClosedSSA(); 267 268 // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding 269 // (<val, phi>) phi after the cloning is done. 270 void FixSubgraphClosedSSAAfterCloning(); 271 272 // 273 // Helpers for CloneBasicBlock. 274 // 275 276 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the 277 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. 278 void ReplaceInputsWithCopies(HInstruction* copy_instr); 279 280 // Recursively clones the environment for the copy instruction. If the input of the original 281 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise 282 // leaves it the same as original. 283 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); 284 285 // 286 // Helpers for RemapEdgesSuccessors. 287 // 288 289 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and 290 // copy_succ. 291 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 292 293 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. 294 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 295 296 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. 297 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); 298 299 // 300 // Local versions of control flow calculation/adjustment routines. 301 // 302 303 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); 304 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); 305 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); 306 void CleanUpControlFlow(); 307 308 // 309 // Helpers for ResolveDataFlow 310 // 311 312 // Resolves the inputs of the phi. 313 void ResolvePhi(HPhi* phi); 314 315 // Update induction range after when fixing SSA. 316 void UpdateInductionRangeInfoOf( 317 HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); 318 319 // 320 // Debug and logging methods. 321 // 322 void CheckInstructionInputsRemapping(HInstruction* orig_instr); 323 bool CheckRemappingInfoIsValid(); 324 void VerifyGraph(); 325 void DumpInputSets(); 326 327 HBasicBlock* GetBlockById(uint32_t block_id) const { 328 DCHECK(block_id < graph_->GetBlocks().size()); 329 HBasicBlock* block = graph_->GetBlocks()[block_id]; 330 DCHECK(block != nullptr); 331 return block; 332 } 333 334 HGraph* const graph_; 335 ArenaAllocator* const arena_; 336 337 // Set of basic block in the original graph to be copied. 338 HBasicBlockSet orig_bb_set_; 339 340 // Sets of edges which require successors remapping. 341 const HEdgeSet* remap_orig_internal_; 342 const HEdgeSet* remap_copy_internal_; 343 const HEdgeSet* remap_incoming_; 344 345 // Correspondence map for blocks: (original block, copy block). 346 HBasicBlockMap* bb_map_; 347 // Correspondence map for instructions: (original HInstruction, copy HInstruction). 348 HInstructionMap* hir_map_; 349 // As a result of cloning, the induction range analysis information can be invalidated 350 // and must be updated. If not null, the cloner updates it for changed instructions. 351 InductionVarRange* induction_range_; 352 // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. 353 HLoopInformation* outer_loop_; 354 HBasicBlockSet outer_loop_bb_set_; 355 356 HInstructionMap live_outs_; 357 358 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); 359 ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); 360 361 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); 362 }; 363 364 // Helper class to perform loop peeling/unrolling. 365 // 366 // This helper should be used when correspondence map between original and copied 367 // basic blocks/instructions are demanded. 368 class PeelUnrollHelper : public ValueObject { 369 public: 370 PeelUnrollHelper(HLoopInformation* info, 371 SuperblockCloner::HBasicBlockMap* bb_map, 372 SuperblockCloner::HInstructionMap* hir_map, 373 InductionVarRange* induction_range) : 374 loop_info_(info), 375 cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { 376 // For now do peeling/unrolling only for natural loops. 377 DCHECK(!info->IsIrreducible()); 378 } 379 380 // Returns whether the loop can be peeled/unrolled (static function). 381 static bool IsLoopClonable(HLoopInformation* loop_info); 382 383 // Returns whether the loop can be peeled/unrolled. 384 bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } 385 386 HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); } 387 HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); } 388 HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } 389 390 protected: 391 // Applies loop peeling/unrolling for the loop specified by 'loop_info'. 392 // 393 // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it. 394 HBasicBlock* DoPeelUnrollImpl(bool to_unroll); 395 396 private: 397 HLoopInformation* loop_info_; 398 SuperblockCloner cloner_; 399 400 DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper); 401 }; 402 403 // Helper class to perform loop peeling/unrolling. 404 // 405 // This helper should be used when there is no need to get correspondence information between 406 // original and copied basic blocks/instructions. 407 class PeelUnrollSimpleHelper : public ValueObject { 408 public: 409 PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); 410 bool IsLoopClonable() const { return helper_.IsLoopClonable(); } 411 HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } 412 HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } 413 HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } 414 415 const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } 416 const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } 417 418 private: 419 SuperblockCloner::HBasicBlockMap bb_map_; 420 SuperblockCloner::HInstructionMap hir_map_; 421 PeelUnrollHelper helper_; 422 423 DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper); 424 }; 425 426 // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. 427 void CollectRemappingInfoForPeelUnroll(bool to_unroll, 428 HLoopInformation* loop_info, 429 SuperblockCloner::HEdgeSet* remap_orig_internal, 430 SuperblockCloner::HEdgeSet* remap_copy_internal, 431 SuperblockCloner::HEdgeSet* remap_incoming); 432 433 // Returns whether blocks from 'work_set' are reachable from the rest of the graph. 434 // 435 // Returns whether such a set 'outer_entries' of basic blocks exists that: 436 // - each block from 'outer_entries' is not from 'work_set'. 437 // - each block from 'work_set' is reachable from at least one block from 'outer_entries'. 438 // 439 // After the function returns work_set contains only blocks from the original 'work_set' 440 // which are unreachable from the rest of the graph. 441 bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); 442 443 // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole 444 // graph. 445 HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); 446 } // namespace art 447 448 namespace std { 449 450 template <> 451 struct hash<art::HEdge> { 452 size_t operator()(art::HEdge const& x) const noexcept { 453 // Use Cantor pairing function as the hash function. 454 size_t a = x.GetFrom(); 455 size_t b = x.GetTo(); 456 return (a + b) * (a + b + 1) / 2 + b; 457 } 458 }; 459 ostream& operator<<(ostream& os, const art::HEdge& e); 460 461 } // namespace std 462 463 #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ 464