1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_sinking.h" 18 19 #include "common_dominator.h" 20 #include "nodes.h" 21 22 namespace art { 23 24 void CodeSinking::Run() { 25 HBasicBlock* exit = graph_->GetExitBlock(); 26 if (exit == nullptr) { 27 // Infinite loop, just bail. 28 return; 29 } 30 // TODO(ngeoffray): we do not profile branches yet, so use throw instructions 31 // as an indicator of an uncommon branch. 32 for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { 33 if (exit_predecessor->GetLastInstruction()->IsThrow()) { 34 SinkCodeToUncommonBranch(exit_predecessor); 35 } 36 } 37 } 38 39 static bool IsInterestingInstruction(HInstruction* instruction) { 40 // Instructions from the entry graph (for example constants) are never interesting to move. 41 if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) { 42 return false; 43 } 44 // We want to move moveable instructions that cannot throw, as well as 45 // heap stores and allocations. 46 47 // Volatile stores cannot be moved. 48 if (instruction->IsInstanceFieldSet()) { 49 if (instruction->AsInstanceFieldSet()->IsVolatile()) { 50 return false; 51 } 52 } 53 54 // Check allocations first, as they can throw, but it is safe to move them. 55 if (instruction->IsNewInstance() || instruction->IsNewArray()) { 56 return true; 57 } 58 59 // All other instructions that can throw cannot be moved. 60 if (instruction->CanThrow()) { 61 return false; 62 } 63 64 // We can only store on local allocations. Other heap references can 65 // be escaping. Note that allocations can escape too, but we only move 66 // allocations if their users can move to, or are in the list of 67 // post dominated blocks. 68 if (instruction->IsInstanceFieldSet()) { 69 if (!instruction->InputAt(0)->IsNewInstance()) { 70 return false; 71 } 72 } 73 74 if (instruction->IsArraySet()) { 75 if (!instruction->InputAt(0)->IsNewArray()) { 76 return false; 77 } 78 } 79 80 // Heap accesses cannot go pass instructions that have memory side effects, which 81 // we are not tracking here. Note that the load/store elimination optimization 82 // runs before this optimization, and should have removed interesting ones. 83 // In theory, we could handle loads of local allocations, but this is currently 84 // hard to test, as LSE removes them. 85 if (instruction->IsStaticFieldGet() || 86 instruction->IsInstanceFieldGet() || 87 instruction->IsArrayGet()) { 88 return false; 89 } 90 91 if (instruction->IsInstanceFieldSet() || 92 instruction->IsArraySet() || 93 instruction->CanBeMoved()) { 94 return true; 95 } 96 return false; 97 } 98 99 static void AddInstruction(HInstruction* instruction, 100 const ArenaBitVector& processed_instructions, 101 const ArenaBitVector& discard_blocks, 102 ArenaVector<HInstruction*>* worklist) { 103 // Add to the work list if the instruction is not in the list of blocks 104 // to discard, hasn't been already processed and is of interest. 105 if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) && 106 !processed_instructions.IsBitSet(instruction->GetId()) && 107 IsInterestingInstruction(instruction)) { 108 worklist->push_back(instruction); 109 } 110 } 111 112 static void AddInputs(HInstruction* instruction, 113 const ArenaBitVector& processed_instructions, 114 const ArenaBitVector& discard_blocks, 115 ArenaVector<HInstruction*>* worklist) { 116 for (HInstruction* input : instruction->GetInputs()) { 117 AddInstruction(input, processed_instructions, discard_blocks, worklist); 118 } 119 } 120 121 static void AddInputs(HBasicBlock* block, 122 const ArenaBitVector& processed_instructions, 123 const ArenaBitVector& discard_blocks, 124 ArenaVector<HInstruction*>* worklist) { 125 for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { 126 AddInputs(it.Current(), processed_instructions, discard_blocks, worklist); 127 } 128 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 129 AddInputs(it.Current(), processed_instructions, discard_blocks, worklist); 130 } 131 } 132 133 static bool ShouldFilterUse(HInstruction* instruction, 134 HInstruction* user, 135 const ArenaBitVector& post_dominated) { 136 if (instruction->IsNewInstance()) { 137 return user->IsInstanceFieldSet() && 138 (user->InputAt(0) == instruction) && 139 !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); 140 } else if (instruction->IsNewArray()) { 141 return user->IsArraySet() && 142 (user->InputAt(0) == instruction) && 143 !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); 144 } 145 return false; 146 } 147 148 149 // Find the ideal position for moving `instruction`. If `filter` is true, 150 // we filter out store instructions to that instruction, which are processed 151 // first in the step (3) of the sinking algorithm. 152 // This method is tailored to the sinking algorithm, unlike 153 // the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops. 154 static HInstruction* FindIdealPosition(HInstruction* instruction, 155 const ArenaBitVector& post_dominated, 156 bool filter = false) { 157 DCHECK(!instruction->IsPhi()); // Makes no sense for Phi. 158 159 // Find the target block. 160 CommonDominator finder(/* start_block */ nullptr); 161 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 162 HInstruction* user = use.GetUser(); 163 if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) { 164 HBasicBlock* block = user->GetBlock(); 165 if (user->IsPhi()) { 166 // Special case phis by taking the incoming block for regular ones, 167 // or the dominator for catch phis. 168 block = user->AsPhi()->IsCatchPhi() 169 ? block->GetDominator() 170 : block->GetPredecessors()[use.GetIndex()]; 171 } 172 finder.Update(block); 173 } 174 } 175 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { 176 DCHECK(!use.GetUser()->GetHolder()->IsPhi()); 177 DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated)); 178 finder.Update(use.GetUser()->GetHolder()->GetBlock()); 179 } 180 HBasicBlock* target_block = finder.Get(); 181 if (target_block == nullptr) { 182 // No user we can go next to? Likely a LSE or DCE limitation. 183 return nullptr; 184 } 185 186 // Move to the first dominator not in a loop, if we can. 187 while (target_block->IsInLoop()) { 188 if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { 189 break; 190 } 191 target_block = target_block->GetDominator(); 192 DCHECK(target_block != nullptr); 193 } 194 195 // Find insertion position. No need to filter anymore, as we have found a 196 // target block. 197 HInstruction* insert_pos = nullptr; 198 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 199 if (use.GetUser()->GetBlock() == target_block && 200 (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) { 201 insert_pos = use.GetUser(); 202 } 203 } 204 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { 205 HInstruction* user = use.GetUser()->GetHolder(); 206 if (user->GetBlock() == target_block && 207 (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) { 208 insert_pos = user; 209 } 210 } 211 if (insert_pos == nullptr) { 212 // No user in `target_block`, insert before the control flow instruction. 213 insert_pos = target_block->GetLastInstruction(); 214 DCHECK(insert_pos->IsControlFlow()); 215 // Avoid splitting HCondition from HIf to prevent unnecessary materialization. 216 if (insert_pos->IsIf()) { 217 HInstruction* if_input = insert_pos->AsIf()->InputAt(0); 218 if (if_input == insert_pos->GetPrevious()) { 219 insert_pos = if_input; 220 } 221 } 222 } 223 DCHECK(!insert_pos->IsPhi()); 224 return insert_pos; 225 } 226 227 228 void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { 229 // Local allocator to discard data structures created below at the end of 230 // this optimization. 231 ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); 232 233 size_t number_of_instructions = graph_->GetCurrentInstructionId(); 234 ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); 235 ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false); 236 ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false); 237 ArenaBitVector instructions_that_can_move( 238 &allocator, number_of_instructions, /* expandable */ false); 239 ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); 240 241 // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. 242 // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by 243 // computint the post dominator tree, but that could be too time consuming. Also, 244 // we should start the analysis from blocks dominated by an uncommon branch, but we 245 // don't profile branches yet. 246 bool found_block = false; 247 for (HBasicBlock* block : graph_->GetPostOrder()) { 248 if (block == end_block) { 249 found_block = true; 250 post_dominated.SetBit(block->GetBlockId()); 251 } else if (found_block) { 252 bool is_post_dominated = true; 253 if (block->GetSuccessors().empty()) { 254 // We currently bail for loops. 255 is_post_dominated = false; 256 } else { 257 for (HBasicBlock* successor : block->GetSuccessors()) { 258 if (!post_dominated.IsBitSet(successor->GetBlockId())) { 259 is_post_dominated = false; 260 break; 261 } 262 } 263 } 264 if (is_post_dominated) { 265 post_dominated.SetBit(block->GetBlockId()); 266 } 267 } 268 } 269 270 // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs 271 // of instructions in these blocks that are not themselves in these blocks. 272 // Also find the common dominator of the found post dominated blocks, to help filtering 273 // out un-movable uses in step (2). 274 CommonDominator finder(end_block); 275 for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) { 276 if (post_dominated.IsBitSet(i)) { 277 finder.Update(graph_->GetBlocks()[i]); 278 AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist); 279 } 280 } 281 HBasicBlock* common_dominator = finder.Get(); 282 283 // Step (2): iterate over the worklist to find sinking candidates. 284 while (!worklist.empty()) { 285 HInstruction* instruction = worklist.back(); 286 if (processed_instructions.IsBitSet(instruction->GetId())) { 287 // The instruction has already been processed, continue. This happens 288 // when the instruction is the input/user of multiple instructions. 289 worklist.pop_back(); 290 continue; 291 } 292 bool all_users_in_post_dominated_blocks = true; 293 bool can_move = true; 294 // Check users of the instruction. 295 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 296 HInstruction* user = use.GetUser(); 297 if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) && 298 !instructions_that_can_move.IsBitSet(user->GetId())) { 299 all_users_in_post_dominated_blocks = false; 300 // If we've already processed this user, or the user cannot be moved, or 301 // is not dominating the post dominated blocks, bail. 302 // TODO(ngeoffray): The domination check is an approximation. We should 303 // instead check if the dominated blocks post dominate the user's block, 304 // but we do not have post dominance information here. 305 if (processed_instructions.IsBitSet(user->GetId()) || 306 !IsInterestingInstruction(user) || 307 !user->GetBlock()->Dominates(common_dominator)) { 308 can_move = false; 309 break; 310 } 311 } 312 } 313 314 // Check environment users of the instruction. Some of these users require 315 // the instruction not to move. 316 if (all_users_in_post_dominated_blocks) { 317 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { 318 HEnvironment* environment = use.GetUser(); 319 HInstruction* user = environment->GetHolder(); 320 if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) { 321 if (graph_->IsDebuggable() || 322 user->IsDeoptimize() || 323 user->CanThrowIntoCatchBlock() || 324 (user->IsSuspendCheck() && graph_->IsCompilingOsr())) { 325 can_move = false; 326 break; 327 } 328 } 329 } 330 } 331 if (!can_move) { 332 // Instruction cannot be moved, mark it as processed and remove it from the work 333 // list. 334 processed_instructions.SetBit(instruction->GetId()); 335 worklist.pop_back(); 336 } else if (all_users_in_post_dominated_blocks) { 337 // Instruction is a candidate for being sunk. Mark it as such, remove it from the 338 // work list, and add its inputs to the work list. 339 instructions_that_can_move.SetBit(instruction->GetId()); 340 move_in_order.push_back(instruction); 341 processed_instructions.SetBit(instruction->GetId()); 342 worklist.pop_back(); 343 AddInputs(instruction, processed_instructions, post_dominated, &worklist); 344 // Drop the environment use not in the list of post-dominated block. This is 345 // to help step (3) of this optimization, when we start moving instructions 346 // closer to their use. 347 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { 348 HEnvironment* environment = use.GetUser(); 349 HInstruction* user = environment->GetHolder(); 350 if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) { 351 environment->RemoveAsUserOfInput(use.GetIndex()); 352 environment->SetRawEnvAt(use.GetIndex(), nullptr); 353 } 354 } 355 } else { 356 // The information we have on the users was not enough to decide whether the 357 // instruction could be moved. 358 // Add the users to the work list, and keep the instruction in the work list 359 // to process it again once all users have been processed. 360 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 361 AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist); 362 } 363 } 364 } 365 366 // Make sure we process instructions in dominated order. This is required for heap 367 // stores. 368 std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) { 369 return b->StrictlyDominates(a); 370 }); 371 372 // Step (3): Try to move sinking candidates. 373 for (HInstruction* instruction : move_in_order) { 374 HInstruction* position = nullptr; 375 if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) { 376 if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) { 377 // A store can trivially move, but it can safely do so only if the heap 378 // location it stores to can also move. 379 // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions 380 // from the set and all their inputs. 381 continue; 382 } 383 // Find the position of the instruction we're storing into, filtering out this 384 // store and all other stores to that instruction. 385 position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true); 386 387 // The position needs to be dominated by the store, in order for the store to move there. 388 if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) { 389 continue; 390 } 391 } else { 392 // Find the ideal position within the post dominated blocks. 393 position = FindIdealPosition(instruction, post_dominated); 394 if (position == nullptr) { 395 continue; 396 } 397 } 398 // Bail if we could not find a position in the post dominated blocks (for example, 399 // if there are multiple users whose common dominator is not in the list of 400 // post dominated blocks). 401 if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) { 402 continue; 403 } 404 MaybeRecordStat(MethodCompilationStat::kInstructionSunk); 405 instruction->MoveBefore(position, /* ensure_safety */ false); 406 } 407 } 408 409 } // namespace art 410