Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_sinking.h"
     18 
     19 #include "base/arena_bit_vector.h"
     20 #include "base/bit_vector-inl.h"
     21 #include "base/scoped_arena_allocator.h"
     22 #include "base/scoped_arena_containers.h"
     23 #include "common_dominator.h"
     24 #include "nodes.h"
     25 
     26 namespace art {
     27 
     28 void CodeSinking::Run() {
     29   HBasicBlock* exit = graph_->GetExitBlock();
     30   if (exit == nullptr) {
     31     // Infinite loop, just bail.
     32     return;
     33   }
     34   // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
     35   // as an indicator of an uncommon branch.
     36   for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
     37     HInstruction* last = exit_predecessor->GetLastInstruction();
     38     // Any predecessor of the exit that does not return, throws an exception.
     39     if (!last->IsReturn() && !last->IsReturnVoid()) {
     40       SinkCodeToUncommonBranch(exit_predecessor);
     41     }
     42   }
     43 }
     44 
     45 static bool IsInterestingInstruction(HInstruction* instruction) {
     46   // Instructions from the entry graph (for example constants) are never interesting to move.
     47   if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
     48     return false;
     49   }
     50   // We want to move moveable instructions that cannot throw, as well as
     51   // heap stores and allocations.
     52 
     53   // Volatile stores cannot be moved.
     54   if (instruction->IsInstanceFieldSet()) {
     55     if (instruction->AsInstanceFieldSet()->IsVolatile()) {
     56       return false;
     57     }
     58   }
     59 
     60   // Check allocations first, as they can throw, but it is safe to move them.
     61   if (instruction->IsNewInstance() || instruction->IsNewArray()) {
     62     return true;
     63   }
     64 
     65   // Check it is safe to move ConstructorFence.
     66   // (Safe to move ConstructorFence for only protecting the new-instance but not for finals.)
     67   if (instruction->IsConstructorFence()) {
     68     HConstructorFence* ctor_fence = instruction->AsConstructorFence();
     69 
     70     // A fence with "0" inputs is dead and should've been removed in a prior pass.
     71     DCHECK_NE(0u, ctor_fence->InputCount());
     72 
     73     // TODO: this should be simplified to 'return true' since it's
     74     // potentially pessimizing any code sinking for inlined constructors with final fields.
     75     // TODO: double check that if the final field assignments are not moved,
     76     // then the fence is not moved either.
     77 
     78     return ctor_fence->GetAssociatedAllocation() != nullptr;
     79   }
     80 
     81   // All other instructions that can throw cannot be moved.
     82   if (instruction->CanThrow()) {
     83     return false;
     84   }
     85 
     86   // We can only store on local allocations. Other heap references can
     87   // be escaping. Note that allocations can escape too, but we only move
     88   // allocations if their users can move to, or are in the list of
     89   // post dominated blocks.
     90   if (instruction->IsInstanceFieldSet()) {
     91     if (!instruction->InputAt(0)->IsNewInstance()) {
     92       return false;
     93     }
     94   }
     95 
     96   if (instruction->IsArraySet()) {
     97     if (!instruction->InputAt(0)->IsNewArray()) {
     98       return false;
     99     }
    100   }
    101 
    102   // Heap accesses cannot go pass instructions that have memory side effects, which
    103   // we are not tracking here. Note that the load/store elimination optimization
    104   // runs before this optimization, and should have removed interesting ones.
    105   // In theory, we could handle loads of local allocations, but this is currently
    106   // hard to test, as LSE removes them.
    107   if (instruction->IsStaticFieldGet() ||
    108       instruction->IsInstanceFieldGet() ||
    109       instruction->IsArrayGet()) {
    110     return false;
    111   }
    112 
    113   if (instruction->IsInstanceFieldSet() ||
    114       instruction->IsArraySet() ||
    115       instruction->CanBeMoved()) {
    116     return true;
    117   }
    118   return false;
    119 }
    120 
    121 static void AddInstruction(HInstruction* instruction,
    122                            const ArenaBitVector& processed_instructions,
    123                            const ArenaBitVector& discard_blocks,
    124                            ScopedArenaVector<HInstruction*>* worklist) {
    125   // Add to the work list if the instruction is not in the list of blocks
    126   // to discard, hasn't been already processed and is of interest.
    127   if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
    128       !processed_instructions.IsBitSet(instruction->GetId()) &&
    129       IsInterestingInstruction(instruction)) {
    130     worklist->push_back(instruction);
    131   }
    132 }
    133 
    134 static void AddInputs(HInstruction* instruction,
    135                       const ArenaBitVector& processed_instructions,
    136                       const ArenaBitVector& discard_blocks,
    137                       ScopedArenaVector<HInstruction*>* worklist) {
    138   for (HInstruction* input : instruction->GetInputs()) {
    139     AddInstruction(input, processed_instructions, discard_blocks, worklist);
    140   }
    141 }
    142 
    143 static void AddInputs(HBasicBlock* block,
    144                       const ArenaBitVector& processed_instructions,
    145                       const ArenaBitVector& discard_blocks,
    146                       ScopedArenaVector<HInstruction*>* worklist) {
    147   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
    148     AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
    149   }
    150   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
    151     AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
    152   }
    153 }
    154 
    155 static bool ShouldFilterUse(HInstruction* instruction,
    156                             HInstruction* user,
    157                             const ArenaBitVector& post_dominated) {
    158   if (instruction->IsNewInstance()) {
    159     return (user->IsInstanceFieldSet() || user->IsConstructorFence()) &&
    160         (user->InputAt(0) == instruction) &&
    161         !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
    162   } else if (instruction->IsNewArray()) {
    163     return (user->IsArraySet() || user->IsConstructorFence()) &&
    164         (user->InputAt(0) == instruction) &&
    165         !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
    166   }
    167   return false;
    168 }
    169 
    170 
    171 // Find the ideal position for moving `instruction`. If `filter` is true,
    172 // we filter out store instructions to that instruction, which are processed
    173 // first in the step (3) of the sinking algorithm.
    174 // This method is tailored to the sinking algorithm, unlike
    175 // the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
    176 static HInstruction* FindIdealPosition(HInstruction* instruction,
    177                                        const ArenaBitVector& post_dominated,
    178                                        bool filter = false) {
    179   DCHECK(!instruction->IsPhi());  // Makes no sense for Phi.
    180 
    181   // Find the target block.
    182   CommonDominator finder(/* start_block */ nullptr);
    183   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
    184     HInstruction* user = use.GetUser();
    185     if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
    186       HBasicBlock* block = user->GetBlock();
    187       if (user->IsPhi()) {
    188         // Special case phis by taking the incoming block for regular ones,
    189         // or the dominator for catch phis.
    190         block = user->AsPhi()->IsCatchPhi()
    191             ? block->GetDominator()
    192             : block->GetPredecessors()[use.GetIndex()];
    193       }
    194       finder.Update(block);
    195     }
    196   }
    197   for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
    198     DCHECK(!use.GetUser()->GetHolder()->IsPhi());
    199     DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
    200     finder.Update(use.GetUser()->GetHolder()->GetBlock());
    201   }
    202   HBasicBlock* target_block = finder.Get();
    203   if (target_block == nullptr) {
    204     // No user we can go next to? Likely a LSE or DCE limitation.
    205     return nullptr;
    206   }
    207 
    208   // Move to the first dominator not in a loop, if we can.
    209   while (target_block->IsInLoop()) {
    210     if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
    211       break;
    212     }
    213     target_block = target_block->GetDominator();
    214     DCHECK(target_block != nullptr);
    215   }
    216 
    217   // Bail if the instruction can throw and we are about to move into a catch block.
    218   if (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) {
    219     return nullptr;
    220   }
    221 
    222   // Find insertion position. No need to filter anymore, as we have found a
    223   // target block.
    224   HInstruction* insert_pos = nullptr;
    225   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
    226     if (use.GetUser()->GetBlock() == target_block &&
    227         (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
    228       insert_pos = use.GetUser();
    229     }
    230   }
    231   for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
    232     HInstruction* user = use.GetUser()->GetHolder();
    233     if (user->GetBlock() == target_block &&
    234         (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
    235       insert_pos = user;
    236     }
    237   }
    238   if (insert_pos == nullptr) {
    239     // No user in `target_block`, insert before the control flow instruction.
    240     insert_pos = target_block->GetLastInstruction();
    241     DCHECK(insert_pos->IsControlFlow());
    242     // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
    243     if (insert_pos->IsIf()) {
    244       HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
    245       if (if_input == insert_pos->GetPrevious()) {
    246         insert_pos = if_input;
    247       }
    248     }
    249   }
    250   DCHECK(!insert_pos->IsPhi());
    251   return insert_pos;
    252 }
    253 
    254 
    255 void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
    256   // Local allocator to discard data structures created below at the end of this optimization.
    257   ScopedArenaAllocator allocator(graph_->GetArenaStack());
    258 
    259   size_t number_of_instructions = graph_->GetCurrentInstructionId();
    260   ScopedArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
    261   ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
    262   processed_instructions.ClearAllBits();
    263   ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
    264   post_dominated.ClearAllBits();
    265   ArenaBitVector instructions_that_can_move(
    266       &allocator, number_of_instructions, /* expandable */ false);
    267   instructions_that_can_move.ClearAllBits();
    268   ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
    269 
    270   // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
    271   // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
    272   // computint the post dominator tree, but that could be too time consuming. Also,
    273   // we should start the analysis from blocks dominated by an uncommon branch, but we
    274   // don't profile branches yet.
    275   bool found_block = false;
    276   for (HBasicBlock* block : graph_->GetPostOrder()) {
    277     if (block == end_block) {
    278       found_block = true;
    279       post_dominated.SetBit(block->GetBlockId());
    280     } else if (found_block) {
    281       bool is_post_dominated = true;
    282       if (block->GetSuccessors().empty()) {
    283         // We currently bail for loops.
    284         is_post_dominated = false;
    285       } else {
    286         for (HBasicBlock* successor : block->GetSuccessors()) {
    287           if (!post_dominated.IsBitSet(successor->GetBlockId())) {
    288             is_post_dominated = false;
    289             break;
    290           }
    291         }
    292       }
    293       if (is_post_dominated) {
    294         post_dominated.SetBit(block->GetBlockId());
    295       }
    296     }
    297   }
    298 
    299   // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
    300   // of instructions in these blocks that are not themselves in these blocks.
    301   // Also find the common dominator of the found post dominated blocks, to help filtering
    302   // out un-movable uses in step (2).
    303   CommonDominator finder(end_block);
    304   for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
    305     if (post_dominated.IsBitSet(i)) {
    306       finder.Update(graph_->GetBlocks()[i]);
    307       AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
    308     }
    309   }
    310   HBasicBlock* common_dominator = finder.Get();
    311 
    312   // Step (2): iterate over the worklist to find sinking candidates.
    313   while (!worklist.empty()) {
    314     HInstruction* instruction = worklist.back();
    315     if (processed_instructions.IsBitSet(instruction->GetId())) {
    316       // The instruction has already been processed, continue. This happens
    317       // when the instruction is the input/user of multiple instructions.
    318       worklist.pop_back();
    319       continue;
    320     }
    321     bool all_users_in_post_dominated_blocks = true;
    322     bool can_move = true;
    323     // Check users of the instruction.
    324     for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
    325       HInstruction* user = use.GetUser();
    326       if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
    327           !instructions_that_can_move.IsBitSet(user->GetId())) {
    328         all_users_in_post_dominated_blocks = false;
    329         // If we've already processed this user, or the user cannot be moved, or
    330         // is not dominating the post dominated blocks, bail.
    331         // TODO(ngeoffray): The domination check is an approximation. We should
    332         // instead check if the dominated blocks post dominate the user's block,
    333         // but we do not have post dominance information here.
    334         if (processed_instructions.IsBitSet(user->GetId()) ||
    335             !IsInterestingInstruction(user) ||
    336             !user->GetBlock()->Dominates(common_dominator)) {
    337           can_move = false;
    338           break;
    339         }
    340       }
    341     }
    342 
    343     // Check environment users of the instruction. Some of these users require
    344     // the instruction not to move.
    345     if (all_users_in_post_dominated_blocks) {
    346       for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
    347         HEnvironment* environment = use.GetUser();
    348         HInstruction* user = environment->GetHolder();
    349         if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
    350           if (graph_->IsDebuggable() ||
    351               user->IsDeoptimize() ||
    352               user->CanThrowIntoCatchBlock() ||
    353               (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
    354             can_move = false;
    355             break;
    356           }
    357         }
    358       }
    359     }
    360     if (!can_move) {
    361       // Instruction cannot be moved, mark it as processed and remove it from the work
    362       // list.
    363       processed_instructions.SetBit(instruction->GetId());
    364       worklist.pop_back();
    365     } else if (all_users_in_post_dominated_blocks) {
    366       // Instruction is a candidate for being sunk. Mark it as such, remove it from the
    367       // work list, and add its inputs to the work list.
    368       instructions_that_can_move.SetBit(instruction->GetId());
    369       move_in_order.push_back(instruction);
    370       processed_instructions.SetBit(instruction->GetId());
    371       worklist.pop_back();
    372       AddInputs(instruction, processed_instructions, post_dominated, &worklist);
    373       // Drop the environment use not in the list of post-dominated block. This is
    374       // to help step (3) of this optimization, when we start moving instructions
    375       // closer to their use.
    376       for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
    377         HEnvironment* environment = use.GetUser();
    378         HInstruction* user = environment->GetHolder();
    379         if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
    380           environment->RemoveAsUserOfInput(use.GetIndex());
    381           environment->SetRawEnvAt(use.GetIndex(), nullptr);
    382         }
    383       }
    384     } else {
    385       // The information we have on the users was not enough to decide whether the
    386       // instruction could be moved.
    387       // Add the users to the work list, and keep the instruction in the work list
    388       // to process it again once all users have been processed.
    389       for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
    390         AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
    391       }
    392     }
    393   }
    394 
    395   // Make sure we process instructions in dominated order. This is required for heap
    396   // stores.
    397   std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
    398     return b->StrictlyDominates(a);
    399   });
    400 
    401   // Step (3): Try to move sinking candidates.
    402   for (HInstruction* instruction : move_in_order) {
    403     HInstruction* position = nullptr;
    404     if (instruction->IsArraySet()
    405             || instruction->IsInstanceFieldSet()
    406             || instruction->IsConstructorFence()) {
    407       if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
    408         // A store can trivially move, but it can safely do so only if the heap
    409         // location it stores to can also move.
    410         // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
    411         // from the set and all their inputs.
    412         continue;
    413       }
    414       // Find the position of the instruction we're storing into, filtering out this
    415       // store and all other stores to that instruction.
    416       position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
    417 
    418       // The position needs to be dominated by the store, in order for the store to move there.
    419       if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
    420         continue;
    421       }
    422     } else {
    423       // Find the ideal position within the post dominated blocks.
    424       position = FindIdealPosition(instruction, post_dominated);
    425       if (position == nullptr) {
    426         continue;
    427       }
    428     }
    429     // Bail if we could not find a position in the post dominated blocks (for example,
    430     // if there are multiple users whose common dominator is not in the list of
    431     // post dominated blocks).
    432     if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
    433       continue;
    434     }
    435     MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk);
    436     instruction->MoveBefore(position, /* ensure_safety */ false);
    437   }
    438 }
    439 
    440 }  // namespace art
    441