Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "parallel_move_resolver.h"
     18 
     19 #include "base/stl_util.h"
     20 #include "nodes.h"
     21 
     22 namespace art {
     23 
     24 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
     25   // Perform a linear sweep of the moves to add them to the initial list of
     26   // moves to perform, ignoring any move that is redundant (the source is
     27   // the same as the destination, the destination is ignored and
     28   // unallocated, or the move was already eliminated).
     29   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
     30     MoveOperands* move = parallel_move->MoveOperandsAt(i);
     31     if (!move->IsRedundant()) {
     32       moves_.push_back(move);
     33     }
     34   }
     35 }
     36 
     37 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
     38   DCHECK(moves_.empty());
     39   // Build up a worklist of moves.
     40   BuildInitialMoveList(parallel_move);
     41 
     42   // Move stack/stack slot to take advantage of a free register on constrained machines.
     43   for (size_t i = 0; i < moves_.size(); ++i) {
     44     const MoveOperands& move = *moves_[i];
     45     // Ignore constants and moves already eliminated.
     46     if (move.IsEliminated() || move.GetSource().IsConstant()) {
     47       continue;
     48     }
     49 
     50     if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
     51         (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
     52       PerformMove(i);
     53     }
     54   }
     55 
     56   for (size_t i = 0; i < moves_.size(); ++i) {
     57     const MoveOperands& move = *moves_[i];
     58     // Skip constants to perform them last.  They don't block other moves
     59     // and skipping such moves with register destinations keeps those
     60     // registers free for the whole algorithm.
     61     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
     62       PerformMove(i);
     63     }
     64   }
     65 
     66   // Perform the moves with constant sources.
     67   for (size_t i = 0; i < moves_.size(); ++i) {
     68     MoveOperands* move = moves_[i];
     69     if (!move->IsEliminated()) {
     70       DCHECK(move->GetSource().IsConstant());
     71       EmitMove(i);
     72       // Eliminate the move, in case following moves need a scratch register.
     73       move->Eliminate();
     74     }
     75   }
     76 
     77   moves_.clear();
     78 }
     79 
     80 Location LowOf(Location location) {
     81   if (location.IsRegisterPair()) {
     82     return Location::RegisterLocation(location.low());
     83   } else if (location.IsFpuRegisterPair()) {
     84     return Location::FpuRegisterLocation(location.low());
     85   } else if (location.IsDoubleStackSlot()) {
     86     return Location::StackSlot(location.GetStackIndex());
     87   } else {
     88     return Location::NoLocation();
     89   }
     90 }
     91 
     92 Location HighOf(Location location) {
     93   if (location.IsRegisterPair()) {
     94     return Location::RegisterLocation(location.high());
     95   } else if (location.IsFpuRegisterPair()) {
     96     return Location::FpuRegisterLocation(location.high());
     97   } else if (location.IsDoubleStackSlot()) {
     98     return Location::StackSlot(location.GetHighStackIndex(4));
     99   } else {
    100     return Location::NoLocation();
    101   }
    102 }
    103 
    104 // Update the source of `move`, knowing that `updated_location` has been swapped
    105 // with `new_source`. Note that `updated_location` can be a pair, therefore if
    106 // `move` is non-pair, we need to extract which register to use.
    107 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
    108   Location source = move->GetSource();
    109   if (LowOf(updated_location).Equals(source)) {
    110     move->SetSource(LowOf(new_source));
    111   } else if (HighOf(updated_location).Equals(source)) {
    112     move->SetSource(HighOf(new_source));
    113   } else {
    114     DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
    115     move->SetSource(new_source);
    116   }
    117 }
    118 
    119 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
    120   // Each call to this function performs a move and deletes it from the move
    121   // graph.  We first recursively perform any move blocking this one.  We
    122   // mark a move as "pending" on entry to PerformMove in order to detect
    123   // cycles in the move graph.  We use operand swaps to resolve cycles,
    124   // which means that a call to PerformMove could change any source operand
    125   // in the move graph.
    126 
    127   MoveOperands* move = moves_[index];
    128   DCHECK(!move->IsPending());
    129   if (move->IsRedundant()) {
    130     // Because we swap register pairs first, following, un-pending
    131     // moves may become redundant.
    132     move->Eliminate();
    133     return nullptr;
    134   }
    135 
    136   // Clear this move's destination to indicate a pending move.  The actual
    137   // destination is saved in a stack-allocated local.  Recursion may allow
    138   // multiple moves to be pending.
    139   DCHECK(!move->GetSource().IsInvalid());
    140   Location destination = move->MarkPending();
    141 
    142   // Perform a depth-first traversal of the move graph to resolve
    143   // dependencies.  Any unperformed, unpending move with a source the same
    144   // as this one's destination blocks this one so recursively perform all
    145   // such moves.
    146   MoveOperands* required_swap = nullptr;
    147   for (size_t i = 0; i < moves_.size(); ++i) {
    148     const MoveOperands& other_move = *moves_[i];
    149     if (other_move.Blocks(destination) && !other_move.IsPending()) {
    150       // Though PerformMove can change any source operand in the move graph,
    151       // calling `PerformMove` cannot create a blocking move via a swap
    152       // (this loop does not miss any).
    153       // For example, assume there is a non-blocking move with source A
    154       // and this move is blocked on source B and there is a swap of A and
    155       // B.  Then A and B must be involved in the same cycle (or they would
    156       // not be swapped).  Since this move's destination is B and there is
    157       // only a single incoming edge to an operand, this move must also be
    158       // involved in the same cycle.  In that case, the blocking move will
    159       // be created but will be "pending" when we return from PerformMove.
    160       required_swap = PerformMove(i);
    161 
    162       if (required_swap == move) {
    163         // If this move is required to swap, we do so without looking
    164         // at the next moves. Swapping is not blocked by anything, it just
    165         // updates other moves's source.
    166         break;
    167       } else if (required_swap == moves_[i]) {
    168         // If `other_move` was swapped, we iterate again to find a new
    169         // potential cycle.
    170         required_swap = nullptr;
    171         i = -1;
    172       } else if (required_swap != nullptr) {
    173         // A move is required to swap. We walk back the cycle to find the
    174         // move by just returning from this `PerformMove`.
    175         moves_[index]->ClearPending(destination);
    176         return required_swap;
    177       }
    178     }
    179   }
    180 
    181   // We are about to resolve this move and don't need it marked as
    182   // pending, so restore its destination.
    183   move->ClearPending(destination);
    184 
    185   // This move's source may have changed due to swaps to resolve cycles and
    186   // so it may now be the last move in the cycle.  If so remove it.
    187   if (move->GetSource().Equals(destination)) {
    188     move->Eliminate();
    189     DCHECK(required_swap == nullptr);
    190     return nullptr;
    191   }
    192 
    193   // The move may be blocked on a (at most one) pending move, in which case
    194   // we have a cycle.  Search for such a blocking move and perform a swap to
    195   // resolve it.
    196   bool do_swap = false;
    197   if (required_swap != nullptr) {
    198     DCHECK_EQ(required_swap, move);
    199     do_swap = true;
    200   } else {
    201     for (MoveOperands* other_move : moves_) {
    202       if (other_move->Blocks(destination)) {
    203         DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
    204         if (!move->Is64BitMove() && other_move->Is64BitMove()) {
    205           // We swap 64bits moves before swapping 32bits moves. Go back from the
    206           // cycle by returning the move that must be swapped.
    207           return other_move;
    208         }
    209         do_swap = true;
    210         break;
    211       }
    212     }
    213   }
    214 
    215   if (do_swap) {
    216     EmitSwap(index);
    217     // Any unperformed (including pending) move with a source of either
    218     // this move's source or destination needs to have their source
    219     // changed to reflect the state of affairs after the swap.
    220     Location source = move->GetSource();
    221     Location swap_destination = move->GetDestination();
    222     move->Eliminate();
    223     for (MoveOperands* other_move : moves_) {
    224       if (other_move->Blocks(source)) {
    225         UpdateSourceOf(other_move, source, swap_destination);
    226       } else if (other_move->Blocks(swap_destination)) {
    227         UpdateSourceOf(other_move, swap_destination, source);
    228       }
    229     }
    230     // If the swap was required because of a 64bits move in the middle of a cycle,
    231     // we return the swapped move, so that the caller knows it needs to re-iterate
    232     // its dependency loop.
    233     return required_swap;
    234   } else {
    235     // This move is not blocked.
    236     EmitMove(index);
    237     move->Eliminate();
    238     DCHECK(required_swap == nullptr);
    239     return nullptr;
    240   }
    241 }
    242 
    243 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
    244   for (MoveOperands* move : moves_) {
    245     if (move->Blocks(loc)) {
    246       return false;
    247     }
    248   }
    249 
    250   for (MoveOperands* move : moves_) {
    251     if (move->GetDestination().Equals(loc)) {
    252       return true;
    253     }
    254   }
    255 
    256   return false;
    257 }
    258 
    259 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
    260                                                           int register_count,
    261                                                           int if_scratch,
    262                                                           bool* spilled) {
    263   DCHECK_NE(blocked, if_scratch);
    264   int scratch = -1;
    265   for (int reg = 0; reg < register_count; ++reg) {
    266     if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
    267       scratch = reg;
    268       break;
    269     }
    270   }
    271 
    272   if (scratch == -1) {
    273     *spilled = true;
    274     scratch = if_scratch;
    275   } else {
    276     *spilled = false;
    277   }
    278 
    279   return scratch;
    280 }
    281 
    282 
    283 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
    284     ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
    285     : resolver_(resolver),
    286       reg_(kNoRegister),
    287       spilled_(false) {
    288   reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
    289 
    290   if (spilled_) {
    291     resolver->SpillScratch(reg_);
    292   }
    293 }
    294 
    295 
    296 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
    297   if (spilled_) {
    298     resolver_->RestoreScratch(reg_);
    299   }
    300 }
    301 
    302 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
    303   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
    304   DCHECK(moves_.empty());
    305   DCHECK(scratches_.empty());
    306 
    307   // Backend dependent initialization.
    308   PrepareForEmitNativeCode();
    309 
    310   // Build up a worklist of moves.
    311   BuildInitialMoveList(parallel_move);
    312 
    313   for (size_t i = 0; i < moves_.size(); ++i) {
    314     const MoveOperands& move = *moves_[i];
    315     // Skip constants to perform them last. They don't block other moves and
    316     // skipping such moves with register destinations keeps those registers
    317     // free for the whole algorithm.
    318     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
    319       PerformMove(i);
    320     }
    321   }
    322 
    323   // Perform the moves with constant sources and register destinations with UpdateMoveSource()
    324   // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
    325   // from changing the constant sources to stack locations.
    326   for (size_t i = 0; i < moves_.size(); ++i) {
    327     MoveOperands* move = moves_[i];
    328     Location destination = move->GetDestination();
    329     if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
    330       Location source = move->GetSource();
    331       EmitMove(i);
    332       move->Eliminate();
    333       // This may introduce additional instruction dependency, but reduce number
    334       // of moves and possible literal loads. For example,
    335       // Original moves:
    336       //   1234.5678 -> D0
    337       //   1234.5678 -> D1
    338       // Updated moves:
    339       //   1234.5678 -> D0
    340       //   D0 -> D1
    341       UpdateMoveSource(source, destination);
    342     }
    343   }
    344 
    345   // Perform the rest of the moves.
    346   for (size_t i = 0; i < moves_.size(); ++i) {
    347     MoveOperands* move = moves_[i];
    348     if (!move->IsEliminated()) {
    349       EmitMove(i);
    350       move->Eliminate();
    351     }
    352   }
    353 
    354   // All pending moves that we have added for resolve cycles should be performed.
    355   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
    356 
    357   // Backend dependent cleanup.
    358   FinishEmitNativeCode();
    359 
    360   moves_.clear();
    361   scratches_.clear();
    362 }
    363 
    364 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
    365   for (Location loc : scratches_) {
    366     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
    367       return loc;
    368     }
    369   }
    370   for (MoveOperands* move : moves_) {
    371     Location loc = move->GetDestination();
    372     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
    373       return loc;
    374     }
    375   }
    376   return Location::NoLocation();
    377 }
    378 
    379 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
    380   if (kIsDebugBuild) {
    381     for (Location scratch : scratches_) {
    382       CHECK(!loc.Equals(scratch));
    383     }
    384   }
    385   scratches_.push_back(loc);
    386 }
    387 
    388 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
    389   DCHECK(!IsBlockedByMoves(loc));
    390   for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
    391     if (loc.Equals(*it)) {
    392       scratches_.erase(it);
    393       break;
    394     }
    395   }
    396 }
    397 
    398 void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
    399   // Each call to this function performs a move and deletes it from the move
    400   // graph. We first recursively perform any move blocking this one. We mark
    401   // a move as "pending" on entry to PerformMove in order to detect cycles
    402   // in the move graph. We use scratch location to resolve cycles, also
    403   // additional pending moves might be added. After move has been performed,
    404   // we will update source operand in the move graph to reduce dependencies in
    405   // the graph.
    406 
    407   MoveOperands* move = moves_[index];
    408   DCHECK(!move->IsPending());
    409   DCHECK(!move->IsEliminated());
    410   if (move->IsRedundant()) {
    411     // Previous operations on the list of moves have caused this particular move
    412     // to become a no-op, so we can safely eliminate it. Consider for example
    413     // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
    414     // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
    415     // used as the scratch location, the move (1 -> 2) will occur while resolving
    416     // the cycle. When that move is emitted, the code will update moves with a '1'
    417     // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
    418     // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
    419     // eliminated here.
    420     move->Eliminate();
    421     return;
    422   }
    423 
    424   // Clear this move's destination to indicate a pending move. The actual
    425   // destination is saved in a stack-allocated local. Recursion may allow
    426   // multiple moves to be pending.
    427   DCHECK(!move->GetSource().IsInvalid());
    428   Location destination = move->MarkPending();
    429 
    430   // Perform a depth-first traversal of the move graph to resolve
    431   // dependencies. Any unperformed, unpending move with a source the same
    432   // as this one's destination blocks this one so recursively perform all
    433   // such moves.
    434   for (size_t i = 0; i < moves_.size(); ++i) {
    435     const MoveOperands& other_move = *moves_[i];
    436     if (other_move.Blocks(destination) && !other_move.IsPending()) {
    437       PerformMove(i);
    438     }
    439   }
    440 
    441   // We are about to resolve this move and don't need it marked as
    442   // pending, so restore its destination.
    443   move->ClearPending(destination);
    444 
    445   // No one else should write to the move destination when the it is pending.
    446   DCHECK(!move->IsRedundant());
    447 
    448   Location source = move->GetSource();
    449   // The move may be blocked on several pending moves, in case we have a cycle.
    450   if (IsBlockedByMoves(destination)) {
    451     // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
    452     // sequence:
    453     // (C -> scratch)     # Emit right now.
    454     // (A -> B) (B -> C)  # Unblocked.
    455     // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
    456     Location::Kind kind = source.GetKind();
    457     DCHECK_NE(kind, Location::kConstant);
    458     Location scratch = AllocateScratchLocationFor(kind);
    459     // We only care about the move size.
    460     DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32;
    461     // Perform (C -> scratch)
    462     move->SetDestination(scratch);
    463     EmitMove(index);
    464     move->Eliminate();
    465     UpdateMoveSource(source, scratch);
    466     // Add (scratch -> A).
    467     AddPendingMove(scratch, destination, type);
    468   } else {
    469     // This move is not blocked.
    470     EmitMove(index);
    471     move->Eliminate();
    472     UpdateMoveSource(source, destination);
    473   }
    474 
    475   // Moves in the pending list should not block any other moves. But performing
    476   // unblocked moves in the pending list can free scratch registers, so we do this
    477   // as early as possible.
    478   MoveOperands* pending_move;
    479   while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
    480     Location pending_source = pending_move->GetSource();
    481     Location pending_destination = pending_move->GetDestination();
    482     // We do not depend on the pending move index. So just delete the move instead
    483     // of eliminating it to make the pending list cleaner.
    484     DeletePendingMove(pending_move);
    485     move->SetSource(pending_source);
    486     move->SetDestination(pending_destination);
    487     EmitMove(index);
    488     move->Eliminate();
    489     UpdateMoveSource(pending_source, pending_destination);
    490     // Free any unblocked locations in the scratch location list.
    491     // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
    492     // FIXME: If FreeScratchLocation() removes the location from scratches_,
    493     // we skip the next location. This happens for arm64.
    494     for (size_t i = 0; i < scratches_.size(); ++i) {
    495       Location scratch = scratches_[i];
    496       // Only scratch overlapping with performed move source can be unblocked.
    497       if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
    498         FreeScratchLocation(pending_source);
    499       }
    500     }
    501   }
    502 }
    503 
    504 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
    505   // This function is used to reduce the dependencies in the graph after
    506   // (from -> to) has been performed. Since we ensure there is no move with the same
    507   // destination, (to -> X) cannot be blocked while (from -> X) might still be
    508   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
    509   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
    510   // a dependency between the two. If we update the source location from 1 to 2, we
    511   // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
    512   //
    513   // This is not something we must do, but we can use fewer scratch locations with
    514   // this trick. For example, we can avoid using additional scratch locations for
    515   // moves (0 -> 1), (1 -> 2), (1 -> 0).
    516   for (MoveOperands* move : moves_) {
    517     if (move->GetSource().Equals(from)) {
    518       move->SetSource(to);
    519     }
    520   }
    521 }
    522 
    523 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
    524                                                 Location destination,
    525                                                 DataType::Type type) {
    526   pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
    527 }
    528 
    529 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
    530   RemoveElement(pending_moves_, move);
    531 }
    532 
    533 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
    534   for (MoveOperands* move : pending_moves_) {
    535     Location destination = move->GetDestination();
    536     // Only moves with destination overlapping with input loc can be unblocked.
    537     if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
    538       return move;
    539     }
    540   }
    541   return nullptr;
    542 }
    543 
    544 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
    545   for (MoveOperands* move : pending_moves_) {
    546     if (move->Blocks(loc)) {
    547       return true;
    548     }
    549   }
    550   for (MoveOperands* move : moves_) {
    551     if (move->Blocks(loc)) {
    552       return true;
    553     }
    554   }
    555   return false;
    556 }
    557 
    558 // So far it is only used for debugging purposes to make sure all pending moves
    559 // have been performed.
    560 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
    561   return pending_moves_.size();
    562 }
    563 
    564 }  // namespace art
    565