1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "parallel_move_resolver.h" 18 19 #include "base/stl_util.h" 20 #include "nodes.h" 21 22 namespace art { 23 24 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { 25 // Perform a linear sweep of the moves to add them to the initial list of 26 // moves to perform, ignoring any move that is redundant (the source is 27 // the same as the destination, the destination is ignored and 28 // unallocated, or the move was already eliminated). 29 for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { 30 MoveOperands* move = parallel_move->MoveOperandsAt(i); 31 if (!move->IsRedundant()) { 32 moves_.push_back(move); 33 } 34 } 35 } 36 37 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { 38 DCHECK(moves_.empty()); 39 // Build up a worklist of moves. 40 BuildInitialMoveList(parallel_move); 41 42 // Move stack/stack slot to take advantage of a free register on constrained machines. 43 for (size_t i = 0; i < moves_.size(); ++i) { 44 const MoveOperands& move = *moves_[i]; 45 // Ignore constants and moves already eliminated. 46 if (move.IsEliminated() || move.GetSource().IsConstant()) { 47 continue; 48 } 49 50 if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) && 51 (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) { 52 PerformMove(i); 53 } 54 } 55 56 for (size_t i = 0; i < moves_.size(); ++i) { 57 const MoveOperands& move = *moves_[i]; 58 // Skip constants to perform them last. They don't block other moves 59 // and skipping such moves with register destinations keeps those 60 // registers free for the whole algorithm. 61 if (!move.IsEliminated() && !move.GetSource().IsConstant()) { 62 PerformMove(i); 63 } 64 } 65 66 // Perform the moves with constant sources. 67 for (size_t i = 0; i < moves_.size(); ++i) { 68 MoveOperands* move = moves_[i]; 69 if (!move->IsEliminated()) { 70 DCHECK(move->GetSource().IsConstant()); 71 EmitMove(i); 72 // Eliminate the move, in case following moves need a scratch register. 73 move->Eliminate(); 74 } 75 } 76 77 moves_.clear(); 78 } 79 80 Location LowOf(Location location) { 81 if (location.IsRegisterPair()) { 82 return Location::RegisterLocation(location.low()); 83 } else if (location.IsFpuRegisterPair()) { 84 return Location::FpuRegisterLocation(location.low()); 85 } else if (location.IsDoubleStackSlot()) { 86 return Location::StackSlot(location.GetStackIndex()); 87 } else { 88 return Location::NoLocation(); 89 } 90 } 91 92 Location HighOf(Location location) { 93 if (location.IsRegisterPair()) { 94 return Location::RegisterLocation(location.high()); 95 } else if (location.IsFpuRegisterPair()) { 96 return Location::FpuRegisterLocation(location.high()); 97 } else if (location.IsDoubleStackSlot()) { 98 return Location::StackSlot(location.GetHighStackIndex(4)); 99 } else { 100 return Location::NoLocation(); 101 } 102 } 103 104 // Update the source of `move`, knowing that `updated_location` has been swapped 105 // with `new_source`. Note that `updated_location` can be a pair, therefore if 106 // `move` is non-pair, we need to extract which register to use. 107 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) { 108 Location source = move->GetSource(); 109 if (LowOf(updated_location).Equals(source)) { 110 move->SetSource(LowOf(new_source)); 111 } else if (HighOf(updated_location).Equals(source)) { 112 move->SetSource(HighOf(new_source)); 113 } else { 114 DCHECK(updated_location.Equals(source)) << updated_location << " " << source; 115 move->SetSource(new_source); 116 } 117 } 118 119 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { 120 // Each call to this function performs a move and deletes it from the move 121 // graph. We first recursively perform any move blocking this one. We 122 // mark a move as "pending" on entry to PerformMove in order to detect 123 // cycles in the move graph. We use operand swaps to resolve cycles, 124 // which means that a call to PerformMove could change any source operand 125 // in the move graph. 126 127 MoveOperands* move = moves_[index]; 128 DCHECK(!move->IsPending()); 129 if (move->IsRedundant()) { 130 // Because we swap register pairs first, following, un-pending 131 // moves may become redundant. 132 move->Eliminate(); 133 return nullptr; 134 } 135 136 // Clear this move's destination to indicate a pending move. The actual 137 // destination is saved in a stack-allocated local. Recursion may allow 138 // multiple moves to be pending. 139 DCHECK(!move->GetSource().IsInvalid()); 140 Location destination = move->MarkPending(); 141 142 // Perform a depth-first traversal of the move graph to resolve 143 // dependencies. Any unperformed, unpending move with a source the same 144 // as this one's destination blocks this one so recursively perform all 145 // such moves. 146 MoveOperands* required_swap = nullptr; 147 for (size_t i = 0; i < moves_.size(); ++i) { 148 const MoveOperands& other_move = *moves_[i]; 149 if (other_move.Blocks(destination) && !other_move.IsPending()) { 150 // Though PerformMove can change any source operand in the move graph, 151 // calling `PerformMove` cannot create a blocking move via a swap 152 // (this loop does not miss any). 153 // For example, assume there is a non-blocking move with source A 154 // and this move is blocked on source B and there is a swap of A and 155 // B. Then A and B must be involved in the same cycle (or they would 156 // not be swapped). Since this move's destination is B and there is 157 // only a single incoming edge to an operand, this move must also be 158 // involved in the same cycle. In that case, the blocking move will 159 // be created but will be "pending" when we return from PerformMove. 160 required_swap = PerformMove(i); 161 162 if (required_swap == move) { 163 // If this move is required to swap, we do so without looking 164 // at the next moves. Swapping is not blocked by anything, it just 165 // updates other moves's source. 166 break; 167 } else if (required_swap == moves_[i]) { 168 // If `other_move` was swapped, we iterate again to find a new 169 // potential cycle. 170 required_swap = nullptr; 171 i = -1; 172 } else if (required_swap != nullptr) { 173 // A move is required to swap. We walk back the cycle to find the 174 // move by just returning from this `PerformMove`. 175 moves_[index]->ClearPending(destination); 176 return required_swap; 177 } 178 } 179 } 180 181 // We are about to resolve this move and don't need it marked as 182 // pending, so restore its destination. 183 move->ClearPending(destination); 184 185 // This move's source may have changed due to swaps to resolve cycles and 186 // so it may now be the last move in the cycle. If so remove it. 187 if (move->GetSource().Equals(destination)) { 188 move->Eliminate(); 189 DCHECK(required_swap == nullptr); 190 return nullptr; 191 } 192 193 // The move may be blocked on a (at most one) pending move, in which case 194 // we have a cycle. Search for such a blocking move and perform a swap to 195 // resolve it. 196 bool do_swap = false; 197 if (required_swap != nullptr) { 198 DCHECK_EQ(required_swap, move); 199 do_swap = true; 200 } else { 201 for (MoveOperands* other_move : moves_) { 202 if (other_move->Blocks(destination)) { 203 DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move; 204 if (!move->Is64BitMove() && other_move->Is64BitMove()) { 205 // We swap 64bits moves before swapping 32bits moves. Go back from the 206 // cycle by returning the move that must be swapped. 207 return other_move; 208 } 209 do_swap = true; 210 break; 211 } 212 } 213 } 214 215 if (do_swap) { 216 EmitSwap(index); 217 // Any unperformed (including pending) move with a source of either 218 // this move's source or destination needs to have their source 219 // changed to reflect the state of affairs after the swap. 220 Location source = move->GetSource(); 221 Location swap_destination = move->GetDestination(); 222 move->Eliminate(); 223 for (MoveOperands* other_move : moves_) { 224 if (other_move->Blocks(source)) { 225 UpdateSourceOf(other_move, source, swap_destination); 226 } else if (other_move->Blocks(swap_destination)) { 227 UpdateSourceOf(other_move, swap_destination, source); 228 } 229 } 230 // If the swap was required because of a 64bits move in the middle of a cycle, 231 // we return the swapped move, so that the caller knows it needs to re-iterate 232 // its dependency loop. 233 return required_swap; 234 } else { 235 // This move is not blocked. 236 EmitMove(index); 237 move->Eliminate(); 238 DCHECK(required_swap == nullptr); 239 return nullptr; 240 } 241 } 242 243 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { 244 for (MoveOperands* move : moves_) { 245 if (move->Blocks(loc)) { 246 return false; 247 } 248 } 249 250 for (MoveOperands* move : moves_) { 251 if (move->GetDestination().Equals(loc)) { 252 return true; 253 } 254 } 255 256 return false; 257 } 258 259 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked, 260 int register_count, 261 int if_scratch, 262 bool* spilled) { 263 DCHECK_NE(blocked, if_scratch); 264 int scratch = -1; 265 for (int reg = 0; reg < register_count; ++reg) { 266 if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) { 267 scratch = reg; 268 break; 269 } 270 } 271 272 if (scratch == -1) { 273 *spilled = true; 274 scratch = if_scratch; 275 } else { 276 *spilled = false; 277 } 278 279 return scratch; 280 } 281 282 283 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope( 284 ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers) 285 : resolver_(resolver), 286 reg_(kNoRegister), 287 spilled_(false) { 288 reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_); 289 290 if (spilled_) { 291 resolver->SpillScratch(reg_); 292 } 293 } 294 295 296 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { 297 if (spilled_) { 298 resolver_->RestoreScratch(reg_); 299 } 300 } 301 302 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { 303 DCHECK_EQ(GetNumberOfPendingMoves(), 0u); 304 DCHECK(moves_.empty()); 305 DCHECK(scratches_.empty()); 306 307 // Backend dependent initialization. 308 PrepareForEmitNativeCode(); 309 310 // Build up a worklist of moves. 311 BuildInitialMoveList(parallel_move); 312 313 for (size_t i = 0; i < moves_.size(); ++i) { 314 const MoveOperands& move = *moves_[i]; 315 // Skip constants to perform them last. They don't block other moves and 316 // skipping such moves with register destinations keeps those registers 317 // free for the whole algorithm. 318 if (!move.IsEliminated() && !move.GetSource().IsConstant()) { 319 PerformMove(i); 320 } 321 } 322 323 // Perform the moves with constant sources and register destinations with UpdateMoveSource() 324 // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit 325 // from changing the constant sources to stack locations. 326 for (size_t i = 0; i < moves_.size(); ++i) { 327 MoveOperands* move = moves_[i]; 328 Location destination = move->GetDestination(); 329 if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { 330 Location source = move->GetSource(); 331 EmitMove(i); 332 move->Eliminate(); 333 // This may introduce additional instruction dependency, but reduce number 334 // of moves and possible literal loads. For example, 335 // Original moves: 336 // 1234.5678 -> D0 337 // 1234.5678 -> D1 338 // Updated moves: 339 // 1234.5678 -> D0 340 // D0 -> D1 341 UpdateMoveSource(source, destination); 342 } 343 } 344 345 // Perform the rest of the moves. 346 for (size_t i = 0; i < moves_.size(); ++i) { 347 MoveOperands* move = moves_[i]; 348 if (!move->IsEliminated()) { 349 EmitMove(i); 350 move->Eliminate(); 351 } 352 } 353 354 // All pending moves that we have added for resolve cycles should be performed. 355 DCHECK_EQ(GetNumberOfPendingMoves(), 0u); 356 357 // Backend dependent cleanup. 358 FinishEmitNativeCode(); 359 360 moves_.clear(); 361 scratches_.clear(); 362 } 363 364 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { 365 for (Location loc : scratches_) { 366 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { 367 return loc; 368 } 369 } 370 for (MoveOperands* move : moves_) { 371 Location loc = move->GetDestination(); 372 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { 373 return loc; 374 } 375 } 376 return Location::NoLocation(); 377 } 378 379 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { 380 if (kIsDebugBuild) { 381 for (Location scratch : scratches_) { 382 CHECK(!loc.Equals(scratch)); 383 } 384 } 385 scratches_.push_back(loc); 386 } 387 388 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { 389 DCHECK(!IsBlockedByMoves(loc)); 390 for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) { 391 if (loc.Equals(*it)) { 392 scratches_.erase(it); 393 break; 394 } 395 } 396 } 397 398 void ParallelMoveResolverNoSwap::PerformMove(size_t index) { 399 // Each call to this function performs a move and deletes it from the move 400 // graph. We first recursively perform any move blocking this one. We mark 401 // a move as "pending" on entry to PerformMove in order to detect cycles 402 // in the move graph. We use scratch location to resolve cycles, also 403 // additional pending moves might be added. After move has been performed, 404 // we will update source operand in the move graph to reduce dependencies in 405 // the graph. 406 407 MoveOperands* move = moves_[index]; 408 DCHECK(!move->IsPending()); 409 DCHECK(!move->IsEliminated()); 410 if (move->IsRedundant()) { 411 // Previous operations on the list of moves have caused this particular move 412 // to become a no-op, so we can safely eliminate it. Consider for example 413 // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will 414 // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is 415 // used as the scratch location, the move (1 -> 2) will occur while resolving 416 // the cycle. When that move is emitted, the code will update moves with a '1' 417 // as their source to use '2' instead (see `UpdateMoveSource()`. In our example 418 // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be 419 // eliminated here. 420 move->Eliminate(); 421 return; 422 } 423 424 // Clear this move's destination to indicate a pending move. The actual 425 // destination is saved in a stack-allocated local. Recursion may allow 426 // multiple moves to be pending. 427 DCHECK(!move->GetSource().IsInvalid()); 428 Location destination = move->MarkPending(); 429 430 // Perform a depth-first traversal of the move graph to resolve 431 // dependencies. Any unperformed, unpending move with a source the same 432 // as this one's destination blocks this one so recursively perform all 433 // such moves. 434 for (size_t i = 0; i < moves_.size(); ++i) { 435 const MoveOperands& other_move = *moves_[i]; 436 if (other_move.Blocks(destination) && !other_move.IsPending()) { 437 PerformMove(i); 438 } 439 } 440 441 // We are about to resolve this move and don't need it marked as 442 // pending, so restore its destination. 443 move->ClearPending(destination); 444 445 // No one else should write to the move destination when the it is pending. 446 DCHECK(!move->IsRedundant()); 447 448 Location source = move->GetSource(); 449 // The move may be blocked on several pending moves, in case we have a cycle. 450 if (IsBlockedByMoves(destination)) { 451 // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following 452 // sequence: 453 // (C -> scratch) # Emit right now. 454 // (A -> B) (B -> C) # Unblocked. 455 // (scratch -> A) # Add to pending_moves_, blocked by (A -> B). 456 Location::Kind kind = source.GetKind(); 457 DCHECK_NE(kind, Location::kConstant); 458 Location scratch = AllocateScratchLocationFor(kind); 459 // We only care about the move size. 460 Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt; 461 // Perform (C -> scratch) 462 move->SetDestination(scratch); 463 EmitMove(index); 464 move->Eliminate(); 465 UpdateMoveSource(source, scratch); 466 // Add (scratch -> A). 467 AddPendingMove(scratch, destination, type); 468 } else { 469 // This move is not blocked. 470 EmitMove(index); 471 move->Eliminate(); 472 UpdateMoveSource(source, destination); 473 } 474 475 // Moves in the pending list should not block any other moves. But performing 476 // unblocked moves in the pending list can free scratch registers, so we do this 477 // as early as possible. 478 MoveOperands* pending_move; 479 while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) { 480 Location pending_source = pending_move->GetSource(); 481 Location pending_destination = pending_move->GetDestination(); 482 // We do not depend on the pending move index. So just delete the move instead 483 // of eliminating it to make the pending list cleaner. 484 DeletePendingMove(pending_move); 485 move->SetSource(pending_source); 486 move->SetDestination(pending_destination); 487 EmitMove(index); 488 move->Eliminate(); 489 UpdateMoveSource(pending_source, pending_destination); 490 // Free any unblocked locations in the scratch location list. 491 // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop. 492 // FIXME: If FreeScratchLocation() removes the location from scratches_, 493 // we skip the next location. This happens for arm64. 494 for (size_t i = 0; i < scratches_.size(); ++i) { 495 Location scratch = scratches_[i]; 496 // Only scratch overlapping with performed move source can be unblocked. 497 if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { 498 FreeScratchLocation(pending_source); 499 } 500 } 501 } 502 } 503 504 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { 505 // This function is used to reduce the dependencies in the graph after 506 // (from -> to) has been performed. Since we ensure there is no move with the same 507 // destination, (to -> X) cannot be blocked while (from -> X) might still be 508 // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After 509 // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is 510 // a dependency between the two. If we update the source location from 1 to 2, we 511 // will get (0 -> 1) and (2 -> 3). There is no dependency between the two. 512 // 513 // This is not something we must do, but we can use fewer scratch locations with 514 // this trick. For example, we can avoid using additional scratch locations for 515 // moves (0 -> 1), (1 -> 2), (1 -> 0). 516 for (MoveOperands* move : moves_) { 517 if (move->GetSource().Equals(from)) { 518 move->SetSource(to); 519 } 520 } 521 } 522 523 void ParallelMoveResolverNoSwap::AddPendingMove(Location source, 524 Location destination, Primitive::Type type) { 525 pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); 526 } 527 528 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { 529 RemoveElement(pending_moves_, move); 530 } 531 532 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { 533 for (MoveOperands* move : pending_moves_) { 534 Location destination = move->GetDestination(); 535 // Only moves with destination overlapping with input loc can be unblocked. 536 if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { 537 return move; 538 } 539 } 540 return nullptr; 541 } 542 543 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { 544 for (MoveOperands* move : pending_moves_) { 545 if (move->Blocks(loc)) { 546 return true; 547 } 548 } 549 for (MoveOperands* move : moves_) { 550 if (move->Blocks(loc)) { 551 return true; 552 } 553 } 554 return false; 555 } 556 557 // So far it is only used for debugging purposes to make sure all pending moves 558 // have been performed. 559 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { 560 return pending_moves_.size(); 561 } 562 563 } // namespace art 564