1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "loop_optimization.h" 18 19 #include "arch/instruction_set.h" 20 #include "arch/arm/instruction_set_features_arm.h" 21 #include "arch/arm64/instruction_set_features_arm64.h" 22 #include "arch/mips/instruction_set_features_mips.h" 23 #include "arch/mips64/instruction_set_features_mips64.h" 24 #include "arch/x86/instruction_set_features_x86.h" 25 #include "arch/x86_64/instruction_set_features_x86_64.h" 26 #include "driver/compiler_driver.h" 27 #include "linear_order.h" 28 29 namespace art { 30 31 // Enables vectorization (SIMDization) in the loop optimizer. 32 static constexpr bool kEnableVectorization = true; 33 34 // All current SIMD targets want 16-byte alignment. 35 static constexpr size_t kAlignedBase = 16; 36 37 // Remove the instruction from the graph. A bit more elaborate than the usual 38 // instruction removal, since there may be a cycle in the use structure. 39 static void RemoveFromCycle(HInstruction* instruction) { 40 instruction->RemoveAsUserOfAllInputs(); 41 instruction->RemoveEnvironmentUsers(); 42 instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); 43 } 44 45 // Detect a goto block and sets succ to the single successor. 46 static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) { 47 if (block->GetPredecessors().size() == 1 && 48 block->GetSuccessors().size() == 1 && 49 block->IsSingleGoto()) { 50 *succ = block->GetSingleSuccessor(); 51 return true; 52 } 53 return false; 54 } 55 56 // Detect an early exit loop. 57 static bool IsEarlyExit(HLoopInformation* loop_info) { 58 HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); 59 for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) { 60 for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) { 61 if (!loop_info->Contains(*successor)) { 62 return true; 63 } 64 } 65 } 66 return false; 67 } 68 69 // Detect a sign extension from the given type. Returns the promoted operand on success. 70 static bool IsSignExtensionAndGet(HInstruction* instruction, 71 Primitive::Type type, 72 /*out*/ HInstruction** operand) { 73 // Accept any already wider constant that would be handled properly by sign 74 // extension when represented in the *width* of the given narrower data type 75 // (the fact that char normally zero extends does not matter here). 76 int64_t value = 0; 77 if (IsInt64AndGet(instruction, /*out*/ &value)) { 78 switch (type) { 79 case Primitive::kPrimByte: 80 if (std::numeric_limits<int8_t>::min() <= value && 81 std::numeric_limits<int8_t>::max() >= value) { 82 *operand = instruction; 83 return true; 84 } 85 return false; 86 case Primitive::kPrimChar: 87 case Primitive::kPrimShort: 88 if (std::numeric_limits<int16_t>::min() <= value && 89 std::numeric_limits<int16_t>::max() <= value) { 90 *operand = instruction; 91 return true; 92 } 93 return false; 94 default: 95 return false; 96 } 97 } 98 // An implicit widening conversion of a signed integer to an integral type sign-extends 99 // the two's-complement representation of the integer value to fill the wider format. 100 if (instruction->GetType() == type && (instruction->IsArrayGet() || 101 instruction->IsStaticFieldGet() || 102 instruction->IsInstanceFieldGet())) { 103 switch (type) { 104 case Primitive::kPrimByte: 105 case Primitive::kPrimShort: 106 *operand = instruction; 107 return true; 108 default: 109 return false; 110 } 111 } 112 // TODO: perhaps explicit conversions later too? 113 // (this may return something different from instruction) 114 return false; 115 } 116 117 // Detect a zero extension from the given type. Returns the promoted operand on success. 118 static bool IsZeroExtensionAndGet(HInstruction* instruction, 119 Primitive::Type type, 120 /*out*/ HInstruction** operand) { 121 // Accept any already wider constant that would be handled properly by zero 122 // extension when represented in the *width* of the given narrower data type 123 // (the fact that byte/short normally sign extend does not matter here). 124 int64_t value = 0; 125 if (IsInt64AndGet(instruction, /*out*/ &value)) { 126 switch (type) { 127 case Primitive::kPrimByte: 128 if (std::numeric_limits<uint8_t>::min() <= value && 129 std::numeric_limits<uint8_t>::max() >= value) { 130 *operand = instruction; 131 return true; 132 } 133 return false; 134 case Primitive::kPrimChar: 135 case Primitive::kPrimShort: 136 if (std::numeric_limits<uint16_t>::min() <= value && 137 std::numeric_limits<uint16_t>::max() <= value) { 138 *operand = instruction; 139 return true; 140 } 141 return false; 142 default: 143 return false; 144 } 145 } 146 // An implicit widening conversion of a char to an integral type zero-extends 147 // the representation of the char value to fill the wider format. 148 if (instruction->GetType() == type && (instruction->IsArrayGet() || 149 instruction->IsStaticFieldGet() || 150 instruction->IsInstanceFieldGet())) { 151 if (type == Primitive::kPrimChar) { 152 *operand = instruction; 153 return true; 154 } 155 } 156 // A sign (or zero) extension followed by an explicit removal of just the 157 // higher sign bits is equivalent to a zero extension of the underlying operand. 158 if (instruction->IsAnd()) { 159 int64_t mask = 0; 160 HInstruction* a = instruction->InputAt(0); 161 HInstruction* b = instruction->InputAt(1); 162 // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask. 163 if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) || 164 IsZeroExtensionAndGet(b, type, /*out*/ operand))) || 165 (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || 166 IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { 167 switch ((*operand)->GetType()) { 168 case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max(); 169 case Primitive::kPrimChar: 170 case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); 171 default: return false; 172 } 173 } 174 } 175 // TODO: perhaps explicit conversions later too? 176 return false; 177 } 178 179 // Detect situations with same-extension narrower operands. 180 // Returns true on success and sets is_unsigned accordingly. 181 static bool IsNarrowerOperands(HInstruction* a, 182 HInstruction* b, 183 Primitive::Type type, 184 /*out*/ HInstruction** r, 185 /*out*/ HInstruction** s, 186 /*out*/ bool* is_unsigned) { 187 if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) { 188 *is_unsigned = false; 189 return true; 190 } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) { 191 *is_unsigned = true; 192 return true; 193 } 194 return false; 195 } 196 197 // As above, single operand. 198 static bool IsNarrowerOperand(HInstruction* a, 199 Primitive::Type type, 200 /*out*/ HInstruction** r, 201 /*out*/ bool* is_unsigned) { 202 if (IsSignExtensionAndGet(a, type, r)) { 203 *is_unsigned = false; 204 return true; 205 } else if (IsZeroExtensionAndGet(a, type, r)) { 206 *is_unsigned = true; 207 return true; 208 } 209 return false; 210 } 211 212 // Detect up to two instructions a and b, and an acccumulated constant c. 213 static bool IsAddConstHelper(HInstruction* instruction, 214 /*out*/ HInstruction** a, 215 /*out*/ HInstruction** b, 216 /*out*/ int64_t* c, 217 int32_t depth) { 218 static constexpr int32_t kMaxDepth = 8; // don't search too deep 219 int64_t value = 0; 220 if (IsInt64AndGet(instruction, &value)) { 221 *c += value; 222 return true; 223 } else if (instruction->IsAdd() && depth <= kMaxDepth) { 224 return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && 225 IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); 226 } else if (*a == nullptr) { 227 *a = instruction; 228 return true; 229 } else if (*b == nullptr) { 230 *b = instruction; 231 return true; 232 } 233 return false; // too many non-const operands 234 } 235 236 // Detect a + b + c for an optional constant c. 237 static bool IsAddConst(HInstruction* instruction, 238 /*out*/ HInstruction** a, 239 /*out*/ HInstruction** b, 240 /*out*/ int64_t* c) { 241 if (instruction->IsAdd()) { 242 // Try to find a + b and accumulated c. 243 if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && 244 IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && 245 *b != nullptr) { 246 return true; 247 } 248 // Found a + b. 249 *a = instruction->InputAt(0); 250 *b = instruction->InputAt(1); 251 *c = 0; 252 return true; 253 } 254 return false; 255 } 256 257 // Test vector restrictions. 258 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { 259 return (restrictions & tested) != 0; 260 } 261 262 // Insert an instruction. 263 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { 264 DCHECK(block != nullptr); 265 DCHECK(instruction != nullptr); 266 block->InsertInstructionBefore(instruction, block->GetLastInstruction()); 267 return instruction; 268 } 269 270 // 271 // Class methods. 272 // 273 274 HLoopOptimization::HLoopOptimization(HGraph* graph, 275 CompilerDriver* compiler_driver, 276 HInductionVarAnalysis* induction_analysis) 277 : HOptimization(graph, kLoopOptimizationPassName), 278 compiler_driver_(compiler_driver), 279 induction_range_(induction_analysis), 280 loop_allocator_(nullptr), 281 global_allocator_(graph_->GetArena()), 282 top_loop_(nullptr), 283 last_loop_(nullptr), 284 iset_(nullptr), 285 induction_simplication_count_(0), 286 simplified_(false), 287 vector_length_(0), 288 vector_refs_(nullptr), 289 vector_peeling_candidate_(nullptr), 290 vector_runtime_test_a_(nullptr), 291 vector_runtime_test_b_(nullptr), 292 vector_map_(nullptr) { 293 } 294 295 void HLoopOptimization::Run() { 296 // Skip if there is no loop or the graph has try-catch/irreducible loops. 297 // TODO: make this less of a sledgehammer. 298 if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { 299 return; 300 } 301 302 // Phase-local allocator that draws from the global pool. Since the allocator 303 // itself resides on the stack, it is destructed on exiting Run(), which 304 // implies its underlying memory is released immediately. 305 ArenaAllocator allocator(global_allocator_->GetArenaPool()); 306 loop_allocator_ = &allocator; 307 308 // Perform loop optimizations. 309 LocalRun(); 310 if (top_loop_ == nullptr) { 311 graph_->SetHasLoops(false); // no more loops 312 } 313 314 // Detach. 315 loop_allocator_ = nullptr; 316 last_loop_ = top_loop_ = nullptr; 317 } 318 319 void HLoopOptimization::LocalRun() { 320 // Build the linear order using the phase-local allocator. This step enables building 321 // a loop hierarchy that properly reflects the outer-inner and previous-next relation. 322 ArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); 323 LinearizeGraph(graph_, loop_allocator_, &linear_order); 324 325 // Build the loop hierarchy. 326 for (HBasicBlock* block : linear_order) { 327 if (block->IsLoopHeader()) { 328 AddLoop(block->GetLoopInformation()); 329 } 330 } 331 332 // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use 333 // temporary data structures using the phase-local allocator. All new HIR 334 // should use the global allocator. 335 if (top_loop_ != nullptr) { 336 ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); 337 ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); 338 ArenaSafeMap<HInstruction*, HInstruction*> map( 339 std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); 340 // Attach. 341 iset_ = &iset; 342 vector_refs_ = &refs; 343 vector_map_ = ↦ 344 // Traverse. 345 TraverseLoopsInnerToOuter(top_loop_); 346 // Detach. 347 iset_ = nullptr; 348 vector_refs_ = nullptr; 349 vector_map_ = nullptr; 350 } 351 } 352 353 void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { 354 DCHECK(loop_info != nullptr); 355 LoopNode* node = new (loop_allocator_) LoopNode(loop_info); 356 if (last_loop_ == nullptr) { 357 // First loop. 358 DCHECK(top_loop_ == nullptr); 359 last_loop_ = top_loop_ = node; 360 } else if (loop_info->IsIn(*last_loop_->loop_info)) { 361 // Inner loop. 362 node->outer = last_loop_; 363 DCHECK(last_loop_->inner == nullptr); 364 last_loop_ = last_loop_->inner = node; 365 } else { 366 // Subsequent loop. 367 while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) { 368 last_loop_ = last_loop_->outer; 369 } 370 node->outer = last_loop_->outer; 371 node->previous = last_loop_; 372 DCHECK(last_loop_->next == nullptr); 373 last_loop_ = last_loop_->next = node; 374 } 375 } 376 377 void HLoopOptimization::RemoveLoop(LoopNode* node) { 378 DCHECK(node != nullptr); 379 DCHECK(node->inner == nullptr); 380 if (node->previous != nullptr) { 381 // Within sequence. 382 node->previous->next = node->next; 383 if (node->next != nullptr) { 384 node->next->previous = node->previous; 385 } 386 } else { 387 // First of sequence. 388 if (node->outer != nullptr) { 389 node->outer->inner = node->next; 390 } else { 391 top_loop_ = node->next; 392 } 393 if (node->next != nullptr) { 394 node->next->outer = node->outer; 395 node->next->previous = nullptr; 396 } 397 } 398 } 399 400 void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { 401 for ( ; node != nullptr; node = node->next) { 402 // Visit inner loops first. 403 uint32_t current_induction_simplification_count = induction_simplication_count_; 404 if (node->inner != nullptr) { 405 TraverseLoopsInnerToOuter(node->inner); 406 } 407 // Recompute induction information of this loop if the induction 408 // of any inner loop has been simplified. 409 if (current_induction_simplification_count != induction_simplication_count_) { 410 induction_range_.ReVisit(node->loop_info); 411 } 412 // Repeat simplifications in the loop-body until no more changes occur. 413 // Note that since each simplification consists of eliminating code (without 414 // introducing new code), this process is always finite. 415 do { 416 simplified_ = false; 417 SimplifyInduction(node); 418 SimplifyBlocks(node); 419 } while (simplified_); 420 // Optimize inner loop. 421 if (node->inner == nullptr) { 422 OptimizeInnerLoop(node); 423 } 424 } 425 } 426 427 // 428 // Optimization. 429 // 430 431 void HLoopOptimization::SimplifyInduction(LoopNode* node) { 432 HBasicBlock* header = node->loop_info->GetHeader(); 433 HBasicBlock* preheader = node->loop_info->GetPreHeader(); 434 // Scan the phis in the header to find opportunities to simplify an induction 435 // cycle that is only used outside the loop. Replace these uses, if any, with 436 // the last value and remove the induction cycle. 437 // Examples: for (int i = 0; x != null; i++) { .... no i .... } 438 // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; 439 for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { 440 HPhi* phi = it.Current()->AsPhi(); 441 iset_->clear(); // prepare phi induction 442 if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && 443 TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { 444 // Note that it's ok to have replaced uses after the loop with the last value, without 445 // being able to remove the cycle. Environment uses (which are the reason we may not be 446 // able to remove the cycle) within the loop will still hold the right value. 447 if (CanRemoveCycle()) { 448 for (HInstruction* i : *iset_) { 449 RemoveFromCycle(i); 450 } 451 simplified_ = true; 452 } 453 } 454 } 455 } 456 457 void HLoopOptimization::SimplifyBlocks(LoopNode* node) { 458 // Iterate over all basic blocks in the loop-body. 459 for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { 460 HBasicBlock* block = it.Current(); 461 // Remove dead instructions from the loop-body. 462 RemoveDeadInstructions(block->GetPhis()); 463 RemoveDeadInstructions(block->GetInstructions()); 464 // Remove trivial control flow blocks from the loop-body. 465 if (block->GetPredecessors().size() == 1 && 466 block->GetSuccessors().size() == 1 && 467 block->GetSingleSuccessor()->GetPredecessors().size() == 1) { 468 simplified_ = true; 469 block->MergeWith(block->GetSingleSuccessor()); 470 } else if (block->GetSuccessors().size() == 2) { 471 // Trivial if block can be bypassed to either branch. 472 HBasicBlock* succ0 = block->GetSuccessors()[0]; 473 HBasicBlock* succ1 = block->GetSuccessors()[1]; 474 HBasicBlock* meet0 = nullptr; 475 HBasicBlock* meet1 = nullptr; 476 if (succ0 != succ1 && 477 IsGotoBlock(succ0, &meet0) && 478 IsGotoBlock(succ1, &meet1) && 479 meet0 == meet1 && // meets again 480 meet0 != block && // no self-loop 481 meet0->GetPhis().IsEmpty()) { // not used for merging 482 simplified_ = true; 483 succ0->DisconnectAndDelete(); 484 if (block->Dominates(meet0)) { 485 block->RemoveDominatedBlock(meet0); 486 succ1->AddDominatedBlock(meet0); 487 meet0->SetDominator(succ1); 488 } 489 } 490 } 491 } 492 } 493 494 void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { 495 HBasicBlock* header = node->loop_info->GetHeader(); 496 HBasicBlock* preheader = node->loop_info->GetPreHeader(); 497 // Ensure loop header logic is finite. 498 int64_t trip_count = 0; 499 if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { 500 return; 501 } 502 503 // Ensure there is only a single loop-body (besides the header). 504 HBasicBlock* body = nullptr; 505 for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { 506 if (it.Current() != header) { 507 if (body != nullptr) { 508 return; 509 } 510 body = it.Current(); 511 } 512 } 513 CHECK(body != nullptr); 514 // Ensure there is only a single exit point. 515 if (header->GetSuccessors().size() != 2) { 516 return; 517 } 518 HBasicBlock* exit = (header->GetSuccessors()[0] == body) 519 ? header->GetSuccessors()[1] 520 : header->GetSuccessors()[0]; 521 // Ensure exit can only be reached by exiting loop. 522 if (exit->GetPredecessors().size() != 1) { 523 return; 524 } 525 // Detect either an empty loop (no side effects other than plain iteration) or 526 // a trivial loop (just iterating once). Replace subsequent index uses, if any, 527 // with the last value and remove the loop, possibly after unrolling its body. 528 HInstruction* phi = header->GetFirstPhi(); 529 iset_->clear(); // prepare phi induction 530 if (TrySetSimpleLoopHeader(header)) { 531 bool is_empty = IsEmptyBody(body); 532 if ((is_empty || trip_count == 1) && 533 TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { 534 if (!is_empty) { 535 // Unroll the loop-body, which sees initial value of the index. 536 phi->ReplaceWith(phi->InputAt(0)); 537 preheader->MergeInstructionsWith(body); 538 } 539 body->DisconnectAndDelete(); 540 exit->RemovePredecessor(header); 541 header->RemoveSuccessor(exit); 542 header->RemoveDominatedBlock(exit); 543 header->DisconnectAndDelete(); 544 preheader->AddSuccessor(exit); 545 preheader->AddInstruction(new (global_allocator_) HGoto()); 546 preheader->AddDominatedBlock(exit); 547 exit->SetDominator(preheader); 548 RemoveLoop(node); // update hierarchy 549 return; 550 } 551 } 552 553 // Vectorize loop, if possible and valid. 554 if (kEnableVectorization) { 555 iset_->clear(); // prepare phi induction 556 if (TrySetSimpleLoopHeader(header) && 557 ShouldVectorize(node, body, trip_count) && 558 TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { 559 Vectorize(node, body, exit, trip_count); 560 graph_->SetHasSIMD(true); // flag SIMD usage 561 return; 562 } 563 } 564 } 565 566 // 567 // Loop vectorization. The implementation is based on the book by Aart J.C. Bik: 568 // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." 569 // Intel Press, June, 2004 (http://www.aartbik.com/). 570 // 571 572 bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) { 573 // Reset vector bookkeeping. 574 vector_length_ = 0; 575 vector_refs_->clear(); 576 vector_peeling_candidate_ = nullptr; 577 vector_runtime_test_a_ = 578 vector_runtime_test_b_= nullptr; 579 580 // Phis in the loop-body prevent vectorization. 581 if (!block->GetPhis().IsEmpty()) { 582 return false; 583 } 584 585 // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side 586 // occurrence, which allows passing down attributes down the use tree. 587 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 588 if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { 589 return false; // failure to vectorize a left-hand-side 590 } 591 } 592 593 // Does vectorization seem profitable? 594 if (!IsVectorizationProfitable(trip_count)) { 595 return false; 596 } 597 598 // Data dependence analysis. Find each pair of references with same type, where 599 // at least one is a write. Each such pair denotes a possible data dependence. 600 // This analysis exploits the property that differently typed arrays cannot be 601 // aliased, as well as the property that references either point to the same 602 // array or to two completely disjoint arrays, i.e., no partial aliasing. 603 // Other than a few simply heuristics, no detailed subscript analysis is done. 604 for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) { 605 for (auto j = i; ++j != vector_refs_->end(); ) { 606 if (i->type == j->type && (i->lhs || j->lhs)) { 607 // Found same-typed a[i+x] vs. b[i+y], where at least one is a write. 608 HInstruction* a = i->base; 609 HInstruction* b = j->base; 610 HInstruction* x = i->offset; 611 HInstruction* y = j->offset; 612 if (a == b) { 613 // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence). 614 // Conservatively assume a loop-carried data dependence otherwise, and reject. 615 if (x != y) { 616 return false; 617 } 618 } else { 619 // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence). 620 // Conservatively assume a potential loop-carried data dependence otherwise, avoided by 621 // generating an explicit a != b disambiguation runtime test on the two references. 622 if (x != y) { 623 // To avoid excessive overhead, we only accept one a != b test. 624 if (vector_runtime_test_a_ == nullptr) { 625 // First test found. 626 vector_runtime_test_a_ = a; 627 vector_runtime_test_b_ = b; 628 } else if ((vector_runtime_test_a_ != a || vector_runtime_test_b_ != b) && 629 (vector_runtime_test_a_ != b || vector_runtime_test_b_ != a)) { 630 return false; // second test would be needed 631 } 632 } 633 } 634 } 635 } 636 } 637 638 // Consider dynamic loop peeling for alignment. 639 SetPeelingCandidate(trip_count); 640 641 // Success! 642 return true; 643 } 644 645 void HLoopOptimization::Vectorize(LoopNode* node, 646 HBasicBlock* block, 647 HBasicBlock* exit, 648 int64_t trip_count) { 649 Primitive::Type induc_type = Primitive::kPrimInt; 650 HBasicBlock* header = node->loop_info->GetHeader(); 651 HBasicBlock* preheader = node->loop_info->GetPreHeader(); 652 653 // Pick a loop unrolling factor for the vector loop. 654 uint32_t unroll = GetUnrollingFactor(block, trip_count); 655 uint32_t chunk = vector_length_ * unroll; 656 657 // A cleanup loop is needed, at least, for any unknown trip count or 658 // for a known trip count with remainder iterations after vectorization. 659 bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0; 660 661 // Adjust vector bookkeeping. 662 iset_->clear(); // prepare phi induction 663 bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_ 664 DCHECK(is_simple_loop_header); 665 vector_header_ = header; 666 vector_body_ = block; 667 668 // Generate dynamic loop peeling trip count, if needed: 669 // ptc = <peeling-needed-for-candidate> 670 HInstruction* ptc = nullptr; 671 if (vector_peeling_candidate_ != nullptr) { 672 DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count"; 673 // 674 // TODO: Implement this. Compute address of first access memory location and 675 // compute peeling factor to obtain kAlignedBase alignment. 676 // 677 needs_cleanup = true; 678 } 679 680 // Generate loop control: 681 // stc = <trip-count>; 682 // vtc = stc - (stc - ptc) % chunk; 683 // i = 0; 684 HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); 685 HInstruction* vtc = stc; 686 if (needs_cleanup) { 687 DCHECK(IsPowerOfTwo(chunk)); 688 HInstruction* diff = stc; 689 if (ptc != nullptr) { 690 diff = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, ptc)); 691 } 692 HInstruction* rem = Insert( 693 preheader, new (global_allocator_) HAnd(induc_type, 694 diff, 695 graph_->GetIntConstant(chunk - 1))); 696 vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem)); 697 } 698 vector_index_ = graph_->GetIntConstant(0); 699 700 // Generate runtime disambiguation test: 701 // vtc = a != b ? vtc : 0; 702 if (vector_runtime_test_a_ != nullptr) { 703 HInstruction* rt = Insert( 704 preheader, 705 new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); 706 vtc = Insert(preheader, 707 new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc)); 708 needs_cleanup = true; 709 } 710 711 // Generate dynamic peeling loop for alignment, if needed: 712 // for ( ; i < ptc; i += 1) 713 // <loop-body> 714 if (ptc != nullptr) { 715 vector_mode_ = kSequential; 716 GenerateNewLoop(node, 717 block, 718 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), 719 vector_index_, 720 ptc, 721 graph_->GetIntConstant(1), 722 /*unroll*/ 1); 723 } 724 725 // Generate vector loop, possibly further unrolled: 726 // for ( ; i < vtc; i += chunk) 727 // <vectorized-loop-body> 728 vector_mode_ = kVector; 729 GenerateNewLoop(node, 730 block, 731 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), 732 vector_index_, 733 vtc, 734 graph_->GetIntConstant(vector_length_), // increment per unroll 735 unroll); 736 HLoopInformation* vloop = vector_header_->GetLoopInformation(); 737 738 // Generate cleanup loop, if needed: 739 // for ( ; i < stc; i += 1) 740 // <loop-body> 741 if (needs_cleanup) { 742 vector_mode_ = kSequential; 743 GenerateNewLoop(node, 744 block, 745 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), 746 vector_index_, 747 stc, 748 graph_->GetIntConstant(1), 749 /*unroll*/ 1); 750 } 751 752 // Remove the original loop by disconnecting the body block 753 // and removing all instructions from the header. 754 block->DisconnectAndDelete(); 755 while (!header->GetFirstInstruction()->IsGoto()) { 756 header->RemoveInstruction(header->GetFirstInstruction()); 757 } 758 // Update loop hierarchy: the old header now resides in the same outer loop 759 // as the old preheader. Note that we don't bother putting sequential 760 // loops back in the hierarchy at this point. 761 header->SetLoopInformation(preheader->GetLoopInformation()); // outward 762 node->loop_info = vloop; 763 } 764 765 void HLoopOptimization::GenerateNewLoop(LoopNode* node, 766 HBasicBlock* block, 767 HBasicBlock* new_preheader, 768 HInstruction* lo, 769 HInstruction* hi, 770 HInstruction* step, 771 uint32_t unroll) { 772 DCHECK(unroll == 1 || vector_mode_ == kVector); 773 Primitive::Type induc_type = Primitive::kPrimInt; 774 // Prepare new loop. 775 vector_preheader_ = new_preheader, 776 vector_header_ = vector_preheader_->GetSingleSuccessor(); 777 vector_body_ = vector_header_->GetSuccessors()[1]; 778 HPhi* phi = new (global_allocator_) HPhi(global_allocator_, 779 kNoRegNumber, 780 0, 781 HPhi::ToPhiType(induc_type)); 782 // Generate header and prepare body. 783 // for (i = lo; i < hi; i += step) 784 // <loop-body> 785 HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi); 786 vector_header_->AddPhi(phi); 787 vector_header_->AddInstruction(cond); 788 vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); 789 vector_index_ = phi; 790 for (uint32_t u = 0; u < unroll; u++) { 791 // Clear map, leaving loop invariants setup during unrolling. 792 if (u == 0) { 793 vector_map_->clear(); 794 } else { 795 for (auto i = vector_map_->begin(); i != vector_map_->end(); ) { 796 if (i->second->IsVecReplicateScalar()) { 797 DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first)); 798 ++i; 799 } else { 800 i = vector_map_->erase(i); 801 } 802 } 803 } 804 // Generate instruction map. 805 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 806 bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); 807 DCHECK(vectorized_def); 808 } 809 // Generate body from the instruction map, but in original program order. 810 HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment(); 811 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 812 auto i = vector_map_->find(it.Current()); 813 if (i != vector_map_->end() && !i->second->IsInBlock()) { 814 Insert(vector_body_, i->second); 815 // Deal with instructions that need an environment, such as the scalar intrinsics. 816 if (i->second->NeedsEnvironment()) { 817 i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_); 818 } 819 } 820 } 821 vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); 822 Insert(vector_body_, vector_index_); 823 } 824 // Finalize phi for the loop index. 825 phi->AddInput(lo); 826 phi->AddInput(vector_index_); 827 vector_index_ = phi; 828 } 829 830 // TODO: accept reductions at left-hand-side, mixed-type store idioms, etc. 831 bool HLoopOptimization::VectorizeDef(LoopNode* node, 832 HInstruction* instruction, 833 bool generate_code) { 834 // Accept a left-hand-side array base[index] for 835 // (1) supported vector type, 836 // (2) loop-invariant base, 837 // (3) unit stride index, 838 // (4) vectorizable right-hand-side value. 839 uint64_t restrictions = kNone; 840 if (instruction->IsArraySet()) { 841 Primitive::Type type = instruction->AsArraySet()->GetComponentType(); 842 HInstruction* base = instruction->InputAt(0); 843 HInstruction* index = instruction->InputAt(1); 844 HInstruction* value = instruction->InputAt(2); 845 HInstruction* offset = nullptr; 846 if (TrySetVectorType(type, &restrictions) && 847 node->loop_info->IsDefinedOutOfTheLoop(base) && 848 induction_range_.IsUnitStride(instruction, index, graph_, &offset) && 849 VectorizeUse(node, value, generate_code, type, restrictions)) { 850 if (generate_code) { 851 GenerateVecSub(index, offset); 852 GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), offset, type); 853 } else { 854 vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true)); 855 } 856 return true; 857 } 858 return false; 859 } 860 // Branch back okay. 861 if (instruction->IsGoto()) { 862 return true; 863 } 864 // Otherwise accept only expressions with no effects outside the immediate loop-body. 865 // Note that actual uses are inspected during right-hand-side tree traversal. 866 return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); 867 } 868 869 // TODO: saturation arithmetic. 870 bool HLoopOptimization::VectorizeUse(LoopNode* node, 871 HInstruction* instruction, 872 bool generate_code, 873 Primitive::Type type, 874 uint64_t restrictions) { 875 // Accept anything for which code has already been generated. 876 if (generate_code) { 877 if (vector_map_->find(instruction) != vector_map_->end()) { 878 return true; 879 } 880 } 881 // Continue the right-hand-side tree traversal, passing in proper 882 // types and vector restrictions along the way. During code generation, 883 // all new nodes are drawn from the global allocator. 884 if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) { 885 // Accept invariant use, using scalar expansion. 886 if (generate_code) { 887 GenerateVecInv(instruction, type); 888 } 889 return true; 890 } else if (instruction->IsArrayGet()) { 891 // Deal with vector restrictions. 892 if (instruction->AsArrayGet()->IsStringCharAt() && 893 HasVectorRestrictions(restrictions, kNoStringCharAt)) { 894 return false; 895 } 896 // Accept a right-hand-side array base[index] for 897 // (1) exact matching vector type, 898 // (2) loop-invariant base, 899 // (3) unit stride index, 900 // (4) vectorizable right-hand-side value. 901 HInstruction* base = instruction->InputAt(0); 902 HInstruction* index = instruction->InputAt(1); 903 HInstruction* offset = nullptr; 904 if (type == instruction->GetType() && 905 node->loop_info->IsDefinedOutOfTheLoop(base) && 906 induction_range_.IsUnitStride(instruction, index, graph_, &offset)) { 907 if (generate_code) { 908 GenerateVecSub(index, offset); 909 GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type); 910 } else { 911 vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false)); 912 } 913 return true; 914 } 915 } else if (instruction->IsTypeConversion()) { 916 // Accept particular type conversions. 917 HTypeConversion* conversion = instruction->AsTypeConversion(); 918 HInstruction* opa = conversion->InputAt(0); 919 Primitive::Type from = conversion->GetInputType(); 920 Primitive::Type to = conversion->GetResultType(); 921 if ((to == Primitive::kPrimByte || 922 to == Primitive::kPrimChar || 923 to == Primitive::kPrimShort) && from == Primitive::kPrimInt) { 924 // Accept a "narrowing" type conversion from a "wider" computation for 925 // (1) conversion into final required type, 926 // (2) vectorizable operand, 927 // (3) "wider" operations cannot bring in higher order bits. 928 if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) { 929 if (generate_code) { 930 if (vector_mode_ == kVector) { 931 vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through 932 } else { 933 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); 934 } 935 } 936 return true; 937 } 938 } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) { 939 DCHECK_EQ(to, type); 940 // Accept int to float conversion for 941 // (1) supported int, 942 // (2) vectorizable operand. 943 if (TrySetVectorType(from, &restrictions) && 944 VectorizeUse(node, opa, generate_code, from, restrictions)) { 945 if (generate_code) { 946 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); 947 } 948 return true; 949 } 950 } 951 return false; 952 } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) { 953 // Accept unary operator for vectorizable operand. 954 HInstruction* opa = instruction->InputAt(0); 955 if (VectorizeUse(node, opa, generate_code, type, restrictions)) { 956 if (generate_code) { 957 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); 958 } 959 return true; 960 } 961 } else if (instruction->IsAdd() || instruction->IsSub() || 962 instruction->IsMul() || instruction->IsDiv() || 963 instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { 964 // Deal with vector restrictions. 965 if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) || 966 (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) { 967 return false; 968 } 969 // Accept binary operator for vectorizable operands. 970 HInstruction* opa = instruction->InputAt(0); 971 HInstruction* opb = instruction->InputAt(1); 972 if (VectorizeUse(node, opa, generate_code, type, restrictions) && 973 VectorizeUse(node, opb, generate_code, type, restrictions)) { 974 if (generate_code) { 975 GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); 976 } 977 return true; 978 } 979 } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { 980 // Recognize vectorization idioms. 981 if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { 982 return true; 983 } 984 // Deal with vector restrictions. 985 HInstruction* opa = instruction->InputAt(0); 986 HInstruction* opb = instruction->InputAt(1); 987 HInstruction* r = opa; 988 bool is_unsigned = false; 989 if ((HasVectorRestrictions(restrictions, kNoShift)) || 990 (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { 991 return false; // unsupported instruction 992 } else if (HasVectorRestrictions(restrictions, kNoHiBits)) { 993 // Shifts right need extra care to account for higher order bits. 994 // TODO: less likely shr/unsigned and ushr/signed can by flipping signess. 995 if (instruction->IsShr() && 996 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { 997 return false; // reject, unless all operands are sign-extension narrower 998 } else if (instruction->IsUShr() && 999 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) { 1000 return false; // reject, unless all operands are zero-extension narrower 1001 } 1002 } 1003 // Accept shift operator for vectorizable/invariant operands. 1004 // TODO: accept symbolic, albeit loop invariant shift factors. 1005 DCHECK(r != nullptr); 1006 if (generate_code && vector_mode_ != kVector) { // de-idiom 1007 r = opa; 1008 } 1009 int64_t distance = 0; 1010 if (VectorizeUse(node, r, generate_code, type, restrictions) && 1011 IsInt64AndGet(opb, /*out*/ &distance)) { 1012 // Restrict shift distance to packed data type width. 1013 int64_t max_distance = Primitive::ComponentSize(type) * 8; 1014 if (0 <= distance && distance < max_distance) { 1015 if (generate_code) { 1016 GenerateVecOp(instruction, vector_map_->Get(r), opb, type); 1017 } 1018 return true; 1019 } 1020 } 1021 } else if (instruction->IsInvokeStaticOrDirect()) { 1022 // Accept particular intrinsics. 1023 HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); 1024 switch (invoke->GetIntrinsic()) { 1025 case Intrinsics::kMathAbsInt: 1026 case Intrinsics::kMathAbsLong: 1027 case Intrinsics::kMathAbsFloat: 1028 case Intrinsics::kMathAbsDouble: { 1029 // Deal with vector restrictions. 1030 HInstruction* opa = instruction->InputAt(0); 1031 HInstruction* r = opa; 1032 bool is_unsigned = false; 1033 if (HasVectorRestrictions(restrictions, kNoAbs)) { 1034 return false; 1035 } else if (HasVectorRestrictions(restrictions, kNoHiBits) && 1036 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { 1037 return false; // reject, unless operand is sign-extension narrower 1038 } 1039 // Accept ABS(x) for vectorizable operand. 1040 DCHECK(r != nullptr); 1041 if (generate_code && vector_mode_ != kVector) { // de-idiom 1042 r = opa; 1043 } 1044 if (VectorizeUse(node, r, generate_code, type, restrictions)) { 1045 if (generate_code) { 1046 GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); 1047 } 1048 return true; 1049 } 1050 return false; 1051 } 1052 case Intrinsics::kMathMinIntInt: 1053 case Intrinsics::kMathMinLongLong: 1054 case Intrinsics::kMathMinFloatFloat: 1055 case Intrinsics::kMathMinDoubleDouble: 1056 case Intrinsics::kMathMaxIntInt: 1057 case Intrinsics::kMathMaxLongLong: 1058 case Intrinsics::kMathMaxFloatFloat: 1059 case Intrinsics::kMathMaxDoubleDouble: { 1060 // Deal with vector restrictions. 1061 HInstruction* opa = instruction->InputAt(0); 1062 HInstruction* opb = instruction->InputAt(1); 1063 HInstruction* r = opa; 1064 HInstruction* s = opb; 1065 bool is_unsigned = false; 1066 if (HasVectorRestrictions(restrictions, kNoMinMax)) { 1067 return false; 1068 } else if (HasVectorRestrictions(restrictions, kNoHiBits) && 1069 !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { 1070 return false; // reject, unless all operands are same-extension narrower 1071 } 1072 // Accept MIN/MAX(x, y) for vectorizable operands. 1073 DCHECK(r != nullptr && s != nullptr); 1074 if (generate_code && vector_mode_ != kVector) { // de-idiom 1075 r = opa; 1076 s = opb; 1077 } 1078 if (VectorizeUse(node, r, generate_code, type, restrictions) && 1079 VectorizeUse(node, s, generate_code, type, restrictions)) { 1080 if (generate_code) { 1081 GenerateVecOp( 1082 instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); 1083 } 1084 return true; 1085 } 1086 return false; 1087 } 1088 default: 1089 return false; 1090 } // switch 1091 } 1092 return false; 1093 } 1094 1095 bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) { 1096 const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); 1097 switch (compiler_driver_->GetInstructionSet()) { 1098 case kArm: 1099 case kThumb2: 1100 // Allow vectorization for all ARM devices, because Android assumes that 1101 // ARM 32-bit always supports advanced SIMD. 1102 switch (type) { 1103 case Primitive::kPrimBoolean: 1104 case Primitive::kPrimByte: 1105 *restrictions |= kNoDiv; 1106 return TrySetVectorLength(8); 1107 case Primitive::kPrimChar: 1108 case Primitive::kPrimShort: 1109 *restrictions |= kNoDiv | kNoStringCharAt; 1110 return TrySetVectorLength(4); 1111 case Primitive::kPrimInt: 1112 *restrictions |= kNoDiv; 1113 return TrySetVectorLength(2); 1114 default: 1115 break; 1116 } 1117 return false; 1118 case kArm64: 1119 // Allow vectorization for all ARM devices, because Android assumes that 1120 // ARMv8 AArch64 always supports advanced SIMD. 1121 switch (type) { 1122 case Primitive::kPrimBoolean: 1123 case Primitive::kPrimByte: 1124 *restrictions |= kNoDiv; 1125 return TrySetVectorLength(16); 1126 case Primitive::kPrimChar: 1127 case Primitive::kPrimShort: 1128 *restrictions |= kNoDiv; 1129 return TrySetVectorLength(8); 1130 case Primitive::kPrimInt: 1131 *restrictions |= kNoDiv; 1132 return TrySetVectorLength(4); 1133 case Primitive::kPrimLong: 1134 *restrictions |= kNoDiv | kNoMul | kNoMinMax; 1135 return TrySetVectorLength(2); 1136 case Primitive::kPrimFloat: 1137 return TrySetVectorLength(4); 1138 case Primitive::kPrimDouble: 1139 return TrySetVectorLength(2); 1140 default: 1141 return false; 1142 } 1143 case kX86: 1144 case kX86_64: 1145 // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors). 1146 if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { 1147 switch (type) { 1148 case Primitive::kPrimBoolean: 1149 case Primitive::kPrimByte: 1150 *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; 1151 return TrySetVectorLength(16); 1152 case Primitive::kPrimChar: 1153 case Primitive::kPrimShort: 1154 *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; 1155 return TrySetVectorLength(8); 1156 case Primitive::kPrimInt: 1157 *restrictions |= kNoDiv; 1158 return TrySetVectorLength(4); 1159 case Primitive::kPrimLong: 1160 *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; 1161 return TrySetVectorLength(2); 1162 case Primitive::kPrimFloat: 1163 *restrictions |= kNoMinMax; // -0.0 vs +0.0 1164 return TrySetVectorLength(4); 1165 case Primitive::kPrimDouble: 1166 *restrictions |= kNoMinMax; // -0.0 vs +0.0 1167 return TrySetVectorLength(2); 1168 default: 1169 break; 1170 } // switch type 1171 } 1172 return false; 1173 case kMips: 1174 if (features->AsMipsInstructionSetFeatures()->HasMsa()) { 1175 switch (type) { 1176 case Primitive::kPrimBoolean: 1177 case Primitive::kPrimByte: 1178 *restrictions |= kNoDiv; 1179 return TrySetVectorLength(16); 1180 case Primitive::kPrimChar: 1181 case Primitive::kPrimShort: 1182 *restrictions |= kNoDiv | kNoStringCharAt; 1183 return TrySetVectorLength(8); 1184 case Primitive::kPrimInt: 1185 *restrictions |= kNoDiv; 1186 return TrySetVectorLength(4); 1187 case Primitive::kPrimLong: 1188 *restrictions |= kNoDiv; 1189 return TrySetVectorLength(2); 1190 case Primitive::kPrimFloat: 1191 *restrictions |= kNoMinMax; // min/max(x, NaN) 1192 return TrySetVectorLength(4); 1193 case Primitive::kPrimDouble: 1194 *restrictions |= kNoMinMax; // min/max(x, NaN) 1195 return TrySetVectorLength(2); 1196 default: 1197 break; 1198 } // switch type 1199 } 1200 return false; 1201 case kMips64: 1202 if (features->AsMips64InstructionSetFeatures()->HasMsa()) { 1203 switch (type) { 1204 case Primitive::kPrimBoolean: 1205 case Primitive::kPrimByte: 1206 *restrictions |= kNoDiv; 1207 return TrySetVectorLength(16); 1208 case Primitive::kPrimChar: 1209 case Primitive::kPrimShort: 1210 *restrictions |= kNoDiv | kNoStringCharAt; 1211 return TrySetVectorLength(8); 1212 case Primitive::kPrimInt: 1213 *restrictions |= kNoDiv; 1214 return TrySetVectorLength(4); 1215 case Primitive::kPrimLong: 1216 *restrictions |= kNoDiv; 1217 return TrySetVectorLength(2); 1218 case Primitive::kPrimFloat: 1219 *restrictions |= kNoMinMax; // min/max(x, NaN) 1220 return TrySetVectorLength(4); 1221 case Primitive::kPrimDouble: 1222 *restrictions |= kNoMinMax; // min/max(x, NaN) 1223 return TrySetVectorLength(2); 1224 default: 1225 break; 1226 } // switch type 1227 } 1228 return false; 1229 default: 1230 return false; 1231 } // switch instruction set 1232 } 1233 1234 bool HLoopOptimization::TrySetVectorLength(uint32_t length) { 1235 DCHECK(IsPowerOfTwo(length) && length >= 2u); 1236 // First time set? 1237 if (vector_length_ == 0) { 1238 vector_length_ = length; 1239 } 1240 // Different types are acceptable within a loop-body, as long as all the corresponding vector 1241 // lengths match exactly to obtain a uniform traversal through the vector iteration space 1242 // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions). 1243 return vector_length_ == length; 1244 } 1245 1246 void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) { 1247 if (vector_map_->find(org) == vector_map_->end()) { 1248 // In scalar code, just use a self pass-through for scalar invariants 1249 // (viz. expression remains itself). 1250 if (vector_mode_ == kSequential) { 1251 vector_map_->Put(org, org); 1252 return; 1253 } 1254 // In vector code, explicit scalar expansion is needed. 1255 HInstruction* vector = new (global_allocator_) HVecReplicateScalar( 1256 global_allocator_, org, type, vector_length_); 1257 vector_map_->Put(org, Insert(vector_preheader_, vector)); 1258 } 1259 } 1260 1261 void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) { 1262 if (vector_map_->find(org) == vector_map_->end()) { 1263 HInstruction* subscript = vector_index_; 1264 int64_t value = 0; 1265 if (!IsInt64AndGet(offset, &value) || value != 0) { 1266 subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset); 1267 if (org->IsPhi()) { 1268 Insert(vector_body_, subscript); // lacks layout placeholder 1269 } 1270 } 1271 vector_map_->Put(org, subscript); 1272 } 1273 } 1274 1275 void HLoopOptimization::GenerateVecMem(HInstruction* org, 1276 HInstruction* opa, 1277 HInstruction* opb, 1278 HInstruction* offset, 1279 Primitive::Type type) { 1280 HInstruction* vector = nullptr; 1281 if (vector_mode_ == kVector) { 1282 // Vector store or load. 1283 HInstruction* base = org->InputAt(0); 1284 if (opb != nullptr) { 1285 vector = new (global_allocator_) HVecStore( 1286 global_allocator_, base, opa, opb, type, vector_length_); 1287 } else { 1288 bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); 1289 vector = new (global_allocator_) HVecLoad( 1290 global_allocator_, base, opa, type, vector_length_, is_string_char_at); 1291 } 1292 // Known dynamically enforced alignment? 1293 // TODO: detect offset + constant differences. 1294 // TODO: long run, static alignment analysis? 1295 if (vector_peeling_candidate_ != nullptr && 1296 vector_peeling_candidate_->base == base && 1297 vector_peeling_candidate_->offset == offset) { 1298 vector->AsVecMemoryOperation()->SetAlignment(Alignment(kAlignedBase, 0)); 1299 } 1300 } else { 1301 // Scalar store or load. 1302 DCHECK(vector_mode_ == kSequential); 1303 if (opb != nullptr) { 1304 vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc); 1305 } else { 1306 bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); 1307 vector = new (global_allocator_) HArrayGet( 1308 org->InputAt(0), opa, type, kNoDexPc, is_string_char_at); 1309 } 1310 } 1311 vector_map_->Put(org, vector); 1312 } 1313 1314 #define GENERATE_VEC(x, y) \ 1315 if (vector_mode_ == kVector) { \ 1316 vector = (x); \ 1317 } else { \ 1318 DCHECK(vector_mode_ == kSequential); \ 1319 vector = (y); \ 1320 } \ 1321 break; 1322 1323 void HLoopOptimization::GenerateVecOp(HInstruction* org, 1324 HInstruction* opa, 1325 HInstruction* opb, 1326 Primitive::Type type, 1327 bool is_unsigned) { 1328 if (vector_mode_ == kSequential) { 1329 // Non-converting scalar code follows implicit integral promotion. 1330 if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean || 1331 type == Primitive::kPrimByte || 1332 type == Primitive::kPrimChar || 1333 type == Primitive::kPrimShort)) { 1334 type = Primitive::kPrimInt; 1335 } 1336 } 1337 HInstruction* vector = nullptr; 1338 switch (org->GetKind()) { 1339 case HInstruction::kNeg: 1340 DCHECK(opb == nullptr); 1341 GENERATE_VEC( 1342 new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_), 1343 new (global_allocator_) HNeg(type, opa)); 1344 case HInstruction::kNot: 1345 DCHECK(opb == nullptr); 1346 GENERATE_VEC( 1347 new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), 1348 new (global_allocator_) HNot(type, opa)); 1349 case HInstruction::kBooleanNot: 1350 DCHECK(opb == nullptr); 1351 GENERATE_VEC( 1352 new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), 1353 new (global_allocator_) HBooleanNot(opa)); 1354 case HInstruction::kTypeConversion: 1355 DCHECK(opb == nullptr); 1356 GENERATE_VEC( 1357 new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_), 1358 new (global_allocator_) HTypeConversion(type, opa, kNoDexPc)); 1359 case HInstruction::kAdd: 1360 GENERATE_VEC( 1361 new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_), 1362 new (global_allocator_) HAdd(type, opa, opb)); 1363 case HInstruction::kSub: 1364 GENERATE_VEC( 1365 new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_), 1366 new (global_allocator_) HSub(type, opa, opb)); 1367 case HInstruction::kMul: 1368 GENERATE_VEC( 1369 new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_), 1370 new (global_allocator_) HMul(type, opa, opb)); 1371 case HInstruction::kDiv: 1372 GENERATE_VEC( 1373 new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_), 1374 new (global_allocator_) HDiv(type, opa, opb, kNoDexPc)); 1375 case HInstruction::kAnd: 1376 GENERATE_VEC( 1377 new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_), 1378 new (global_allocator_) HAnd(type, opa, opb)); 1379 case HInstruction::kOr: 1380 GENERATE_VEC( 1381 new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_), 1382 new (global_allocator_) HOr(type, opa, opb)); 1383 case HInstruction::kXor: 1384 GENERATE_VEC( 1385 new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_), 1386 new (global_allocator_) HXor(type, opa, opb)); 1387 case HInstruction::kShl: 1388 GENERATE_VEC( 1389 new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_), 1390 new (global_allocator_) HShl(type, opa, opb)); 1391 case HInstruction::kShr: 1392 GENERATE_VEC( 1393 new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_), 1394 new (global_allocator_) HShr(type, opa, opb)); 1395 case HInstruction::kUShr: 1396 GENERATE_VEC( 1397 new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_), 1398 new (global_allocator_) HUShr(type, opa, opb)); 1399 case HInstruction::kInvokeStaticOrDirect: { 1400 HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); 1401 if (vector_mode_ == kVector) { 1402 switch (invoke->GetIntrinsic()) { 1403 case Intrinsics::kMathAbsInt: 1404 case Intrinsics::kMathAbsLong: 1405 case Intrinsics::kMathAbsFloat: 1406 case Intrinsics::kMathAbsDouble: 1407 DCHECK(opb == nullptr); 1408 vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_); 1409 break; 1410 case Intrinsics::kMathMinIntInt: 1411 case Intrinsics::kMathMinLongLong: 1412 case Intrinsics::kMathMinFloatFloat: 1413 case Intrinsics::kMathMinDoubleDouble: { 1414 vector = new (global_allocator_) 1415 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); 1416 break; 1417 } 1418 case Intrinsics::kMathMaxIntInt: 1419 case Intrinsics::kMathMaxLongLong: 1420 case Intrinsics::kMathMaxFloatFloat: 1421 case Intrinsics::kMathMaxDoubleDouble: { 1422 vector = new (global_allocator_) 1423 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); 1424 break; 1425 } 1426 default: 1427 LOG(FATAL) << "Unsupported SIMD intrinsic"; 1428 UNREACHABLE(); 1429 } // switch invoke 1430 } else { 1431 // In scalar code, simply clone the method invoke, and replace its operands with the 1432 // corresponding new scalar instructions in the loop. The instruction will get an 1433 // environment while being inserted from the instruction map in original program order. 1434 DCHECK(vector_mode_ == kSequential); 1435 size_t num_args = invoke->GetNumberOfArguments(); 1436 HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( 1437 global_allocator_, 1438 num_args, 1439 invoke->GetType(), 1440 invoke->GetDexPc(), 1441 invoke->GetDexMethodIndex(), 1442 invoke->GetResolvedMethod(), 1443 invoke->GetDispatchInfo(), 1444 invoke->GetInvokeType(), 1445 invoke->GetTargetMethod(), 1446 invoke->GetClinitCheckRequirement()); 1447 HInputsRef inputs = invoke->GetInputs(); 1448 size_t num_inputs = inputs.size(); 1449 DCHECK_LE(num_args, num_inputs); 1450 DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree 1451 for (size_t index = 0; index < num_inputs; ++index) { 1452 HInstruction* new_input = index < num_args 1453 ? vector_map_->Get(inputs[index]) 1454 : inputs[index]; // beyond arguments: just pass through 1455 new_invoke->SetArgumentAt(index, new_input); 1456 } 1457 new_invoke->SetIntrinsic(invoke->GetIntrinsic(), 1458 kNeedsEnvironmentOrCache, 1459 kNoSideEffects, 1460 kNoThrow); 1461 vector = new_invoke; 1462 } 1463 break; 1464 } 1465 default: 1466 break; 1467 } // switch 1468 CHECK(vector != nullptr) << "Unsupported SIMD operator"; 1469 vector_map_->Put(org, vector); 1470 } 1471 1472 #undef GENERATE_VEC 1473 1474 // 1475 // Vectorization idioms. 1476 // 1477 1478 // Method recognizes the following idioms: 1479 // rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b 1480 // regular halving add (a + b) >> 1 for unsigned/signed operands a, b 1481 // Provided that the operands are promoted to a wider form to do the arithmetic and 1482 // then cast back to narrower form, the idioms can be mapped into efficient SIMD 1483 // implementation that operates directly in narrower form (plus one extra bit). 1484 // TODO: current version recognizes implicit byte/short/char widening only; 1485 // explicit widening from int to long could be added later. 1486 bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, 1487 HInstruction* instruction, 1488 bool generate_code, 1489 Primitive::Type type, 1490 uint64_t restrictions) { 1491 // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 1492 // (note whether the sign bit in wider precision is shifted in has no effect 1493 // on the narrow precision computed by the idiom). 1494 int64_t distance = 0; 1495 if ((instruction->IsShr() || 1496 instruction->IsUShr()) && 1497 IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { 1498 // Test for (a + b + c) >> 1 for optional constant c. 1499 HInstruction* a = nullptr; 1500 HInstruction* b = nullptr; 1501 int64_t c = 0; 1502 if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { 1503 DCHECK(a != nullptr && b != nullptr); 1504 // Accept c == 1 (rounded) or c == 0 (not rounded). 1505 bool is_rounded = false; 1506 if (c == 1) { 1507 is_rounded = true; 1508 } else if (c != 0) { 1509 return false; 1510 } 1511 // Accept consistent zero or sign extension on operands a and b. 1512 HInstruction* r = nullptr; 1513 HInstruction* s = nullptr; 1514 bool is_unsigned = false; 1515 if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) { 1516 return false; 1517 } 1518 // Deal with vector restrictions. 1519 if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) || 1520 (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) { 1521 return false; 1522 } 1523 // Accept recognized halving add for vectorizable operands. Vectorized code uses the 1524 // shorthand idiomatic operation. Sequential code uses the original scalar expressions. 1525 DCHECK(r != nullptr && s != nullptr); 1526 if (generate_code && vector_mode_ != kVector) { // de-idiom 1527 r = instruction->InputAt(0); 1528 s = instruction->InputAt(1); 1529 } 1530 if (VectorizeUse(node, r, generate_code, type, restrictions) && 1531 VectorizeUse(node, s, generate_code, type, restrictions)) { 1532 if (generate_code) { 1533 if (vector_mode_ == kVector) { 1534 vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( 1535 global_allocator_, 1536 vector_map_->Get(r), 1537 vector_map_->Get(s), 1538 type, 1539 vector_length_, 1540 is_unsigned, 1541 is_rounded)); 1542 } else { 1543 GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); 1544 } 1545 } 1546 return true; 1547 } 1548 } 1549 } 1550 return false; 1551 } 1552 1553 // 1554 // Vectorization heuristics. 1555 // 1556 1557 bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { 1558 // Current heuristic: non-empty body with sufficient number 1559 // of iterations (if known). 1560 // TODO: refine by looking at e.g. operation count, alignment, etc. 1561 if (vector_length_ == 0) { 1562 return false; // nothing found 1563 } else if (0 < trip_count && trip_count < vector_length_) { 1564 return false; // insufficient iterations 1565 } 1566 return true; 1567 } 1568 1569 void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) { 1570 // Current heuristic: none. 1571 // TODO: implement 1572 } 1573 1574 uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { 1575 // Current heuristic: unroll by 2 on ARM64/X86 for large known trip 1576 // counts and small loop bodies. 1577 // TODO: refine with operation count, remaining iterations, etc. 1578 // Artem had some really cool ideas for this already. 1579 switch (compiler_driver_->GetInstructionSet()) { 1580 case kArm64: 1581 case kX86: 1582 case kX86_64: { 1583 size_t num_instructions = block->GetInstructions().CountSize(); 1584 if (num_instructions <= 10 && trip_count >= 4 * vector_length_) { 1585 return 2; 1586 } 1587 return 1; 1588 } 1589 default: 1590 return 1; 1591 } 1592 } 1593 1594 // 1595 // Helpers. 1596 // 1597 1598 bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { 1599 // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't 1600 // smart enough to follow strongly connected components (and it's probably not worth 1601 // it to make it so). See b/33775412. 1602 if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) { 1603 return false; 1604 } 1605 DCHECK(iset_->empty()); 1606 ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); 1607 if (set != nullptr) { 1608 for (HInstruction* i : *set) { 1609 // Check that, other than instructions that are no longer in the graph (removed earlier) 1610 // each instruction is removable and, when restrict uses are requested, other than for phi, 1611 // all uses are contained within the cycle. 1612 if (!i->IsInBlock()) { 1613 continue; 1614 } else if (!i->IsRemovable()) { 1615 return false; 1616 } else if (i != phi && restrict_uses) { 1617 for (const HUseListNode<HInstruction*>& use : i->GetUses()) { 1618 if (set->find(use.GetUser()) == set->end()) { 1619 return false; 1620 } 1621 } 1622 } 1623 iset_->insert(i); // copy 1624 } 1625 return true; 1626 } 1627 return false; 1628 } 1629 1630 // Find: phi: Phi(init, addsub) 1631 // s: SuspendCheck 1632 // c: Condition(phi, bound) 1633 // i: If(c) 1634 // TODO: Find a less pattern matching approach? 1635 bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { 1636 DCHECK(iset_->empty()); 1637 HInstruction* phi = block->GetFirstPhi(); 1638 if (phi != nullptr && 1639 phi->GetNext() == nullptr && 1640 TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) { 1641 HInstruction* s = block->GetFirstInstruction(); 1642 if (s != nullptr && s->IsSuspendCheck()) { 1643 HInstruction* c = s->GetNext(); 1644 if (c != nullptr && 1645 c->IsCondition() && 1646 c->GetUses().HasExactlyOneElement() && // only used for termination 1647 !c->HasEnvironmentUses()) { // unlikely, but not impossible 1648 HInstruction* i = c->GetNext(); 1649 if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { 1650 iset_->insert(c); 1651 iset_->insert(s); 1652 return true; 1653 } 1654 } 1655 } 1656 } 1657 return false; 1658 } 1659 1660 bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { 1661 if (!block->GetPhis().IsEmpty()) { 1662 return false; 1663 } 1664 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 1665 HInstruction* instruction = it.Current(); 1666 if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { 1667 return false; 1668 } 1669 } 1670 return true; 1671 } 1672 1673 bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, 1674 HInstruction* instruction) { 1675 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 1676 if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) { 1677 return true; 1678 } 1679 } 1680 return false; 1681 } 1682 1683 bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, 1684 HInstruction* instruction, 1685 bool collect_loop_uses, 1686 /*out*/ int32_t* use_count) { 1687 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 1688 HInstruction* user = use.GetUser(); 1689 if (iset_->find(user) == iset_->end()) { // not excluded? 1690 HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation(); 1691 if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) { 1692 // If collect_loop_uses is set, simply keep adding those uses to the set. 1693 // Otherwise, reject uses inside the loop that were not already in the set. 1694 if (collect_loop_uses) { 1695 iset_->insert(user); 1696 continue; 1697 } 1698 return false; 1699 } 1700 ++*use_count; 1701 } 1702 } 1703 return true; 1704 } 1705 1706 bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, 1707 HInstruction* instruction, 1708 HBasicBlock* block) { 1709 // Try to replace outside uses with the last value. 1710 if (induction_range_.CanGenerateLastValue(instruction)) { 1711 HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block); 1712 const HUseList<HInstruction*>& uses = instruction->GetUses(); 1713 for (auto it = uses.begin(), end = uses.end(); it != end;) { 1714 HInstruction* user = it->GetUser(); 1715 size_t index = it->GetIndex(); 1716 ++it; // increment before replacing 1717 if (iset_->find(user) == iset_->end()) { // not excluded? 1718 if (kIsDebugBuild) { 1719 // We have checked earlier in 'IsOnlyUsedAfterLoop' that the use is after the loop. 1720 HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation(); 1721 CHECK(other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info)); 1722 } 1723 user->ReplaceInput(replacement, index); 1724 induction_range_.Replace(user, instruction, replacement); // update induction 1725 } 1726 } 1727 const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); 1728 for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { 1729 HEnvironment* user = it->GetUser(); 1730 size_t index = it->GetIndex(); 1731 ++it; // increment before replacing 1732 if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded? 1733 // Only update environment uses after the loop. 1734 HLoopInformation* other_loop_info = user->GetHolder()->GetBlock()->GetLoopInformation(); 1735 if (other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info)) { 1736 user->RemoveAsUserOfInput(index); 1737 user->SetRawEnvAt(index, replacement); 1738 replacement->AddEnvUseAt(user, index); 1739 } 1740 } 1741 } 1742 induction_simplication_count_++; 1743 return true; 1744 } 1745 return false; 1746 } 1747 1748 bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info, 1749 HInstruction* instruction, 1750 HBasicBlock* block, 1751 bool collect_loop_uses) { 1752 // Assigning the last value is always successful if there are no uses. 1753 // Otherwise, it succeeds in a no early-exit loop by generating the 1754 // proper last value assignment. 1755 int32_t use_count = 0; 1756 return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) && 1757 (use_count == 0 || 1758 (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(loop_info, instruction, block))); 1759 } 1760 1761 void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) { 1762 for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) { 1763 HInstruction* instruction = i.Current(); 1764 if (instruction->IsDeadAndRemovable()) { 1765 simplified_ = true; 1766 instruction->GetBlock()->RemoveInstructionOrPhi(instruction); 1767 } 1768 } 1769 } 1770 1771 bool HLoopOptimization::CanRemoveCycle() { 1772 for (HInstruction* i : *iset_) { 1773 // We can never remove instructions that have environment 1774 // uses when we compile 'debuggable'. 1775 if (i->HasEnvironmentUses() && graph_->IsDebuggable()) { 1776 return false; 1777 } 1778 // A deoptimization should never have an environment input removed. 1779 for (const HUseListNode<HEnvironment*>& use : i->GetEnvUses()) { 1780 if (use.GetUser()->GetHolder()->IsDeoptimize()) { 1781 return false; 1782 } 1783 } 1784 } 1785 return true; 1786 } 1787 1788 } // namespace art 1789