1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm_vixl.h" 18 19 #include "arch/arm/asm_support_arm.h" 20 #include "arch/arm/instruction_set_features_arm.h" 21 #include "art_method.h" 22 #include "base/bit_utils.h" 23 #include "base/bit_utils_iterator.h" 24 #include "class_table.h" 25 #include "code_generator_utils.h" 26 #include "common_arm.h" 27 #include "compiled_method.h" 28 #include "entrypoints/quick/quick_entrypoints.h" 29 #include "gc/accounting/card_table.h" 30 #include "heap_poisoning.h" 31 #include "intrinsics_arm_vixl.h" 32 #include "linker/arm/relative_patcher_thumb2.h" 33 #include "linker/linker_patch.h" 34 #include "mirror/array-inl.h" 35 #include "mirror/class-inl.h" 36 #include "thread.h" 37 #include "utils/arm/assembler_arm_vixl.h" 38 #include "utils/arm/managed_register_arm.h" 39 #include "utils/assembler.h" 40 #include "utils/stack_checks.h" 41 42 namespace art { 43 namespace arm { 44 45 namespace vixl32 = vixl::aarch32; 46 using namespace vixl32; // NOLINT(build/namespaces) 47 48 using helpers::DRegisterFrom; 49 using helpers::DWARFReg; 50 using helpers::HighDRegisterFrom; 51 using helpers::HighRegisterFrom; 52 using helpers::InputDRegisterAt; 53 using helpers::InputOperandAt; 54 using helpers::InputRegister; 55 using helpers::InputRegisterAt; 56 using helpers::InputSRegisterAt; 57 using helpers::InputVRegister; 58 using helpers::InputVRegisterAt; 59 using helpers::Int32ConstantFrom; 60 using helpers::Int64ConstantFrom; 61 using helpers::LocationFrom; 62 using helpers::LowRegisterFrom; 63 using helpers::LowSRegisterFrom; 64 using helpers::OperandFrom; 65 using helpers::OutputRegister; 66 using helpers::OutputSRegister; 67 using helpers::OutputVRegister; 68 using helpers::RegisterFrom; 69 using helpers::SRegisterFrom; 70 using helpers::Uint64ConstantFrom; 71 72 using vixl::ExactAssemblyScope; 73 using vixl::CodeBufferCheckScope; 74 75 using RegisterList = vixl32::RegisterList; 76 77 static bool ExpectedPairLayout(Location location) { 78 // We expected this for both core and fpu register pairs. 79 return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); 80 } 81 // Use a local definition to prevent copying mistakes. 82 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize); 83 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; 84 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 85 86 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle 87 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. 88 // For the Baker read barrier implementation using link-generated thunks we need to split 89 // the offset explicitly. 90 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; 91 92 // Flags controlling the use of link-time generated thunks for Baker read barriers. 93 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; 94 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; 95 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; 96 97 // The reserved entrypoint register for link-time generated thunks. 98 const vixl32::Register kBakerCcEntrypointRegister = r4; 99 100 // Using a base helps identify when we hit Marking Register check breakpoints. 101 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; 102 103 #ifdef __ 104 #error "ARM Codegen VIXL macro-assembler macro already defined." 105 #endif 106 107 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 108 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT 109 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() 110 111 // Marker that code is yet to be, and must, be implemented. 112 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " 113 114 static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, 115 HInstruction* instruction) { 116 DCHECK(temps->IsAvailable(ip)); 117 temps->Exclude(ip); 118 DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); 119 DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), 120 linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); 121 DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); 122 DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( 123 instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); 124 } 125 126 static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { 127 ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); 128 __ bind(patch_label); 129 vixl32::Label placeholder_label; 130 __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. 131 __ bind(&placeholder_label); 132 } 133 134 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { 135 return rt.IsLow() && rn.IsLow() && offset < 32u; 136 } 137 138 class EmitAdrCode { 139 public: 140 EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) 141 : assembler_(assembler), rd_(rd), label_(label) { 142 ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); 143 adr_location_ = assembler->GetCursorOffset(); 144 assembler->adr(EncodingSize(Wide), rd, label); 145 } 146 147 ~EmitAdrCode() { 148 DCHECK(label_->IsBound()); 149 // The ADR emitted by the assembler does not set the Thumb mode bit we need. 150 // TODO: Maybe extend VIXL to allow ADR for return address? 151 uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_); 152 // Expecting ADR encoding T3 with `(offset & 1) == 0`. 153 DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26. 154 DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23. 155 DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15. 156 DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`. 157 // Add the Thumb mode bit. 158 raw_adr[2] |= 0x01u; 159 } 160 161 private: 162 ArmVIXLMacroAssembler* const assembler_; 163 vixl32::Register rd_; 164 vixl32::Label* const label_; 165 int32_t adr_location_; 166 }; 167 168 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, 169 // for each live D registers they treat two corresponding S registers as live ones. 170 // 171 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build 172 // from a list of contiguous S registers a list of contiguous D registers (processing first/last 173 // S registers corner cases) and save/restore this new list treating them as D registers. 174 // - decreasing code size 175 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is 176 // restored and then used in regular non SlowPath code as D register. 177 // 178 // For the following example (v means the S register is live): 179 // D names: | D0 | D1 | D2 | D4 | ... 180 // S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ... 181 // Live? | | v | v | v | v | v | v | | ... 182 // 183 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed 184 // as D registers. 185 // 186 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers 187 // for lists of floating-point registers. 188 static size_t SaveContiguousSRegisterList(size_t first, 189 size_t last, 190 CodeGenerator* codegen, 191 size_t stack_offset) { 192 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); 193 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); 194 DCHECK_LE(first, last); 195 if ((first == last) && (first == 0)) { 196 __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); 197 return stack_offset + kSRegSizeInBytes; 198 } 199 if (first % 2 == 1) { 200 __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); 201 stack_offset += kSRegSizeInBytes; 202 } 203 204 bool save_last = false; 205 if (last % 2 == 0) { 206 save_last = true; 207 --last; 208 } 209 210 if (first < last) { 211 vixl32::DRegister d_reg = vixl32::DRegister(first / 2); 212 DCHECK_EQ((last - first + 1) % 2, 0u); 213 size_t number_of_d_regs = (last - first + 1) / 2; 214 215 if (number_of_d_regs == 1) { 216 __ Vstr(d_reg, MemOperand(sp, stack_offset)); 217 } else if (number_of_d_regs > 1) { 218 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); 219 vixl32::Register base = sp; 220 if (stack_offset != 0) { 221 base = temps.Acquire(); 222 __ Add(base, sp, Operand::From(stack_offset)); 223 } 224 __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); 225 } 226 stack_offset += number_of_d_regs * kDRegSizeInBytes; 227 } 228 229 if (save_last) { 230 __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); 231 stack_offset += kSRegSizeInBytes; 232 } 233 234 return stack_offset; 235 } 236 237 static size_t RestoreContiguousSRegisterList(size_t first, 238 size_t last, 239 CodeGenerator* codegen, 240 size_t stack_offset) { 241 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); 242 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); 243 DCHECK_LE(first, last); 244 if ((first == last) && (first == 0)) { 245 __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); 246 return stack_offset + kSRegSizeInBytes; 247 } 248 if (first % 2 == 1) { 249 __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); 250 stack_offset += kSRegSizeInBytes; 251 } 252 253 bool restore_last = false; 254 if (last % 2 == 0) { 255 restore_last = true; 256 --last; 257 } 258 259 if (first < last) { 260 vixl32::DRegister d_reg = vixl32::DRegister(first / 2); 261 DCHECK_EQ((last - first + 1) % 2, 0u); 262 size_t number_of_d_regs = (last - first + 1) / 2; 263 if (number_of_d_regs == 1) { 264 __ Vldr(d_reg, MemOperand(sp, stack_offset)); 265 } else if (number_of_d_regs > 1) { 266 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); 267 vixl32::Register base = sp; 268 if (stack_offset != 0) { 269 base = temps.Acquire(); 270 __ Add(base, sp, Operand::From(stack_offset)); 271 } 272 __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); 273 } 274 stack_offset += number_of_d_regs * kDRegSizeInBytes; 275 } 276 277 if (restore_last) { 278 __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); 279 stack_offset += kSRegSizeInBytes; 280 } 281 282 return stack_offset; 283 } 284 285 static LoadOperandType GetLoadOperandType(DataType::Type type) { 286 switch (type) { 287 case DataType::Type::kReference: 288 return kLoadWord; 289 case DataType::Type::kBool: 290 case DataType::Type::kUint8: 291 return kLoadUnsignedByte; 292 case DataType::Type::kInt8: 293 return kLoadSignedByte; 294 case DataType::Type::kUint16: 295 return kLoadUnsignedHalfword; 296 case DataType::Type::kInt16: 297 return kLoadSignedHalfword; 298 case DataType::Type::kInt32: 299 return kLoadWord; 300 case DataType::Type::kInt64: 301 return kLoadWordPair; 302 case DataType::Type::kFloat32: 303 return kLoadSWord; 304 case DataType::Type::kFloat64: 305 return kLoadDWord; 306 default: 307 LOG(FATAL) << "Unreachable type " << type; 308 UNREACHABLE(); 309 } 310 } 311 312 static StoreOperandType GetStoreOperandType(DataType::Type type) { 313 switch (type) { 314 case DataType::Type::kReference: 315 return kStoreWord; 316 case DataType::Type::kBool: 317 case DataType::Type::kUint8: 318 case DataType::Type::kInt8: 319 return kStoreByte; 320 case DataType::Type::kUint16: 321 case DataType::Type::kInt16: 322 return kStoreHalfword; 323 case DataType::Type::kInt32: 324 return kStoreWord; 325 case DataType::Type::kInt64: 326 return kStoreWordPair; 327 case DataType::Type::kFloat32: 328 return kStoreSWord; 329 case DataType::Type::kFloat64: 330 return kStoreDWord; 331 default: 332 LOG(FATAL) << "Unreachable type " << type; 333 UNREACHABLE(); 334 } 335 } 336 337 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 338 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 339 size_t orig_offset = stack_offset; 340 341 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 342 for (uint32_t i : LowToHighBits(core_spills)) { 343 // If the register holds an object, update the stack mask. 344 if (locations->RegisterContainsObject(i)) { 345 locations->SetStackBit(stack_offset / kVRegSize); 346 } 347 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 348 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 349 saved_core_stack_offsets_[i] = stack_offset; 350 stack_offset += kArmWordSize; 351 } 352 353 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 354 arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset); 355 356 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 357 orig_offset = stack_offset; 358 for (uint32_t i : LowToHighBits(fp_spills)) { 359 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 360 saved_fpu_stack_offsets_[i] = stack_offset; 361 stack_offset += kArmWordSize; 362 } 363 364 stack_offset = orig_offset; 365 while (fp_spills != 0u) { 366 uint32_t begin = CTZ(fp_spills); 367 uint32_t tmp = fp_spills + (1u << begin); 368 fp_spills &= tmp; // Clear the contiguous range of 1s. 369 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. 370 stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset); 371 } 372 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 373 } 374 375 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 376 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 377 size_t orig_offset = stack_offset; 378 379 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 380 for (uint32_t i : LowToHighBits(core_spills)) { 381 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 382 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 383 stack_offset += kArmWordSize; 384 } 385 386 // TODO(VIXL): Check the coherency of stack_offset after this with a test. 387 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 388 arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset); 389 390 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 391 while (fp_spills != 0u) { 392 uint32_t begin = CTZ(fp_spills); 393 uint32_t tmp = fp_spills + (1u << begin); 394 fp_spills &= tmp; // Clear the contiguous range of 1s. 395 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. 396 stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset); 397 } 398 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 399 } 400 401 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { 402 public: 403 explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} 404 405 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 406 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 407 __ Bind(GetEntryLabel()); 408 if (instruction_->CanThrowIntoCatchBlock()) { 409 // Live registers will be restored in the catch block if caught. 410 SaveLiveRegisters(codegen, instruction_->GetLocations()); 411 } 412 arm_codegen->InvokeRuntime(kQuickThrowNullPointer, 413 instruction_, 414 instruction_->GetDexPc(), 415 this); 416 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 417 } 418 419 bool IsFatal() const OVERRIDE { return true; } 420 421 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; } 422 423 private: 424 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL); 425 }; 426 427 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { 428 public: 429 explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) 430 : SlowPathCodeARMVIXL(instruction) {} 431 432 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 433 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 434 __ Bind(GetEntryLabel()); 435 arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 436 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 437 } 438 439 bool IsFatal() const OVERRIDE { return true; } 440 441 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; } 442 443 private: 444 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL); 445 }; 446 447 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { 448 public: 449 SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor) 450 : SlowPathCodeARMVIXL(instruction), successor_(successor) {} 451 452 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 453 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 454 __ Bind(GetEntryLabel()); 455 arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 456 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 457 if (successor_ == nullptr) { 458 __ B(GetReturnLabel()); 459 } else { 460 __ B(arm_codegen->GetLabelOf(successor_)); 461 } 462 } 463 464 vixl32::Label* GetReturnLabel() { 465 DCHECK(successor_ == nullptr); 466 return &return_label_; 467 } 468 469 HBasicBlock* GetSuccessor() const { 470 return successor_; 471 } 472 473 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; } 474 475 private: 476 // If not null, the block to branch to after the suspend check. 477 HBasicBlock* const successor_; 478 479 // If `successor_` is null, the label to branch to after the suspend check. 480 vixl32::Label return_label_; 481 482 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL); 483 }; 484 485 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { 486 public: 487 explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction) 488 : SlowPathCodeARMVIXL(instruction) {} 489 490 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 491 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 492 LocationSummary* locations = instruction_->GetLocations(); 493 494 __ Bind(GetEntryLabel()); 495 if (instruction_->CanThrowIntoCatchBlock()) { 496 // Live registers will be restored in the catch block if caught. 497 SaveLiveRegisters(codegen, instruction_->GetLocations()); 498 } 499 // We're moving two locations to locations that could overlap, so we need a parallel 500 // move resolver. 501 InvokeRuntimeCallingConventionARMVIXL calling_convention; 502 codegen->EmitParallelMoves( 503 locations->InAt(0), 504 LocationFrom(calling_convention.GetRegisterAt(0)), 505 DataType::Type::kInt32, 506 locations->InAt(1), 507 LocationFrom(calling_convention.GetRegisterAt(1)), 508 DataType::Type::kInt32); 509 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 510 ? kQuickThrowStringBounds 511 : kQuickThrowArrayBounds; 512 arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 513 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 514 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 515 } 516 517 bool IsFatal() const OVERRIDE { return true; } 518 519 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; } 520 521 private: 522 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL); 523 }; 524 525 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { 526 public: 527 LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) 528 : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { 529 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 530 } 531 532 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 533 LocationSummary* locations = instruction_->GetLocations(); 534 Location out = locations->Out(); 535 536 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 537 __ Bind(GetEntryLabel()); 538 SaveLiveRegisters(codegen, locations); 539 540 InvokeRuntimeCallingConventionARMVIXL calling_convention; 541 dex::TypeIndex type_index = cls_->GetTypeIndex(); 542 __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); 543 QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage 544 : kQuickInitializeType; 545 arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); 546 if (do_clinit_) { 547 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 548 } else { 549 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 550 } 551 552 // Move the class to the desired location. 553 if (out.IsValid()) { 554 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 555 arm_codegen->Move32(locations->Out(), LocationFrom(r0)); 556 } 557 RestoreLiveRegisters(codegen, locations); 558 __ B(GetExitLabel()); 559 } 560 561 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; } 562 563 private: 564 // The class this slow path will load. 565 HLoadClass* const cls_; 566 567 // The dex PC of `at_`. 568 const uint32_t dex_pc_; 569 570 // Whether to initialize the class. 571 const bool do_clinit_; 572 573 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); 574 }; 575 576 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { 577 public: 578 explicit LoadStringSlowPathARMVIXL(HLoadString* instruction) 579 : SlowPathCodeARMVIXL(instruction) {} 580 581 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 582 DCHECK(instruction_->IsLoadString()); 583 DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); 584 LocationSummary* locations = instruction_->GetLocations(); 585 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 586 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 587 588 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 589 __ Bind(GetEntryLabel()); 590 SaveLiveRegisters(codegen, locations); 591 592 InvokeRuntimeCallingConventionARMVIXL calling_convention; 593 __ Mov(calling_convention.GetRegisterAt(0), string_index.index_); 594 arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 595 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 596 597 arm_codegen->Move32(locations->Out(), LocationFrom(r0)); 598 RestoreLiveRegisters(codegen, locations); 599 600 __ B(GetExitLabel()); 601 } 602 603 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; } 604 605 private: 606 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL); 607 }; 608 609 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { 610 public: 611 TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal) 612 : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {} 613 614 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 615 LocationSummary* locations = instruction_->GetLocations(); 616 DCHECK(instruction_->IsCheckCast() 617 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 618 619 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 620 __ Bind(GetEntryLabel()); 621 622 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 623 SaveLiveRegisters(codegen, locations); 624 } 625 626 // We're moving two locations to locations that could overlap, so we need a parallel 627 // move resolver. 628 InvokeRuntimeCallingConventionARMVIXL calling_convention; 629 630 codegen->EmitParallelMoves(locations->InAt(0), 631 LocationFrom(calling_convention.GetRegisterAt(0)), 632 DataType::Type::kReference, 633 locations->InAt(1), 634 LocationFrom(calling_convention.GetRegisterAt(1)), 635 DataType::Type::kReference); 636 if (instruction_->IsInstanceOf()) { 637 arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, 638 instruction_, 639 instruction_->GetDexPc(), 640 this); 641 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 642 arm_codegen->Move32(locations->Out(), LocationFrom(r0)); 643 } else { 644 DCHECK(instruction_->IsCheckCast()); 645 arm_codegen->InvokeRuntime(kQuickCheckInstanceOf, 646 instruction_, 647 instruction_->GetDexPc(), 648 this); 649 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 650 } 651 652 if (!is_fatal_) { 653 RestoreLiveRegisters(codegen, locations); 654 __ B(GetExitLabel()); 655 } 656 } 657 658 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; } 659 660 bool IsFatal() const OVERRIDE { return is_fatal_; } 661 662 private: 663 const bool is_fatal_; 664 665 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL); 666 }; 667 668 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { 669 public: 670 explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction) 671 : SlowPathCodeARMVIXL(instruction) {} 672 673 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 674 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 675 __ Bind(GetEntryLabel()); 676 LocationSummary* locations = instruction_->GetLocations(); 677 SaveLiveRegisters(codegen, locations); 678 InvokeRuntimeCallingConventionARMVIXL calling_convention; 679 __ Mov(calling_convention.GetRegisterAt(0), 680 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 681 682 arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 683 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 684 } 685 686 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } 687 688 private: 689 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL); 690 }; 691 692 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { 693 public: 694 explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {} 695 696 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 697 LocationSummary* locations = instruction_->GetLocations(); 698 __ Bind(GetEntryLabel()); 699 SaveLiveRegisters(codegen, locations); 700 701 InvokeRuntimeCallingConventionARMVIXL calling_convention; 702 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 703 parallel_move.AddMove( 704 locations->InAt(0), 705 LocationFrom(calling_convention.GetRegisterAt(0)), 706 DataType::Type::kReference, 707 nullptr); 708 parallel_move.AddMove( 709 locations->InAt(1), 710 LocationFrom(calling_convention.GetRegisterAt(1)), 711 DataType::Type::kInt32, 712 nullptr); 713 parallel_move.AddMove( 714 locations->InAt(2), 715 LocationFrom(calling_convention.GetRegisterAt(2)), 716 DataType::Type::kReference, 717 nullptr); 718 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 719 720 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 721 arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 722 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 723 RestoreLiveRegisters(codegen, locations); 724 __ B(GetExitLabel()); 725 } 726 727 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; } 728 729 private: 730 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL); 731 }; 732 733 // Abstract base class for read barrier slow paths marking a reference 734 // `ref`. 735 // 736 // Argument `entrypoint` must be a register location holding the read 737 // barrier marking runtime entry point to be invoked or an empty 738 // location; in the latter case, the read barrier marking runtime 739 // entry point will be loaded by the slow path code itself. 740 class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { 741 protected: 742 ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint) 743 : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) { 744 DCHECK(kEmitCompilerReadBarrier); 745 } 746 747 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; } 748 749 // Generate assembly code calling the read barrier marking runtime 750 // entry point (ReadBarrierMarkRegX). 751 void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { 752 vixl32::Register ref_reg = RegisterFrom(ref_); 753 754 // No need to save live registers; it's taken care of by the 755 // entrypoint. Also, there is no need to update the stack mask, 756 // as this runtime call will not trigger a garbage collection. 757 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 758 DCHECK(!ref_reg.Is(sp)); 759 DCHECK(!ref_reg.Is(lr)); 760 DCHECK(!ref_reg.Is(pc)); 761 // IP is used internally by the ReadBarrierMarkRegX entry point 762 // as a temporary, it cannot be the entry point's input/output. 763 DCHECK(!ref_reg.Is(ip)); 764 DCHECK(ref_reg.IsRegister()) << ref_reg; 765 // "Compact" slow path, saving two moves. 766 // 767 // Instead of using the standard runtime calling convention (input 768 // and output in R0): 769 // 770 // R0 <- ref 771 // R0 <- ReadBarrierMark(R0) 772 // ref <- R0 773 // 774 // we just use rX (the register containing `ref`) as input and output 775 // of a dedicated entrypoint: 776 // 777 // rX <- ReadBarrierMarkRegX(rX) 778 // 779 if (entrypoint_.IsValid()) { 780 arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); 781 __ Blx(RegisterFrom(entrypoint_)); 782 } else { 783 // Entrypoint is not already loaded, load from the thread. 784 int32_t entry_point_offset = 785 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); 786 // This runtime call does not require a stack map. 787 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 788 } 789 } 790 791 // The location (register) of the marked object reference. 792 const Location ref_; 793 794 // The location of the entrypoint if already loaded. 795 const Location entrypoint_; 796 797 private: 798 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL); 799 }; 800 801 // Slow path marking an object reference `ref` during a read 802 // barrier. The field `obj.field` in the object `obj` holding this 803 // reference does not get updated by this slow path after marking. 804 // 805 // This means that after the execution of this slow path, `ref` will 806 // always be up-to-date, but `obj.field` may not; i.e., after the 807 // flip, `ref` will be a to-space reference, but `obj.field` will 808 // probably still be a from-space reference (unless it gets updated by 809 // another thread, or if another thread installed another object 810 // reference (different from `ref`) in `obj.field`). 811 // 812 // Argument `entrypoint` must be a register location holding the read 813 // barrier marking runtime entry point to be invoked or an empty 814 // location; in the latter case, the read barrier marking runtime 815 // entry point will be loaded by the slow path code itself. 816 class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { 817 public: 818 ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, 819 Location ref, 820 Location entrypoint = Location::NoLocation()) 821 : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) { 822 DCHECK(kEmitCompilerReadBarrier); 823 } 824 825 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } 826 827 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 828 LocationSummary* locations = instruction_->GetLocations(); 829 DCHECK(locations->CanCall()); 830 DCHECK(ref_.IsRegister()) << ref_; 831 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 832 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 833 << "Unexpected instruction in read barrier marking slow path: " 834 << instruction_->DebugName(); 835 836 __ Bind(GetEntryLabel()); 837 GenerateReadBarrierMarkRuntimeCall(codegen); 838 __ B(GetExitLabel()); 839 } 840 841 private: 842 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL); 843 }; 844 845 // Slow path loading `obj`'s lock word, loading a reference from 846 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 847 // marking `ref` if `obj` is gray according to the lock word (Baker 848 // read barrier). The field `obj.field` in the object `obj` holding 849 // this reference does not get updated by this slow path after marking 850 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL 851 // below for that). 852 // 853 // This means that after the execution of this slow path, `ref` will 854 // always be up-to-date, but `obj.field` may not; i.e., after the 855 // flip, `ref` will be a to-space reference, but `obj.field` will 856 // probably still be a from-space reference (unless it gets updated by 857 // another thread, or if another thread installed another object 858 // reference (different from `ref`) in `obj.field`). 859 // 860 // Argument `entrypoint` must be a register location holding the read 861 // barrier marking runtime entry point to be invoked or an empty 862 // location; in the latter case, the read barrier marking runtime 863 // entry point will be loaded by the slow path code itself. 864 class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { 865 public: 866 LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, 867 Location ref, 868 vixl32::Register obj, 869 uint32_t offset, 870 Location index, 871 ScaleFactor scale_factor, 872 bool needs_null_check, 873 vixl32::Register temp, 874 Location entrypoint = Location::NoLocation()) 875 : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), 876 obj_(obj), 877 offset_(offset), 878 index_(index), 879 scale_factor_(scale_factor), 880 needs_null_check_(needs_null_check), 881 temp_(temp) { 882 DCHECK(kEmitCompilerReadBarrier); 883 DCHECK(kUseBakerReadBarrier); 884 } 885 886 const char* GetDescription() const OVERRIDE { 887 return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL"; 888 } 889 890 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 891 LocationSummary* locations = instruction_->GetLocations(); 892 vixl32::Register ref_reg = RegisterFrom(ref_); 893 DCHECK(locations->CanCall()); 894 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; 895 DCHECK(instruction_->IsInstanceFieldGet() || 896 instruction_->IsStaticFieldGet() || 897 instruction_->IsArrayGet() || 898 instruction_->IsArraySet() || 899 instruction_->IsInstanceOf() || 900 instruction_->IsCheckCast() || 901 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 902 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 903 << "Unexpected instruction in read barrier marking slow path: " 904 << instruction_->DebugName(); 905 // The read barrier instrumentation of object ArrayGet 906 // instructions does not support the HIntermediateAddress 907 // instruction. 908 DCHECK(!(instruction_->IsArrayGet() && 909 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 910 911 // Temporary register `temp_`, used to store the lock word, must 912 // not be IP, as we may use it to emit the reference load (in the 913 // call to GenerateRawReferenceLoad below), and we need the lock 914 // word to still be in `temp_` after the reference load. 915 DCHECK(!temp_.Is(ip)); 916 917 __ Bind(GetEntryLabel()); 918 919 // When using MaybeGenerateReadBarrierSlow, the read barrier call is 920 // inserted after the original load. However, in fast path based 921 // Baker's read barriers, we need to perform the load of 922 // mirror::Object::monitor_ *before* the original reference load. 923 // This load-load ordering is required by the read barrier. 924 // The slow path (for Baker's algorithm) should look like: 925 // 926 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 927 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 928 // HeapReference<mirror::Object> ref = *src; // Original reference load. 929 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 930 // if (is_gray) { 931 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 932 // } 933 // 934 // Note: the original implementation in ReadBarrier::Barrier is 935 // slightly more complex as it performs additional checks that we do 936 // not do here for performance reasons. 937 938 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 939 940 // /* int32_t */ monitor = obj->monitor_ 941 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 942 arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); 943 if (needs_null_check_) { 944 codegen->MaybeRecordImplicitNullCheck(instruction_); 945 } 946 // /* LockWord */ lock_word = LockWord(monitor) 947 static_assert(sizeof(LockWord) == sizeof(int32_t), 948 "art::LockWord and int32_t have different sizes."); 949 950 // Introduce a dependency on the lock_word including the rb_state, 951 // which shall prevent load-load reordering without using 952 // a memory barrier (which would be more expensive). 953 // `obj` is unchanged by this operation, but its value now depends 954 // on `temp`. 955 __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32)); 956 957 // The actual reference load. 958 // A possible implicit null check has already been handled above. 959 arm_codegen->GenerateRawReferenceLoad( 960 instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); 961 962 // Mark the object `ref` when `obj` is gray. 963 // 964 // if (rb_state == ReadBarrier::GrayState()) 965 // ref = ReadBarrier::Mark(ref); 966 // 967 // Given the numeric representation, it's enough to check the low bit of the 968 // rb_state. We do that by shifting the bit out of the lock word with LSRS 969 // which can be a 16-bit instruction unlike the TST immediate. 970 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 971 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 972 __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); 973 __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. 974 GenerateReadBarrierMarkRuntimeCall(codegen); 975 976 __ B(GetExitLabel()); 977 } 978 979 private: 980 // The register containing the object holding the marked object reference field. 981 vixl32::Register obj_; 982 // The offset, index and scale factor to access the reference in `obj_`. 983 uint32_t offset_; 984 Location index_; 985 ScaleFactor scale_factor_; 986 // Is a null check required? 987 bool needs_null_check_; 988 // A temporary register used to hold the lock word of `obj_`. 989 vixl32::Register temp_; 990 991 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL); 992 }; 993 994 // Slow path loading `obj`'s lock word, loading a reference from 995 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 996 // marking `ref` if `obj` is gray according to the lock word (Baker 997 // read barrier). If needed, this slow path also atomically updates 998 // the field `obj.field` in the object `obj` holding this reference 999 // after marking (contrary to 1000 // LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never 1001 // tries to update `obj.field`). 1002 // 1003 // This means that after the execution of this slow path, both `ref` 1004 // and `obj.field` will be up-to-date; i.e., after the flip, both will 1005 // hold the same to-space reference (unless another thread installed 1006 // another object reference (different from `ref`) in `obj.field`). 1007 // 1008 // Argument `entrypoint` must be a register location holding the read 1009 // barrier marking runtime entry point to be invoked or an empty 1010 // location; in the latter case, the read barrier marking runtime 1011 // entry point will be loaded by the slow path code itself. 1012 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL 1013 : public ReadBarrierMarkSlowPathBaseARMVIXL { 1014 public: 1015 LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( 1016 HInstruction* instruction, 1017 Location ref, 1018 vixl32::Register obj, 1019 uint32_t offset, 1020 Location index, 1021 ScaleFactor scale_factor, 1022 bool needs_null_check, 1023 vixl32::Register temp1, 1024 vixl32::Register temp2, 1025 Location entrypoint = Location::NoLocation()) 1026 : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), 1027 obj_(obj), 1028 offset_(offset), 1029 index_(index), 1030 scale_factor_(scale_factor), 1031 needs_null_check_(needs_null_check), 1032 temp1_(temp1), 1033 temp2_(temp2) { 1034 DCHECK(kEmitCompilerReadBarrier); 1035 DCHECK(kUseBakerReadBarrier); 1036 } 1037 1038 const char* GetDescription() const OVERRIDE { 1039 return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL"; 1040 } 1041 1042 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1043 LocationSummary* locations = instruction_->GetLocations(); 1044 vixl32::Register ref_reg = RegisterFrom(ref_); 1045 DCHECK(locations->CanCall()); 1046 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; 1047 DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg()); 1048 1049 // This slow path is only used by the UnsafeCASObject intrinsic at the moment. 1050 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 1051 << "Unexpected instruction in read barrier marking and field updating slow path: " 1052 << instruction_->DebugName(); 1053 DCHECK(instruction_->GetLocations()->Intrinsified()); 1054 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 1055 DCHECK_EQ(offset_, 0u); 1056 DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); 1057 Location field_offset = index_; 1058 DCHECK(field_offset.IsRegisterPair()) << field_offset; 1059 1060 // Temporary register `temp1_`, used to store the lock word, must 1061 // not be IP, as we may use it to emit the reference load (in the 1062 // call to GenerateRawReferenceLoad below), and we need the lock 1063 // word to still be in `temp1_` after the reference load. 1064 DCHECK(!temp1_.Is(ip)); 1065 1066 __ Bind(GetEntryLabel()); 1067 1068 // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: 1069 // 1070 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 1071 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 1072 // HeapReference<mirror::Object> ref = *src; // Original reference load. 1073 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 1074 // if (is_gray) { 1075 // old_ref = ref; 1076 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 1077 // compareAndSwapObject(obj, field_offset, old_ref, ref); 1078 // } 1079 1080 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 1081 1082 // /* int32_t */ monitor = obj->monitor_ 1083 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 1084 arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); 1085 if (needs_null_check_) { 1086 codegen->MaybeRecordImplicitNullCheck(instruction_); 1087 } 1088 // /* LockWord */ lock_word = LockWord(monitor) 1089 static_assert(sizeof(LockWord) == sizeof(int32_t), 1090 "art::LockWord and int32_t have different sizes."); 1091 1092 // Introduce a dependency on the lock_word including the rb_state, 1093 // which shall prevent load-load reordering without using 1094 // a memory barrier (which would be more expensive). 1095 // `obj` is unchanged by this operation, but its value now depends 1096 // on `temp`. 1097 __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32)); 1098 1099 // The actual reference load. 1100 // A possible implicit null check has already been handled above. 1101 arm_codegen->GenerateRawReferenceLoad( 1102 instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); 1103 1104 // Mark the object `ref` when `obj` is gray. 1105 // 1106 // if (rb_state == ReadBarrier::GrayState()) 1107 // ref = ReadBarrier::Mark(ref); 1108 // 1109 // Given the numeric representation, it's enough to check the low bit of the 1110 // rb_state. We do that by shifting the bit out of the lock word with LSRS 1111 // which can be a 16-bit instruction unlike the TST immediate. 1112 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 1113 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 1114 __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); 1115 __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. 1116 1117 // Save the old value of the reference before marking it. 1118 // Note that we cannot use IP to save the old reference, as IP is 1119 // used internally by the ReadBarrierMarkRegX entry point, and we 1120 // need the old reference after the call to that entry point. 1121 DCHECK(!temp1_.Is(ip)); 1122 __ Mov(temp1_, ref_reg); 1123 1124 GenerateReadBarrierMarkRuntimeCall(codegen); 1125 1126 // If the new reference is different from the old reference, 1127 // update the field in the holder (`*(obj_ + field_offset)`). 1128 // 1129 // Note that this field could also hold a different object, if 1130 // another thread had concurrently changed it. In that case, the 1131 // LDREX/CMP/BNE sequence of instructions in the compare-and-set 1132 // (CAS) operation below would abort the CAS, leaving the field 1133 // as-is. 1134 __ Cmp(temp1_, ref_reg); 1135 __ B(eq, GetExitLabel()); 1136 1137 // Update the the holder's field atomically. This may fail if 1138 // mutator updates before us, but it's OK. This is achieved 1139 // using a strong compare-and-set (CAS) operation with relaxed 1140 // memory synchronization ordering, where the expected value is 1141 // the old reference and the desired value is the new reference. 1142 1143 UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler()); 1144 // Convenience aliases. 1145 vixl32::Register base = obj_; 1146 // The UnsafeCASObject intrinsic uses a register pair as field 1147 // offset ("long offset"), of which only the low part contains 1148 // data. 1149 vixl32::Register offset = LowRegisterFrom(field_offset); 1150 vixl32::Register expected = temp1_; 1151 vixl32::Register value = ref_reg; 1152 vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory. 1153 vixl32::Register tmp = temp2_; // Value in memory. 1154 1155 __ Add(tmp_ptr, base, offset); 1156 1157 if (kPoisonHeapReferences) { 1158 arm_codegen->GetAssembler()->PoisonHeapReference(expected); 1159 if (value.Is(expected)) { 1160 // Do not poison `value`, as it is the same register as 1161 // `expected`, which has just been poisoned. 1162 } else { 1163 arm_codegen->GetAssembler()->PoisonHeapReference(value); 1164 } 1165 } 1166 1167 // do { 1168 // tmp = [r_ptr] - expected; 1169 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 1170 1171 vixl32::Label loop_head, comparison_failed, exit_loop; 1172 __ Bind(&loop_head); 1173 __ Ldrex(tmp, MemOperand(tmp_ptr)); 1174 __ Cmp(tmp, expected); 1175 __ B(ne, &comparison_failed, /* far_target */ false); 1176 __ Strex(tmp, value, MemOperand(tmp_ptr)); 1177 __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false); 1178 __ B(&loop_head); 1179 __ Bind(&comparison_failed); 1180 __ Clrex(); 1181 __ Bind(&exit_loop); 1182 1183 if (kPoisonHeapReferences) { 1184 arm_codegen->GetAssembler()->UnpoisonHeapReference(expected); 1185 if (value.Is(expected)) { 1186 // Do not unpoison `value`, as it is the same register as 1187 // `expected`, which has just been unpoisoned. 1188 } else { 1189 arm_codegen->GetAssembler()->UnpoisonHeapReference(value); 1190 } 1191 } 1192 1193 __ B(GetExitLabel()); 1194 } 1195 1196 private: 1197 // The register containing the object holding the marked object reference field. 1198 const vixl32::Register obj_; 1199 // The offset, index and scale factor to access the reference in `obj_`. 1200 uint32_t offset_; 1201 Location index_; 1202 ScaleFactor scale_factor_; 1203 // Is a null check required? 1204 bool needs_null_check_; 1205 // A temporary register used to hold the lock word of `obj_`; and 1206 // also to hold the original reference value, when the reference is 1207 // marked. 1208 const vixl32::Register temp1_; 1209 // A temporary register used in the implementation of the CAS, to 1210 // update the object's reference field. 1211 const vixl32::Register temp2_; 1212 1213 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL); 1214 }; 1215 1216 // Slow path generating a read barrier for a heap reference. 1217 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { 1218 public: 1219 ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction, 1220 Location out, 1221 Location ref, 1222 Location obj, 1223 uint32_t offset, 1224 Location index) 1225 : SlowPathCodeARMVIXL(instruction), 1226 out_(out), 1227 ref_(ref), 1228 obj_(obj), 1229 offset_(offset), 1230 index_(index) { 1231 DCHECK(kEmitCompilerReadBarrier); 1232 // If `obj` is equal to `out` or `ref`, it means the initial object 1233 // has been overwritten by (or after) the heap object reference load 1234 // to be instrumented, e.g.: 1235 // 1236 // __ LoadFromOffset(kLoadWord, out, out, offset); 1237 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 1238 // 1239 // In that case, we have lost the information about the original 1240 // object, and the emitted read barrier cannot work properly. 1241 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 1242 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 1243 } 1244 1245 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1246 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 1247 LocationSummary* locations = instruction_->GetLocations(); 1248 vixl32::Register reg_out = RegisterFrom(out_); 1249 DCHECK(locations->CanCall()); 1250 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode())); 1251 DCHECK(instruction_->IsInstanceFieldGet() || 1252 instruction_->IsStaticFieldGet() || 1253 instruction_->IsArrayGet() || 1254 instruction_->IsInstanceOf() || 1255 instruction_->IsCheckCast() || 1256 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 1257 << "Unexpected instruction in read barrier for heap reference slow path: " 1258 << instruction_->DebugName(); 1259 // The read barrier instrumentation of object ArrayGet 1260 // instructions does not support the HIntermediateAddress 1261 // instruction. 1262 DCHECK(!(instruction_->IsArrayGet() && 1263 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 1264 1265 __ Bind(GetEntryLabel()); 1266 SaveLiveRegisters(codegen, locations); 1267 1268 // We may have to change the index's value, but as `index_` is a 1269 // constant member (like other "inputs" of this slow path), 1270 // introduce a copy of it, `index`. 1271 Location index = index_; 1272 if (index_.IsValid()) { 1273 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 1274 if (instruction_->IsArrayGet()) { 1275 // Compute the actual memory offset and store it in `index`. 1276 vixl32::Register index_reg = RegisterFrom(index_); 1277 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode())); 1278 if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) { 1279 // We are about to change the value of `index_reg` (see the 1280 // calls to art::arm::ArmVIXLMacroAssembler::Lsl and 1281 // art::arm::ArmVIXLMacroAssembler::Add below), but it has 1282 // not been saved by the previous call to 1283 // art::SlowPathCode::SaveLiveRegisters, as it is a 1284 // callee-save register -- 1285 // art::SlowPathCode::SaveLiveRegisters does not consider 1286 // callee-save registers, as it has been designed with the 1287 // assumption that callee-save registers are supposed to be 1288 // handled by the called function. So, as a callee-save 1289 // register, `index_reg` _would_ eventually be saved onto 1290 // the stack, but it would be too late: we would have 1291 // changed its value earlier. Therefore, we manually save 1292 // it here into another freely available register, 1293 // `free_reg`, chosen of course among the caller-save 1294 // registers (as a callee-save `free_reg` register would 1295 // exhibit the same problem). 1296 // 1297 // Note we could have requested a temporary register from 1298 // the register allocator instead; but we prefer not to, as 1299 // this is a slow path, and we know we can find a 1300 // caller-save register that is available. 1301 vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen); 1302 __ Mov(free_reg, index_reg); 1303 index_reg = free_reg; 1304 index = LocationFrom(index_reg); 1305 } else { 1306 // The initial register stored in `index_` has already been 1307 // saved in the call to art::SlowPathCode::SaveLiveRegisters 1308 // (as it is not a callee-save register), so we can freely 1309 // use it. 1310 } 1311 // Shifting the index value contained in `index_reg` by the scale 1312 // factor (2) cannot overflow in practice, as the runtime is 1313 // unable to allocate object arrays with a size larger than 1314 // 2^26 - 1 (that is, 2^28 - 4 bytes). 1315 __ Lsl(index_reg, index_reg, TIMES_4); 1316 static_assert( 1317 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 1318 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 1319 __ Add(index_reg, index_reg, offset_); 1320 } else { 1321 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 1322 // intrinsics, `index_` is not shifted by a scale factor of 2 1323 // (as in the case of ArrayGet), as it is actually an offset 1324 // to an object field within an object. 1325 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 1326 DCHECK(instruction_->GetLocations()->Intrinsified()); 1327 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 1328 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 1329 << instruction_->AsInvoke()->GetIntrinsic(); 1330 DCHECK_EQ(offset_, 0U); 1331 DCHECK(index_.IsRegisterPair()); 1332 // UnsafeGet's offset location is a register pair, the low 1333 // part contains the correct offset. 1334 index = index_.ToLow(); 1335 } 1336 } 1337 1338 // We're moving two or three locations to locations that could 1339 // overlap, so we need a parallel move resolver. 1340 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1341 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 1342 parallel_move.AddMove(ref_, 1343 LocationFrom(calling_convention.GetRegisterAt(0)), 1344 DataType::Type::kReference, 1345 nullptr); 1346 parallel_move.AddMove(obj_, 1347 LocationFrom(calling_convention.GetRegisterAt(1)), 1348 DataType::Type::kReference, 1349 nullptr); 1350 if (index.IsValid()) { 1351 parallel_move.AddMove(index, 1352 LocationFrom(calling_convention.GetRegisterAt(2)), 1353 DataType::Type::kInt32, 1354 nullptr); 1355 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1356 } else { 1357 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1358 __ Mov(calling_convention.GetRegisterAt(2), offset_); 1359 } 1360 arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); 1361 CheckEntrypointTypes< 1362 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 1363 arm_codegen->Move32(out_, LocationFrom(r0)); 1364 1365 RestoreLiveRegisters(codegen, locations); 1366 __ B(GetExitLabel()); 1367 } 1368 1369 const char* GetDescription() const OVERRIDE { 1370 return "ReadBarrierForHeapReferenceSlowPathARMVIXL"; 1371 } 1372 1373 private: 1374 vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 1375 uint32_t ref = RegisterFrom(ref_).GetCode(); 1376 uint32_t obj = RegisterFrom(obj_).GetCode(); 1377 for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 1378 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 1379 return vixl32::Register(i); 1380 } 1381 } 1382 // We shall never fail to find a free caller-save register, as 1383 // there are more than two core caller-save registers on ARM 1384 // (meaning it is possible to find one which is different from 1385 // `ref` and `obj`). 1386 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 1387 LOG(FATAL) << "Could not find a free caller-save register"; 1388 UNREACHABLE(); 1389 } 1390 1391 const Location out_; 1392 const Location ref_; 1393 const Location obj_; 1394 const uint32_t offset_; 1395 // An additional location containing an index to an array. 1396 // Only used for HArrayGet and the UnsafeGetObject & 1397 // UnsafeGetObjectVolatile intrinsics. 1398 const Location index_; 1399 1400 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL); 1401 }; 1402 1403 // Slow path generating a read barrier for a GC root. 1404 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { 1405 public: 1406 ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root) 1407 : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) { 1408 DCHECK(kEmitCompilerReadBarrier); 1409 } 1410 1411 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1412 LocationSummary* locations = instruction_->GetLocations(); 1413 vixl32::Register reg_out = RegisterFrom(out_); 1414 DCHECK(locations->CanCall()); 1415 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode())); 1416 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 1417 << "Unexpected instruction in read barrier for GC root slow path: " 1418 << instruction_->DebugName(); 1419 1420 __ Bind(GetEntryLabel()); 1421 SaveLiveRegisters(codegen, locations); 1422 1423 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1424 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 1425 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_); 1426 arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 1427 instruction_, 1428 instruction_->GetDexPc(), 1429 this); 1430 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 1431 arm_codegen->Move32(out_, LocationFrom(r0)); 1432 1433 RestoreLiveRegisters(codegen, locations); 1434 __ B(GetExitLabel()); 1435 } 1436 1437 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARMVIXL"; } 1438 1439 private: 1440 const Location out_; 1441 const Location root_; 1442 1443 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL); 1444 }; 1445 1446 inline vixl32::Condition ARMCondition(IfCondition cond) { 1447 switch (cond) { 1448 case kCondEQ: return eq; 1449 case kCondNE: return ne; 1450 case kCondLT: return lt; 1451 case kCondLE: return le; 1452 case kCondGT: return gt; 1453 case kCondGE: return ge; 1454 case kCondB: return lo; 1455 case kCondBE: return ls; 1456 case kCondA: return hi; 1457 case kCondAE: return hs; 1458 } 1459 LOG(FATAL) << "Unreachable"; 1460 UNREACHABLE(); 1461 } 1462 1463 // Maps signed condition to unsigned condition. 1464 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) { 1465 switch (cond) { 1466 case kCondEQ: return eq; 1467 case kCondNE: return ne; 1468 // Signed to unsigned. 1469 case kCondLT: return lo; 1470 case kCondLE: return ls; 1471 case kCondGT: return hi; 1472 case kCondGE: return hs; 1473 // Unsigned remain unchanged. 1474 case kCondB: return lo; 1475 case kCondBE: return ls; 1476 case kCondA: return hi; 1477 case kCondAE: return hs; 1478 } 1479 LOG(FATAL) << "Unreachable"; 1480 UNREACHABLE(); 1481 } 1482 1483 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) { 1484 // The ARM condition codes can express all the necessary branches, see the 1485 // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual. 1486 // There is no dex instruction or HIR that would need the missing conditions 1487 // "equal or unordered" or "not equal". 1488 switch (cond) { 1489 case kCondEQ: return eq; 1490 case kCondNE: return ne /* unordered */; 1491 case kCondLT: return gt_bias ? cc : lt /* unordered */; 1492 case kCondLE: return gt_bias ? ls : le /* unordered */; 1493 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 1494 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 1495 default: 1496 LOG(FATAL) << "UNREACHABLE"; 1497 UNREACHABLE(); 1498 } 1499 } 1500 1501 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) { 1502 switch (op_kind) { 1503 case HDataProcWithShifterOp::kASR: return ShiftType::ASR; 1504 case HDataProcWithShifterOp::kLSL: return ShiftType::LSL; 1505 case HDataProcWithShifterOp::kLSR: return ShiftType::LSR; 1506 default: 1507 LOG(FATAL) << "Unexpected op kind " << op_kind; 1508 UNREACHABLE(); 1509 } 1510 } 1511 1512 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const { 1513 stream << vixl32::Register(reg); 1514 } 1515 1516 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1517 stream << vixl32::SRegister(reg); 1518 } 1519 1520 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { 1521 uint32_t mask = 0; 1522 for (uint32_t i = regs.GetFirstSRegister().GetCode(); 1523 i <= regs.GetLastSRegister().GetCode(); 1524 ++i) { 1525 mask |= (1 << i); 1526 } 1527 return mask; 1528 } 1529 1530 // Saves the register in the stack. Returns the size taken on stack. 1531 size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, 1532 uint32_t reg_id ATTRIBUTE_UNUSED) { 1533 TODO_VIXL32(FATAL); 1534 return 0; 1535 } 1536 1537 // Restores the register from the stack. Returns the size taken on stack. 1538 size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, 1539 uint32_t reg_id ATTRIBUTE_UNUSED) { 1540 TODO_VIXL32(FATAL); 1541 return 0; 1542 } 1543 1544 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, 1545 uint32_t reg_id ATTRIBUTE_UNUSED) { 1546 TODO_VIXL32(FATAL); 1547 return 0; 1548 } 1549 1550 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, 1551 uint32_t reg_id ATTRIBUTE_UNUSED) { 1552 TODO_VIXL32(FATAL); 1553 return 0; 1554 } 1555 1556 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind, 1557 vixl32::Register out, 1558 vixl32::Register first, 1559 const Operand& second, 1560 CodeGeneratorARMVIXL* codegen) { 1561 if (second.IsImmediate() && second.GetImmediate() == 0) { 1562 const Operand in = kind == HInstruction::kAnd 1563 ? Operand(0) 1564 : Operand(first); 1565 1566 __ Mov(out, in); 1567 } else { 1568 switch (kind) { 1569 case HInstruction::kAdd: 1570 __ Add(out, first, second); 1571 break; 1572 case HInstruction::kAnd: 1573 __ And(out, first, second); 1574 break; 1575 case HInstruction::kOr: 1576 __ Orr(out, first, second); 1577 break; 1578 case HInstruction::kSub: 1579 __ Sub(out, first, second); 1580 break; 1581 case HInstruction::kXor: 1582 __ Eor(out, first, second); 1583 break; 1584 default: 1585 LOG(FATAL) << "Unexpected instruction kind: " << kind; 1586 UNREACHABLE(); 1587 } 1588 } 1589 } 1590 1591 static void GenerateDataProc(HInstruction::InstructionKind kind, 1592 const Location& out, 1593 const Location& first, 1594 const Operand& second_lo, 1595 const Operand& second_hi, 1596 CodeGeneratorARMVIXL* codegen) { 1597 const vixl32::Register first_hi = HighRegisterFrom(first); 1598 const vixl32::Register first_lo = LowRegisterFrom(first); 1599 const vixl32::Register out_hi = HighRegisterFrom(out); 1600 const vixl32::Register out_lo = LowRegisterFrom(out); 1601 1602 if (kind == HInstruction::kAdd) { 1603 __ Adds(out_lo, first_lo, second_lo); 1604 __ Adc(out_hi, first_hi, second_hi); 1605 } else if (kind == HInstruction::kSub) { 1606 __ Subs(out_lo, first_lo, second_lo); 1607 __ Sbc(out_hi, first_hi, second_hi); 1608 } else { 1609 GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen); 1610 GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen); 1611 } 1612 } 1613 1614 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) { 1615 return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm); 1616 } 1617 1618 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, 1619 CodeGeneratorARMVIXL* codegen) { 1620 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); 1621 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); 1622 1623 const LocationSummary* const locations = instruction->GetLocations(); 1624 const uint32_t shift_value = instruction->GetShiftAmount(); 1625 const HInstruction::InstructionKind kind = instruction->GetInstrKind(); 1626 const Location first = locations->InAt(0); 1627 const Location second = locations->InAt(1); 1628 const Location out = locations->Out(); 1629 const vixl32::Register first_hi = HighRegisterFrom(first); 1630 const vixl32::Register first_lo = LowRegisterFrom(first); 1631 const vixl32::Register out_hi = HighRegisterFrom(out); 1632 const vixl32::Register out_lo = LowRegisterFrom(out); 1633 const vixl32::Register second_hi = HighRegisterFrom(second); 1634 const vixl32::Register second_lo = LowRegisterFrom(second); 1635 const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind()); 1636 1637 if (shift_value >= 32) { 1638 if (shift == ShiftType::LSL) { 1639 GenerateDataProcInstruction(kind, 1640 out_hi, 1641 first_hi, 1642 Operand(second_lo, ShiftType::LSL, shift_value - 32), 1643 codegen); 1644 GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen); 1645 } else if (shift == ShiftType::ASR) { 1646 GenerateDataProc(kind, 1647 out, 1648 first, 1649 GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32), 1650 Operand(second_hi, ShiftType::ASR, 31), 1651 codegen); 1652 } else { 1653 DCHECK_EQ(shift, ShiftType::LSR); 1654 GenerateDataProc(kind, 1655 out, 1656 first, 1657 GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32), 1658 0, 1659 codegen); 1660 } 1661 } else { 1662 DCHECK_GT(shift_value, 1U); 1663 DCHECK_LT(shift_value, 32U); 1664 1665 UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); 1666 1667 if (shift == ShiftType::LSL) { 1668 // We are not doing this for HInstruction::kAdd because the output will require 1669 // Location::kOutputOverlap; not applicable to other cases. 1670 if (kind == HInstruction::kOr || kind == HInstruction::kXor) { 1671 GenerateDataProcInstruction(kind, 1672 out_hi, 1673 first_hi, 1674 Operand(second_hi, ShiftType::LSL, shift_value), 1675 codegen); 1676 GenerateDataProcInstruction(kind, 1677 out_hi, 1678 out_hi, 1679 Operand(second_lo, ShiftType::LSR, 32 - shift_value), 1680 codegen); 1681 GenerateDataProcInstruction(kind, 1682 out_lo, 1683 first_lo, 1684 Operand(second_lo, ShiftType::LSL, shift_value), 1685 codegen); 1686 } else { 1687 const vixl32::Register temp = temps.Acquire(); 1688 1689 __ Lsl(temp, second_hi, shift_value); 1690 __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value)); 1691 GenerateDataProc(kind, 1692 out, 1693 first, 1694 Operand(second_lo, ShiftType::LSL, shift_value), 1695 temp, 1696 codegen); 1697 } 1698 } else { 1699 DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR); 1700 1701 // We are not doing this for HInstruction::kAdd because the output will require 1702 // Location::kOutputOverlap; not applicable to other cases. 1703 if (kind == HInstruction::kOr || kind == HInstruction::kXor) { 1704 GenerateDataProcInstruction(kind, 1705 out_lo, 1706 first_lo, 1707 Operand(second_lo, ShiftType::LSR, shift_value), 1708 codegen); 1709 GenerateDataProcInstruction(kind, 1710 out_lo, 1711 out_lo, 1712 Operand(second_hi, ShiftType::LSL, 32 - shift_value), 1713 codegen); 1714 GenerateDataProcInstruction(kind, 1715 out_hi, 1716 first_hi, 1717 Operand(second_hi, shift, shift_value), 1718 codegen); 1719 } else { 1720 const vixl32::Register temp = temps.Acquire(); 1721 1722 __ Lsr(temp, second_lo, shift_value); 1723 __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value)); 1724 GenerateDataProc(kind, 1725 out, 1726 first, 1727 temp, 1728 Operand(second_hi, shift, shift_value), 1729 codegen); 1730 } 1731 } 1732 } 1733 } 1734 1735 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) { 1736 const Location rhs_loc = instruction->GetLocations()->InAt(1); 1737 if (rhs_loc.IsConstant()) { 1738 // 0.0 is the only immediate that can be encoded directly in 1739 // a VCMP instruction. 1740 // 1741 // Both the JLS (section 15.20.1) and the JVMS (section 6.5) 1742 // specify that in a floating-point comparison, positive zero 1743 // and negative zero are considered equal, so we can use the 1744 // literal 0.0 for both cases here. 1745 // 1746 // Note however that some methods (Float.equal, Float.compare, 1747 // Float.compareTo, Double.equal, Double.compare, 1748 // Double.compareTo, Math.max, Math.min, StrictMath.max, 1749 // StrictMath.min) consider 0.0 to be (strictly) greater than 1750 // -0.0. So if we ever translate calls to these methods into a 1751 // HCompare instruction, we must handle the -0.0 case with 1752 // care here. 1753 DCHECK(rhs_loc.GetConstant()->IsArithmeticZero()); 1754 1755 const DataType::Type type = instruction->InputAt(0)->GetType(); 1756 1757 if (type == DataType::Type::kFloat32) { 1758 __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0); 1759 } else { 1760 DCHECK_EQ(type, DataType::Type::kFloat64); 1761 __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0); 1762 } 1763 } else { 1764 __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1)); 1765 } 1766 } 1767 1768 static int64_t AdjustConstantForCondition(int64_t value, 1769 IfCondition* condition, 1770 IfCondition* opposite) { 1771 if (value == 1) { 1772 if (*condition == kCondB) { 1773 value = 0; 1774 *condition = kCondEQ; 1775 *opposite = kCondNE; 1776 } else if (*condition == kCondAE) { 1777 value = 0; 1778 *condition = kCondNE; 1779 *opposite = kCondEQ; 1780 } 1781 } else if (value == -1) { 1782 if (*condition == kCondGT) { 1783 value = 0; 1784 *condition = kCondGE; 1785 *opposite = kCondLT; 1786 } else if (*condition == kCondLE) { 1787 value = 0; 1788 *condition = kCondLT; 1789 *opposite = kCondGE; 1790 } 1791 } 1792 1793 return value; 1794 } 1795 1796 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( 1797 HCondition* condition, 1798 bool invert, 1799 CodeGeneratorARMVIXL* codegen) { 1800 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); 1801 1802 const LocationSummary* const locations = condition->GetLocations(); 1803 IfCondition cond = condition->GetCondition(); 1804 IfCondition opposite = condition->GetOppositeCondition(); 1805 1806 if (invert) { 1807 std::swap(cond, opposite); 1808 } 1809 1810 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); 1811 const Location left = locations->InAt(0); 1812 const Location right = locations->InAt(1); 1813 1814 DCHECK(right.IsConstant()); 1815 1816 const vixl32::Register left_high = HighRegisterFrom(left); 1817 const vixl32::Register left_low = LowRegisterFrom(left); 1818 int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite); 1819 UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); 1820 1821 // Comparisons against 0 are common enough to deserve special attention. 1822 if (value == 0) { 1823 switch (cond) { 1824 case kCondNE: 1825 // x > 0 iff x != 0 when the comparison is unsigned. 1826 case kCondA: 1827 ret = std::make_pair(ne, eq); 1828 FALLTHROUGH_INTENDED; 1829 case kCondEQ: 1830 // x <= 0 iff x == 0 when the comparison is unsigned. 1831 case kCondBE: 1832 __ Orrs(temps.Acquire(), left_low, left_high); 1833 return ret; 1834 case kCondLT: 1835 case kCondGE: 1836 __ Cmp(left_high, 0); 1837 return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); 1838 // Trivially true or false. 1839 case kCondB: 1840 ret = std::make_pair(ne, eq); 1841 FALLTHROUGH_INTENDED; 1842 case kCondAE: 1843 __ Cmp(left_low, left_low); 1844 return ret; 1845 default: 1846 break; 1847 } 1848 } 1849 1850 switch (cond) { 1851 case kCondEQ: 1852 case kCondNE: 1853 case kCondB: 1854 case kCondBE: 1855 case kCondA: 1856 case kCondAE: { 1857 const uint32_t value_low = Low32Bits(value); 1858 Operand operand_low(value_low); 1859 1860 __ Cmp(left_high, High32Bits(value)); 1861 1862 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 1863 // we must ensure that the operands corresponding to the least significant 1864 // halves of the inputs fit into a 16-bit CMP encoding. 1865 if (!left_low.IsLow() || !IsUint<8>(value_low)) { 1866 operand_low = Operand(temps.Acquire()); 1867 __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low); 1868 } 1869 1870 // We use the scope because of the IT block that follows. 1871 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 1872 2 * vixl32::k16BitT32InstructionSizeInBytes, 1873 CodeBufferCheckScope::kExactSize); 1874 1875 __ it(eq); 1876 __ cmp(eq, left_low, operand_low); 1877 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); 1878 break; 1879 } 1880 case kCondLE: 1881 case kCondGT: 1882 // Trivially true or false. 1883 if (value == std::numeric_limits<int64_t>::max()) { 1884 __ Cmp(left_low, left_low); 1885 ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq); 1886 break; 1887 } 1888 1889 if (cond == kCondLE) { 1890 DCHECK_EQ(opposite, kCondGT); 1891 cond = kCondLT; 1892 opposite = kCondGE; 1893 } else { 1894 DCHECK_EQ(cond, kCondGT); 1895 DCHECK_EQ(opposite, kCondLE); 1896 cond = kCondGE; 1897 opposite = kCondLT; 1898 } 1899 1900 value++; 1901 FALLTHROUGH_INTENDED; 1902 case kCondGE: 1903 case kCondLT: { 1904 __ Cmp(left_low, Low32Bits(value)); 1905 __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); 1906 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); 1907 break; 1908 } 1909 default: 1910 LOG(FATAL) << "Unreachable"; 1911 UNREACHABLE(); 1912 } 1913 1914 return ret; 1915 } 1916 1917 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest( 1918 HCondition* condition, 1919 bool invert, 1920 CodeGeneratorARMVIXL* codegen) { 1921 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); 1922 1923 const LocationSummary* const locations = condition->GetLocations(); 1924 IfCondition cond = condition->GetCondition(); 1925 IfCondition opposite = condition->GetOppositeCondition(); 1926 1927 if (invert) { 1928 std::swap(cond, opposite); 1929 } 1930 1931 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); 1932 Location left = locations->InAt(0); 1933 Location right = locations->InAt(1); 1934 1935 DCHECK(right.IsRegisterPair()); 1936 1937 switch (cond) { 1938 case kCondEQ: 1939 case kCondNE: 1940 case kCondB: 1941 case kCondBE: 1942 case kCondA: 1943 case kCondAE: { 1944 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); 1945 1946 // We use the scope because of the IT block that follows. 1947 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 1948 2 * vixl32::k16BitT32InstructionSizeInBytes, 1949 CodeBufferCheckScope::kExactSize); 1950 1951 __ it(eq); 1952 __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right)); 1953 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); 1954 break; 1955 } 1956 case kCondLE: 1957 case kCondGT: 1958 if (cond == kCondLE) { 1959 DCHECK_EQ(opposite, kCondGT); 1960 cond = kCondGE; 1961 opposite = kCondLT; 1962 } else { 1963 DCHECK_EQ(cond, kCondGT); 1964 DCHECK_EQ(opposite, kCondLE); 1965 cond = kCondLT; 1966 opposite = kCondGE; 1967 } 1968 1969 std::swap(left, right); 1970 FALLTHROUGH_INTENDED; 1971 case kCondGE: 1972 case kCondLT: { 1973 UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); 1974 1975 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); 1976 __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right)); 1977 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); 1978 break; 1979 } 1980 default: 1981 LOG(FATAL) << "Unreachable"; 1982 UNREACHABLE(); 1983 } 1984 1985 return ret; 1986 } 1987 1988 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition, 1989 bool invert, 1990 CodeGeneratorARMVIXL* codegen) { 1991 const DataType::Type type = condition->GetLeft()->GetType(); 1992 IfCondition cond = condition->GetCondition(); 1993 IfCondition opposite = condition->GetOppositeCondition(); 1994 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); 1995 1996 if (invert) { 1997 std::swap(cond, opposite); 1998 } 1999 2000 if (type == DataType::Type::kInt64) { 2001 ret = condition->GetLocations()->InAt(1).IsConstant() 2002 ? GenerateLongTestConstant(condition, invert, codegen) 2003 : GenerateLongTest(condition, invert, codegen); 2004 } else if (DataType::IsFloatingPointType(type)) { 2005 GenerateVcmp(condition, codegen); 2006 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 2007 ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()), 2008 ARMFPCondition(opposite, condition->IsGtBias())); 2009 } else { 2010 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; 2011 __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1)); 2012 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); 2013 } 2014 2015 return ret; 2016 } 2017 2018 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { 2019 const vixl32::Register out = OutputRegister(cond); 2020 const auto condition = GenerateTest(cond, false, codegen); 2021 2022 __ Mov(LeaveFlags, out, 0); 2023 2024 if (out.IsLow()) { 2025 // We use the scope because of the IT block that follows. 2026 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2027 2 * vixl32::k16BitT32InstructionSizeInBytes, 2028 CodeBufferCheckScope::kExactSize); 2029 2030 __ it(condition.first); 2031 __ mov(condition.first, out, 1); 2032 } else { 2033 vixl32::Label done_label; 2034 vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); 2035 2036 __ B(condition.second, final_label, /* far_target */ false); 2037 __ Mov(out, 1); 2038 2039 if (done_label.IsReferenced()) { 2040 __ Bind(&done_label); 2041 } 2042 } 2043 } 2044 2045 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { 2046 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); 2047 2048 const LocationSummary* const locations = cond->GetLocations(); 2049 IfCondition condition = cond->GetCondition(); 2050 const vixl32::Register out = OutputRegister(cond); 2051 const Location left = locations->InAt(0); 2052 const Location right = locations->InAt(1); 2053 vixl32::Register left_high = HighRegisterFrom(left); 2054 vixl32::Register left_low = LowRegisterFrom(left); 2055 vixl32::Register temp; 2056 UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); 2057 2058 if (right.IsConstant()) { 2059 IfCondition opposite = cond->GetOppositeCondition(); 2060 const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), 2061 &condition, 2062 &opposite); 2063 Operand right_high = High32Bits(value); 2064 Operand right_low = Low32Bits(value); 2065 2066 // The output uses Location::kNoOutputOverlap. 2067 if (out.Is(left_high)) { 2068 std::swap(left_low, left_high); 2069 std::swap(right_low, right_high); 2070 } 2071 2072 __ Sub(out, left_low, right_low); 2073 temp = temps.Acquire(); 2074 __ Sub(temp, left_high, right_high); 2075 } else { 2076 DCHECK(right.IsRegisterPair()); 2077 temp = temps.Acquire(); 2078 __ Sub(temp, left_high, HighRegisterFrom(right)); 2079 __ Sub(out, left_low, LowRegisterFrom(right)); 2080 } 2081 2082 // Need to check after calling AdjustConstantForCondition(). 2083 DCHECK(condition == kCondEQ || condition == kCondNE) << condition; 2084 2085 if (condition == kCondNE && out.IsLow()) { 2086 __ Orrs(out, out, temp); 2087 2088 // We use the scope because of the IT block that follows. 2089 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2090 2 * vixl32::k16BitT32InstructionSizeInBytes, 2091 CodeBufferCheckScope::kExactSize); 2092 2093 __ it(ne); 2094 __ mov(ne, out, 1); 2095 } else { 2096 __ Orr(out, out, temp); 2097 codegen->GenerateConditionWithZero(condition, out, out, temp); 2098 } 2099 } 2100 2101 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { 2102 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); 2103 2104 const LocationSummary* const locations = cond->GetLocations(); 2105 IfCondition condition = cond->GetCondition(); 2106 const vixl32::Register out = OutputRegister(cond); 2107 const Location left = locations->InAt(0); 2108 const Location right = locations->InAt(1); 2109 2110 if (right.IsConstant()) { 2111 IfCondition opposite = cond->GetOppositeCondition(); 2112 2113 // Comparisons against 0 are common enough to deserve special attention. 2114 if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) { 2115 switch (condition) { 2116 case kCondNE: 2117 case kCondA: 2118 if (out.IsLow()) { 2119 // We only care if both input registers are 0 or not. 2120 __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left)); 2121 2122 // We use the scope because of the IT block that follows. 2123 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2124 2 * vixl32::k16BitT32InstructionSizeInBytes, 2125 CodeBufferCheckScope::kExactSize); 2126 2127 __ it(ne); 2128 __ mov(ne, out, 1); 2129 return; 2130 } 2131 2132 FALLTHROUGH_INTENDED; 2133 case kCondEQ: 2134 case kCondBE: 2135 // We only care if both input registers are 0 or not. 2136 __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left)); 2137 codegen->GenerateConditionWithZero(condition, out, out); 2138 return; 2139 case kCondLT: 2140 case kCondGE: 2141 // We only care about the sign bit. 2142 FALLTHROUGH_INTENDED; 2143 case kCondAE: 2144 case kCondB: 2145 codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left)); 2146 return; 2147 case kCondLE: 2148 case kCondGT: 2149 default: 2150 break; 2151 } 2152 } 2153 } 2154 2155 // If `out` is a low register, then the GenerateConditionGeneric() 2156 // function generates a shorter code sequence that is still branchless. 2157 if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) { 2158 GenerateEqualLong(cond, codegen); 2159 return; 2160 } 2161 2162 GenerateConditionGeneric(cond, codegen); 2163 } 2164 2165 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, 2166 CodeGeneratorARMVIXL* codegen) { 2167 const DataType::Type type = cond->GetLeft()->GetType(); 2168 2169 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; 2170 2171 if (type == DataType::Type::kInt64) { 2172 GenerateConditionLong(cond, codegen); 2173 return; 2174 } 2175 2176 IfCondition condition = cond->GetCondition(); 2177 vixl32::Register in = InputRegisterAt(cond, 0); 2178 const vixl32::Register out = OutputRegister(cond); 2179 const Location right = cond->GetLocations()->InAt(1); 2180 int64_t value; 2181 2182 if (right.IsConstant()) { 2183 IfCondition opposite = cond->GetOppositeCondition(); 2184 2185 value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite); 2186 2187 // Comparisons against 0 are common enough to deserve special attention. 2188 if (value == 0) { 2189 switch (condition) { 2190 case kCondNE: 2191 case kCondA: 2192 if (out.IsLow() && out.Is(in)) { 2193 __ Cmp(out, 0); 2194 2195 // We use the scope because of the IT block that follows. 2196 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2197 2 * vixl32::k16BitT32InstructionSizeInBytes, 2198 CodeBufferCheckScope::kExactSize); 2199 2200 __ it(ne); 2201 __ mov(ne, out, 1); 2202 return; 2203 } 2204 2205 FALLTHROUGH_INTENDED; 2206 case kCondEQ: 2207 case kCondBE: 2208 case kCondLT: 2209 case kCondGE: 2210 case kCondAE: 2211 case kCondB: 2212 codegen->GenerateConditionWithZero(condition, out, in); 2213 return; 2214 case kCondLE: 2215 case kCondGT: 2216 default: 2217 break; 2218 } 2219 } 2220 } 2221 2222 if (condition == kCondEQ || condition == kCondNE) { 2223 Operand operand(0); 2224 2225 if (right.IsConstant()) { 2226 operand = Operand::From(value); 2227 } else if (out.Is(RegisterFrom(right))) { 2228 // Avoid 32-bit instructions if possible. 2229 operand = InputOperandAt(cond, 0); 2230 in = RegisterFrom(right); 2231 } else { 2232 operand = InputOperandAt(cond, 1); 2233 } 2234 2235 if (condition == kCondNE && out.IsLow()) { 2236 __ Subs(out, in, operand); 2237 2238 // We use the scope because of the IT block that follows. 2239 ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2240 2 * vixl32::k16BitT32InstructionSizeInBytes, 2241 CodeBufferCheckScope::kExactSize); 2242 2243 __ it(ne); 2244 __ mov(ne, out, 1); 2245 } else { 2246 __ Sub(out, in, operand); 2247 codegen->GenerateConditionWithZero(condition, out, out); 2248 } 2249 2250 return; 2251 } 2252 2253 GenerateConditionGeneric(cond, codegen); 2254 } 2255 2256 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { 2257 const DataType::Type type = constant->GetType(); 2258 bool ret = false; 2259 2260 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; 2261 2262 if (type == DataType::Type::kInt64) { 2263 const uint64_t value = Uint64ConstantFrom(constant); 2264 2265 ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value)); 2266 } else { 2267 ret = IsUint<8>(Int32ConstantFrom(constant)); 2268 } 2269 2270 return ret; 2271 } 2272 2273 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) { 2274 DCHECK(!DataType::IsFloatingPointType(constant->GetType())); 2275 2276 if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) { 2277 return Location::ConstantLocation(constant->AsConstant()); 2278 } 2279 2280 return Location::RequiresRegister(); 2281 } 2282 2283 static bool CanGenerateConditionalMove(const Location& out, const Location& src) { 2284 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 2285 // we check that we are not dealing with floating-point output (there is no 2286 // 16-bit VMOV encoding). 2287 if (!out.IsRegister() && !out.IsRegisterPair()) { 2288 return false; 2289 } 2290 2291 // For constants, we also check that the output is in one or two low registers, 2292 // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit 2293 // MOV encoding can be used. 2294 if (src.IsConstant()) { 2295 if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) { 2296 return false; 2297 } 2298 2299 if (out.IsRegister()) { 2300 if (!RegisterFrom(out).IsLow()) { 2301 return false; 2302 } 2303 } else { 2304 DCHECK(out.IsRegisterPair()); 2305 2306 if (!HighRegisterFrom(out).IsLow()) { 2307 return false; 2308 } 2309 } 2310 } 2311 2312 return true; 2313 } 2314 2315 #undef __ 2316 2317 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, 2318 vixl32::Label* final_label) { 2319 DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); 2320 DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); 2321 2322 const HBasicBlock* const block = instruction->GetBlock(); 2323 const HLoopInformation* const info = block->GetLoopInformation(); 2324 HInstruction* const next = instruction->GetNext(); 2325 2326 // Avoid a branch to a branch. 2327 if (next->IsGoto() && (info == nullptr || 2328 !info->IsBackEdge(*block) || 2329 !info->HasSuspendCheck())) { 2330 final_label = GetLabelOf(next->AsGoto()->GetSuccessor()); 2331 } 2332 2333 return final_label; 2334 } 2335 2336 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, 2337 const ArmInstructionSetFeatures& isa_features, 2338 const CompilerOptions& compiler_options, 2339 OptimizingCompilerStats* stats) 2340 : CodeGenerator(graph, 2341 kNumberOfCoreRegisters, 2342 kNumberOfSRegisters, 2343 kNumberOfRegisterPairs, 2344 kCoreCalleeSaves.GetList(), 2345 ComputeSRegisterListMask(kFpuCalleeSaves), 2346 compiler_options, 2347 stats), 2348 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2349 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2350 location_builder_(graph, this), 2351 instruction_visitor_(graph, this), 2352 move_resolver_(graph->GetAllocator(), this), 2353 assembler_(graph->GetAllocator()), 2354 isa_features_(isa_features), 2355 uint32_literals_(std::less<uint32_t>(), 2356 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2357 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2358 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2359 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2360 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2361 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2362 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2363 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2364 jit_string_patches_(StringReferenceValueComparator(), 2365 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 2366 jit_class_patches_(TypeReferenceValueComparator(), 2367 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 2368 // Always save the LR register to mimic Quick. 2369 AddAllocatedRegister(Location::RegisterLocation(LR)); 2370 // Give D30 and D31 as scratch register to VIXL. The register allocator only works on 2371 // S0-S31, which alias to D0-D15. 2372 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31); 2373 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30); 2374 } 2375 2376 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) { 2377 uint32_t num_entries = switch_instr_->GetNumEntries(); 2378 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 2379 2380 // We are about to use the assembler to place literals directly. Make sure we have enough 2381 // underlying code buffer and we have generated a jump table of the right size, using 2382 // codegen->GetVIXLAssembler()->GetBuffer().Align(); 2383 ExactAssemblyScope aas(codegen->GetVIXLAssembler(), 2384 num_entries * sizeof(int32_t), 2385 CodeBufferCheckScope::kMaximumSize); 2386 // TODO(VIXL): Check that using lower case bind is fine here. 2387 codegen->GetVIXLAssembler()->bind(&table_start_); 2388 for (uint32_t i = 0; i < num_entries; i++) { 2389 codegen->GetVIXLAssembler()->place(bb_addresses_[i].get()); 2390 } 2391 } 2392 2393 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) { 2394 uint32_t num_entries = switch_instr_->GetNumEntries(); 2395 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 2396 2397 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 2398 for (uint32_t i = 0; i < num_entries; i++) { 2399 vixl32::Label* target_label = codegen->GetLabelOf(successors[i]); 2400 DCHECK(target_label->IsBound()); 2401 int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); 2402 // When doing BX to address we need to have lower bit set to 1 in T32. 2403 if (codegen->GetVIXLAssembler()->IsUsingT32()) { 2404 jump_offset++; 2405 } 2406 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 2407 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 2408 2409 bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer()); 2410 } 2411 } 2412 2413 void CodeGeneratorARMVIXL::FixJumpTables() { 2414 for (auto&& jump_table : jump_tables_) { 2415 jump_table->FixTable(this); 2416 } 2417 } 2418 2419 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT 2420 2421 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { 2422 FixJumpTables(); 2423 GetAssembler()->FinalizeCode(); 2424 CodeGenerator::Finalize(allocator); 2425 } 2426 2427 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { 2428 // Stack register, LR and PC are always reserved. 2429 blocked_core_registers_[SP] = true; 2430 blocked_core_registers_[LR] = true; 2431 blocked_core_registers_[PC] = true; 2432 2433 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2434 // Reserve marking register. 2435 blocked_core_registers_[MR] = true; 2436 } 2437 2438 // Reserve thread register. 2439 blocked_core_registers_[TR] = true; 2440 2441 // Reserve temp register. 2442 blocked_core_registers_[IP] = true; 2443 2444 if (GetGraph()->IsDebuggable()) { 2445 // Stubs do not save callee-save floating point registers. If the graph 2446 // is debuggable, we need to deal with these registers differently. For 2447 // now, just block them. 2448 for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode(); 2449 i <= kFpuCalleeSaves.GetLastSRegister().GetCode(); 2450 ++i) { 2451 blocked_fpu_registers_[i] = true; 2452 } 2453 } 2454 } 2455 2456 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph, 2457 CodeGeneratorARMVIXL* codegen) 2458 : InstructionCodeGenerator(graph, codegen), 2459 assembler_(codegen->GetAssembler()), 2460 codegen_(codegen) {} 2461 2462 void CodeGeneratorARMVIXL::ComputeSpillMask() { 2463 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 2464 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 2465 // There is no easy instruction to restore just the PC on thumb2. We spill and 2466 // restore another arbitrary register. 2467 core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister.GetCode()); 2468 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 2469 // We use vpush and vpop for saving and restoring floating point registers, which take 2470 // a SRegister and the number of registers to save/restore after that SRegister. We 2471 // therefore update the `fpu_spill_mask_` to also contain those registers not allocated, 2472 // but in the range. 2473 if (fpu_spill_mask_ != 0) { 2474 uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_); 2475 uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_); 2476 for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) { 2477 fpu_spill_mask_ |= (1 << i); 2478 } 2479 } 2480 } 2481 2482 void CodeGeneratorARMVIXL::GenerateFrameEntry() { 2483 bool skip_overflow_check = 2484 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); 2485 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 2486 __ Bind(&frame_entry_label_); 2487 2488 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 2489 UseScratchRegisterScope temps(GetVIXLAssembler()); 2490 vixl32::Register temp = temps.Acquire(); 2491 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 2492 __ Add(temp, temp, 1); 2493 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 2494 } 2495 2496 if (HasEmptyFrame()) { 2497 return; 2498 } 2499 2500 if (!skip_overflow_check) { 2501 // Using r4 instead of IP saves 2 bytes. 2502 UseScratchRegisterScope temps(GetVIXLAssembler()); 2503 vixl32::Register temp; 2504 // TODO: Remove this check when R4 is made a callee-save register 2505 // in ART compiled code (b/72801708). Currently we need to make 2506 // sure r4 is not blocked, e.g. in special purpose 2507 // TestCodeGeneratorARMVIXL; also asserting that r4 is available 2508 // here. 2509 if (!blocked_core_registers_[R4]) { 2510 for (vixl32::Register reg : kParameterCoreRegistersVIXL) { 2511 DCHECK(!reg.Is(r4)); 2512 } 2513 DCHECK(!kCoreCalleeSaves.Includes(r4)); 2514 temp = r4; 2515 } else { 2516 temp = temps.Acquire(); 2517 } 2518 __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); 2519 // The load must immediately precede RecordPcInfo. 2520 ExactAssemblyScope aas(GetVIXLAssembler(), 2521 vixl32::kMaxInstructionSizeInBytes, 2522 CodeBufferCheckScope::kMaximumSize); 2523 __ ldr(temp, MemOperand(temp)); 2524 RecordPcInfo(nullptr, 0); 2525 } 2526 2527 __ Push(RegisterList(core_spill_mask_)); 2528 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_)); 2529 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister), 2530 0, 2531 core_spill_mask_, 2532 kArmWordSize); 2533 if (fpu_spill_mask_ != 0) { 2534 uint32_t first = LeastSignificantBit(fpu_spill_mask_); 2535 2536 // Check that list is contiguous. 2537 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); 2538 2539 __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); 2540 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); 2541 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize); 2542 } 2543 2544 int adjust = GetFrameSize() - FrameEntrySpillSize(); 2545 __ Sub(sp, sp, adjust); 2546 GetAssembler()->cfi().AdjustCFAOffset(adjust); 2547 2548 // Save the current method if we need it. Note that we do not 2549 // do this in HCurrentMethod, as the instruction might have been removed 2550 // in the SSA graph. 2551 if (RequiresCurrentMethod()) { 2552 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); 2553 } 2554 2555 if (GetGraph()->HasShouldDeoptimizeFlag()) { 2556 UseScratchRegisterScope temps(GetVIXLAssembler()); 2557 vixl32::Register temp = temps.Acquire(); 2558 // Initialize should_deoptimize flag to 0. 2559 __ Mov(temp, 0); 2560 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); 2561 } 2562 2563 MaybeGenerateMarkingRegisterCheck(/* code */ 1); 2564 } 2565 2566 void CodeGeneratorARMVIXL::GenerateFrameExit() { 2567 if (HasEmptyFrame()) { 2568 __ Bx(lr); 2569 return; 2570 } 2571 GetAssembler()->cfi().RememberState(); 2572 int adjust = GetFrameSize() - FrameEntrySpillSize(); 2573 __ Add(sp, sp, adjust); 2574 GetAssembler()->cfi().AdjustCFAOffset(-adjust); 2575 if (fpu_spill_mask_ != 0) { 2576 uint32_t first = LeastSignificantBit(fpu_spill_mask_); 2577 2578 // Check that list is contiguous. 2579 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); 2580 2581 __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); 2582 GetAssembler()->cfi().AdjustCFAOffset( 2583 -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_)); 2584 GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_); 2585 } 2586 // Pop LR into PC to return. 2587 DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U); 2588 uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode; 2589 __ Pop(RegisterList(pop_mask)); 2590 GetAssembler()->cfi().RestoreState(); 2591 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 2592 } 2593 2594 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { 2595 __ Bind(GetLabelOf(block)); 2596 } 2597 2598 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) { 2599 switch (type) { 2600 case DataType::Type::kReference: 2601 case DataType::Type::kBool: 2602 case DataType::Type::kUint8: 2603 case DataType::Type::kInt8: 2604 case DataType::Type::kUint16: 2605 case DataType::Type::kInt16: 2606 case DataType::Type::kInt32: { 2607 uint32_t index = gp_index_++; 2608 uint32_t stack_index = stack_index_++; 2609 if (index < calling_convention.GetNumberOfRegisters()) { 2610 return LocationFrom(calling_convention.GetRegisterAt(index)); 2611 } else { 2612 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)); 2613 } 2614 } 2615 2616 case DataType::Type::kInt64: { 2617 uint32_t index = gp_index_; 2618 uint32_t stack_index = stack_index_; 2619 gp_index_ += 2; 2620 stack_index_ += 2; 2621 if (index + 1 < calling_convention.GetNumberOfRegisters()) { 2622 if (calling_convention.GetRegisterAt(index).Is(r1)) { 2623 // Skip R1, and use R2_R3 instead. 2624 gp_index_++; 2625 index++; 2626 } 2627 } 2628 if (index + 1 < calling_convention.GetNumberOfRegisters()) { 2629 DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1, 2630 calling_convention.GetRegisterAt(index + 1).GetCode()); 2631 2632 return LocationFrom(calling_convention.GetRegisterAt(index), 2633 calling_convention.GetRegisterAt(index + 1)); 2634 } else { 2635 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); 2636 } 2637 } 2638 2639 case DataType::Type::kFloat32: { 2640 uint32_t stack_index = stack_index_++; 2641 if (float_index_ % 2 == 0) { 2642 float_index_ = std::max(double_index_, float_index_); 2643 } 2644 if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) { 2645 return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 2646 } else { 2647 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)); 2648 } 2649 } 2650 2651 case DataType::Type::kFloat64: { 2652 double_index_ = std::max(double_index_, RoundUp(float_index_, 2)); 2653 uint32_t stack_index = stack_index_; 2654 stack_index_ += 2; 2655 if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { 2656 uint32_t index = double_index_; 2657 double_index_ += 2; 2658 Location result = LocationFrom( 2659 calling_convention.GetFpuRegisterAt(index), 2660 calling_convention.GetFpuRegisterAt(index + 1)); 2661 DCHECK(ExpectedPairLayout(result)); 2662 return result; 2663 } else { 2664 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); 2665 } 2666 } 2667 2668 case DataType::Type::kUint32: 2669 case DataType::Type::kUint64: 2670 case DataType::Type::kVoid: 2671 LOG(FATAL) << "Unexpected parameter type " << type; 2672 break; 2673 } 2674 return Location::NoLocation(); 2675 } 2676 2677 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const { 2678 switch (type) { 2679 case DataType::Type::kReference: 2680 case DataType::Type::kBool: 2681 case DataType::Type::kUint8: 2682 case DataType::Type::kInt8: 2683 case DataType::Type::kUint16: 2684 case DataType::Type::kInt16: 2685 case DataType::Type::kUint32: 2686 case DataType::Type::kInt32: { 2687 return LocationFrom(r0); 2688 } 2689 2690 case DataType::Type::kFloat32: { 2691 return LocationFrom(s0); 2692 } 2693 2694 case DataType::Type::kUint64: 2695 case DataType::Type::kInt64: { 2696 return LocationFrom(r0, r1); 2697 } 2698 2699 case DataType::Type::kFloat64: { 2700 return LocationFrom(s0, s1); 2701 } 2702 2703 case DataType::Type::kVoid: 2704 return Location::NoLocation(); 2705 } 2706 2707 UNREACHABLE(); 2708 } 2709 2710 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const { 2711 return LocationFrom(kMethodRegister); 2712 } 2713 2714 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) { 2715 if (source.Equals(destination)) { 2716 return; 2717 } 2718 if (destination.IsRegister()) { 2719 if (source.IsRegister()) { 2720 __ Mov(RegisterFrom(destination), RegisterFrom(source)); 2721 } else if (source.IsFpuRegister()) { 2722 __ Vmov(RegisterFrom(destination), SRegisterFrom(source)); 2723 } else { 2724 GetAssembler()->LoadFromOffset(kLoadWord, 2725 RegisterFrom(destination), 2726 sp, 2727 source.GetStackIndex()); 2728 } 2729 } else if (destination.IsFpuRegister()) { 2730 if (source.IsRegister()) { 2731 __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); 2732 } else if (source.IsFpuRegister()) { 2733 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); 2734 } else { 2735 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); 2736 } 2737 } else { 2738 DCHECK(destination.IsStackSlot()) << destination; 2739 if (source.IsRegister()) { 2740 GetAssembler()->StoreToOffset(kStoreWord, 2741 RegisterFrom(source), 2742 sp, 2743 destination.GetStackIndex()); 2744 } else if (source.IsFpuRegister()) { 2745 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); 2746 } else { 2747 DCHECK(source.IsStackSlot()) << source; 2748 UseScratchRegisterScope temps(GetVIXLAssembler()); 2749 vixl32::Register temp = temps.Acquire(); 2750 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); 2751 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 2752 } 2753 } 2754 } 2755 2756 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) { 2757 DCHECK(location.IsRegister()); 2758 __ Mov(RegisterFrom(location), value); 2759 } 2760 2761 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) { 2762 // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in 2763 // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend. 2764 HParallelMove move(GetGraph()->GetAllocator()); 2765 move.AddMove(src, dst, dst_type, nullptr); 2766 GetMoveResolver()->EmitNativeCode(&move); 2767 } 2768 2769 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) { 2770 if (location.IsRegister()) { 2771 locations->AddTemp(location); 2772 } else if (location.IsRegisterPair()) { 2773 locations->AddTemp(LocationFrom(LowRegisterFrom(location))); 2774 locations->AddTemp(LocationFrom(HighRegisterFrom(location))); 2775 } else { 2776 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 2777 } 2778 } 2779 2780 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint, 2781 HInstruction* instruction, 2782 uint32_t dex_pc, 2783 SlowPathCode* slow_path) { 2784 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 2785 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value())); 2786 // Ensure the pc position is recorded immediately after the `blx` instruction. 2787 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. 2788 ExactAssemblyScope aas(GetVIXLAssembler(), 2789 vixl32::k16BitT32InstructionSizeInBytes, 2790 CodeBufferCheckScope::kExactSize); 2791 __ blx(lr); 2792 if (EntrypointRequiresStackMap(entrypoint)) { 2793 RecordPcInfo(instruction, dex_pc, slow_path); 2794 } 2795 } 2796 2797 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 2798 HInstruction* instruction, 2799 SlowPathCode* slow_path) { 2800 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 2801 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 2802 __ Blx(lr); 2803 } 2804 2805 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { 2806 if (successor->IsExitBlock()) { 2807 DCHECK(got->GetPrevious()->AlwaysThrows()); 2808 return; // no code needed 2809 } 2810 2811 HBasicBlock* block = got->GetBlock(); 2812 HInstruction* previous = got->GetPrevious(); 2813 HLoopInformation* info = block->GetLoopInformation(); 2814 2815 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 2816 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 2817 UseScratchRegisterScope temps(GetVIXLAssembler()); 2818 vixl32::Register temp = temps.Acquire(); 2819 __ Push(vixl32::Register(kMethodRegister)); 2820 GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); 2821 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 2822 __ Add(temp, temp, 1); 2823 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 2824 __ Pop(vixl32::Register(kMethodRegister)); 2825 } 2826 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 2827 return; 2828 } 2829 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 2830 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 2831 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2); 2832 } 2833 if (!codegen_->GoesToNextBlock(block, successor)) { 2834 __ B(codegen_->GetLabelOf(successor)); 2835 } 2836 } 2837 2838 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) { 2839 got->SetLocations(nullptr); 2840 } 2841 2842 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) { 2843 HandleGoto(got, got->GetSuccessor()); 2844 } 2845 2846 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { 2847 try_boundary->SetLocations(nullptr); 2848 } 2849 2850 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { 2851 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 2852 if (!successor->IsExitBlock()) { 2853 HandleGoto(try_boundary, successor); 2854 } 2855 } 2856 2857 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { 2858 exit->SetLocations(nullptr); 2859 } 2860 2861 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 2862 } 2863 2864 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, 2865 vixl32::Label* true_target, 2866 vixl32::Label* false_target, 2867 bool is_far_target) { 2868 if (true_target == false_target) { 2869 DCHECK(true_target != nullptr); 2870 __ B(true_target); 2871 return; 2872 } 2873 2874 vixl32::Label* non_fallthrough_target; 2875 bool invert; 2876 bool emit_both_branches; 2877 2878 if (true_target == nullptr) { 2879 // The true target is fallthrough. 2880 DCHECK(false_target != nullptr); 2881 non_fallthrough_target = false_target; 2882 invert = true; 2883 emit_both_branches = false; 2884 } else { 2885 non_fallthrough_target = true_target; 2886 invert = false; 2887 // Either the false target is fallthrough, or there is no fallthrough 2888 // and both branches must be emitted. 2889 emit_both_branches = (false_target != nullptr); 2890 } 2891 2892 const auto cond = GenerateTest(condition, invert, codegen_); 2893 2894 __ B(cond.first, non_fallthrough_target, is_far_target); 2895 2896 if (emit_both_branches) { 2897 // No target falls through, we need to branch. 2898 __ B(false_target); 2899 } 2900 } 2901 2902 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction, 2903 size_t condition_input_index, 2904 vixl32::Label* true_target, 2905 vixl32::Label* false_target, 2906 bool far_target) { 2907 HInstruction* cond = instruction->InputAt(condition_input_index); 2908 2909 if (true_target == nullptr && false_target == nullptr) { 2910 // Nothing to do. The code always falls through. 2911 return; 2912 } else if (cond->IsIntConstant()) { 2913 // Constant condition, statically compared against "true" (integer value 1). 2914 if (cond->AsIntConstant()->IsTrue()) { 2915 if (true_target != nullptr) { 2916 __ B(true_target); 2917 } 2918 } else { 2919 DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond); 2920 if (false_target != nullptr) { 2921 __ B(false_target); 2922 } 2923 } 2924 return; 2925 } 2926 2927 // The following code generates these patterns: 2928 // (1) true_target == nullptr && false_target != nullptr 2929 // - opposite condition true => branch to false_target 2930 // (2) true_target != nullptr && false_target == nullptr 2931 // - condition true => branch to true_target 2932 // (3) true_target != nullptr && false_target != nullptr 2933 // - condition true => branch to true_target 2934 // - branch to false_target 2935 if (IsBooleanValueOrMaterializedCondition(cond)) { 2936 // Condition has been materialized, compare the output to 0. 2937 if (kIsDebugBuild) { 2938 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 2939 DCHECK(cond_val.IsRegister()); 2940 } 2941 if (true_target == nullptr) { 2942 __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index), 2943 false_target, 2944 far_target); 2945 } else { 2946 __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index), 2947 true_target, 2948 far_target); 2949 } 2950 } else { 2951 // Condition has not been materialized. Use its inputs as the comparison and 2952 // its condition as the branch condition. 2953 HCondition* condition = cond->AsCondition(); 2954 2955 // If this is a long or FP comparison that has been folded into 2956 // the HCondition, generate the comparison directly. 2957 DataType::Type type = condition->InputAt(0)->GetType(); 2958 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { 2959 GenerateCompareTestAndBranch(condition, true_target, false_target, far_target); 2960 return; 2961 } 2962 2963 vixl32::Label* non_fallthrough_target; 2964 vixl32::Condition arm_cond = vixl32::Condition::None(); 2965 const vixl32::Register left = InputRegisterAt(cond, 0); 2966 const Operand right = InputOperandAt(cond, 1); 2967 2968 if (true_target == nullptr) { 2969 arm_cond = ARMCondition(condition->GetOppositeCondition()); 2970 non_fallthrough_target = false_target; 2971 } else { 2972 arm_cond = ARMCondition(condition->GetCondition()); 2973 non_fallthrough_target = true_target; 2974 } 2975 2976 if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) { 2977 if (arm_cond.Is(eq)) { 2978 __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target); 2979 } else { 2980 DCHECK(arm_cond.Is(ne)); 2981 __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target); 2982 } 2983 } else { 2984 __ Cmp(left, right); 2985 __ B(arm_cond, non_fallthrough_target, far_target); 2986 } 2987 } 2988 2989 // If neither branch falls through (case 3), the conditional branch to `true_target` 2990 // was already emitted (case 2) and we need to emit a jump to `false_target`. 2991 if (true_target != nullptr && false_target != nullptr) { 2992 __ B(false_target); 2993 } 2994 } 2995 2996 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { 2997 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 2998 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 2999 locations->SetInAt(0, Location::RequiresRegister()); 3000 } 3001 } 3002 3003 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { 3004 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 3005 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 3006 vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 3007 nullptr : codegen_->GetLabelOf(true_successor); 3008 vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 3009 nullptr : codegen_->GetLabelOf(false_successor); 3010 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 3011 } 3012 3013 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { 3014 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3015 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 3016 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3017 RegisterSet caller_saves = RegisterSet::Empty(); 3018 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); 3019 locations->SetCustomSlowPathCallerSaves(caller_saves); 3020 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 3021 locations->SetInAt(0, Location::RequiresRegister()); 3022 } 3023 } 3024 3025 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { 3026 SlowPathCodeARMVIXL* slow_path = 3027 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize); 3028 GenerateTestAndBranch(deoptimize, 3029 /* condition_input_index */ 0, 3030 slow_path->GetEntryLabel(), 3031 /* false_target */ nullptr); 3032 } 3033 3034 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3035 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3036 LocationSummary(flag, LocationSummary::kNoCall); 3037 locations->SetOut(Location::RequiresRegister()); 3038 } 3039 3040 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3041 GetAssembler()->LoadFromOffset(kLoadWord, 3042 OutputRegister(flag), 3043 sp, 3044 codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); 3045 } 3046 3047 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { 3048 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 3049 const bool is_floating_point = DataType::IsFloatingPointType(select->GetType()); 3050 3051 if (is_floating_point) { 3052 locations->SetInAt(0, Location::RequiresFpuRegister()); 3053 locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue())); 3054 } else { 3055 locations->SetInAt(0, Location::RequiresRegister()); 3056 locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue())); 3057 } 3058 3059 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3060 locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition())); 3061 // The code generator handles overlap with the values, but not with the condition. 3062 locations->SetOut(Location::SameAsFirstInput()); 3063 } else if (is_floating_point) { 3064 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3065 } else { 3066 if (!locations->InAt(1).IsConstant()) { 3067 locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue())); 3068 } 3069 3070 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3071 } 3072 } 3073 3074 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { 3075 HInstruction* const condition = select->GetCondition(); 3076 const LocationSummary* const locations = select->GetLocations(); 3077 const DataType::Type type = select->GetType(); 3078 const Location first = locations->InAt(0); 3079 const Location out = locations->Out(); 3080 const Location second = locations->InAt(1); 3081 Location src; 3082 3083 if (condition->IsIntConstant()) { 3084 if (condition->AsIntConstant()->IsFalse()) { 3085 src = first; 3086 } else { 3087 src = second; 3088 } 3089 3090 codegen_->MoveLocation(out, src, type); 3091 return; 3092 } 3093 3094 if (!DataType::IsFloatingPointType(type)) { 3095 bool invert = false; 3096 3097 if (out.Equals(second)) { 3098 src = first; 3099 invert = true; 3100 } else if (out.Equals(first)) { 3101 src = second; 3102 } else if (second.IsConstant()) { 3103 DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant())); 3104 src = second; 3105 } else if (first.IsConstant()) { 3106 DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant())); 3107 src = first; 3108 invert = true; 3109 } else { 3110 src = second; 3111 } 3112 3113 if (CanGenerateConditionalMove(out, src)) { 3114 if (!out.Equals(first) && !out.Equals(second)) { 3115 codegen_->MoveLocation(out, src.Equals(first) ? second : first, type); 3116 } 3117 3118 std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne); 3119 3120 if (IsBooleanValueOrMaterializedCondition(condition)) { 3121 __ Cmp(InputRegisterAt(select, 2), 0); 3122 cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq); 3123 } else { 3124 cond = GenerateTest(condition->AsCondition(), invert, codegen_); 3125 } 3126 3127 const size_t instr_count = out.IsRegisterPair() ? 4 : 2; 3128 // We use the scope because of the IT block that follows. 3129 ExactAssemblyScope guard(GetVIXLAssembler(), 3130 instr_count * vixl32::k16BitT32InstructionSizeInBytes, 3131 CodeBufferCheckScope::kExactSize); 3132 3133 if (out.IsRegister()) { 3134 __ it(cond.first); 3135 __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type)); 3136 } else { 3137 DCHECK(out.IsRegisterPair()); 3138 3139 Operand operand_high(0); 3140 Operand operand_low(0); 3141 3142 if (src.IsConstant()) { 3143 const int64_t value = Int64ConstantFrom(src); 3144 3145 operand_high = High32Bits(value); 3146 operand_low = Low32Bits(value); 3147 } else { 3148 DCHECK(src.IsRegisterPair()); 3149 operand_high = HighRegisterFrom(src); 3150 operand_low = LowRegisterFrom(src); 3151 } 3152 3153 __ it(cond.first); 3154 __ mov(cond.first, LowRegisterFrom(out), operand_low); 3155 __ it(cond.first); 3156 __ mov(cond.first, HighRegisterFrom(out), operand_high); 3157 } 3158 3159 return; 3160 } 3161 } 3162 3163 vixl32::Label* false_target = nullptr; 3164 vixl32::Label* true_target = nullptr; 3165 vixl32::Label select_end; 3166 vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end); 3167 3168 if (out.Equals(second)) { 3169 true_target = target; 3170 src = first; 3171 } else { 3172 false_target = target; 3173 src = second; 3174 3175 if (!out.Equals(first)) { 3176 codegen_->MoveLocation(out, first, type); 3177 } 3178 } 3179 3180 GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false); 3181 codegen_->MoveLocation(out, src, type); 3182 3183 if (select_end.IsReferenced()) { 3184 __ Bind(&select_end); 3185 } 3186 } 3187 3188 void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3189 new (GetGraph()->GetAllocator()) LocationSummary(info); 3190 } 3191 3192 void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) { 3193 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 3194 } 3195 3196 void CodeGeneratorARMVIXL::GenerateNop() { 3197 __ Nop(); 3198 } 3199 3200 // `temp` is an extra temporary register that is used for some conditions; 3201 // callers may not specify it, in which case the method will use a scratch 3202 // register instead. 3203 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, 3204 vixl32::Register out, 3205 vixl32::Register in, 3206 vixl32::Register temp) { 3207 switch (condition) { 3208 case kCondEQ: 3209 // x <= 0 iff x == 0 when the comparison is unsigned. 3210 case kCondBE: 3211 if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) { 3212 temp = out; 3213 } 3214 3215 // Avoid 32-bit instructions if possible; note that `in` and `temp` must be 3216 // different as well. 3217 if (in.IsLow() && temp.IsLow() && !in.Is(temp)) { 3218 // temp = - in; only 0 sets the carry flag. 3219 __ Rsbs(temp, in, 0); 3220 3221 if (out.Is(in)) { 3222 std::swap(in, temp); 3223 } 3224 3225 // out = - in + in + carry = carry 3226 __ Adc(out, temp, in); 3227 } else { 3228 // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. 3229 __ Clz(out, in); 3230 // Any number less than 32 logically shifted right by 5 bits results in 0; 3231 // the same operation on 32 yields 1. 3232 __ Lsr(out, out, 5); 3233 } 3234 3235 break; 3236 case kCondNE: 3237 // x > 0 iff x != 0 when the comparison is unsigned. 3238 case kCondA: { 3239 UseScratchRegisterScope temps(GetVIXLAssembler()); 3240 3241 if (out.Is(in)) { 3242 if (!temp.IsValid() || in.Is(temp)) { 3243 temp = temps.Acquire(); 3244 } 3245 } else if (!temp.IsValid() || !temp.IsLow()) { 3246 temp = out; 3247 } 3248 3249 // temp = in - 1; only 0 does not set the carry flag. 3250 __ Subs(temp, in, 1); 3251 // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry 3252 __ Sbc(out, in, temp); 3253 break; 3254 } 3255 case kCondGE: 3256 __ Mvn(out, in); 3257 in = out; 3258 FALLTHROUGH_INTENDED; 3259 case kCondLT: 3260 // We only care about the sign bit. 3261 __ Lsr(out, in, 31); 3262 break; 3263 case kCondAE: 3264 // Trivially true. 3265 __ Mov(out, 1); 3266 break; 3267 case kCondB: 3268 // Trivially false. 3269 __ Mov(out, 0); 3270 break; 3271 default: 3272 LOG(FATAL) << "Unexpected condition " << condition; 3273 UNREACHABLE(); 3274 } 3275 } 3276 3277 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { 3278 LocationSummary* locations = 3279 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); 3280 // Handle the long/FP comparisons made in instruction simplification. 3281 switch (cond->InputAt(0)->GetType()) { 3282 case DataType::Type::kInt64: 3283 locations->SetInAt(0, Location::RequiresRegister()); 3284 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); 3285 if (!cond->IsEmittedAtUseSite()) { 3286 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3287 } 3288 break; 3289 3290 case DataType::Type::kFloat32: 3291 case DataType::Type::kFloat64: 3292 locations->SetInAt(0, Location::RequiresFpuRegister()); 3293 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); 3294 if (!cond->IsEmittedAtUseSite()) { 3295 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3296 } 3297 break; 3298 3299 default: 3300 locations->SetInAt(0, Location::RequiresRegister()); 3301 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); 3302 if (!cond->IsEmittedAtUseSite()) { 3303 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3304 } 3305 } 3306 } 3307 3308 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { 3309 if (cond->IsEmittedAtUseSite()) { 3310 return; 3311 } 3312 3313 const DataType::Type type = cond->GetLeft()->GetType(); 3314 3315 if (DataType::IsFloatingPointType(type)) { 3316 GenerateConditionGeneric(cond, codegen_); 3317 return; 3318 } 3319 3320 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; 3321 3322 const IfCondition condition = cond->GetCondition(); 3323 3324 // A condition with only one boolean input, or two boolean inputs without being equality or 3325 // inequality results from transformations done by the instruction simplifier, and is handled 3326 // as a regular condition with integral inputs. 3327 if (type == DataType::Type::kBool && 3328 cond->GetRight()->GetType() == DataType::Type::kBool && 3329 (condition == kCondEQ || condition == kCondNE)) { 3330 vixl32::Register left = InputRegisterAt(cond, 0); 3331 const vixl32::Register out = OutputRegister(cond); 3332 const Location right_loc = cond->GetLocations()->InAt(1); 3333 3334 // The constant case is handled by the instruction simplifier. 3335 DCHECK(!right_loc.IsConstant()); 3336 3337 vixl32::Register right = RegisterFrom(right_loc); 3338 3339 // Avoid 32-bit instructions if possible. 3340 if (out.Is(right)) { 3341 std::swap(left, right); 3342 } 3343 3344 __ Eor(out, left, right); 3345 3346 if (condition == kCondEQ) { 3347 __ Eor(out, out, 1); 3348 } 3349 3350 return; 3351 } 3352 3353 GenerateConditionIntegralOrNonPrimitive(cond, codegen_); 3354 } 3355 3356 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { 3357 HandleCondition(comp); 3358 } 3359 3360 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) { 3361 HandleCondition(comp); 3362 } 3363 3364 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) { 3365 HandleCondition(comp); 3366 } 3367 3368 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) { 3369 HandleCondition(comp); 3370 } 3371 3372 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) { 3373 HandleCondition(comp); 3374 } 3375 3376 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) { 3377 HandleCondition(comp); 3378 } 3379 3380 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 3381 HandleCondition(comp); 3382 } 3383 3384 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 3385 HandleCondition(comp); 3386 } 3387 3388 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) { 3389 HandleCondition(comp); 3390 } 3391 3392 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) { 3393 HandleCondition(comp); 3394 } 3395 3396 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 3397 HandleCondition(comp); 3398 } 3399 3400 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 3401 HandleCondition(comp); 3402 } 3403 3404 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) { 3405 HandleCondition(comp); 3406 } 3407 3408 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) { 3409 HandleCondition(comp); 3410 } 3411 3412 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) { 3413 HandleCondition(comp); 3414 } 3415 3416 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) { 3417 HandleCondition(comp); 3418 } 3419 3420 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) { 3421 HandleCondition(comp); 3422 } 3423 3424 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) { 3425 HandleCondition(comp); 3426 } 3427 3428 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) { 3429 HandleCondition(comp); 3430 } 3431 3432 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) { 3433 HandleCondition(comp); 3434 } 3435 3436 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) { 3437 LocationSummary* locations = 3438 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3439 locations->SetOut(Location::ConstantLocation(constant)); 3440 } 3441 3442 void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 3443 // Will be generated at use site. 3444 } 3445 3446 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { 3447 LocationSummary* locations = 3448 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3449 locations->SetOut(Location::ConstantLocation(constant)); 3450 } 3451 3452 void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 3453 // Will be generated at use site. 3454 } 3455 3456 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { 3457 LocationSummary* locations = 3458 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3459 locations->SetOut(Location::ConstantLocation(constant)); 3460 } 3461 3462 void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 3463 // Will be generated at use site. 3464 } 3465 3466 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { 3467 LocationSummary* locations = 3468 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3469 locations->SetOut(Location::ConstantLocation(constant)); 3470 } 3471 3472 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant( 3473 HFloatConstant* constant ATTRIBUTE_UNUSED) { 3474 // Will be generated at use site. 3475 } 3476 3477 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { 3478 LocationSummary* locations = 3479 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3480 locations->SetOut(Location::ConstantLocation(constant)); 3481 } 3482 3483 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( 3484 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 3485 // Will be generated at use site. 3486 } 3487 3488 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) { 3489 constructor_fence->SetLocations(nullptr); 3490 } 3491 3492 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( 3493 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 3494 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3495 } 3496 3497 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 3498 memory_barrier->SetLocations(nullptr); 3499 } 3500 3501 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 3502 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 3503 } 3504 3505 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) { 3506 ret->SetLocations(nullptr); 3507 } 3508 3509 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 3510 codegen_->GenerateFrameExit(); 3511 } 3512 3513 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) { 3514 LocationSummary* locations = 3515 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); 3516 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType())); 3517 } 3518 3519 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) { 3520 codegen_->GenerateFrameExit(); 3521 } 3522 3523 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3524 // The trampoline uses the same calling convention as dex calling conventions, 3525 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 3526 // the method_idx. 3527 HandleInvoke(invoke); 3528 } 3529 3530 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3531 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 3532 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3); 3533 } 3534 3535 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 3536 // Explicit clinit checks triggered by static invokes must have been pruned by 3537 // art::PrepareForRegisterAllocation. 3538 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 3539 3540 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); 3541 if (intrinsic.TryDispatch(invoke)) { 3542 return; 3543 } 3544 3545 HandleInvoke(invoke); 3546 } 3547 3548 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { 3549 if (invoke->GetLocations()->Intrinsified()) { 3550 IntrinsicCodeGeneratorARMVIXL intrinsic(codegen); 3551 intrinsic.Dispatch(invoke); 3552 return true; 3553 } 3554 return false; 3555 } 3556 3557 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 3558 // Explicit clinit checks triggered by static invokes must have been pruned by 3559 // art::PrepareForRegisterAllocation. 3560 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 3561 3562 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 3563 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4); 3564 return; 3565 } 3566 3567 LocationSummary* locations = invoke->GetLocations(); 3568 codegen_->GenerateStaticOrDirectCall( 3569 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 3570 3571 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5); 3572 } 3573 3574 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { 3575 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor; 3576 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 3577 } 3578 3579 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { 3580 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); 3581 if (intrinsic.TryDispatch(invoke)) { 3582 return; 3583 } 3584 3585 HandleInvoke(invoke); 3586 } 3587 3588 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { 3589 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 3590 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6); 3591 return; 3592 } 3593 3594 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 3595 DCHECK(!codegen_->IsLeafMethod()); 3596 3597 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7); 3598 } 3599 3600 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { 3601 HandleInvoke(invoke); 3602 // Add the hidden argument. 3603 invoke->GetLocations()->AddTemp(LocationFrom(r12)); 3604 } 3605 3606 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { 3607 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 3608 LocationSummary* locations = invoke->GetLocations(); 3609 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 3610 vixl32::Register hidden_reg = RegisterFrom(locations->GetTemp(1)); 3611 Location receiver = locations->InAt(0); 3612 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3613 3614 DCHECK(!receiver.IsStackSlot()); 3615 3616 // Ensure the pc position is recorded immediately after the `ldr` instruction. 3617 { 3618 ExactAssemblyScope aas(GetVIXLAssembler(), 3619 vixl32::kMaxInstructionSizeInBytes, 3620 CodeBufferCheckScope::kMaximumSize); 3621 // /* HeapReference<Class> */ temp = receiver->klass_ 3622 __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset)); 3623 codegen_->MaybeRecordImplicitNullCheck(invoke); 3624 } 3625 // Instead of simply (possibly) unpoisoning `temp` here, we should 3626 // emit a read barrier for the previous class reference load. 3627 // However this is not required in practice, as this is an 3628 // intermediate/temporary reference and because the current 3629 // concurrent copying collector keeps the from-space memory 3630 // intact/accessible until the end of the marking phase (the 3631 // concurrent copying collector may not in the future). 3632 GetAssembler()->MaybeUnpoisonHeapReference(temp); 3633 GetAssembler()->LoadFromOffset(kLoadWord, 3634 temp, 3635 temp, 3636 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); 3637 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 3638 invoke->GetImtIndex(), kArmPointerSize)); 3639 // temp = temp->GetImtEntryAt(method_offset); 3640 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); 3641 uint32_t entry_point = 3642 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value(); 3643 // LR = temp->GetEntryPoint(); 3644 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); 3645 3646 // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other 3647 // instruction from clobbering it as they might use r12 as a scratch register. 3648 DCHECK(hidden_reg.Is(r12)); 3649 3650 { 3651 // The VIXL macro assembler may clobber any of the scratch registers that are available to it, 3652 // so it checks if the application is using them (by passing them to the macro assembler 3653 // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of 3654 // what is available, and is the opposite of the standard usage: Instead of requesting a 3655 // temporary location, it imposes an external constraint (i.e. a specific register is reserved 3656 // for the hidden argument). Note that this works even if VIXL needs a scratch register itself 3657 // (to materialize the constant), since the destination register becomes available for such use 3658 // internally for the duration of the macro instruction. 3659 UseScratchRegisterScope temps(GetVIXLAssembler()); 3660 temps.Exclude(hidden_reg); 3661 __ Mov(hidden_reg, invoke->GetDexMethodIndex()); 3662 } 3663 { 3664 // Ensure the pc position is recorded immediately after the `blx` instruction. 3665 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. 3666 ExactAssemblyScope aas(GetVIXLAssembler(), 3667 vixl32::k16BitT32InstructionSizeInBytes, 3668 CodeBufferCheckScope::kExactSize); 3669 // LR(); 3670 __ blx(lr); 3671 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 3672 DCHECK(!codegen_->IsLeafMethod()); 3673 } 3674 3675 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8); 3676 } 3677 3678 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 3679 HandleInvoke(invoke); 3680 } 3681 3682 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 3683 codegen_->GenerateInvokePolymorphicCall(invoke); 3684 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); 3685 } 3686 3687 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { 3688 LocationSummary* locations = 3689 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 3690 switch (neg->GetResultType()) { 3691 case DataType::Type::kInt32: { 3692 locations->SetInAt(0, Location::RequiresRegister()); 3693 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3694 break; 3695 } 3696 case DataType::Type::kInt64: { 3697 locations->SetInAt(0, Location::RequiresRegister()); 3698 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3699 break; 3700 } 3701 3702 case DataType::Type::kFloat32: 3703 case DataType::Type::kFloat64: 3704 locations->SetInAt(0, Location::RequiresFpuRegister()); 3705 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3706 break; 3707 3708 default: 3709 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 3710 } 3711 } 3712 3713 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { 3714 LocationSummary* locations = neg->GetLocations(); 3715 Location out = locations->Out(); 3716 Location in = locations->InAt(0); 3717 switch (neg->GetResultType()) { 3718 case DataType::Type::kInt32: 3719 __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0); 3720 break; 3721 3722 case DataType::Type::kInt64: 3723 // out.lo = 0 - in.lo (and update the carry/borrow (C) flag) 3724 __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0); 3725 // We cannot emit an RSC (Reverse Subtract with Carry) 3726 // instruction here, as it does not exist in the Thumb-2 3727 // instruction set. We use the following approach 3728 // using SBC and SUB instead. 3729 // 3730 // out.hi = -C 3731 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out)); 3732 // out.hi = out.hi - in.hi 3733 __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in)); 3734 break; 3735 3736 case DataType::Type::kFloat32: 3737 case DataType::Type::kFloat64: 3738 __ Vneg(OutputVRegister(neg), InputVRegister(neg)); 3739 break; 3740 3741 default: 3742 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 3743 } 3744 } 3745 3746 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { 3747 DataType::Type result_type = conversion->GetResultType(); 3748 DataType::Type input_type = conversion->GetInputType(); 3749 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 3750 << input_type << " -> " << result_type; 3751 3752 // The float-to-long, double-to-long and long-to-float type conversions 3753 // rely on a call to the runtime. 3754 LocationSummary::CallKind call_kind = 3755 (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64) 3756 && result_type == DataType::Type::kInt64) 3757 || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32)) 3758 ? LocationSummary::kCallOnMainOnly 3759 : LocationSummary::kNoCall; 3760 LocationSummary* locations = 3761 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); 3762 3763 switch (result_type) { 3764 case DataType::Type::kUint8: 3765 case DataType::Type::kInt8: 3766 case DataType::Type::kUint16: 3767 case DataType::Type::kInt16: 3768 DCHECK(DataType::IsIntegralType(input_type)) << input_type; 3769 locations->SetInAt(0, Location::RequiresRegister()); 3770 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3771 break; 3772 3773 case DataType::Type::kInt32: 3774 switch (input_type) { 3775 case DataType::Type::kInt64: 3776 locations->SetInAt(0, Location::Any()); 3777 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3778 break; 3779 3780 case DataType::Type::kFloat32: 3781 locations->SetInAt(0, Location::RequiresFpuRegister()); 3782 locations->SetOut(Location::RequiresRegister()); 3783 locations->AddTemp(Location::RequiresFpuRegister()); 3784 break; 3785 3786 case DataType::Type::kFloat64: 3787 locations->SetInAt(0, Location::RequiresFpuRegister()); 3788 locations->SetOut(Location::RequiresRegister()); 3789 locations->AddTemp(Location::RequiresFpuRegister()); 3790 break; 3791 3792 default: 3793 LOG(FATAL) << "Unexpected type conversion from " << input_type 3794 << " to " << result_type; 3795 } 3796 break; 3797 3798 case DataType::Type::kInt64: 3799 switch (input_type) { 3800 case DataType::Type::kBool: 3801 case DataType::Type::kUint8: 3802 case DataType::Type::kInt8: 3803 case DataType::Type::kUint16: 3804 case DataType::Type::kInt16: 3805 case DataType::Type::kInt32: 3806 locations->SetInAt(0, Location::RequiresRegister()); 3807 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3808 break; 3809 3810 case DataType::Type::kFloat32: { 3811 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3812 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 3813 locations->SetOut(LocationFrom(r0, r1)); 3814 break; 3815 } 3816 3817 case DataType::Type::kFloat64: { 3818 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3819 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0), 3820 calling_convention.GetFpuRegisterAt(1))); 3821 locations->SetOut(LocationFrom(r0, r1)); 3822 break; 3823 } 3824 3825 default: 3826 LOG(FATAL) << "Unexpected type conversion from " << input_type 3827 << " to " << result_type; 3828 } 3829 break; 3830 3831 case DataType::Type::kFloat32: 3832 switch (input_type) { 3833 case DataType::Type::kBool: 3834 case DataType::Type::kUint8: 3835 case DataType::Type::kInt8: 3836 case DataType::Type::kUint16: 3837 case DataType::Type::kInt16: 3838 case DataType::Type::kInt32: 3839 locations->SetInAt(0, Location::RequiresRegister()); 3840 locations->SetOut(Location::RequiresFpuRegister()); 3841 break; 3842 3843 case DataType::Type::kInt64: { 3844 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3845 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0), 3846 calling_convention.GetRegisterAt(1))); 3847 locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0))); 3848 break; 3849 } 3850 3851 case DataType::Type::kFloat64: 3852 locations->SetInAt(0, Location::RequiresFpuRegister()); 3853 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3854 break; 3855 3856 default: 3857 LOG(FATAL) << "Unexpected type conversion from " << input_type 3858 << " to " << result_type; 3859 } 3860 break; 3861 3862 case DataType::Type::kFloat64: 3863 switch (input_type) { 3864 case DataType::Type::kBool: 3865 case DataType::Type::kUint8: 3866 case DataType::Type::kInt8: 3867 case DataType::Type::kUint16: 3868 case DataType::Type::kInt16: 3869 case DataType::Type::kInt32: 3870 locations->SetInAt(0, Location::RequiresRegister()); 3871 locations->SetOut(Location::RequiresFpuRegister()); 3872 break; 3873 3874 case DataType::Type::kInt64: 3875 locations->SetInAt(0, Location::RequiresRegister()); 3876 locations->SetOut(Location::RequiresFpuRegister()); 3877 locations->AddTemp(Location::RequiresFpuRegister()); 3878 locations->AddTemp(Location::RequiresFpuRegister()); 3879 break; 3880 3881 case DataType::Type::kFloat32: 3882 locations->SetInAt(0, Location::RequiresFpuRegister()); 3883 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3884 break; 3885 3886 default: 3887 LOG(FATAL) << "Unexpected type conversion from " << input_type 3888 << " to " << result_type; 3889 } 3890 break; 3891 3892 default: 3893 LOG(FATAL) << "Unexpected type conversion from " << input_type 3894 << " to " << result_type; 3895 } 3896 } 3897 3898 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { 3899 LocationSummary* locations = conversion->GetLocations(); 3900 Location out = locations->Out(); 3901 Location in = locations->InAt(0); 3902 DataType::Type result_type = conversion->GetResultType(); 3903 DataType::Type input_type = conversion->GetInputType(); 3904 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 3905 << input_type << " -> " << result_type; 3906 switch (result_type) { 3907 case DataType::Type::kUint8: 3908 switch (input_type) { 3909 case DataType::Type::kInt8: 3910 case DataType::Type::kUint16: 3911 case DataType::Type::kInt16: 3912 case DataType::Type::kInt32: 3913 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8); 3914 break; 3915 case DataType::Type::kInt64: 3916 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); 3917 break; 3918 3919 default: 3920 LOG(FATAL) << "Unexpected type conversion from " << input_type 3921 << " to " << result_type; 3922 } 3923 break; 3924 3925 case DataType::Type::kInt8: 3926 switch (input_type) { 3927 case DataType::Type::kUint8: 3928 case DataType::Type::kUint16: 3929 case DataType::Type::kInt16: 3930 case DataType::Type::kInt32: 3931 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8); 3932 break; 3933 case DataType::Type::kInt64: 3934 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); 3935 break; 3936 3937 default: 3938 LOG(FATAL) << "Unexpected type conversion from " << input_type 3939 << " to " << result_type; 3940 } 3941 break; 3942 3943 case DataType::Type::kUint16: 3944 switch (input_type) { 3945 case DataType::Type::kInt8: 3946 case DataType::Type::kInt16: 3947 case DataType::Type::kInt32: 3948 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); 3949 break; 3950 case DataType::Type::kInt64: 3951 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); 3952 break; 3953 3954 default: 3955 LOG(FATAL) << "Unexpected type conversion from " << input_type 3956 << " to " << result_type; 3957 } 3958 break; 3959 3960 case DataType::Type::kInt16: 3961 switch (input_type) { 3962 case DataType::Type::kUint16: 3963 case DataType::Type::kInt32: 3964 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); 3965 break; 3966 case DataType::Type::kInt64: 3967 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); 3968 break; 3969 3970 default: 3971 LOG(FATAL) << "Unexpected type conversion from " << input_type 3972 << " to " << result_type; 3973 } 3974 break; 3975 3976 case DataType::Type::kInt32: 3977 switch (input_type) { 3978 case DataType::Type::kInt64: 3979 DCHECK(out.IsRegister()); 3980 if (in.IsRegisterPair()) { 3981 __ Mov(OutputRegister(conversion), LowRegisterFrom(in)); 3982 } else if (in.IsDoubleStackSlot()) { 3983 GetAssembler()->LoadFromOffset(kLoadWord, 3984 OutputRegister(conversion), 3985 sp, 3986 in.GetStackIndex()); 3987 } else { 3988 DCHECK(in.IsConstant()); 3989 DCHECK(in.GetConstant()->IsLongConstant()); 3990 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 3991 __ Mov(OutputRegister(conversion), static_cast<int32_t>(value)); 3992 } 3993 break; 3994 3995 case DataType::Type::kFloat32: { 3996 vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0)); 3997 __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0)); 3998 __ Vmov(OutputRegister(conversion), temp); 3999 break; 4000 } 4001 4002 case DataType::Type::kFloat64: { 4003 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); 4004 __ Vcvt(S32, F64, temp_s, DRegisterFrom(in)); 4005 __ Vmov(OutputRegister(conversion), temp_s); 4006 break; 4007 } 4008 4009 default: 4010 LOG(FATAL) << "Unexpected type conversion from " << input_type 4011 << " to " << result_type; 4012 } 4013 break; 4014 4015 case DataType::Type::kInt64: 4016 switch (input_type) { 4017 case DataType::Type::kBool: 4018 case DataType::Type::kUint8: 4019 case DataType::Type::kInt8: 4020 case DataType::Type::kUint16: 4021 case DataType::Type::kInt16: 4022 case DataType::Type::kInt32: 4023 DCHECK(out.IsRegisterPair()); 4024 DCHECK(in.IsRegister()); 4025 __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0)); 4026 // Sign extension. 4027 __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31); 4028 break; 4029 4030 case DataType::Type::kFloat32: 4031 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); 4032 CheckEntrypointTypes<kQuickF2l, int64_t, float>(); 4033 break; 4034 4035 case DataType::Type::kFloat64: 4036 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); 4037 CheckEntrypointTypes<kQuickD2l, int64_t, double>(); 4038 break; 4039 4040 default: 4041 LOG(FATAL) << "Unexpected type conversion from " << input_type 4042 << " to " << result_type; 4043 } 4044 break; 4045 4046 case DataType::Type::kFloat32: 4047 switch (input_type) { 4048 case DataType::Type::kBool: 4049 case DataType::Type::kUint8: 4050 case DataType::Type::kInt8: 4051 case DataType::Type::kUint16: 4052 case DataType::Type::kInt16: 4053 case DataType::Type::kInt32: 4054 __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0)); 4055 __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion)); 4056 break; 4057 4058 case DataType::Type::kInt64: 4059 codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc()); 4060 CheckEntrypointTypes<kQuickL2f, float, int64_t>(); 4061 break; 4062 4063 case DataType::Type::kFloat64: 4064 __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in)); 4065 break; 4066 4067 default: 4068 LOG(FATAL) << "Unexpected type conversion from " << input_type 4069 << " to " << result_type; 4070 } 4071 break; 4072 4073 case DataType::Type::kFloat64: 4074 switch (input_type) { 4075 case DataType::Type::kBool: 4076 case DataType::Type::kUint8: 4077 case DataType::Type::kInt8: 4078 case DataType::Type::kUint16: 4079 case DataType::Type::kInt16: 4080 case DataType::Type::kInt32: 4081 __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0)); 4082 __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out)); 4083 break; 4084 4085 case DataType::Type::kInt64: { 4086 vixl32::Register low = LowRegisterFrom(in); 4087 vixl32::Register high = HighRegisterFrom(in); 4088 vixl32::SRegister out_s = LowSRegisterFrom(out); 4089 vixl32::DRegister out_d = DRegisterFrom(out); 4090 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); 4091 vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0)); 4092 vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1)); 4093 4094 // temp_d = int-to-double(high) 4095 __ Vmov(temp_s, high); 4096 __ Vcvt(F64, S32, temp_d, temp_s); 4097 // constant_d = k2Pow32EncodingForDouble 4098 __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble)); 4099 // out_d = unsigned-to-double(low) 4100 __ Vmov(out_s, low); 4101 __ Vcvt(F64, U32, out_d, out_s); 4102 // out_d += temp_d * constant_d 4103 __ Vmla(F64, out_d, temp_d, constant_d); 4104 break; 4105 } 4106 4107 case DataType::Type::kFloat32: 4108 __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0)); 4109 break; 4110 4111 default: 4112 LOG(FATAL) << "Unexpected type conversion from " << input_type 4113 << " to " << result_type; 4114 } 4115 break; 4116 4117 default: 4118 LOG(FATAL) << "Unexpected type conversion from " << input_type 4119 << " to " << result_type; 4120 } 4121 } 4122 4123 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) { 4124 LocationSummary* locations = 4125 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); 4126 switch (add->GetResultType()) { 4127 case DataType::Type::kInt32: { 4128 locations->SetInAt(0, Location::RequiresRegister()); 4129 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 4130 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4131 break; 4132 } 4133 4134 case DataType::Type::kInt64: { 4135 locations->SetInAt(0, Location::RequiresRegister()); 4136 locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD)); 4137 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4138 break; 4139 } 4140 4141 case DataType::Type::kFloat32: 4142 case DataType::Type::kFloat64: { 4143 locations->SetInAt(0, Location::RequiresFpuRegister()); 4144 locations->SetInAt(1, Location::RequiresFpuRegister()); 4145 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4146 break; 4147 } 4148 4149 default: 4150 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 4151 } 4152 } 4153 4154 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { 4155 LocationSummary* locations = add->GetLocations(); 4156 Location out = locations->Out(); 4157 Location first = locations->InAt(0); 4158 Location second = locations->InAt(1); 4159 4160 switch (add->GetResultType()) { 4161 case DataType::Type::kInt32: { 4162 __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1)); 4163 } 4164 break; 4165 4166 case DataType::Type::kInt64: { 4167 if (second.IsConstant()) { 4168 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); 4169 GenerateAddLongConst(out, first, value); 4170 } else { 4171 DCHECK(second.IsRegisterPair()); 4172 __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); 4173 __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); 4174 } 4175 break; 4176 } 4177 4178 case DataType::Type::kFloat32: 4179 case DataType::Type::kFloat64: 4180 __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1)); 4181 break; 4182 4183 default: 4184 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 4185 } 4186 } 4187 4188 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) { 4189 LocationSummary* locations = 4190 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); 4191 switch (sub->GetResultType()) { 4192 case DataType::Type::kInt32: { 4193 locations->SetInAt(0, Location::RequiresRegister()); 4194 locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); 4195 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4196 break; 4197 } 4198 4199 case DataType::Type::kInt64: { 4200 locations->SetInAt(0, Location::RequiresRegister()); 4201 locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB)); 4202 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4203 break; 4204 } 4205 case DataType::Type::kFloat32: 4206 case DataType::Type::kFloat64: { 4207 locations->SetInAt(0, Location::RequiresFpuRegister()); 4208 locations->SetInAt(1, Location::RequiresFpuRegister()); 4209 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4210 break; 4211 } 4212 default: 4213 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 4214 } 4215 } 4216 4217 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { 4218 LocationSummary* locations = sub->GetLocations(); 4219 Location out = locations->Out(); 4220 Location first = locations->InAt(0); 4221 Location second = locations->InAt(1); 4222 switch (sub->GetResultType()) { 4223 case DataType::Type::kInt32: { 4224 __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1)); 4225 break; 4226 } 4227 4228 case DataType::Type::kInt64: { 4229 if (second.IsConstant()) { 4230 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); 4231 GenerateAddLongConst(out, first, -value); 4232 } else { 4233 DCHECK(second.IsRegisterPair()); 4234 __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); 4235 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); 4236 } 4237 break; 4238 } 4239 4240 case DataType::Type::kFloat32: 4241 case DataType::Type::kFloat64: 4242 __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1)); 4243 break; 4244 4245 default: 4246 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 4247 } 4248 } 4249 4250 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) { 4251 LocationSummary* locations = 4252 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 4253 switch (mul->GetResultType()) { 4254 case DataType::Type::kInt32: 4255 case DataType::Type::kInt64: { 4256 locations->SetInAt(0, Location::RequiresRegister()); 4257 locations->SetInAt(1, Location::RequiresRegister()); 4258 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4259 break; 4260 } 4261 4262 case DataType::Type::kFloat32: 4263 case DataType::Type::kFloat64: { 4264 locations->SetInAt(0, Location::RequiresFpuRegister()); 4265 locations->SetInAt(1, Location::RequiresFpuRegister()); 4266 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4267 break; 4268 } 4269 4270 default: 4271 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 4272 } 4273 } 4274 4275 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { 4276 LocationSummary* locations = mul->GetLocations(); 4277 Location out = locations->Out(); 4278 Location first = locations->InAt(0); 4279 Location second = locations->InAt(1); 4280 switch (mul->GetResultType()) { 4281 case DataType::Type::kInt32: { 4282 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 4283 break; 4284 } 4285 case DataType::Type::kInt64: { 4286 vixl32::Register out_hi = HighRegisterFrom(out); 4287 vixl32::Register out_lo = LowRegisterFrom(out); 4288 vixl32::Register in1_hi = HighRegisterFrom(first); 4289 vixl32::Register in1_lo = LowRegisterFrom(first); 4290 vixl32::Register in2_hi = HighRegisterFrom(second); 4291 vixl32::Register in2_lo = LowRegisterFrom(second); 4292 4293 // Extra checks to protect caused by the existence of R1_R2. 4294 // The algorithm is wrong if out.hi is either in1.lo or in2.lo: 4295 // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2); 4296 DCHECK(!out_hi.Is(in1_lo)); 4297 DCHECK(!out_hi.Is(in2_lo)); 4298 4299 // input: in1 - 64 bits, in2 - 64 bits 4300 // output: out 4301 // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo 4302 // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] 4303 // parts: out.lo = (in1.lo * in2.lo)[31:0] 4304 4305 UseScratchRegisterScope temps(GetVIXLAssembler()); 4306 vixl32::Register temp = temps.Acquire(); 4307 // temp <- in1.lo * in2.hi 4308 __ Mul(temp, in1_lo, in2_hi); 4309 // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo 4310 __ Mla(out_hi, in1_hi, in2_lo, temp); 4311 // out.lo <- (in1.lo * in2.lo)[31:0]; 4312 __ Umull(out_lo, temp, in1_lo, in2_lo); 4313 // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] 4314 __ Add(out_hi, out_hi, temp); 4315 break; 4316 } 4317 4318 case DataType::Type::kFloat32: 4319 case DataType::Type::kFloat64: 4320 __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1)); 4321 break; 4322 4323 default: 4324 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 4325 } 4326 } 4327 4328 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 4329 DCHECK(instruction->IsDiv() || instruction->IsRem()); 4330 DCHECK(instruction->GetResultType() == DataType::Type::kInt32); 4331 4332 Location second = instruction->GetLocations()->InAt(1); 4333 DCHECK(second.IsConstant()); 4334 4335 vixl32::Register out = OutputRegister(instruction); 4336 vixl32::Register dividend = InputRegisterAt(instruction, 0); 4337 int32_t imm = Int32ConstantFrom(second); 4338 DCHECK(imm == 1 || imm == -1); 4339 4340 if (instruction->IsRem()) { 4341 __ Mov(out, 0); 4342 } else { 4343 if (imm == 1) { 4344 __ Mov(out, dividend); 4345 } else { 4346 __ Rsb(out, dividend, 0); 4347 } 4348 } 4349 } 4350 4351 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) { 4352 DCHECK(instruction->IsDiv() || instruction->IsRem()); 4353 DCHECK(instruction->GetResultType() == DataType::Type::kInt32); 4354 4355 LocationSummary* locations = instruction->GetLocations(); 4356 Location second = locations->InAt(1); 4357 DCHECK(second.IsConstant()); 4358 4359 vixl32::Register out = OutputRegister(instruction); 4360 vixl32::Register dividend = InputRegisterAt(instruction, 0); 4361 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 4362 int32_t imm = Int32ConstantFrom(second); 4363 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); 4364 int ctz_imm = CTZ(abs_imm); 4365 4366 if (ctz_imm == 1) { 4367 __ Lsr(temp, dividend, 32 - ctz_imm); 4368 } else { 4369 __ Asr(temp, dividend, 31); 4370 __ Lsr(temp, temp, 32 - ctz_imm); 4371 } 4372 __ Add(out, temp, dividend); 4373 4374 if (instruction->IsDiv()) { 4375 __ Asr(out, out, ctz_imm); 4376 if (imm < 0) { 4377 __ Rsb(out, out, 0); 4378 } 4379 } else { 4380 __ Ubfx(out, out, 0, ctz_imm); 4381 __ Sub(out, out, temp); 4382 } 4383 } 4384 4385 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 4386 DCHECK(instruction->IsDiv() || instruction->IsRem()); 4387 DCHECK(instruction->GetResultType() == DataType::Type::kInt32); 4388 4389 LocationSummary* locations = instruction->GetLocations(); 4390 Location second = locations->InAt(1); 4391 DCHECK(second.IsConstant()); 4392 4393 vixl32::Register out = OutputRegister(instruction); 4394 vixl32::Register dividend = InputRegisterAt(instruction, 0); 4395 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0)); 4396 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1)); 4397 int32_t imm = Int32ConstantFrom(second); 4398 4399 int64_t magic; 4400 int shift; 4401 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); 4402 4403 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed. 4404 __ Mov(temp1, static_cast<int32_t>(magic)); 4405 __ Smull(temp2, temp1, dividend, temp1); 4406 4407 if (imm > 0 && magic < 0) { 4408 __ Add(temp1, temp1, dividend); 4409 } else if (imm < 0 && magic > 0) { 4410 __ Sub(temp1, temp1, dividend); 4411 } 4412 4413 if (shift != 0) { 4414 __ Asr(temp1, temp1, shift); 4415 } 4416 4417 if (instruction->IsDiv()) { 4418 __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31)); 4419 } else { 4420 __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31)); 4421 // TODO: Strength reduction for mls. 4422 __ Mov(temp2, imm); 4423 __ Mls(out, temp1, temp2, dividend); 4424 } 4425 } 4426 4427 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral( 4428 HBinaryOperation* instruction) { 4429 DCHECK(instruction->IsDiv() || instruction->IsRem()); 4430 DCHECK(instruction->GetResultType() == DataType::Type::kInt32); 4431 4432 Location second = instruction->GetLocations()->InAt(1); 4433 DCHECK(second.IsConstant()); 4434 4435 int32_t imm = Int32ConstantFrom(second); 4436 if (imm == 0) { 4437 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 4438 } else if (imm == 1 || imm == -1) { 4439 DivRemOneOrMinusOne(instruction); 4440 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 4441 DivRemByPowerOfTwo(instruction); 4442 } else { 4443 DCHECK(imm <= -2 || imm >= 2); 4444 GenerateDivRemWithAnyConstant(instruction); 4445 } 4446 } 4447 4448 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { 4449 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 4450 if (div->GetResultType() == DataType::Type::kInt64) { 4451 // pLdiv runtime call. 4452 call_kind = LocationSummary::kCallOnMainOnly; 4453 } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) { 4454 // sdiv will be replaced by other instruction sequence. 4455 } else if (div->GetResultType() == DataType::Type::kInt32 && 4456 !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4457 // pIdivmod runtime call. 4458 call_kind = LocationSummary::kCallOnMainOnly; 4459 } 4460 4461 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); 4462 4463 switch (div->GetResultType()) { 4464 case DataType::Type::kInt32: { 4465 if (div->InputAt(1)->IsConstant()) { 4466 locations->SetInAt(0, Location::RequiresRegister()); 4467 locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); 4468 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4469 int32_t value = Int32ConstantFrom(div->InputAt(1)); 4470 if (value == 1 || value == 0 || value == -1) { 4471 // No temp register required. 4472 } else { 4473 locations->AddTemp(Location::RequiresRegister()); 4474 if (!IsPowerOfTwo(AbsOrMin(value))) { 4475 locations->AddTemp(Location::RequiresRegister()); 4476 } 4477 } 4478 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4479 locations->SetInAt(0, Location::RequiresRegister()); 4480 locations->SetInAt(1, Location::RequiresRegister()); 4481 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4482 } else { 4483 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4484 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 4485 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 4486 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but 4487 // we only need the former. 4488 locations->SetOut(LocationFrom(r0)); 4489 } 4490 break; 4491 } 4492 case DataType::Type::kInt64: { 4493 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4494 locations->SetInAt(0, LocationFrom( 4495 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); 4496 locations->SetInAt(1, LocationFrom( 4497 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); 4498 locations->SetOut(LocationFrom(r0, r1)); 4499 break; 4500 } 4501 case DataType::Type::kFloat32: 4502 case DataType::Type::kFloat64: { 4503 locations->SetInAt(0, Location::RequiresFpuRegister()); 4504 locations->SetInAt(1, Location::RequiresFpuRegister()); 4505 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4506 break; 4507 } 4508 4509 default: 4510 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 4511 } 4512 } 4513 4514 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { 4515 Location lhs = div->GetLocations()->InAt(0); 4516 Location rhs = div->GetLocations()->InAt(1); 4517 4518 switch (div->GetResultType()) { 4519 case DataType::Type::kInt32: { 4520 if (rhs.IsConstant()) { 4521 GenerateDivRemConstantIntegral(div); 4522 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4523 __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1)); 4524 } else { 4525 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4526 DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs))); 4527 DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs))); 4528 DCHECK(r0.Is(OutputRegister(div))); 4529 4530 codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc()); 4531 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); 4532 } 4533 break; 4534 } 4535 4536 case DataType::Type::kInt64: { 4537 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4538 DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs))); 4539 DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs))); 4540 DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs))); 4541 DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs))); 4542 DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0)); 4543 DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1)); 4544 4545 codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc()); 4546 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); 4547 break; 4548 } 4549 4550 case DataType::Type::kFloat32: 4551 case DataType::Type::kFloat64: 4552 __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1)); 4553 break; 4554 4555 default: 4556 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 4557 } 4558 } 4559 4560 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { 4561 DataType::Type type = rem->GetResultType(); 4562 4563 // Most remainders are implemented in the runtime. 4564 LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; 4565 if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) { 4566 // sdiv will be replaced by other instruction sequence. 4567 call_kind = LocationSummary::kNoCall; 4568 } else if ((rem->GetResultType() == DataType::Type::kInt32) 4569 && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4570 // Have hardware divide instruction for int, do it with three instructions. 4571 call_kind = LocationSummary::kNoCall; 4572 } 4573 4574 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); 4575 4576 switch (type) { 4577 case DataType::Type::kInt32: { 4578 if (rem->InputAt(1)->IsConstant()) { 4579 locations->SetInAt(0, Location::RequiresRegister()); 4580 locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); 4581 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4582 int32_t value = Int32ConstantFrom(rem->InputAt(1)); 4583 if (value == 1 || value == 0 || value == -1) { 4584 // No temp register required. 4585 } else { 4586 locations->AddTemp(Location::RequiresRegister()); 4587 if (!IsPowerOfTwo(AbsOrMin(value))) { 4588 locations->AddTemp(Location::RequiresRegister()); 4589 } 4590 } 4591 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4592 locations->SetInAt(0, Location::RequiresRegister()); 4593 locations->SetInAt(1, Location::RequiresRegister()); 4594 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4595 locations->AddTemp(Location::RequiresRegister()); 4596 } else { 4597 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4598 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 4599 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 4600 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but 4601 // we only need the latter. 4602 locations->SetOut(LocationFrom(r1)); 4603 } 4604 break; 4605 } 4606 case DataType::Type::kInt64: { 4607 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4608 locations->SetInAt(0, LocationFrom( 4609 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); 4610 locations->SetInAt(1, LocationFrom( 4611 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); 4612 // The runtime helper puts the output in R2,R3. 4613 locations->SetOut(LocationFrom(r2, r3)); 4614 break; 4615 } 4616 case DataType::Type::kFloat32: { 4617 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4618 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 4619 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 4620 locations->SetOut(LocationFrom(s0)); 4621 break; 4622 } 4623 4624 case DataType::Type::kFloat64: { 4625 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4626 locations->SetInAt(0, LocationFrom( 4627 calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1))); 4628 locations->SetInAt(1, LocationFrom( 4629 calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3))); 4630 locations->SetOut(LocationFrom(s0, s1)); 4631 break; 4632 } 4633 4634 default: 4635 LOG(FATAL) << "Unexpected rem type " << type; 4636 } 4637 } 4638 4639 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { 4640 LocationSummary* locations = rem->GetLocations(); 4641 Location second = locations->InAt(1); 4642 4643 DataType::Type type = rem->GetResultType(); 4644 switch (type) { 4645 case DataType::Type::kInt32: { 4646 vixl32::Register reg1 = InputRegisterAt(rem, 0); 4647 vixl32::Register out_reg = OutputRegister(rem); 4648 if (second.IsConstant()) { 4649 GenerateDivRemConstantIntegral(rem); 4650 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { 4651 vixl32::Register reg2 = RegisterFrom(second); 4652 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 4653 4654 // temp = reg1 / reg2 (integer division) 4655 // dest = reg1 - temp * reg2 4656 __ Sdiv(temp, reg1, reg2); 4657 __ Mls(out_reg, temp, reg2, reg1); 4658 } else { 4659 InvokeRuntimeCallingConventionARMVIXL calling_convention; 4660 DCHECK(reg1.Is(calling_convention.GetRegisterAt(0))); 4661 DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1))); 4662 DCHECK(out_reg.Is(r1)); 4663 4664 codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc()); 4665 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); 4666 } 4667 break; 4668 } 4669 4670 case DataType::Type::kInt64: { 4671 codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc()); 4672 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); 4673 break; 4674 } 4675 4676 case DataType::Type::kFloat32: { 4677 codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc()); 4678 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 4679 break; 4680 } 4681 4682 case DataType::Type::kFloat64: { 4683 codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc()); 4684 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 4685 break; 4686 } 4687 4688 default: 4689 LOG(FATAL) << "Unexpected rem type " << type; 4690 } 4691 } 4692 4693 4694 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4695 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4696 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 4697 } 4698 4699 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4700 DivZeroCheckSlowPathARMVIXL* slow_path = 4701 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction); 4702 codegen_->AddSlowPath(slow_path); 4703 4704 LocationSummary* locations = instruction->GetLocations(); 4705 Location value = locations->InAt(0); 4706 4707 switch (instruction->GetType()) { 4708 case DataType::Type::kBool: 4709 case DataType::Type::kUint8: 4710 case DataType::Type::kInt8: 4711 case DataType::Type::kUint16: 4712 case DataType::Type::kInt16: 4713 case DataType::Type::kInt32: { 4714 if (value.IsRegister()) { 4715 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 4716 } else { 4717 DCHECK(value.IsConstant()) << value; 4718 if (Int32ConstantFrom(value) == 0) { 4719 __ B(slow_path->GetEntryLabel()); 4720 } 4721 } 4722 break; 4723 } 4724 case DataType::Type::kInt64: { 4725 if (value.IsRegisterPair()) { 4726 UseScratchRegisterScope temps(GetVIXLAssembler()); 4727 vixl32::Register temp = temps.Acquire(); 4728 __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value)); 4729 __ B(eq, slow_path->GetEntryLabel()); 4730 } else { 4731 DCHECK(value.IsConstant()) << value; 4732 if (Int64ConstantFrom(value) == 0) { 4733 __ B(slow_path->GetEntryLabel()); 4734 } 4735 } 4736 break; 4737 } 4738 default: 4739 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 4740 } 4741 } 4742 4743 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) { 4744 LocationSummary* locations = ror->GetLocations(); 4745 vixl32::Register in = InputRegisterAt(ror, 0); 4746 Location rhs = locations->InAt(1); 4747 vixl32::Register out = OutputRegister(ror); 4748 4749 if (rhs.IsConstant()) { 4750 // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], 4751 // so map all rotations to a +ve. equivalent in that range. 4752 // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) 4753 uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F; 4754 if (rot) { 4755 // Rotate, mapping left rotations to right equivalents if necessary. 4756 // (e.g. left by 2 bits == right by 30.) 4757 __ Ror(out, in, rot); 4758 } else if (!out.Is(in)) { 4759 __ Mov(out, in); 4760 } 4761 } else { 4762 __ Ror(out, in, RegisterFrom(rhs)); 4763 } 4764 } 4765 4766 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer 4767 // rotates by swapping input regs (effectively rotating by the first 32-bits of 4768 // a larger rotation) or flipping direction (thus treating larger right/left 4769 // rotations as sub-word sized rotations in the other direction) as appropriate. 4770 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { 4771 LocationSummary* locations = ror->GetLocations(); 4772 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 4773 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 4774 Location rhs = locations->InAt(1); 4775 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); 4776 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); 4777 4778 if (rhs.IsConstant()) { 4779 uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant()); 4780 // Map all rotations to +ve. equivalents on the interval [0,63]. 4781 rot &= kMaxLongShiftDistance; 4782 // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate 4783 // logic below to a simple pair of binary orr. 4784 // (e.g. 34 bits == in_reg swap + 2 bits right.) 4785 if (rot >= kArmBitsPerWord) { 4786 rot -= kArmBitsPerWord; 4787 std::swap(in_reg_hi, in_reg_lo); 4788 } 4789 // Rotate, or mov to out for zero or word size rotations. 4790 if (rot != 0u) { 4791 __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot)); 4792 __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot)); 4793 __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot)); 4794 __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot)); 4795 } else { 4796 __ Mov(out_reg_lo, in_reg_lo); 4797 __ Mov(out_reg_hi, in_reg_hi); 4798 } 4799 } else { 4800 vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0)); 4801 vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); 4802 vixl32::Label end; 4803 vixl32::Label shift_by_32_plus_shift_right; 4804 vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end); 4805 4806 __ And(shift_right, RegisterFrom(rhs), 0x1F); 4807 __ Lsrs(shift_left, RegisterFrom(rhs), 6); 4808 __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord)); 4809 __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false); 4810 4811 // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). 4812 // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). 4813 __ Lsl(out_reg_hi, in_reg_hi, shift_left); 4814 __ Lsr(out_reg_lo, in_reg_lo, shift_right); 4815 __ Add(out_reg_hi, out_reg_hi, out_reg_lo); 4816 __ Lsl(out_reg_lo, in_reg_lo, shift_left); 4817 __ Lsr(shift_left, in_reg_hi, shift_right); 4818 __ Add(out_reg_lo, out_reg_lo, shift_left); 4819 __ B(final_label); 4820 4821 __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. 4822 // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). 4823 // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). 4824 __ Lsr(out_reg_hi, in_reg_hi, shift_right); 4825 __ Lsl(out_reg_lo, in_reg_lo, shift_left); 4826 __ Add(out_reg_hi, out_reg_hi, out_reg_lo); 4827 __ Lsr(out_reg_lo, in_reg_lo, shift_right); 4828 __ Lsl(shift_right, in_reg_hi, shift_left); 4829 __ Add(out_reg_lo, out_reg_lo, shift_right); 4830 4831 if (end.IsReferenced()) { 4832 __ Bind(&end); 4833 } 4834 } 4835 } 4836 4837 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { 4838 LocationSummary* locations = 4839 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); 4840 switch (ror->GetResultType()) { 4841 case DataType::Type::kInt32: { 4842 locations->SetInAt(0, Location::RequiresRegister()); 4843 locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); 4844 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4845 break; 4846 } 4847 case DataType::Type::kInt64: { 4848 locations->SetInAt(0, Location::RequiresRegister()); 4849 if (ror->InputAt(1)->IsConstant()) { 4850 locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); 4851 } else { 4852 locations->SetInAt(1, Location::RequiresRegister()); 4853 locations->AddTemp(Location::RequiresRegister()); 4854 locations->AddTemp(Location::RequiresRegister()); 4855 } 4856 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 4857 break; 4858 } 4859 default: 4860 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4861 } 4862 } 4863 4864 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) { 4865 DataType::Type type = ror->GetResultType(); 4866 switch (type) { 4867 case DataType::Type::kInt32: { 4868 HandleIntegerRotate(ror); 4869 break; 4870 } 4871 case DataType::Type::kInt64: { 4872 HandleLongRotate(ror); 4873 break; 4874 } 4875 default: 4876 LOG(FATAL) << "Unexpected operation type " << type; 4877 UNREACHABLE(); 4878 } 4879 } 4880 4881 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { 4882 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4883 4884 LocationSummary* locations = 4885 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); 4886 4887 switch (op->GetResultType()) { 4888 case DataType::Type::kInt32: { 4889 locations->SetInAt(0, Location::RequiresRegister()); 4890 if (op->InputAt(1)->IsConstant()) { 4891 locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); 4892 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4893 } else { 4894 locations->SetInAt(1, Location::RequiresRegister()); 4895 // Make the output overlap, as it will be used to hold the masked 4896 // second input. 4897 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 4898 } 4899 break; 4900 } 4901 case DataType::Type::kInt64: { 4902 locations->SetInAt(0, Location::RequiresRegister()); 4903 if (op->InputAt(1)->IsConstant()) { 4904 locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); 4905 // For simplicity, use kOutputOverlap even though we only require that low registers 4906 // don't clash with high registers which the register allocator currently guarantees. 4907 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 4908 } else { 4909 locations->SetInAt(1, Location::RequiresRegister()); 4910 locations->AddTemp(Location::RequiresRegister()); 4911 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 4912 } 4913 break; 4914 } 4915 default: 4916 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 4917 } 4918 } 4919 4920 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { 4921 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4922 4923 LocationSummary* locations = op->GetLocations(); 4924 Location out = locations->Out(); 4925 Location first = locations->InAt(0); 4926 Location second = locations->InAt(1); 4927 4928 DataType::Type type = op->GetResultType(); 4929 switch (type) { 4930 case DataType::Type::kInt32: { 4931 vixl32::Register out_reg = OutputRegister(op); 4932 vixl32::Register first_reg = InputRegisterAt(op, 0); 4933 if (second.IsRegister()) { 4934 vixl32::Register second_reg = RegisterFrom(second); 4935 // ARM doesn't mask the shift count so we need to do it ourselves. 4936 __ And(out_reg, second_reg, kMaxIntShiftDistance); 4937 if (op->IsShl()) { 4938 __ Lsl(out_reg, first_reg, out_reg); 4939 } else if (op->IsShr()) { 4940 __ Asr(out_reg, first_reg, out_reg); 4941 } else { 4942 __ Lsr(out_reg, first_reg, out_reg); 4943 } 4944 } else { 4945 int32_t cst = Int32ConstantFrom(second); 4946 uint32_t shift_value = cst & kMaxIntShiftDistance; 4947 if (shift_value == 0) { // ARM does not support shifting with 0 immediate. 4948 __ Mov(out_reg, first_reg); 4949 } else if (op->IsShl()) { 4950 __ Lsl(out_reg, first_reg, shift_value); 4951 } else if (op->IsShr()) { 4952 __ Asr(out_reg, first_reg, shift_value); 4953 } else { 4954 __ Lsr(out_reg, first_reg, shift_value); 4955 } 4956 } 4957 break; 4958 } 4959 case DataType::Type::kInt64: { 4960 vixl32::Register o_h = HighRegisterFrom(out); 4961 vixl32::Register o_l = LowRegisterFrom(out); 4962 4963 vixl32::Register high = HighRegisterFrom(first); 4964 vixl32::Register low = LowRegisterFrom(first); 4965 4966 if (second.IsRegister()) { 4967 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 4968 4969 vixl32::Register second_reg = RegisterFrom(second); 4970 4971 if (op->IsShl()) { 4972 __ And(o_l, second_reg, kMaxLongShiftDistance); 4973 // Shift the high part 4974 __ Lsl(o_h, high, o_l); 4975 // Shift the low part and `or` what overflew on the high part 4976 __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord)); 4977 __ Lsr(temp, low, temp); 4978 __ Orr(o_h, o_h, temp); 4979 // If the shift is > 32 bits, override the high part 4980 __ Subs(temp, o_l, Operand::From(kArmBitsPerWord)); 4981 { 4982 ExactAssemblyScope guard(GetVIXLAssembler(), 4983 2 * vixl32::kMaxInstructionSizeInBytes, 4984 CodeBufferCheckScope::kMaximumSize); 4985 __ it(pl); 4986 __ lsl(pl, o_h, low, temp); 4987 } 4988 // Shift the low part 4989 __ Lsl(o_l, low, o_l); 4990 } else if (op->IsShr()) { 4991 __ And(o_h, second_reg, kMaxLongShiftDistance); 4992 // Shift the low part 4993 __ Lsr(o_l, low, o_h); 4994 // Shift the high part and `or` what underflew on the low part 4995 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord)); 4996 __ Lsl(temp, high, temp); 4997 __ Orr(o_l, o_l, temp); 4998 // If the shift is > 32 bits, override the low part 4999 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord)); 5000 { 5001 ExactAssemblyScope guard(GetVIXLAssembler(), 5002 2 * vixl32::kMaxInstructionSizeInBytes, 5003 CodeBufferCheckScope::kMaximumSize); 5004 __ it(pl); 5005 __ asr(pl, o_l, high, temp); 5006 } 5007 // Shift the high part 5008 __ Asr(o_h, high, o_h); 5009 } else { 5010 __ And(o_h, second_reg, kMaxLongShiftDistance); 5011 // same as Shr except we use `Lsr`s and not `Asr`s 5012 __ Lsr(o_l, low, o_h); 5013 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord)); 5014 __ Lsl(temp, high, temp); 5015 __ Orr(o_l, o_l, temp); 5016 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord)); 5017 { 5018 ExactAssemblyScope guard(GetVIXLAssembler(), 5019 2 * vixl32::kMaxInstructionSizeInBytes, 5020 CodeBufferCheckScope::kMaximumSize); 5021 __ it(pl); 5022 __ lsr(pl, o_l, high, temp); 5023 } 5024 __ Lsr(o_h, high, o_h); 5025 } 5026 } else { 5027 // Register allocator doesn't create partial overlap. 5028 DCHECK(!o_l.Is(high)); 5029 DCHECK(!o_h.Is(low)); 5030 int32_t cst = Int32ConstantFrom(second); 5031 uint32_t shift_value = cst & kMaxLongShiftDistance; 5032 if (shift_value > 32) { 5033 if (op->IsShl()) { 5034 __ Lsl(o_h, low, shift_value - 32); 5035 __ Mov(o_l, 0); 5036 } else if (op->IsShr()) { 5037 __ Asr(o_l, high, shift_value - 32); 5038 __ Asr(o_h, high, 31); 5039 } else { 5040 __ Lsr(o_l, high, shift_value - 32); 5041 __ Mov(o_h, 0); 5042 } 5043 } else if (shift_value == 32) { 5044 if (op->IsShl()) { 5045 __ Mov(o_h, low); 5046 __ Mov(o_l, 0); 5047 } else if (op->IsShr()) { 5048 __ Mov(o_l, high); 5049 __ Asr(o_h, high, 31); 5050 } else { 5051 __ Mov(o_l, high); 5052 __ Mov(o_h, 0); 5053 } 5054 } else if (shift_value == 1) { 5055 if (op->IsShl()) { 5056 __ Lsls(o_l, low, 1); 5057 __ Adc(o_h, high, high); 5058 } else if (op->IsShr()) { 5059 __ Asrs(o_h, high, 1); 5060 __ Rrx(o_l, low); 5061 } else { 5062 __ Lsrs(o_h, high, 1); 5063 __ Rrx(o_l, low); 5064 } 5065 } else { 5066 DCHECK(2 <= shift_value && shift_value < 32) << shift_value; 5067 if (op->IsShl()) { 5068 __ Lsl(o_h, high, shift_value); 5069 __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value)); 5070 __ Lsl(o_l, low, shift_value); 5071 } else if (op->IsShr()) { 5072 __ Lsr(o_l, low, shift_value); 5073 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); 5074 __ Asr(o_h, high, shift_value); 5075 } else { 5076 __ Lsr(o_l, low, shift_value); 5077 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); 5078 __ Lsr(o_h, high, shift_value); 5079 } 5080 } 5081 } 5082 break; 5083 } 5084 default: 5085 LOG(FATAL) << "Unexpected operation type " << type; 5086 UNREACHABLE(); 5087 } 5088 } 5089 5090 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) { 5091 HandleShift(shl); 5092 } 5093 5094 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) { 5095 HandleShift(shl); 5096 } 5097 5098 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) { 5099 HandleShift(shr); 5100 } 5101 5102 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) { 5103 HandleShift(shr); 5104 } 5105 5106 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) { 5107 HandleShift(ushr); 5108 } 5109 5110 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { 5111 HandleShift(ushr); 5112 } 5113 5114 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { 5115 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5116 instruction, LocationSummary::kCallOnMainOnly); 5117 if (instruction->IsStringAlloc()) { 5118 locations->AddTemp(LocationFrom(kMethodRegister)); 5119 } else { 5120 InvokeRuntimeCallingConventionARMVIXL calling_convention; 5121 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5122 } 5123 locations->SetOut(LocationFrom(r0)); 5124 } 5125 5126 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { 5127 // Note: if heap poisoning is enabled, the entry point takes cares 5128 // of poisoning the reference. 5129 if (instruction->IsStringAlloc()) { 5130 // String is allocated through StringFactory. Call NewEmptyString entry point. 5131 vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); 5132 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); 5133 GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); 5134 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); 5135 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. 5136 ExactAssemblyScope aas(GetVIXLAssembler(), 5137 vixl32::k16BitT32InstructionSizeInBytes, 5138 CodeBufferCheckScope::kExactSize); 5139 __ blx(lr); 5140 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 5141 } else { 5142 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 5143 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5144 } 5145 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); 5146 } 5147 5148 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { 5149 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5150 instruction, LocationSummary::kCallOnMainOnly); 5151 InvokeRuntimeCallingConventionARMVIXL calling_convention; 5152 locations->SetOut(LocationFrom(r0)); 5153 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5154 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 5155 } 5156 5157 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { 5158 // Note: if heap poisoning is enabled, the entry point takes cares 5159 // of poisoning the reference. 5160 QuickEntrypointEnum entrypoint = 5161 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); 5162 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 5163 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 5164 DCHECK(!codegen_->IsLeafMethod()); 5165 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); 5166 } 5167 5168 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { 5169 LocationSummary* locations = 5170 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5171 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 5172 if (location.IsStackSlot()) { 5173 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5174 } else if (location.IsDoubleStackSlot()) { 5175 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5176 } 5177 locations->SetOut(location); 5178 } 5179 5180 void InstructionCodeGeneratorARMVIXL::VisitParameterValue( 5181 HParameterValue* instruction ATTRIBUTE_UNUSED) { 5182 // Nothing to do, the parameter is already at its location. 5183 } 5184 5185 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { 5186 LocationSummary* locations = 5187 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5188 locations->SetOut(LocationFrom(kMethodRegister)); 5189 } 5190 5191 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( 5192 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 5193 // Nothing to do, the method is already at its location. 5194 } 5195 5196 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { 5197 LocationSummary* locations = 5198 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); 5199 locations->SetInAt(0, Location::RequiresRegister()); 5200 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5201 } 5202 5203 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { 5204 LocationSummary* locations = not_->GetLocations(); 5205 Location out = locations->Out(); 5206 Location in = locations->InAt(0); 5207 switch (not_->GetResultType()) { 5208 case DataType::Type::kInt32: 5209 __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0)); 5210 break; 5211 5212 case DataType::Type::kInt64: 5213 __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); 5214 __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); 5215 break; 5216 5217 default: 5218 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 5219 } 5220 } 5221 5222 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { 5223 LocationSummary* locations = 5224 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); 5225 locations->SetInAt(0, Location::RequiresRegister()); 5226 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5227 } 5228 5229 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { 5230 __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1); 5231 } 5232 5233 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { 5234 LocationSummary* locations = 5235 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 5236 switch (compare->InputAt(0)->GetType()) { 5237 case DataType::Type::kBool: 5238 case DataType::Type::kUint8: 5239 case DataType::Type::kInt8: 5240 case DataType::Type::kUint16: 5241 case DataType::Type::kInt16: 5242 case DataType::Type::kInt32: 5243 case DataType::Type::kInt64: { 5244 locations->SetInAt(0, Location::RequiresRegister()); 5245 locations->SetInAt(1, Location::RequiresRegister()); 5246 // Output overlaps because it is written before doing the low comparison. 5247 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 5248 break; 5249 } 5250 case DataType::Type::kFloat32: 5251 case DataType::Type::kFloat64: { 5252 locations->SetInAt(0, Location::RequiresFpuRegister()); 5253 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); 5254 locations->SetOut(Location::RequiresRegister()); 5255 break; 5256 } 5257 default: 5258 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 5259 } 5260 } 5261 5262 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { 5263 LocationSummary* locations = compare->GetLocations(); 5264 vixl32::Register out = OutputRegister(compare); 5265 Location left = locations->InAt(0); 5266 Location right = locations->InAt(1); 5267 5268 vixl32::Label less, greater, done; 5269 vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done); 5270 DataType::Type type = compare->InputAt(0)->GetType(); 5271 vixl32::Condition less_cond = vixl32::Condition::None(); 5272 switch (type) { 5273 case DataType::Type::kBool: 5274 case DataType::Type::kUint8: 5275 case DataType::Type::kInt8: 5276 case DataType::Type::kUint16: 5277 case DataType::Type::kInt16: 5278 case DataType::Type::kInt32: { 5279 // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. 5280 __ Mov(out, 0); 5281 __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. 5282 less_cond = lt; 5283 break; 5284 } 5285 case DataType::Type::kInt64: { 5286 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. 5287 __ B(lt, &less, /* far_target */ false); 5288 __ B(gt, &greater, /* far_target */ false); 5289 // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. 5290 __ Mov(out, 0); 5291 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. 5292 less_cond = lo; 5293 break; 5294 } 5295 case DataType::Type::kFloat32: 5296 case DataType::Type::kFloat64: { 5297 __ Mov(out, 0); 5298 GenerateVcmp(compare, codegen_); 5299 // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). 5300 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 5301 less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); 5302 break; 5303 } 5304 default: 5305 LOG(FATAL) << "Unexpected compare type " << type; 5306 UNREACHABLE(); 5307 } 5308 5309 __ B(eq, final_label, /* far_target */ false); 5310 __ B(less_cond, &less, /* far_target */ false); 5311 5312 __ Bind(&greater); 5313 __ Mov(out, 1); 5314 __ B(final_label); 5315 5316 __ Bind(&less); 5317 __ Mov(out, -1); 5318 5319 if (done.IsReferenced()) { 5320 __ Bind(&done); 5321 } 5322 } 5323 5324 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { 5325 LocationSummary* locations = 5326 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5327 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 5328 locations->SetInAt(i, Location::Any()); 5329 } 5330 locations->SetOut(Location::Any()); 5331 } 5332 5333 void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 5334 LOG(FATAL) << "Unreachable"; 5335 } 5336 5337 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { 5338 // TODO (ported from quick): revisit ARM barrier kinds. 5339 DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. 5340 switch (kind) { 5341 case MemBarrierKind::kAnyStore: 5342 case MemBarrierKind::kLoadAny: 5343 case MemBarrierKind::kAnyAny: { 5344 flavor = DmbOptions::ISH; 5345 break; 5346 } 5347 case MemBarrierKind::kStoreStore: { 5348 flavor = DmbOptions::ISHST; 5349 break; 5350 } 5351 default: 5352 LOG(FATAL) << "Unexpected memory barrier " << kind; 5353 } 5354 __ Dmb(flavor); 5355 } 5356 5357 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr, 5358 uint32_t offset, 5359 vixl32::Register out_lo, 5360 vixl32::Register out_hi) { 5361 UseScratchRegisterScope temps(GetVIXLAssembler()); 5362 if (offset != 0) { 5363 vixl32::Register temp = temps.Acquire(); 5364 __ Add(temp, addr, offset); 5365 addr = temp; 5366 } 5367 __ Ldrexd(out_lo, out_hi, MemOperand(addr)); 5368 } 5369 5370 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr, 5371 uint32_t offset, 5372 vixl32::Register value_lo, 5373 vixl32::Register value_hi, 5374 vixl32::Register temp1, 5375 vixl32::Register temp2, 5376 HInstruction* instruction) { 5377 UseScratchRegisterScope temps(GetVIXLAssembler()); 5378 vixl32::Label fail; 5379 if (offset != 0) { 5380 vixl32::Register temp = temps.Acquire(); 5381 __ Add(temp, addr, offset); 5382 addr = temp; 5383 } 5384 __ Bind(&fail); 5385 { 5386 // Ensure the pc position is recorded immediately after the `ldrexd` instruction. 5387 ExactAssemblyScope aas(GetVIXLAssembler(), 5388 vixl32::kMaxInstructionSizeInBytes, 5389 CodeBufferCheckScope::kMaximumSize); 5390 // We need a load followed by store. (The address used in a STREX instruction must 5391 // be the same as the address in the most recently executed LDREX instruction.) 5392 __ ldrexd(temp1, temp2, MemOperand(addr)); 5393 codegen_->MaybeRecordImplicitNullCheck(instruction); 5394 } 5395 __ Strexd(temp1, value_lo, value_hi, MemOperand(addr)); 5396 __ CompareAndBranchIfNonZero(temp1, &fail); 5397 } 5398 5399 void LocationsBuilderARMVIXL::HandleFieldSet( 5400 HInstruction* instruction, const FieldInfo& field_info) { 5401 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 5402 5403 LocationSummary* locations = 5404 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5405 locations->SetInAt(0, Location::RequiresRegister()); 5406 5407 DataType::Type field_type = field_info.GetFieldType(); 5408 if (DataType::IsFloatingPointType(field_type)) { 5409 locations->SetInAt(1, Location::RequiresFpuRegister()); 5410 } else { 5411 locations->SetInAt(1, Location::RequiresRegister()); 5412 } 5413 5414 bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64; 5415 bool generate_volatile = field_info.IsVolatile() 5416 && is_wide 5417 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); 5418 bool needs_write_barrier = 5419 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 5420 // Temporary registers for the write barrier. 5421 // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. 5422 if (needs_write_barrier) { 5423 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 5424 locations->AddTemp(Location::RequiresRegister()); 5425 } else if (generate_volatile) { 5426 // ARM encoding have some additional constraints for ldrexd/strexd: 5427 // - registers need to be consecutive 5428 // - the first register should be even but not R14. 5429 // We don't test for ARM yet, and the assertion makes sure that we 5430 // revisit this if we ever enable ARM encoding. 5431 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); 5432 5433 locations->AddTemp(Location::RequiresRegister()); 5434 locations->AddTemp(Location::RequiresRegister()); 5435 if (field_type == DataType::Type::kFloat64) { 5436 // For doubles we need two more registers to copy the value. 5437 locations->AddTemp(LocationFrom(r2)); 5438 locations->AddTemp(LocationFrom(r3)); 5439 } 5440 } 5441 } 5442 5443 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, 5444 const FieldInfo& field_info, 5445 bool value_can_be_null) { 5446 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 5447 5448 LocationSummary* locations = instruction->GetLocations(); 5449 vixl32::Register base = InputRegisterAt(instruction, 0); 5450 Location value = locations->InAt(1); 5451 5452 bool is_volatile = field_info.IsVolatile(); 5453 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); 5454 DataType::Type field_type = field_info.GetFieldType(); 5455 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 5456 bool needs_write_barrier = 5457 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 5458 5459 if (is_volatile) { 5460 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 5461 } 5462 5463 switch (field_type) { 5464 case DataType::Type::kBool: 5465 case DataType::Type::kUint8: 5466 case DataType::Type::kInt8: 5467 case DataType::Type::kUint16: 5468 case DataType::Type::kInt16: 5469 case DataType::Type::kInt32: { 5470 StoreOperandType operand_type = GetStoreOperandType(field_type); 5471 GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset); 5472 break; 5473 } 5474 5475 case DataType::Type::kReference: { 5476 if (kPoisonHeapReferences && needs_write_barrier) { 5477 // Note that in the case where `value` is a null reference, 5478 // we do not enter this block, as a null reference does not 5479 // need poisoning. 5480 DCHECK_EQ(field_type, DataType::Type::kReference); 5481 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 5482 __ Mov(temp, RegisterFrom(value)); 5483 GetAssembler()->PoisonHeapReference(temp); 5484 GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset); 5485 } else { 5486 GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset); 5487 } 5488 break; 5489 } 5490 5491 case DataType::Type::kInt64: { 5492 if (is_volatile && !atomic_ldrd_strd) { 5493 GenerateWideAtomicStore(base, 5494 offset, 5495 LowRegisterFrom(value), 5496 HighRegisterFrom(value), 5497 RegisterFrom(locations->GetTemp(0)), 5498 RegisterFrom(locations->GetTemp(1)), 5499 instruction); 5500 } else { 5501 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset); 5502 codegen_->MaybeRecordImplicitNullCheck(instruction); 5503 } 5504 break; 5505 } 5506 5507 case DataType::Type::kFloat32: { 5508 GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset); 5509 break; 5510 } 5511 5512 case DataType::Type::kFloat64: { 5513 vixl32::DRegister value_reg = DRegisterFrom(value); 5514 if (is_volatile && !atomic_ldrd_strd) { 5515 vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0)); 5516 vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1)); 5517 5518 __ Vmov(value_reg_lo, value_reg_hi, value_reg); 5519 5520 GenerateWideAtomicStore(base, 5521 offset, 5522 value_reg_lo, 5523 value_reg_hi, 5524 RegisterFrom(locations->GetTemp(2)), 5525 RegisterFrom(locations->GetTemp(3)), 5526 instruction); 5527 } else { 5528 GetAssembler()->StoreDToOffset(value_reg, base, offset); 5529 codegen_->MaybeRecordImplicitNullCheck(instruction); 5530 } 5531 break; 5532 } 5533 5534 case DataType::Type::kUint32: 5535 case DataType::Type::kUint64: 5536 case DataType::Type::kVoid: 5537 LOG(FATAL) << "Unreachable type " << field_type; 5538 UNREACHABLE(); 5539 } 5540 5541 // Longs and doubles are handled in the switch. 5542 if (field_type != DataType::Type::kInt64 && field_type != DataType::Type::kFloat64) { 5543 // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we 5544 // should use a scope and the assembler to emit the store instruction to guarantee that we 5545 // record the pc at the correct position. But the `Assembler` does not automatically handle 5546 // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time 5547 // of writing, do generate the store instruction last. 5548 codegen_->MaybeRecordImplicitNullCheck(instruction); 5549 } 5550 5551 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 5552 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 5553 vixl32::Register card = RegisterFrom(locations->GetTemp(1)); 5554 codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); 5555 } 5556 5557 if (is_volatile) { 5558 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 5559 } 5560 } 5561 5562 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, 5563 const FieldInfo& field_info) { 5564 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 5565 5566 bool object_field_get_with_read_barrier = 5567 kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); 5568 LocationSummary* locations = 5569 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 5570 object_field_get_with_read_barrier 5571 ? LocationSummary::kCallOnSlowPath 5572 : LocationSummary::kNoCall); 5573 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 5574 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5575 } 5576 locations->SetInAt(0, Location::RequiresRegister()); 5577 5578 bool volatile_for_double = field_info.IsVolatile() 5579 && (field_info.GetFieldType() == DataType::Type::kFloat64) 5580 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); 5581 // The output overlaps in case of volatile long: we don't want the 5582 // code generated by GenerateWideAtomicLoad to overwrite the 5583 // object's location. Likewise, in the case of an object field get 5584 // with read barriers enabled, we do not want the load to overwrite 5585 // the object's location, as we need it to emit the read barrier. 5586 bool overlap = 5587 (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) || 5588 object_field_get_with_read_barrier; 5589 5590 if (DataType::IsFloatingPointType(instruction->GetType())) { 5591 locations->SetOut(Location::RequiresFpuRegister()); 5592 } else { 5593 locations->SetOut(Location::RequiresRegister(), 5594 (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 5595 } 5596 if (volatile_for_double) { 5597 // ARM encoding have some additional constraints for ldrexd/strexd: 5598 // - registers need to be consecutive 5599 // - the first register should be even but not R14. 5600 // We don't test for ARM yet, and the assertion makes sure that we 5601 // revisit this if we ever enable ARM encoding. 5602 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); 5603 locations->AddTemp(Location::RequiresRegister()); 5604 locations->AddTemp(Location::RequiresRegister()); 5605 } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 5606 // We need a temporary register for the read barrier marking slow 5607 // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. 5608 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 5609 !Runtime::Current()->UseJitCompilation()) { 5610 // If link-time thunks for the Baker read barrier are enabled, for AOT 5611 // loads we need a temporary only if the offset is too big. 5612 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { 5613 locations->AddTemp(Location::RequiresRegister()); 5614 } 5615 // And we always need the reserved entrypoint register. 5616 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 5617 } else { 5618 locations->AddTemp(Location::RequiresRegister()); 5619 } 5620 } 5621 } 5622 5623 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { 5624 DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType(); 5625 if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || 5626 (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { 5627 return Location::ConstantLocation(input->AsConstant()); 5628 } else { 5629 return Location::RequiresFpuRegister(); 5630 } 5631 } 5632 5633 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, 5634 Opcode opcode) { 5635 DCHECK(!DataType::IsFloatingPointType(constant->GetType())); 5636 if (constant->IsConstant() && 5637 CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { 5638 return Location::ConstantLocation(constant->AsConstant()); 5639 } 5640 return Location::RequiresRegister(); 5641 } 5642 5643 static bool CanEncode32BitConstantAsImmediate( 5644 CodeGeneratorARMVIXL* codegen, 5645 uint32_t value, 5646 Opcode opcode, 5647 vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) { 5648 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 5649 if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) { 5650 return true; 5651 } 5652 Opcode neg_opcode = kNoOperand; 5653 uint32_t neg_value = 0; 5654 switch (opcode) { 5655 case AND: neg_opcode = BIC; neg_value = ~value; break; 5656 case ORR: neg_opcode = ORN; neg_value = ~value; break; 5657 case ADD: neg_opcode = SUB; neg_value = -value; break; 5658 case ADC: neg_opcode = SBC; neg_value = ~value; break; 5659 case SUB: neg_opcode = ADD; neg_value = -value; break; 5660 case SBC: neg_opcode = ADC; neg_value = ~value; break; 5661 case MOV: neg_opcode = MVN; neg_value = ~value; break; 5662 default: 5663 return false; 5664 } 5665 5666 if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) { 5667 return true; 5668 } 5669 5670 return opcode == AND && IsPowerOfTwo(value + 1); 5671 } 5672 5673 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { 5674 uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); 5675 if (DataType::Is64BitType(input_cst->GetType())) { 5676 Opcode high_opcode = opcode; 5677 vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare; 5678 switch (opcode) { 5679 case SUB: 5680 // Flip the operation to an ADD. 5681 value = -value; 5682 opcode = ADD; 5683 FALLTHROUGH_INTENDED; 5684 case ADD: 5685 if (Low32Bits(value) == 0u) { 5686 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode); 5687 } 5688 high_opcode = ADC; 5689 low_flags_update = vixl32::FlagsUpdate::SetFlags; 5690 break; 5691 default: 5692 break; 5693 } 5694 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) && 5695 CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update); 5696 } else { 5697 return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode); 5698 } 5699 } 5700 5701 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, 5702 const FieldInfo& field_info) { 5703 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 5704 5705 LocationSummary* locations = instruction->GetLocations(); 5706 vixl32::Register base = InputRegisterAt(instruction, 0); 5707 Location out = locations->Out(); 5708 bool is_volatile = field_info.IsVolatile(); 5709 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); 5710 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 5711 DataType::Type load_type = instruction->GetType(); 5712 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 5713 5714 switch (load_type) { 5715 case DataType::Type::kBool: 5716 case DataType::Type::kUint8: 5717 case DataType::Type::kInt8: 5718 case DataType::Type::kUint16: 5719 case DataType::Type::kInt16: 5720 case DataType::Type::kInt32: { 5721 LoadOperandType operand_type = GetLoadOperandType(load_type); 5722 GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset); 5723 break; 5724 } 5725 5726 case DataType::Type::kReference: { 5727 // /* HeapReference<Object> */ out = *(base + offset) 5728 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 5729 Location temp_loc = locations->GetTemp(0); 5730 // Note that a potential implicit null check is handled in this 5731 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. 5732 codegen_->GenerateFieldLoadWithBakerReadBarrier( 5733 instruction, out, base, offset, temp_loc, /* needs_null_check */ true); 5734 if (is_volatile) { 5735 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5736 } 5737 } else { 5738 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); 5739 codegen_->MaybeRecordImplicitNullCheck(instruction); 5740 if (is_volatile) { 5741 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5742 } 5743 // If read barriers are enabled, emit read barriers other than 5744 // Baker's using a slow path (and also unpoison the loaded 5745 // reference, if heap poisoning is enabled). 5746 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset); 5747 } 5748 break; 5749 } 5750 5751 case DataType::Type::kInt64: 5752 if (is_volatile && !atomic_ldrd_strd) { 5753 GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out)); 5754 } else { 5755 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset); 5756 } 5757 break; 5758 5759 case DataType::Type::kFloat32: 5760 GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset); 5761 break; 5762 5763 case DataType::Type::kFloat64: { 5764 vixl32::DRegister out_dreg = DRegisterFrom(out); 5765 if (is_volatile && !atomic_ldrd_strd) { 5766 vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); 5767 vixl32::Register hi = RegisterFrom(locations->GetTemp(1)); 5768 GenerateWideAtomicLoad(base, offset, lo, hi); 5769 // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a 5770 // scope. 5771 codegen_->MaybeRecordImplicitNullCheck(instruction); 5772 __ Vmov(out_dreg, lo, hi); 5773 } else { 5774 GetAssembler()->LoadDFromOffset(out_dreg, base, offset); 5775 codegen_->MaybeRecordImplicitNullCheck(instruction); 5776 } 5777 break; 5778 } 5779 5780 case DataType::Type::kUint32: 5781 case DataType::Type::kUint64: 5782 case DataType::Type::kVoid: 5783 LOG(FATAL) << "Unreachable type " << load_type; 5784 UNREACHABLE(); 5785 } 5786 5787 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kFloat64) { 5788 // Potential implicit null checks, in the case of reference or 5789 // double fields, are handled in the previous switch statement. 5790 } else { 5791 // Address cases other than reference and double that may require an implicit null check. 5792 // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we 5793 // should use a scope and the assembler to emit the load instruction to guarantee that we 5794 // record the pc at the correct position. But the `Assembler` does not automatically handle 5795 // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time 5796 // of writing, do generate the store instruction last. 5797 codegen_->MaybeRecordImplicitNullCheck(instruction); 5798 } 5799 5800 if (is_volatile) { 5801 if (load_type == DataType::Type::kReference) { 5802 // Memory barriers, in the case of references, are also handled 5803 // in the previous switch statement. 5804 } else { 5805 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5806 } 5807 } 5808 } 5809 5810 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 5811 HandleFieldSet(instruction, instruction->GetFieldInfo()); 5812 } 5813 5814 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 5815 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5816 } 5817 5818 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 5819 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5820 } 5821 5822 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 5823 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5824 } 5825 5826 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5827 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5828 } 5829 5830 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5831 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5832 } 5833 5834 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5835 HandleFieldSet(instruction, instruction->GetFieldInfo()); 5836 } 5837 5838 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5839 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5840 } 5841 5842 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet( 5843 HUnresolvedInstanceFieldGet* instruction) { 5844 FieldAccessCallingConventionARMVIXL calling_convention; 5845 codegen_->CreateUnresolvedFieldLocationSummary( 5846 instruction, instruction->GetFieldType(), calling_convention); 5847 } 5848 5849 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet( 5850 HUnresolvedInstanceFieldGet* instruction) { 5851 FieldAccessCallingConventionARMVIXL calling_convention; 5852 codegen_->GenerateUnresolvedFieldAccess(instruction, 5853 instruction->GetFieldType(), 5854 instruction->GetFieldIndex(), 5855 instruction->GetDexPc(), 5856 calling_convention); 5857 } 5858 5859 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet( 5860 HUnresolvedInstanceFieldSet* instruction) { 5861 FieldAccessCallingConventionARMVIXL calling_convention; 5862 codegen_->CreateUnresolvedFieldLocationSummary( 5863 instruction, instruction->GetFieldType(), calling_convention); 5864 } 5865 5866 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet( 5867 HUnresolvedInstanceFieldSet* instruction) { 5868 FieldAccessCallingConventionARMVIXL calling_convention; 5869 codegen_->GenerateUnresolvedFieldAccess(instruction, 5870 instruction->GetFieldType(), 5871 instruction->GetFieldIndex(), 5872 instruction->GetDexPc(), 5873 calling_convention); 5874 } 5875 5876 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet( 5877 HUnresolvedStaticFieldGet* instruction) { 5878 FieldAccessCallingConventionARMVIXL calling_convention; 5879 codegen_->CreateUnresolvedFieldLocationSummary( 5880 instruction, instruction->GetFieldType(), calling_convention); 5881 } 5882 5883 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet( 5884 HUnresolvedStaticFieldGet* instruction) { 5885 FieldAccessCallingConventionARMVIXL calling_convention; 5886 codegen_->GenerateUnresolvedFieldAccess(instruction, 5887 instruction->GetFieldType(), 5888 instruction->GetFieldIndex(), 5889 instruction->GetDexPc(), 5890 calling_convention); 5891 } 5892 5893 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet( 5894 HUnresolvedStaticFieldSet* instruction) { 5895 FieldAccessCallingConventionARMVIXL calling_convention; 5896 codegen_->CreateUnresolvedFieldLocationSummary( 5897 instruction, instruction->GetFieldType(), calling_convention); 5898 } 5899 5900 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet( 5901 HUnresolvedStaticFieldSet* instruction) { 5902 FieldAccessCallingConventionARMVIXL calling_convention; 5903 codegen_->GenerateUnresolvedFieldAccess(instruction, 5904 instruction->GetFieldType(), 5905 instruction->GetFieldIndex(), 5906 instruction->GetDexPc(), 5907 calling_convention); 5908 } 5909 5910 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) { 5911 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5912 locations->SetInAt(0, Location::RequiresRegister()); 5913 } 5914 5915 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) { 5916 if (CanMoveNullCheckToUser(instruction)) { 5917 return; 5918 } 5919 5920 UseScratchRegisterScope temps(GetVIXLAssembler()); 5921 // Ensure the pc position is recorded immediately after the `ldr` instruction. 5922 ExactAssemblyScope aas(GetVIXLAssembler(), 5923 vixl32::kMaxInstructionSizeInBytes, 5924 CodeBufferCheckScope::kMaximumSize); 5925 __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0))); 5926 RecordPcInfo(instruction, instruction->GetDexPc()); 5927 } 5928 5929 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) { 5930 NullCheckSlowPathARMVIXL* slow_path = 5931 new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction); 5932 AddSlowPath(slow_path); 5933 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 5934 } 5935 5936 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) { 5937 codegen_->GenerateNullCheck(instruction); 5938 } 5939 5940 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type, 5941 Location out_loc, 5942 vixl32::Register base, 5943 vixl32::Register reg_index, 5944 vixl32::Condition cond) { 5945 uint32_t shift_count = DataType::SizeShift(type); 5946 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); 5947 5948 switch (type) { 5949 case DataType::Type::kBool: 5950 case DataType::Type::kUint8: 5951 __ Ldrb(cond, RegisterFrom(out_loc), mem_address); 5952 break; 5953 case DataType::Type::kInt8: 5954 __ Ldrsb(cond, RegisterFrom(out_loc), mem_address); 5955 break; 5956 case DataType::Type::kUint16: 5957 __ Ldrh(cond, RegisterFrom(out_loc), mem_address); 5958 break; 5959 case DataType::Type::kInt16: 5960 __ Ldrsh(cond, RegisterFrom(out_loc), mem_address); 5961 break; 5962 case DataType::Type::kReference: 5963 case DataType::Type::kInt32: 5964 __ Ldr(cond, RegisterFrom(out_loc), mem_address); 5965 break; 5966 // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types. 5967 case DataType::Type::kInt64: 5968 case DataType::Type::kFloat32: 5969 case DataType::Type::kFloat64: 5970 default: 5971 LOG(FATAL) << "Unreachable type " << type; 5972 UNREACHABLE(); 5973 } 5974 } 5975 5976 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type, 5977 Location loc, 5978 vixl32::Register base, 5979 vixl32::Register reg_index, 5980 vixl32::Condition cond) { 5981 uint32_t shift_count = DataType::SizeShift(type); 5982 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); 5983 5984 switch (type) { 5985 case DataType::Type::kBool: 5986 case DataType::Type::kUint8: 5987 case DataType::Type::kInt8: 5988 __ Strb(cond, RegisterFrom(loc), mem_address); 5989 break; 5990 case DataType::Type::kUint16: 5991 case DataType::Type::kInt16: 5992 __ Strh(cond, RegisterFrom(loc), mem_address); 5993 break; 5994 case DataType::Type::kReference: 5995 case DataType::Type::kInt32: 5996 __ Str(cond, RegisterFrom(loc), mem_address); 5997 break; 5998 // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types. 5999 case DataType::Type::kInt64: 6000 case DataType::Type::kFloat32: 6001 case DataType::Type::kFloat64: 6002 default: 6003 LOG(FATAL) << "Unreachable type " << type; 6004 UNREACHABLE(); 6005 } 6006 } 6007 6008 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { 6009 bool object_array_get_with_read_barrier = 6010 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 6011 LocationSummary* locations = 6012 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 6013 object_array_get_with_read_barrier 6014 ? LocationSummary::kCallOnSlowPath 6015 : LocationSummary::kNoCall); 6016 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 6017 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6018 } 6019 locations->SetInAt(0, Location::RequiresRegister()); 6020 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 6021 if (DataType::IsFloatingPointType(instruction->GetType())) { 6022 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 6023 } else { 6024 // The output overlaps in the case of an object array get with 6025 // read barriers enabled: we do not want the move to overwrite the 6026 // array's location, as we need it to emit the read barrier. 6027 locations->SetOut( 6028 Location::RequiresRegister(), 6029 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 6030 } 6031 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 6032 // We need a temporary register for the read barrier marking slow 6033 // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. 6034 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 6035 !Runtime::Current()->UseJitCompilation() && 6036 instruction->GetIndex()->IsConstant()) { 6037 // Array loads with constant index are treated as field loads. 6038 // If link-time thunks for the Baker read barrier are enabled, for AOT 6039 // constant index loads we need a temporary only if the offset is too big. 6040 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 6041 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); 6042 offset += index << DataType::SizeShift(DataType::Type::kReference); 6043 if (offset >= kReferenceLoadMinFarOffset) { 6044 locations->AddTemp(Location::RequiresRegister()); 6045 } 6046 // And we always need the reserved entrypoint register. 6047 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 6048 } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && 6049 !Runtime::Current()->UseJitCompilation() && 6050 !instruction->GetIndex()->IsConstant()) { 6051 // We need a non-scratch temporary for the array data pointer. 6052 locations->AddTemp(Location::RequiresRegister()); 6053 // And we always need the reserved entrypoint register. 6054 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 6055 } else { 6056 locations->AddTemp(Location::RequiresRegister()); 6057 } 6058 } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 6059 // Also need a temporary for String compression feature. 6060 locations->AddTemp(Location::RequiresRegister()); 6061 } 6062 } 6063 6064 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { 6065 LocationSummary* locations = instruction->GetLocations(); 6066 Location obj_loc = locations->InAt(0); 6067 vixl32::Register obj = InputRegisterAt(instruction, 0); 6068 Location index = locations->InAt(1); 6069 Location out_loc = locations->Out(); 6070 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 6071 DataType::Type type = instruction->GetType(); 6072 const bool maybe_compressed_char_at = mirror::kUseStringCompression && 6073 instruction->IsStringCharAt(); 6074 HInstruction* array_instr = instruction->GetArray(); 6075 bool has_intermediate_address = array_instr->IsIntermediateAddress(); 6076 6077 switch (type) { 6078 case DataType::Type::kBool: 6079 case DataType::Type::kUint8: 6080 case DataType::Type::kInt8: 6081 case DataType::Type::kUint16: 6082 case DataType::Type::kInt16: 6083 case DataType::Type::kInt32: { 6084 vixl32::Register length; 6085 if (maybe_compressed_char_at) { 6086 length = RegisterFrom(locations->GetTemp(0)); 6087 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 6088 GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); 6089 codegen_->MaybeRecordImplicitNullCheck(instruction); 6090 } 6091 if (index.IsConstant()) { 6092 int32_t const_index = Int32ConstantFrom(index); 6093 if (maybe_compressed_char_at) { 6094 vixl32::Label uncompressed_load, done; 6095 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); 6096 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. 6097 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 6098 "Expecting 0=compressed, 1=uncompressed"); 6099 __ B(cs, &uncompressed_load, /* far_target */ false); 6100 GetAssembler()->LoadFromOffset(kLoadUnsignedByte, 6101 RegisterFrom(out_loc), 6102 obj, 6103 data_offset + const_index); 6104 __ B(final_label); 6105 __ Bind(&uncompressed_load); 6106 GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16), 6107 RegisterFrom(out_loc), 6108 obj, 6109 data_offset + (const_index << 1)); 6110 if (done.IsReferenced()) { 6111 __ Bind(&done); 6112 } 6113 } else { 6114 uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type)); 6115 6116 LoadOperandType load_type = GetLoadOperandType(type); 6117 GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset); 6118 } 6119 } else { 6120 UseScratchRegisterScope temps(GetVIXLAssembler()); 6121 vixl32::Register temp = temps.Acquire(); 6122 6123 if (has_intermediate_address) { 6124 // We do not need to compute the intermediate address from the array: the 6125 // input instruction has done it already. See the comment in 6126 // `TryExtractArrayAccessAddress()`. 6127 if (kIsDebugBuild) { 6128 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); 6129 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset); 6130 } 6131 temp = obj; 6132 } else { 6133 __ Add(temp, obj, data_offset); 6134 } 6135 if (maybe_compressed_char_at) { 6136 vixl32::Label uncompressed_load, done; 6137 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); 6138 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. 6139 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 6140 "Expecting 0=compressed, 1=uncompressed"); 6141 __ B(cs, &uncompressed_load, /* far_target */ false); 6142 __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); 6143 __ B(final_label); 6144 __ Bind(&uncompressed_load); 6145 __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1)); 6146 if (done.IsReferenced()) { 6147 __ Bind(&done); 6148 } 6149 } else { 6150 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); 6151 } 6152 } 6153 break; 6154 } 6155 6156 case DataType::Type::kReference: { 6157 // The read barrier instrumentation of object ArrayGet 6158 // instructions does not support the HIntermediateAddress 6159 // instruction. 6160 DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); 6161 6162 static_assert( 6163 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6164 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6165 // /* HeapReference<Object> */ out = 6166 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 6167 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 6168 Location temp = locations->GetTemp(0); 6169 // Note that a potential implicit null check is handled in this 6170 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. 6171 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); 6172 if (index.IsConstant()) { 6173 // Array load with a constant index can be treated as a field load. 6174 data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type); 6175 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 6176 out_loc, 6177 obj, 6178 data_offset, 6179 locations->GetTemp(0), 6180 /* needs_null_check */ false); 6181 } else { 6182 codegen_->GenerateArrayLoadWithBakerReadBarrier( 6183 instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); 6184 } 6185 } else { 6186 vixl32::Register out = OutputRegister(instruction); 6187 if (index.IsConstant()) { 6188 size_t offset = 6189 (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6190 GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); 6191 // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, 6192 // we should use a scope and the assembler to emit the load instruction to guarantee that 6193 // we record the pc at the correct position. But the `Assembler` does not automatically 6194 // handle unencodable offsets. Practically, everything is fine because the helper and 6195 // VIXL, at the time of writing, do generate the store instruction last. 6196 codegen_->MaybeRecordImplicitNullCheck(instruction); 6197 // If read barriers are enabled, emit read barriers other than 6198 // Baker's using a slow path (and also unpoison the loaded 6199 // reference, if heap poisoning is enabled). 6200 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 6201 } else { 6202 UseScratchRegisterScope temps(GetVIXLAssembler()); 6203 vixl32::Register temp = temps.Acquire(); 6204 6205 if (has_intermediate_address) { 6206 // We do not need to compute the intermediate address from the array: the 6207 // input instruction has done it already. See the comment in 6208 // `TryExtractArrayAccessAddress()`. 6209 if (kIsDebugBuild) { 6210 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); 6211 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset); 6212 } 6213 temp = obj; 6214 } else { 6215 __ Add(temp, obj, data_offset); 6216 } 6217 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); 6218 temps.Close(); 6219 // TODO(VIXL): Use a scope to ensure that we record the pc position immediately after the 6220 // load instruction. Practically, everything is fine because the helper and VIXL, at the 6221 // time of writing, do generate the store instruction last. 6222 codegen_->MaybeRecordImplicitNullCheck(instruction); 6223 // If read barriers are enabled, emit read barriers other than 6224 // Baker's using a slow path (and also unpoison the loaded 6225 // reference, if heap poisoning is enabled). 6226 codegen_->MaybeGenerateReadBarrierSlow( 6227 instruction, out_loc, out_loc, obj_loc, data_offset, index); 6228 } 6229 } 6230 break; 6231 } 6232 6233 case DataType::Type::kInt64: { 6234 if (index.IsConstant()) { 6235 size_t offset = 6236 (Int32ConstantFrom(index) << TIMES_8) + data_offset; 6237 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset); 6238 } else { 6239 UseScratchRegisterScope temps(GetVIXLAssembler()); 6240 vixl32::Register temp = temps.Acquire(); 6241 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); 6242 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset); 6243 } 6244 break; 6245 } 6246 6247 case DataType::Type::kFloat32: { 6248 vixl32::SRegister out = SRegisterFrom(out_loc); 6249 if (index.IsConstant()) { 6250 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6251 GetAssembler()->LoadSFromOffset(out, obj, offset); 6252 } else { 6253 UseScratchRegisterScope temps(GetVIXLAssembler()); 6254 vixl32::Register temp = temps.Acquire(); 6255 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); 6256 GetAssembler()->LoadSFromOffset(out, temp, data_offset); 6257 } 6258 break; 6259 } 6260 6261 case DataType::Type::kFloat64: { 6262 if (index.IsConstant()) { 6263 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; 6264 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset); 6265 } else { 6266 UseScratchRegisterScope temps(GetVIXLAssembler()); 6267 vixl32::Register temp = temps.Acquire(); 6268 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); 6269 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset); 6270 } 6271 break; 6272 } 6273 6274 case DataType::Type::kUint32: 6275 case DataType::Type::kUint64: 6276 case DataType::Type::kVoid: 6277 LOG(FATAL) << "Unreachable type " << type; 6278 UNREACHABLE(); 6279 } 6280 6281 if (type == DataType::Type::kReference) { 6282 // Potential implicit null checks, in the case of reference 6283 // arrays, are handled in the previous switch statement. 6284 } else if (!maybe_compressed_char_at) { 6285 // TODO(VIXL): Use a scope to ensure we record the pc info immediately after 6286 // the preceding load instruction. 6287 codegen_->MaybeRecordImplicitNullCheck(instruction); 6288 } 6289 } 6290 6291 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { 6292 DataType::Type value_type = instruction->GetComponentType(); 6293 6294 bool needs_write_barrier = 6295 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 6296 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 6297 6298 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6299 instruction, 6300 may_need_runtime_call_for_type_check ? 6301 LocationSummary::kCallOnSlowPath : 6302 LocationSummary::kNoCall); 6303 6304 locations->SetInAt(0, Location::RequiresRegister()); 6305 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 6306 if (DataType::IsFloatingPointType(value_type)) { 6307 locations->SetInAt(2, Location::RequiresFpuRegister()); 6308 } else { 6309 locations->SetInAt(2, Location::RequiresRegister()); 6310 } 6311 if (needs_write_barrier) { 6312 // Temporary registers for the write barrier. 6313 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 6314 locations->AddTemp(Location::RequiresRegister()); 6315 } 6316 } 6317 6318 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { 6319 LocationSummary* locations = instruction->GetLocations(); 6320 vixl32::Register array = InputRegisterAt(instruction, 0); 6321 Location index = locations->InAt(1); 6322 DataType::Type value_type = instruction->GetComponentType(); 6323 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 6324 bool needs_write_barrier = 6325 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 6326 uint32_t data_offset = 6327 mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); 6328 Location value_loc = locations->InAt(2); 6329 HInstruction* array_instr = instruction->GetArray(); 6330 bool has_intermediate_address = array_instr->IsIntermediateAddress(); 6331 6332 switch (value_type) { 6333 case DataType::Type::kBool: 6334 case DataType::Type::kUint8: 6335 case DataType::Type::kInt8: 6336 case DataType::Type::kUint16: 6337 case DataType::Type::kInt16: 6338 case DataType::Type::kInt32: { 6339 if (index.IsConstant()) { 6340 int32_t const_index = Int32ConstantFrom(index); 6341 uint32_t full_offset = 6342 data_offset + (const_index << DataType::SizeShift(value_type)); 6343 StoreOperandType store_type = GetStoreOperandType(value_type); 6344 GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset); 6345 } else { 6346 UseScratchRegisterScope temps(GetVIXLAssembler()); 6347 vixl32::Register temp = temps.Acquire(); 6348 6349 if (has_intermediate_address) { 6350 // We do not need to compute the intermediate address from the array: the 6351 // input instruction has done it already. See the comment in 6352 // `TryExtractArrayAccessAddress()`. 6353 if (kIsDebugBuild) { 6354 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); 6355 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset); 6356 } 6357 temp = array; 6358 } else { 6359 __ Add(temp, array, data_offset); 6360 } 6361 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); 6362 } 6363 break; 6364 } 6365 6366 case DataType::Type::kReference: { 6367 vixl32::Register value = RegisterFrom(value_loc); 6368 // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet. 6369 // See the comment in instruction_simplifier_shared.cc. 6370 DCHECK(!has_intermediate_address); 6371 6372 if (instruction->InputAt(2)->IsNullConstant()) { 6373 // Just setting null. 6374 if (index.IsConstant()) { 6375 size_t offset = 6376 (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6377 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); 6378 } else { 6379 DCHECK(index.IsRegister()) << index; 6380 UseScratchRegisterScope temps(GetVIXLAssembler()); 6381 vixl32::Register temp = temps.Acquire(); 6382 __ Add(temp, array, data_offset); 6383 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); 6384 } 6385 // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding 6386 // store instruction. 6387 codegen_->MaybeRecordImplicitNullCheck(instruction); 6388 DCHECK(!needs_write_barrier); 6389 DCHECK(!may_need_runtime_call_for_type_check); 6390 break; 6391 } 6392 6393 DCHECK(needs_write_barrier); 6394 Location temp1_loc = locations->GetTemp(0); 6395 vixl32::Register temp1 = RegisterFrom(temp1_loc); 6396 Location temp2_loc = locations->GetTemp(1); 6397 vixl32::Register temp2 = RegisterFrom(temp2_loc); 6398 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6399 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6400 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6401 vixl32::Label done; 6402 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); 6403 SlowPathCodeARMVIXL* slow_path = nullptr; 6404 6405 if (may_need_runtime_call_for_type_check) { 6406 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction); 6407 codegen_->AddSlowPath(slow_path); 6408 if (instruction->GetValueCanBeNull()) { 6409 vixl32::Label non_zero; 6410 __ CompareAndBranchIfNonZero(value, &non_zero); 6411 if (index.IsConstant()) { 6412 size_t offset = 6413 (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6414 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); 6415 } else { 6416 DCHECK(index.IsRegister()) << index; 6417 UseScratchRegisterScope temps(GetVIXLAssembler()); 6418 vixl32::Register temp = temps.Acquire(); 6419 __ Add(temp, array, data_offset); 6420 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); 6421 } 6422 // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding 6423 // store instruction. 6424 codegen_->MaybeRecordImplicitNullCheck(instruction); 6425 __ B(final_label); 6426 __ Bind(&non_zero); 6427 } 6428 6429 // Note that when read barriers are enabled, the type checks 6430 // are performed without read barriers. This is fine, even in 6431 // the case where a class object is in the from-space after 6432 // the flip, as a comparison involving such a type would not 6433 // produce a false positive; it may of course produce a false 6434 // negative, in which case we would take the ArraySet slow 6435 // path. 6436 6437 { 6438 // Ensure we record the pc position immediately after the `ldr` instruction. 6439 ExactAssemblyScope aas(GetVIXLAssembler(), 6440 vixl32::kMaxInstructionSizeInBytes, 6441 CodeBufferCheckScope::kMaximumSize); 6442 // /* HeapReference<Class> */ temp1 = array->klass_ 6443 __ ldr(temp1, MemOperand(array, class_offset)); 6444 codegen_->MaybeRecordImplicitNullCheck(instruction); 6445 } 6446 GetAssembler()->MaybeUnpoisonHeapReference(temp1); 6447 6448 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 6449 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset); 6450 // /* HeapReference<Class> */ temp2 = value->klass_ 6451 GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset); 6452 // If heap poisoning is enabled, no need to unpoison `temp1` 6453 // nor `temp2`, as we are comparing two poisoned references. 6454 __ Cmp(temp1, temp2); 6455 6456 if (instruction->StaticTypeOfArrayIsObjectArray()) { 6457 vixl32::Label do_put; 6458 __ B(eq, &do_put, /* far_target */ false); 6459 // If heap poisoning is enabled, the `temp1` reference has 6460 // not been unpoisoned yet; unpoison it now. 6461 GetAssembler()->MaybeUnpoisonHeapReference(temp1); 6462 6463 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 6464 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset); 6465 // If heap poisoning is enabled, no need to unpoison 6466 // `temp1`, as we are comparing against null below. 6467 __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); 6468 __ Bind(&do_put); 6469 } else { 6470 __ B(ne, slow_path->GetEntryLabel()); 6471 } 6472 } 6473 6474 vixl32::Register source = value; 6475 if (kPoisonHeapReferences) { 6476 // Note that in the case where `value` is a null reference, 6477 // we do not enter this block, as a null reference does not 6478 // need poisoning. 6479 DCHECK_EQ(value_type, DataType::Type::kReference); 6480 __ Mov(temp1, value); 6481 GetAssembler()->PoisonHeapReference(temp1); 6482 source = temp1; 6483 } 6484 6485 if (index.IsConstant()) { 6486 size_t offset = 6487 (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6488 GetAssembler()->StoreToOffset(kStoreWord, source, array, offset); 6489 } else { 6490 DCHECK(index.IsRegister()) << index; 6491 6492 UseScratchRegisterScope temps(GetVIXLAssembler()); 6493 vixl32::Register temp = temps.Acquire(); 6494 __ Add(temp, array, data_offset); 6495 codegen_->StoreToShiftedRegOffset(value_type, 6496 LocationFrom(source), 6497 temp, 6498 RegisterFrom(index)); 6499 } 6500 6501 if (!may_need_runtime_call_for_type_check) { 6502 // TODO(VIXL): Ensure we record the pc position immediately after the preceding store 6503 // instruction. 6504 codegen_->MaybeRecordImplicitNullCheck(instruction); 6505 } 6506 6507 codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull()); 6508 6509 if (done.IsReferenced()) { 6510 __ Bind(&done); 6511 } 6512 6513 if (slow_path != nullptr) { 6514 __ Bind(slow_path->GetExitLabel()); 6515 } 6516 6517 break; 6518 } 6519 6520 case DataType::Type::kInt64: { 6521 Location value = locations->InAt(2); 6522 if (index.IsConstant()) { 6523 size_t offset = 6524 (Int32ConstantFrom(index) << TIMES_8) + data_offset; 6525 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset); 6526 } else { 6527 UseScratchRegisterScope temps(GetVIXLAssembler()); 6528 vixl32::Register temp = temps.Acquire(); 6529 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); 6530 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset); 6531 } 6532 break; 6533 } 6534 6535 case DataType::Type::kFloat32: { 6536 Location value = locations->InAt(2); 6537 DCHECK(value.IsFpuRegister()); 6538 if (index.IsConstant()) { 6539 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; 6540 GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset); 6541 } else { 6542 UseScratchRegisterScope temps(GetVIXLAssembler()); 6543 vixl32::Register temp = temps.Acquire(); 6544 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); 6545 GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset); 6546 } 6547 break; 6548 } 6549 6550 case DataType::Type::kFloat64: { 6551 Location value = locations->InAt(2); 6552 DCHECK(value.IsFpuRegisterPair()); 6553 if (index.IsConstant()) { 6554 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; 6555 GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset); 6556 } else { 6557 UseScratchRegisterScope temps(GetVIXLAssembler()); 6558 vixl32::Register temp = temps.Acquire(); 6559 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); 6560 GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset); 6561 } 6562 break; 6563 } 6564 6565 case DataType::Type::kUint32: 6566 case DataType::Type::kUint64: 6567 case DataType::Type::kVoid: 6568 LOG(FATAL) << "Unreachable type " << value_type; 6569 UNREACHABLE(); 6570 } 6571 6572 // Objects are handled in the switch. 6573 if (value_type != DataType::Type::kReference) { 6574 // TODO(VIXL): Ensure we record the pc position immediately after the preceding store 6575 // instruction. 6576 codegen_->MaybeRecordImplicitNullCheck(instruction); 6577 } 6578 } 6579 6580 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { 6581 LocationSummary* locations = 6582 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6583 locations->SetInAt(0, Location::RequiresRegister()); 6584 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6585 } 6586 6587 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) { 6588 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 6589 vixl32::Register obj = InputRegisterAt(instruction, 0); 6590 vixl32::Register out = OutputRegister(instruction); 6591 { 6592 ExactAssemblyScope aas(GetVIXLAssembler(), 6593 vixl32::kMaxInstructionSizeInBytes, 6594 CodeBufferCheckScope::kMaximumSize); 6595 __ ldr(out, MemOperand(obj, offset)); 6596 codegen_->MaybeRecordImplicitNullCheck(instruction); 6597 } 6598 // Mask out compression flag from String's array length. 6599 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 6600 __ Lsr(out, out, 1u); 6601 } 6602 } 6603 6604 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) { 6605 LocationSummary* locations = 6606 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6607 6608 locations->SetInAt(0, Location::RequiresRegister()); 6609 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset())); 6610 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6611 } 6612 6613 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) { 6614 vixl32::Register out = OutputRegister(instruction); 6615 vixl32::Register first = InputRegisterAt(instruction, 0); 6616 Location second = instruction->GetLocations()->InAt(1); 6617 6618 if (second.IsRegister()) { 6619 __ Add(out, first, RegisterFrom(second)); 6620 } else { 6621 __ Add(out, first, Int32ConstantFrom(second)); 6622 } 6623 } 6624 6625 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex( 6626 HIntermediateAddressIndex* instruction) { 6627 LOG(FATAL) << "Unreachable " << instruction->GetId(); 6628 } 6629 6630 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex( 6631 HIntermediateAddressIndex* instruction) { 6632 LOG(FATAL) << "Unreachable " << instruction->GetId(); 6633 } 6634 6635 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { 6636 RegisterSet caller_saves = RegisterSet::Empty(); 6637 InvokeRuntimeCallingConventionARMVIXL calling_convention; 6638 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); 6639 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1))); 6640 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 6641 6642 HInstruction* index = instruction->InputAt(0); 6643 HInstruction* length = instruction->InputAt(1); 6644 // If both index and length are constants we can statically check the bounds. But if at least one 6645 // of them is not encodable ArmEncodableConstantOrRegister will create 6646 // Location::RequiresRegister() which is not desired to happen. Instead we create constant 6647 // locations. 6648 bool both_const = index->IsConstant() && length->IsConstant(); 6649 locations->SetInAt(0, both_const 6650 ? Location::ConstantLocation(index->AsConstant()) 6651 : ArmEncodableConstantOrRegister(index, CMP)); 6652 locations->SetInAt(1, both_const 6653 ? Location::ConstantLocation(length->AsConstant()) 6654 : ArmEncodableConstantOrRegister(length, CMP)); 6655 } 6656 6657 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { 6658 LocationSummary* locations = instruction->GetLocations(); 6659 Location index_loc = locations->InAt(0); 6660 Location length_loc = locations->InAt(1); 6661 6662 if (length_loc.IsConstant()) { 6663 int32_t length = Int32ConstantFrom(length_loc); 6664 if (index_loc.IsConstant()) { 6665 // BCE will remove the bounds check if we are guaranteed to pass. 6666 int32_t index = Int32ConstantFrom(index_loc); 6667 if (index < 0 || index >= length) { 6668 SlowPathCodeARMVIXL* slow_path = 6669 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); 6670 codegen_->AddSlowPath(slow_path); 6671 __ B(slow_path->GetEntryLabel()); 6672 } else { 6673 // Some optimization after BCE may have generated this, and we should not 6674 // generate a bounds check if it is a valid range. 6675 } 6676 return; 6677 } 6678 6679 SlowPathCodeARMVIXL* slow_path = 6680 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); 6681 __ Cmp(RegisterFrom(index_loc), length); 6682 codegen_->AddSlowPath(slow_path); 6683 __ B(hs, slow_path->GetEntryLabel()); 6684 } else { 6685 SlowPathCodeARMVIXL* slow_path = 6686 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); 6687 __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0)); 6688 codegen_->AddSlowPath(slow_path); 6689 __ B(ls, slow_path->GetEntryLabel()); 6690 } 6691 } 6692 6693 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, 6694 vixl32::Register card, 6695 vixl32::Register object, 6696 vixl32::Register value, 6697 bool can_be_null) { 6698 vixl32::Label is_null; 6699 if (can_be_null) { 6700 __ CompareAndBranchIfZero(value, &is_null); 6701 } 6702 GetAssembler()->LoadFromOffset( 6703 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); 6704 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift)); 6705 __ Strb(card, MemOperand(card, temp)); 6706 if (can_be_null) { 6707 __ Bind(&is_null); 6708 } 6709 } 6710 6711 void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 6712 LOG(FATAL) << "Unreachable"; 6713 } 6714 6715 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) { 6716 if (instruction->GetNext()->IsSuspendCheck() && 6717 instruction->GetBlock()->GetLoopInformation() != nullptr) { 6718 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 6719 // The back edge will generate the suspend check. 6720 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 6721 } 6722 6723 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 6724 } 6725 6726 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { 6727 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6728 instruction, LocationSummary::kCallOnSlowPath); 6729 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6730 } 6731 6732 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { 6733 HBasicBlock* block = instruction->GetBlock(); 6734 if (block->GetLoopInformation() != nullptr) { 6735 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 6736 // The back edge will generate the suspend check. 6737 return; 6738 } 6739 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 6740 // The goto will generate the suspend check. 6741 return; 6742 } 6743 GenerateSuspendCheck(instruction, nullptr); 6744 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); 6745 } 6746 6747 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, 6748 HBasicBlock* successor) { 6749 SuspendCheckSlowPathARMVIXL* slow_path = 6750 down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath()); 6751 if (slow_path == nullptr) { 6752 slow_path = 6753 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor); 6754 instruction->SetSlowPath(slow_path); 6755 codegen_->AddSlowPath(slow_path); 6756 if (successor != nullptr) { 6757 DCHECK(successor->IsLoopHeader()); 6758 } 6759 } else { 6760 DCHECK_EQ(slow_path->GetSuccessor(), successor); 6761 } 6762 6763 UseScratchRegisterScope temps(GetVIXLAssembler()); 6764 vixl32::Register temp = temps.Acquire(); 6765 GetAssembler()->LoadFromOffset( 6766 kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value()); 6767 if (successor == nullptr) { 6768 __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); 6769 __ Bind(slow_path->GetReturnLabel()); 6770 } else { 6771 __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor)); 6772 __ B(slow_path->GetEntryLabel()); 6773 } 6774 } 6775 6776 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const { 6777 return codegen_->GetAssembler(); 6778 } 6779 6780 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { 6781 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); 6782 MoveOperands* move = moves_[index]; 6783 Location source = move->GetSource(); 6784 Location destination = move->GetDestination(); 6785 6786 if (source.IsRegister()) { 6787 if (destination.IsRegister()) { 6788 __ Mov(RegisterFrom(destination), RegisterFrom(source)); 6789 } else if (destination.IsFpuRegister()) { 6790 __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); 6791 } else { 6792 DCHECK(destination.IsStackSlot()); 6793 GetAssembler()->StoreToOffset(kStoreWord, 6794 RegisterFrom(source), 6795 sp, 6796 destination.GetStackIndex()); 6797 } 6798 } else if (source.IsStackSlot()) { 6799 if (destination.IsRegister()) { 6800 GetAssembler()->LoadFromOffset(kLoadWord, 6801 RegisterFrom(destination), 6802 sp, 6803 source.GetStackIndex()); 6804 } else if (destination.IsFpuRegister()) { 6805 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); 6806 } else { 6807 DCHECK(destination.IsStackSlot()); 6808 vixl32::Register temp = temps.Acquire(); 6809 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); 6810 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 6811 } 6812 } else if (source.IsFpuRegister()) { 6813 if (destination.IsRegister()) { 6814 __ Vmov(RegisterFrom(destination), SRegisterFrom(source)); 6815 } else if (destination.IsFpuRegister()) { 6816 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); 6817 } else { 6818 DCHECK(destination.IsStackSlot()); 6819 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); 6820 } 6821 } else if (source.IsDoubleStackSlot()) { 6822 if (destination.IsDoubleStackSlot()) { 6823 vixl32::DRegister temp = temps.AcquireD(); 6824 GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex()); 6825 GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex()); 6826 } else if (destination.IsRegisterPair()) { 6827 DCHECK(ExpectedPairLayout(destination)); 6828 GetAssembler()->LoadFromOffset( 6829 kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex()); 6830 } else { 6831 DCHECK(destination.IsFpuRegisterPair()) << destination; 6832 GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex()); 6833 } 6834 } else if (source.IsRegisterPair()) { 6835 if (destination.IsRegisterPair()) { 6836 __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source)); 6837 __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source)); 6838 } else if (destination.IsFpuRegisterPair()) { 6839 __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source)); 6840 } else { 6841 DCHECK(destination.IsDoubleStackSlot()) << destination; 6842 DCHECK(ExpectedPairLayout(source)); 6843 GetAssembler()->StoreToOffset(kStoreWordPair, 6844 LowRegisterFrom(source), 6845 sp, 6846 destination.GetStackIndex()); 6847 } 6848 } else if (source.IsFpuRegisterPair()) { 6849 if (destination.IsRegisterPair()) { 6850 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source)); 6851 } else if (destination.IsFpuRegisterPair()) { 6852 __ Vmov(DRegisterFrom(destination), DRegisterFrom(source)); 6853 } else { 6854 DCHECK(destination.IsDoubleStackSlot()) << destination; 6855 GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex()); 6856 } 6857 } else { 6858 DCHECK(source.IsConstant()) << source; 6859 HConstant* constant = source.GetConstant(); 6860 if (constant->IsIntConstant() || constant->IsNullConstant()) { 6861 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 6862 if (destination.IsRegister()) { 6863 __ Mov(RegisterFrom(destination), value); 6864 } else { 6865 DCHECK(destination.IsStackSlot()); 6866 vixl32::Register temp = temps.Acquire(); 6867 __ Mov(temp, value); 6868 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 6869 } 6870 } else if (constant->IsLongConstant()) { 6871 int64_t value = Int64ConstantFrom(source); 6872 if (destination.IsRegisterPair()) { 6873 __ Mov(LowRegisterFrom(destination), Low32Bits(value)); 6874 __ Mov(HighRegisterFrom(destination), High32Bits(value)); 6875 } else { 6876 DCHECK(destination.IsDoubleStackSlot()) << destination; 6877 vixl32::Register temp = temps.Acquire(); 6878 __ Mov(temp, Low32Bits(value)); 6879 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 6880 __ Mov(temp, High32Bits(value)); 6881 GetAssembler()->StoreToOffset(kStoreWord, 6882 temp, 6883 sp, 6884 destination.GetHighStackIndex(kArmWordSize)); 6885 } 6886 } else if (constant->IsDoubleConstant()) { 6887 double value = constant->AsDoubleConstant()->GetValue(); 6888 if (destination.IsFpuRegisterPair()) { 6889 __ Vmov(DRegisterFrom(destination), value); 6890 } else { 6891 DCHECK(destination.IsDoubleStackSlot()) << destination; 6892 uint64_t int_value = bit_cast<uint64_t, double>(value); 6893 vixl32::Register temp = temps.Acquire(); 6894 __ Mov(temp, Low32Bits(int_value)); 6895 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 6896 __ Mov(temp, High32Bits(int_value)); 6897 GetAssembler()->StoreToOffset(kStoreWord, 6898 temp, 6899 sp, 6900 destination.GetHighStackIndex(kArmWordSize)); 6901 } 6902 } else { 6903 DCHECK(constant->IsFloatConstant()) << constant->DebugName(); 6904 float value = constant->AsFloatConstant()->GetValue(); 6905 if (destination.IsFpuRegister()) { 6906 __ Vmov(SRegisterFrom(destination), value); 6907 } else { 6908 DCHECK(destination.IsStackSlot()); 6909 vixl32::Register temp = temps.Acquire(); 6910 __ Mov(temp, bit_cast<int32_t, float>(value)); 6911 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); 6912 } 6913 } 6914 } 6915 } 6916 6917 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) { 6918 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); 6919 vixl32::Register temp = temps.Acquire(); 6920 __ Mov(temp, reg); 6921 GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem); 6922 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem); 6923 } 6924 6925 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) { 6926 // TODO(VIXL32): Double check the performance of this implementation. 6927 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); 6928 vixl32::Register temp1 = temps.Acquire(); 6929 ScratchRegisterScope ensure_scratch( 6930 this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters()); 6931 vixl32::Register temp2(ensure_scratch.GetRegister()); 6932 6933 int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0; 6934 GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset); 6935 GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset); 6936 GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset); 6937 GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset); 6938 } 6939 6940 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { 6941 MoveOperands* move = moves_[index]; 6942 Location source = move->GetSource(); 6943 Location destination = move->GetDestination(); 6944 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); 6945 6946 if (source.IsRegister() && destination.IsRegister()) { 6947 vixl32::Register temp = temps.Acquire(); 6948 DCHECK(!RegisterFrom(source).Is(temp)); 6949 DCHECK(!RegisterFrom(destination).Is(temp)); 6950 __ Mov(temp, RegisterFrom(destination)); 6951 __ Mov(RegisterFrom(destination), RegisterFrom(source)); 6952 __ Mov(RegisterFrom(source), temp); 6953 } else if (source.IsRegister() && destination.IsStackSlot()) { 6954 Exchange(RegisterFrom(source), destination.GetStackIndex()); 6955 } else if (source.IsStackSlot() && destination.IsRegister()) { 6956 Exchange(RegisterFrom(destination), source.GetStackIndex()); 6957 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 6958 Exchange(source.GetStackIndex(), destination.GetStackIndex()); 6959 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 6960 vixl32::Register temp = temps.Acquire(); 6961 __ Vmov(temp, SRegisterFrom(source)); 6962 __ Vmov(SRegisterFrom(source), SRegisterFrom(destination)); 6963 __ Vmov(SRegisterFrom(destination), temp); 6964 } else if (source.IsRegisterPair() && destination.IsRegisterPair()) { 6965 vixl32::DRegister temp = temps.AcquireD(); 6966 __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source)); 6967 __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination)); 6968 __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination)); 6969 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp); 6970 } else if (source.IsRegisterPair() || destination.IsRegisterPair()) { 6971 vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination); 6972 int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex(); 6973 DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination)); 6974 vixl32::DRegister temp = temps.AcquireD(); 6975 __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1)); 6976 GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem); 6977 GetAssembler()->StoreDToOffset(temp, sp, mem); 6978 } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { 6979 vixl32::DRegister first = DRegisterFrom(source); 6980 vixl32::DRegister second = DRegisterFrom(destination); 6981 vixl32::DRegister temp = temps.AcquireD(); 6982 __ Vmov(temp, first); 6983 __ Vmov(first, second); 6984 __ Vmov(second, temp); 6985 } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { 6986 vixl32::DRegister reg = source.IsFpuRegisterPair() 6987 ? DRegisterFrom(source) 6988 : DRegisterFrom(destination); 6989 int mem = source.IsFpuRegisterPair() 6990 ? destination.GetStackIndex() 6991 : source.GetStackIndex(); 6992 vixl32::DRegister temp = temps.AcquireD(); 6993 __ Vmov(temp, reg); 6994 GetAssembler()->LoadDFromOffset(reg, sp, mem); 6995 GetAssembler()->StoreDToOffset(temp, sp, mem); 6996 } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { 6997 vixl32::SRegister reg = source.IsFpuRegister() 6998 ? SRegisterFrom(source) 6999 : SRegisterFrom(destination); 7000 int mem = source.IsFpuRegister() 7001 ? destination.GetStackIndex() 7002 : source.GetStackIndex(); 7003 vixl32::Register temp = temps.Acquire(); 7004 __ Vmov(temp, reg); 7005 GetAssembler()->LoadSFromOffset(reg, sp, mem); 7006 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem); 7007 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { 7008 vixl32::DRegister temp1 = temps.AcquireD(); 7009 vixl32::DRegister temp2 = temps.AcquireD(); 7010 __ Vldr(temp1, MemOperand(sp, source.GetStackIndex())); 7011 __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex())); 7012 __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex())); 7013 __ Vstr(temp2, MemOperand(sp, source.GetStackIndex())); 7014 } else { 7015 LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; 7016 } 7017 } 7018 7019 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) { 7020 __ Push(vixl32::Register(reg)); 7021 } 7022 7023 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) { 7024 __ Pop(vixl32::Register(reg)); 7025 } 7026 7027 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( 7028 HLoadClass::LoadKind desired_class_load_kind) { 7029 switch (desired_class_load_kind) { 7030 case HLoadClass::LoadKind::kInvalid: 7031 LOG(FATAL) << "UNREACHABLE"; 7032 UNREACHABLE(); 7033 case HLoadClass::LoadKind::kReferrersClass: 7034 break; 7035 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 7036 case HLoadClass::LoadKind::kBootImageClassTable: 7037 case HLoadClass::LoadKind::kBssEntry: 7038 DCHECK(!Runtime::Current()->UseJitCompilation()); 7039 break; 7040 case HLoadClass::LoadKind::kJitTableAddress: 7041 DCHECK(Runtime::Current()->UseJitCompilation()); 7042 break; 7043 case HLoadClass::LoadKind::kBootImageAddress: 7044 case HLoadClass::LoadKind::kRuntimeCall: 7045 break; 7046 } 7047 return desired_class_load_kind; 7048 } 7049 7050 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { 7051 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 7052 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 7053 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7054 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 7055 cls, 7056 LocationFrom(calling_convention.GetRegisterAt(0)), 7057 LocationFrom(r0)); 7058 DCHECK(calling_convention.GetRegisterAt(0).Is(r0)); 7059 return; 7060 } 7061 DCHECK(!cls->NeedsAccessCheck()); 7062 7063 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 7064 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 7065 ? LocationSummary::kCallOnSlowPath 7066 : LocationSummary::kNoCall; 7067 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 7068 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 7069 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 7070 } 7071 7072 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 7073 locations->SetInAt(0, Location::RequiresRegister()); 7074 } 7075 locations->SetOut(Location::RequiresRegister()); 7076 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 7077 if (!kUseReadBarrier || kUseBakerReadBarrier) { 7078 // Rely on the type resolution or initialization and marking to save everything we need. 7079 RegisterSet caller_saves = RegisterSet::Empty(); 7080 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7081 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); 7082 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() 7083 // that the the kPrimNot result register is the same as the first argument register. 7084 locations->SetCustomSlowPathCallerSaves(caller_saves); 7085 } else { 7086 // For non-Baker read barrier we have a temp-clobbering call. 7087 } 7088 } 7089 if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { 7090 if (load_kind == HLoadClass::LoadKind::kBssEntry || 7091 (load_kind == HLoadClass::LoadKind::kReferrersClass && 7092 !Runtime::Current()->UseJitCompilation())) { 7093 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 7094 } 7095 } 7096 } 7097 7098 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 7099 // move. 7100 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 7101 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 7102 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 7103 codegen_->GenerateLoadClassRuntimeCall(cls); 7104 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); 7105 return; 7106 } 7107 DCHECK(!cls->NeedsAccessCheck()); 7108 7109 LocationSummary* locations = cls->GetLocations(); 7110 Location out_loc = locations->Out(); 7111 vixl32::Register out = OutputRegister(cls); 7112 7113 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 7114 ? kWithoutReadBarrier 7115 : kCompilerReadBarrierOption; 7116 bool generate_null_check = false; 7117 switch (load_kind) { 7118 case HLoadClass::LoadKind::kReferrersClass: { 7119 DCHECK(!cls->CanCallRuntime()); 7120 DCHECK(!cls->MustGenerateClinitCheck()); 7121 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 7122 vixl32::Register current_method = InputRegisterAt(cls, 0); 7123 GenerateGcRootFieldLoad(cls, 7124 out_loc, 7125 current_method, 7126 ArtMethod::DeclaringClassOffset().Int32Value(), 7127 read_barrier_option); 7128 break; 7129 } 7130 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 7131 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 7132 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 7133 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7134 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); 7135 codegen_->EmitMovwMovtPlaceholder(labels, out); 7136 break; 7137 } 7138 case HLoadClass::LoadKind::kBootImageAddress: { 7139 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 7140 uint32_t address = dchecked_integral_cast<uint32_t>( 7141 reinterpret_cast<uintptr_t>(cls->GetClass().Get())); 7142 DCHECK_NE(address, 0u); 7143 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); 7144 break; 7145 } 7146 case HLoadClass::LoadKind::kBootImageClassTable: { 7147 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 7148 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7149 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); 7150 codegen_->EmitMovwMovtPlaceholder(labels, out); 7151 __ Ldr(out, MemOperand(out, /* offset */ 0)); 7152 // Extract the reference from the slot data, i.e. clear the hash bits. 7153 int32_t masked_hash = ClassTable::TableSlot::MaskHash( 7154 ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); 7155 if (masked_hash != 0) { 7156 __ Sub(out, out, Operand(masked_hash)); 7157 } 7158 break; 7159 } 7160 case HLoadClass::LoadKind::kBssEntry: { 7161 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7162 codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); 7163 codegen_->EmitMovwMovtPlaceholder(labels, out); 7164 GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); 7165 generate_null_check = true; 7166 break; 7167 } 7168 case HLoadClass::LoadKind::kJitTableAddress: { 7169 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), 7170 cls->GetTypeIndex(), 7171 cls->GetClass())); 7172 // /* GcRoot<mirror::Class> */ out = *out 7173 GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); 7174 break; 7175 } 7176 case HLoadClass::LoadKind::kRuntimeCall: 7177 case HLoadClass::LoadKind::kInvalid: 7178 LOG(FATAL) << "UNREACHABLE"; 7179 UNREACHABLE(); 7180 } 7181 7182 if (generate_null_check || cls->MustGenerateClinitCheck()) { 7183 DCHECK(cls->CanCallRuntime()); 7184 LoadClassSlowPathARMVIXL* slow_path = 7185 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL( 7186 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); 7187 codegen_->AddSlowPath(slow_path); 7188 if (generate_null_check) { 7189 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); 7190 } 7191 if (cls->MustGenerateClinitCheck()) { 7192 GenerateClassInitializationCheck(slow_path, out); 7193 } else { 7194 __ Bind(slow_path->GetExitLabel()); 7195 } 7196 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); 7197 } 7198 } 7199 7200 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { 7201 LocationSummary* locations = 7202 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 7203 locations->SetInAt(0, Location::RequiresRegister()); 7204 if (check->HasUses()) { 7205 locations->SetOut(Location::SameAsFirstInput()); 7206 } 7207 } 7208 7209 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { 7210 // We assume the class is not null. 7211 LoadClassSlowPathARMVIXL* slow_path = 7212 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), 7213 check, 7214 check->GetDexPc(), 7215 /* do_clinit */ true); 7216 codegen_->AddSlowPath(slow_path); 7217 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 7218 } 7219 7220 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( 7221 LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { 7222 UseScratchRegisterScope temps(GetVIXLAssembler()); 7223 vixl32::Register temp = temps.Acquire(); 7224 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 7225 const size_t status_byte_offset = 7226 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 7227 constexpr uint32_t shifted_initialized_value = 7228 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 7229 7230 GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset); 7231 __ Cmp(temp, shifted_initialized_value); 7232 __ B(lo, slow_path->GetEntryLabel()); 7233 // Even if the initialized flag is set, we may be in a situation where caches are not synced 7234 // properly. Therefore, we do a memory fence. 7235 __ Dmb(ISH); 7236 __ Bind(slow_path->GetExitLabel()); 7237 } 7238 7239 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( 7240 HLoadString::LoadKind desired_string_load_kind) { 7241 switch (desired_string_load_kind) { 7242 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 7243 case HLoadString::LoadKind::kBootImageInternTable: 7244 case HLoadString::LoadKind::kBssEntry: 7245 DCHECK(!Runtime::Current()->UseJitCompilation()); 7246 break; 7247 case HLoadString::LoadKind::kJitTableAddress: 7248 DCHECK(Runtime::Current()->UseJitCompilation()); 7249 break; 7250 case HLoadString::LoadKind::kBootImageAddress: 7251 case HLoadString::LoadKind::kRuntimeCall: 7252 break; 7253 } 7254 return desired_string_load_kind; 7255 } 7256 7257 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { 7258 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 7259 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 7260 HLoadString::LoadKind load_kind = load->GetLoadKind(); 7261 if (load_kind == HLoadString::LoadKind::kRuntimeCall) { 7262 locations->SetOut(LocationFrom(r0)); 7263 } else { 7264 locations->SetOut(Location::RequiresRegister()); 7265 if (load_kind == HLoadString::LoadKind::kBssEntry) { 7266 if (!kUseReadBarrier || kUseBakerReadBarrier) { 7267 // Rely on the pResolveString and marking to save everything we need, including temps. 7268 RegisterSet caller_saves = RegisterSet::Empty(); 7269 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7270 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); 7271 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() 7272 // that the the kPrimNot result register is the same as the first argument register. 7273 locations->SetCustomSlowPathCallerSaves(caller_saves); 7274 if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { 7275 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 7276 } 7277 } else { 7278 // For non-Baker read barrier we have a temp-clobbering call. 7279 } 7280 } 7281 } 7282 } 7283 7284 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 7285 // move. 7286 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 7287 LocationSummary* locations = load->GetLocations(); 7288 Location out_loc = locations->Out(); 7289 vixl32::Register out = OutputRegister(load); 7290 HLoadString::LoadKind load_kind = load->GetLoadKind(); 7291 7292 switch (load_kind) { 7293 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 7294 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 7295 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7296 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); 7297 codegen_->EmitMovwMovtPlaceholder(labels, out); 7298 return; 7299 } 7300 case HLoadString::LoadKind::kBootImageAddress: { 7301 uint32_t address = dchecked_integral_cast<uint32_t>( 7302 reinterpret_cast<uintptr_t>(load->GetString().Get())); 7303 DCHECK_NE(address, 0u); 7304 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); 7305 return; 7306 } 7307 case HLoadString::LoadKind::kBootImageInternTable: { 7308 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 7309 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7310 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); 7311 codegen_->EmitMovwMovtPlaceholder(labels, out); 7312 __ Ldr(out, MemOperand(out, /* offset */ 0)); 7313 return; 7314 } 7315 case HLoadString::LoadKind::kBssEntry: { 7316 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 7317 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = 7318 codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); 7319 codegen_->EmitMovwMovtPlaceholder(labels, out); 7320 GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); 7321 LoadStringSlowPathARMVIXL* slow_path = 7322 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); 7323 codegen_->AddSlowPath(slow_path); 7324 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); 7325 __ Bind(slow_path->GetExitLabel()); 7326 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); 7327 return; 7328 } 7329 case HLoadString::LoadKind::kJitTableAddress: { 7330 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), 7331 load->GetStringIndex(), 7332 load->GetString())); 7333 // /* GcRoot<mirror::String> */ out = *out 7334 GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); 7335 return; 7336 } 7337 default: 7338 break; 7339 } 7340 7341 // TODO: Re-add the compiler code to do string dex cache lookup again. 7342 DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); 7343 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7344 __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); 7345 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 7346 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 7347 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); 7348 } 7349 7350 static int32_t GetExceptionTlsOffset() { 7351 return Thread::ExceptionOffset<kArmPointerSize>().Int32Value(); 7352 } 7353 7354 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { 7355 LocationSummary* locations = 7356 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 7357 locations->SetOut(Location::RequiresRegister()); 7358 } 7359 7360 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { 7361 vixl32::Register out = OutputRegister(load); 7362 GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset()); 7363 } 7364 7365 7366 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { 7367 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 7368 } 7369 7370 void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 7371 UseScratchRegisterScope temps(GetVIXLAssembler()); 7372 vixl32::Register temp = temps.Acquire(); 7373 __ Mov(temp, 0); 7374 GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset()); 7375 } 7376 7377 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { 7378 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 7379 instruction, LocationSummary::kCallOnMainOnly); 7380 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7381 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 7382 } 7383 7384 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { 7385 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 7386 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 7387 } 7388 7389 // Temp is used for read barrier. 7390 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 7391 if (kEmitCompilerReadBarrier && 7392 (kUseBakerReadBarrier || 7393 type_check_kind == TypeCheckKind::kAbstractClassCheck || 7394 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 7395 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 7396 return 1; 7397 } 7398 return 0; 7399 } 7400 7401 // Interface case has 3 temps, one for holding the number of interfaces, one for the current 7402 // interface pointer, one for loading the current interface. 7403 // The other checks have one temp for loading the object's class. 7404 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 7405 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 7406 return 3; 7407 } 7408 return 1 + NumberOfInstanceOfTemps(type_check_kind); 7409 } 7410 7411 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { 7412 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 7413 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7414 bool baker_read_barrier_slow_path = false; 7415 switch (type_check_kind) { 7416 case TypeCheckKind::kExactCheck: 7417 case TypeCheckKind::kAbstractClassCheck: 7418 case TypeCheckKind::kClassHierarchyCheck: 7419 case TypeCheckKind::kArrayObjectCheck: { 7420 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 7421 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 7422 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 7423 break; 7424 } 7425 case TypeCheckKind::kArrayCheck: 7426 case TypeCheckKind::kUnresolvedCheck: 7427 case TypeCheckKind::kInterfaceCheck: 7428 call_kind = LocationSummary::kCallOnSlowPath; 7429 break; 7430 } 7431 7432 LocationSummary* locations = 7433 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 7434 if (baker_read_barrier_slow_path) { 7435 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 7436 } 7437 locations->SetInAt(0, Location::RequiresRegister()); 7438 locations->SetInAt(1, Location::RequiresRegister()); 7439 // The "out" register is used as a temporary, so it overlaps with the inputs. 7440 // Note that TypeCheckSlowPathARM uses this register too. 7441 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 7442 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 7443 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 7444 codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); 7445 } 7446 } 7447 7448 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { 7449 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7450 LocationSummary* locations = instruction->GetLocations(); 7451 Location obj_loc = locations->InAt(0); 7452 vixl32::Register obj = InputRegisterAt(instruction, 0); 7453 vixl32::Register cls = InputRegisterAt(instruction, 1); 7454 Location out_loc = locations->Out(); 7455 vixl32::Register out = OutputRegister(instruction); 7456 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 7457 DCHECK_LE(num_temps, 1u); 7458 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 7459 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 7460 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 7461 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 7462 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 7463 vixl32::Label done; 7464 vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done); 7465 SlowPathCodeARMVIXL* slow_path = nullptr; 7466 7467 // Return 0 if `obj` is null. 7468 // avoid null check if we know obj is not null. 7469 if (instruction->MustDoNullCheck()) { 7470 DCHECK(!out.Is(obj)); 7471 __ Mov(out, 0); 7472 __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); 7473 } 7474 7475 switch (type_check_kind) { 7476 case TypeCheckKind::kExactCheck: { 7477 ReadBarrierOption read_barrier_option = 7478 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7479 // /* HeapReference<Class> */ out = obj->klass_ 7480 GenerateReferenceLoadTwoRegisters(instruction, 7481 out_loc, 7482 obj_loc, 7483 class_offset, 7484 maybe_temp_loc, 7485 read_barrier_option); 7486 // Classes must be equal for the instanceof to succeed. 7487 __ Cmp(out, cls); 7488 // We speculatively set the result to false without changing the condition 7489 // flags, which allows us to avoid some branching later. 7490 __ Mov(LeaveFlags, out, 0); 7491 7492 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 7493 // we check that the output is in a low register, so that a 16-bit MOV 7494 // encoding can be used. 7495 if (out.IsLow()) { 7496 // We use the scope because of the IT block that follows. 7497 ExactAssemblyScope guard(GetVIXLAssembler(), 7498 2 * vixl32::k16BitT32InstructionSizeInBytes, 7499 CodeBufferCheckScope::kExactSize); 7500 7501 __ it(eq); 7502 __ mov(eq, out, 1); 7503 } else { 7504 __ B(ne, final_label, /* far_target */ false); 7505 __ Mov(out, 1); 7506 } 7507 7508 break; 7509 } 7510 7511 case TypeCheckKind::kAbstractClassCheck: { 7512 ReadBarrierOption read_barrier_option = 7513 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7514 // /* HeapReference<Class> */ out = obj->klass_ 7515 GenerateReferenceLoadTwoRegisters(instruction, 7516 out_loc, 7517 obj_loc, 7518 class_offset, 7519 maybe_temp_loc, 7520 read_barrier_option); 7521 // If the class is abstract, we eagerly fetch the super class of the 7522 // object to avoid doing a comparison we know will fail. 7523 vixl32::Label loop; 7524 __ Bind(&loop); 7525 // /* HeapReference<Class> */ out = out->super_class_ 7526 GenerateReferenceLoadOneRegister(instruction, 7527 out_loc, 7528 super_offset, 7529 maybe_temp_loc, 7530 read_barrier_option); 7531 // If `out` is null, we use it for the result, and jump to the final label. 7532 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); 7533 __ Cmp(out, cls); 7534 __ B(ne, &loop, /* far_target */ false); 7535 __ Mov(out, 1); 7536 break; 7537 } 7538 7539 case TypeCheckKind::kClassHierarchyCheck: { 7540 ReadBarrierOption read_barrier_option = 7541 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7542 // /* HeapReference<Class> */ out = obj->klass_ 7543 GenerateReferenceLoadTwoRegisters(instruction, 7544 out_loc, 7545 obj_loc, 7546 class_offset, 7547 maybe_temp_loc, 7548 read_barrier_option); 7549 // Walk over the class hierarchy to find a match. 7550 vixl32::Label loop, success; 7551 __ Bind(&loop); 7552 __ Cmp(out, cls); 7553 __ B(eq, &success, /* far_target */ false); 7554 // /* HeapReference<Class> */ out = out->super_class_ 7555 GenerateReferenceLoadOneRegister(instruction, 7556 out_loc, 7557 super_offset, 7558 maybe_temp_loc, 7559 read_barrier_option); 7560 // This is essentially a null check, but it sets the condition flags to the 7561 // proper value for the code that follows the loop, i.e. not `eq`. 7562 __ Cmp(out, 1); 7563 __ B(hs, &loop, /* far_target */ false); 7564 7565 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 7566 // we check that the output is in a low register, so that a 16-bit MOV 7567 // encoding can be used. 7568 if (out.IsLow()) { 7569 // If `out` is null, we use it for the result, and the condition flags 7570 // have already been set to `ne`, so the IT block that comes afterwards 7571 // (and which handles the successful case) turns into a NOP (instead of 7572 // overwriting `out`). 7573 __ Bind(&success); 7574 7575 // We use the scope because of the IT block that follows. 7576 ExactAssemblyScope guard(GetVIXLAssembler(), 7577 2 * vixl32::k16BitT32InstructionSizeInBytes, 7578 CodeBufferCheckScope::kExactSize); 7579 7580 // There is only one branch to the `success` label (which is bound to this 7581 // IT block), and it has the same condition, `eq`, so in that case the MOV 7582 // is executed. 7583 __ it(eq); 7584 __ mov(eq, out, 1); 7585 } else { 7586 // If `out` is null, we use it for the result, and jump to the final label. 7587 __ B(final_label); 7588 __ Bind(&success); 7589 __ Mov(out, 1); 7590 } 7591 7592 break; 7593 } 7594 7595 case TypeCheckKind::kArrayObjectCheck: { 7596 ReadBarrierOption read_barrier_option = 7597 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7598 // /* HeapReference<Class> */ out = obj->klass_ 7599 GenerateReferenceLoadTwoRegisters(instruction, 7600 out_loc, 7601 obj_loc, 7602 class_offset, 7603 maybe_temp_loc, 7604 read_barrier_option); 7605 // Do an exact check. 7606 vixl32::Label exact_check; 7607 __ Cmp(out, cls); 7608 __ B(eq, &exact_check, /* far_target */ false); 7609 // Otherwise, we need to check that the object's class is a non-primitive array. 7610 // /* HeapReference<Class> */ out = out->component_type_ 7611 GenerateReferenceLoadOneRegister(instruction, 7612 out_loc, 7613 component_offset, 7614 maybe_temp_loc, 7615 read_barrier_option); 7616 // If `out` is null, we use it for the result, and jump to the final label. 7617 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); 7618 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); 7619 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 7620 __ Cmp(out, 0); 7621 // We speculatively set the result to false without changing the condition 7622 // flags, which allows us to avoid some branching later. 7623 __ Mov(LeaveFlags, out, 0); 7624 7625 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 7626 // we check that the output is in a low register, so that a 16-bit MOV 7627 // encoding can be used. 7628 if (out.IsLow()) { 7629 __ Bind(&exact_check); 7630 7631 // We use the scope because of the IT block that follows. 7632 ExactAssemblyScope guard(GetVIXLAssembler(), 7633 2 * vixl32::k16BitT32InstructionSizeInBytes, 7634 CodeBufferCheckScope::kExactSize); 7635 7636 __ it(eq); 7637 __ mov(eq, out, 1); 7638 } else { 7639 __ B(ne, final_label, /* far_target */ false); 7640 __ Bind(&exact_check); 7641 __ Mov(out, 1); 7642 } 7643 7644 break; 7645 } 7646 7647 case TypeCheckKind::kArrayCheck: { 7648 // No read barrier since the slow path will retry upon failure. 7649 // /* HeapReference<Class> */ out = obj->klass_ 7650 GenerateReferenceLoadTwoRegisters(instruction, 7651 out_loc, 7652 obj_loc, 7653 class_offset, 7654 maybe_temp_loc, 7655 kWithoutReadBarrier); 7656 __ Cmp(out, cls); 7657 DCHECK(locations->OnlyCallsOnSlowPath()); 7658 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( 7659 instruction, /* is_fatal */ false); 7660 codegen_->AddSlowPath(slow_path); 7661 __ B(ne, slow_path->GetEntryLabel()); 7662 __ Mov(out, 1); 7663 break; 7664 } 7665 7666 case TypeCheckKind::kUnresolvedCheck: 7667 case TypeCheckKind::kInterfaceCheck: { 7668 // Note that we indeed only call on slow path, but we always go 7669 // into the slow path for the unresolved and interface check 7670 // cases. 7671 // 7672 // We cannot directly call the InstanceofNonTrivial runtime 7673 // entry point without resorting to a type checking slow path 7674 // here (i.e. by calling InvokeRuntime directly), as it would 7675 // require to assign fixed registers for the inputs of this 7676 // HInstanceOf instruction (following the runtime calling 7677 // convention), which might be cluttered by the potential first 7678 // read barrier emission at the beginning of this method. 7679 // 7680 // TODO: Introduce a new runtime entry point taking the object 7681 // to test (instead of its class) as argument, and let it deal 7682 // with the read barrier issues. This will let us refactor this 7683 // case of the `switch` code as it was previously (with a direct 7684 // call to the runtime not using a type checking slow path). 7685 // This should also be beneficial for the other cases above. 7686 DCHECK(locations->OnlyCallsOnSlowPath()); 7687 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( 7688 instruction, /* is_fatal */ false); 7689 codegen_->AddSlowPath(slow_path); 7690 __ B(slow_path->GetEntryLabel()); 7691 break; 7692 } 7693 } 7694 7695 if (done.IsReferenced()) { 7696 __ Bind(&done); 7697 } 7698 7699 if (slow_path != nullptr) { 7700 __ Bind(slow_path->GetExitLabel()); 7701 } 7702 } 7703 7704 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { 7705 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7706 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 7707 LocationSummary* locations = 7708 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 7709 locations->SetInAt(0, Location::RequiresRegister()); 7710 locations->SetInAt(1, Location::RequiresRegister()); 7711 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 7712 } 7713 7714 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { 7715 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7716 LocationSummary* locations = instruction->GetLocations(); 7717 Location obj_loc = locations->InAt(0); 7718 vixl32::Register obj = InputRegisterAt(instruction, 0); 7719 vixl32::Register cls = InputRegisterAt(instruction, 1); 7720 Location temp_loc = locations->GetTemp(0); 7721 vixl32::Register temp = RegisterFrom(temp_loc); 7722 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 7723 DCHECK_LE(num_temps, 3u); 7724 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 7725 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); 7726 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 7727 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 7728 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 7729 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 7730 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 7731 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 7732 const uint32_t object_array_data_offset = 7733 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 7734 7735 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 7736 SlowPathCodeARMVIXL* type_check_slow_path = 7737 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( 7738 instruction, is_type_check_slow_path_fatal); 7739 codegen_->AddSlowPath(type_check_slow_path); 7740 7741 vixl32::Label done; 7742 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); 7743 // Avoid null check if we know obj is not null. 7744 if (instruction->MustDoNullCheck()) { 7745 __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); 7746 } 7747 7748 switch (type_check_kind) { 7749 case TypeCheckKind::kExactCheck: 7750 case TypeCheckKind::kArrayCheck: { 7751 // /* HeapReference<Class> */ temp = obj->klass_ 7752 GenerateReferenceLoadTwoRegisters(instruction, 7753 temp_loc, 7754 obj_loc, 7755 class_offset, 7756 maybe_temp2_loc, 7757 kWithoutReadBarrier); 7758 7759 __ Cmp(temp, cls); 7760 // Jump to slow path for throwing the exception or doing a 7761 // more involved array check. 7762 __ B(ne, type_check_slow_path->GetEntryLabel()); 7763 break; 7764 } 7765 7766 case TypeCheckKind::kAbstractClassCheck: { 7767 // /* HeapReference<Class> */ temp = obj->klass_ 7768 GenerateReferenceLoadTwoRegisters(instruction, 7769 temp_loc, 7770 obj_loc, 7771 class_offset, 7772 maybe_temp2_loc, 7773 kWithoutReadBarrier); 7774 7775 // If the class is abstract, we eagerly fetch the super class of the 7776 // object to avoid doing a comparison we know will fail. 7777 vixl32::Label loop; 7778 __ Bind(&loop); 7779 // /* HeapReference<Class> */ temp = temp->super_class_ 7780 GenerateReferenceLoadOneRegister(instruction, 7781 temp_loc, 7782 super_offset, 7783 maybe_temp2_loc, 7784 kWithoutReadBarrier); 7785 7786 // If the class reference currently in `temp` is null, jump to the slow path to throw the 7787 // exception. 7788 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); 7789 7790 // Otherwise, compare the classes. 7791 __ Cmp(temp, cls); 7792 __ B(ne, &loop, /* far_target */ false); 7793 break; 7794 } 7795 7796 case TypeCheckKind::kClassHierarchyCheck: { 7797 // /* HeapReference<Class> */ temp = obj->klass_ 7798 GenerateReferenceLoadTwoRegisters(instruction, 7799 temp_loc, 7800 obj_loc, 7801 class_offset, 7802 maybe_temp2_loc, 7803 kWithoutReadBarrier); 7804 7805 // Walk over the class hierarchy to find a match. 7806 vixl32::Label loop; 7807 __ Bind(&loop); 7808 __ Cmp(temp, cls); 7809 __ B(eq, final_label, /* far_target */ false); 7810 7811 // /* HeapReference<Class> */ temp = temp->super_class_ 7812 GenerateReferenceLoadOneRegister(instruction, 7813 temp_loc, 7814 super_offset, 7815 maybe_temp2_loc, 7816 kWithoutReadBarrier); 7817 7818 // If the class reference currently in `temp` is null, jump to the slow path to throw the 7819 // exception. 7820 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); 7821 // Otherwise, jump to the beginning of the loop. 7822 __ B(&loop); 7823 break; 7824 } 7825 7826 case TypeCheckKind::kArrayObjectCheck: { 7827 // /* HeapReference<Class> */ temp = obj->klass_ 7828 GenerateReferenceLoadTwoRegisters(instruction, 7829 temp_loc, 7830 obj_loc, 7831 class_offset, 7832 maybe_temp2_loc, 7833 kWithoutReadBarrier); 7834 7835 // Do an exact check. 7836 __ Cmp(temp, cls); 7837 __ B(eq, final_label, /* far_target */ false); 7838 7839 // Otherwise, we need to check that the object's class is a non-primitive array. 7840 // /* HeapReference<Class> */ temp = temp->component_type_ 7841 GenerateReferenceLoadOneRegister(instruction, 7842 temp_loc, 7843 component_offset, 7844 maybe_temp2_loc, 7845 kWithoutReadBarrier); 7846 // If the component type is null, jump to the slow path to throw the exception. 7847 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); 7848 // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type` 7849 // to further check that this component type is not a primitive type. 7850 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); 7851 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 7852 __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel()); 7853 break; 7854 } 7855 7856 case TypeCheckKind::kUnresolvedCheck: 7857 // We always go into the type check slow path for the unresolved check case. 7858 // We cannot directly call the CheckCast runtime entry point 7859 // without resorting to a type checking slow path here (i.e. by 7860 // calling InvokeRuntime directly), as it would require to 7861 // assign fixed registers for the inputs of this HInstanceOf 7862 // instruction (following the runtime calling convention), which 7863 // might be cluttered by the potential first read barrier 7864 // emission at the beginning of this method. 7865 7866 __ B(type_check_slow_path->GetEntryLabel()); 7867 break; 7868 7869 case TypeCheckKind::kInterfaceCheck: { 7870 // Avoid read barriers to improve performance of the fast path. We can not get false 7871 // positives by doing this. 7872 // /* HeapReference<Class> */ temp = obj->klass_ 7873 GenerateReferenceLoadTwoRegisters(instruction, 7874 temp_loc, 7875 obj_loc, 7876 class_offset, 7877 maybe_temp2_loc, 7878 kWithoutReadBarrier); 7879 7880 // /* HeapReference<Class> */ temp = temp->iftable_ 7881 GenerateReferenceLoadTwoRegisters(instruction, 7882 temp_loc, 7883 temp_loc, 7884 iftable_offset, 7885 maybe_temp2_loc, 7886 kWithoutReadBarrier); 7887 // Iftable is never null. 7888 __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset)); 7889 // Loop through the iftable and check if any class matches. 7890 vixl32::Label start_loop; 7891 __ Bind(&start_loop); 7892 __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc), 7893 type_check_slow_path->GetEntryLabel()); 7894 __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset)); 7895 GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc)); 7896 // Go to next interface. 7897 __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize)); 7898 __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2); 7899 // Compare the classes and continue the loop if they do not match. 7900 __ Cmp(cls, RegisterFrom(maybe_temp3_loc)); 7901 __ B(ne, &start_loop, /* far_target */ false); 7902 break; 7903 } 7904 } 7905 if (done.IsReferenced()) { 7906 __ Bind(&done); 7907 } 7908 7909 __ Bind(type_check_slow_path->GetExitLabel()); 7910 } 7911 7912 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) { 7913 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 7914 instruction, LocationSummary::kCallOnMainOnly); 7915 InvokeRuntimeCallingConventionARMVIXL calling_convention; 7916 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 7917 } 7918 7919 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) { 7920 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 7921 instruction, 7922 instruction->GetDexPc()); 7923 if (instruction->IsEnter()) { 7924 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 7925 } else { 7926 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 7927 } 7928 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); 7929 } 7930 7931 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { 7932 HandleBitwiseOperation(instruction, AND); 7933 } 7934 7935 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) { 7936 HandleBitwiseOperation(instruction, ORR); 7937 } 7938 7939 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) { 7940 HandleBitwiseOperation(instruction, EOR); 7941 } 7942 7943 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) { 7944 LocationSummary* locations = 7945 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 7946 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 7947 || instruction->GetResultType() == DataType::Type::kInt64); 7948 // Note: GVN reorders commutative operations to have the constant on the right hand side. 7949 locations->SetInAt(0, Location::RequiresRegister()); 7950 locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode)); 7951 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 7952 } 7953 7954 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) { 7955 HandleBitwiseOperation(instruction); 7956 } 7957 7958 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) { 7959 HandleBitwiseOperation(instruction); 7960 } 7961 7962 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) { 7963 HandleBitwiseOperation(instruction); 7964 } 7965 7966 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { 7967 LocationSummary* locations = 7968 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 7969 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 7970 || instruction->GetResultType() == DataType::Type::kInt64); 7971 7972 locations->SetInAt(0, Location::RequiresRegister()); 7973 locations->SetInAt(1, Location::RequiresRegister()); 7974 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 7975 } 7976 7977 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { 7978 LocationSummary* locations = instruction->GetLocations(); 7979 Location first = locations->InAt(0); 7980 Location second = locations->InAt(1); 7981 Location out = locations->Out(); 7982 7983 if (instruction->GetResultType() == DataType::Type::kInt32) { 7984 vixl32::Register first_reg = RegisterFrom(first); 7985 vixl32::Register second_reg = RegisterFrom(second); 7986 vixl32::Register out_reg = RegisterFrom(out); 7987 7988 switch (instruction->GetOpKind()) { 7989 case HInstruction::kAnd: 7990 __ Bic(out_reg, first_reg, second_reg); 7991 break; 7992 case HInstruction::kOr: 7993 __ Orn(out_reg, first_reg, second_reg); 7994 break; 7995 // There is no EON on arm. 7996 case HInstruction::kXor: 7997 default: 7998 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName(); 7999 UNREACHABLE(); 8000 } 8001 return; 8002 8003 } else { 8004 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 8005 vixl32::Register first_low = LowRegisterFrom(first); 8006 vixl32::Register first_high = HighRegisterFrom(first); 8007 vixl32::Register second_low = LowRegisterFrom(second); 8008 vixl32::Register second_high = HighRegisterFrom(second); 8009 vixl32::Register out_low = LowRegisterFrom(out); 8010 vixl32::Register out_high = HighRegisterFrom(out); 8011 8012 switch (instruction->GetOpKind()) { 8013 case HInstruction::kAnd: 8014 __ Bic(out_low, first_low, second_low); 8015 __ Bic(out_high, first_high, second_high); 8016 break; 8017 case HInstruction::kOr: 8018 __ Orn(out_low, first_low, second_low); 8019 __ Orn(out_high, first_high, second_high); 8020 break; 8021 // There is no EON on arm. 8022 case HInstruction::kXor: 8023 default: 8024 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName(); 8025 UNREACHABLE(); 8026 } 8027 } 8028 } 8029 8030 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp( 8031 HDataProcWithShifterOp* instruction) { 8032 DCHECK(instruction->GetType() == DataType::Type::kInt32 || 8033 instruction->GetType() == DataType::Type::kInt64); 8034 LocationSummary* locations = 8035 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 8036 const bool overlap = instruction->GetType() == DataType::Type::kInt64 && 8037 HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind()); 8038 8039 locations->SetInAt(0, Location::RequiresRegister()); 8040 locations->SetInAt(1, Location::RequiresRegister()); 8041 locations->SetOut(Location::RequiresRegister(), 8042 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap); 8043 } 8044 8045 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp( 8046 HDataProcWithShifterOp* instruction) { 8047 const LocationSummary* const locations = instruction->GetLocations(); 8048 const HInstruction::InstructionKind kind = instruction->GetInstrKind(); 8049 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 8050 8051 if (instruction->GetType() == DataType::Type::kInt32) { 8052 const vixl32::Register first = InputRegisterAt(instruction, 0); 8053 const vixl32::Register output = OutputRegister(instruction); 8054 const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64 8055 ? LowRegisterFrom(locations->InAt(1)) 8056 : InputRegisterAt(instruction, 1); 8057 8058 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 8059 DCHECK_EQ(kind, HInstruction::kAdd); 8060 8061 switch (op_kind) { 8062 case HDataProcWithShifterOp::kUXTB: 8063 __ Uxtab(output, first, second); 8064 break; 8065 case HDataProcWithShifterOp::kUXTH: 8066 __ Uxtah(output, first, second); 8067 break; 8068 case HDataProcWithShifterOp::kSXTB: 8069 __ Sxtab(output, first, second); 8070 break; 8071 case HDataProcWithShifterOp::kSXTH: 8072 __ Sxtah(output, first, second); 8073 break; 8074 default: 8075 LOG(FATAL) << "Unexpected operation kind: " << op_kind; 8076 UNREACHABLE(); 8077 } 8078 } else { 8079 GenerateDataProcInstruction(kind, 8080 output, 8081 first, 8082 Operand(second, 8083 ShiftFromOpKind(op_kind), 8084 instruction->GetShiftAmount()), 8085 codegen_); 8086 } 8087 } else { 8088 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); 8089 8090 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 8091 const vixl32::Register second = InputRegisterAt(instruction, 1); 8092 8093 DCHECK(!LowRegisterFrom(locations->Out()).Is(second)); 8094 GenerateDataProc(kind, 8095 locations->Out(), 8096 locations->InAt(0), 8097 second, 8098 Operand(second, ShiftType::ASR, 31), 8099 codegen_); 8100 } else { 8101 GenerateLongDataProc(instruction, codegen_); 8102 } 8103 } 8104 } 8105 8106 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. 8107 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out, 8108 vixl32::Register first, 8109 uint32_t value) { 8110 // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier). 8111 if (value == 0xffffffffu) { 8112 if (!out.Is(first)) { 8113 __ Mov(out, first); 8114 } 8115 return; 8116 } 8117 if (value == 0u) { 8118 __ Mov(out, 0); 8119 return; 8120 } 8121 if (GetAssembler()->ShifterOperandCanHold(AND, value)) { 8122 __ And(out, first, value); 8123 } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) { 8124 __ Bic(out, first, ~value); 8125 } else { 8126 DCHECK(IsPowerOfTwo(value + 1)); 8127 __ Ubfx(out, first, 0, WhichPowerOf2(value + 1)); 8128 } 8129 } 8130 8131 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. 8132 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out, 8133 vixl32::Register first, 8134 uint32_t value) { 8135 // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier). 8136 if (value == 0u) { 8137 if (!out.Is(first)) { 8138 __ Mov(out, first); 8139 } 8140 return; 8141 } 8142 if (value == 0xffffffffu) { 8143 __ Mvn(out, 0); 8144 return; 8145 } 8146 if (GetAssembler()->ShifterOperandCanHold(ORR, value)) { 8147 __ Orr(out, first, value); 8148 } else { 8149 DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value)); 8150 __ Orn(out, first, ~value); 8151 } 8152 } 8153 8154 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. 8155 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out, 8156 vixl32::Register first, 8157 uint32_t value) { 8158 // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier). 8159 if (value == 0u) { 8160 if (!out.Is(first)) { 8161 __ Mov(out, first); 8162 } 8163 return; 8164 } 8165 __ Eor(out, first, value); 8166 } 8167 8168 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out, 8169 Location first, 8170 uint64_t value) { 8171 vixl32::Register out_low = LowRegisterFrom(out); 8172 vixl32::Register out_high = HighRegisterFrom(out); 8173 vixl32::Register first_low = LowRegisterFrom(first); 8174 vixl32::Register first_high = HighRegisterFrom(first); 8175 uint32_t value_low = Low32Bits(value); 8176 uint32_t value_high = High32Bits(value); 8177 if (value_low == 0u) { 8178 if (!out_low.Is(first_low)) { 8179 __ Mov(out_low, first_low); 8180 } 8181 __ Add(out_high, first_high, value_high); 8182 return; 8183 } 8184 __ Adds(out_low, first_low, value_low); 8185 if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) { 8186 __ Adc(out_high, first_high, value_high); 8187 } else { 8188 DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high)); 8189 __ Sbc(out_high, first_high, ~value_high); 8190 } 8191 } 8192 8193 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) { 8194 LocationSummary* locations = instruction->GetLocations(); 8195 Location first = locations->InAt(0); 8196 Location second = locations->InAt(1); 8197 Location out = locations->Out(); 8198 8199 if (second.IsConstant()) { 8200 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); 8201 uint32_t value_low = Low32Bits(value); 8202 if (instruction->GetResultType() == DataType::Type::kInt32) { 8203 vixl32::Register first_reg = InputRegisterAt(instruction, 0); 8204 vixl32::Register out_reg = OutputRegister(instruction); 8205 if (instruction->IsAnd()) { 8206 GenerateAndConst(out_reg, first_reg, value_low); 8207 } else if (instruction->IsOr()) { 8208 GenerateOrrConst(out_reg, first_reg, value_low); 8209 } else { 8210 DCHECK(instruction->IsXor()); 8211 GenerateEorConst(out_reg, first_reg, value_low); 8212 } 8213 } else { 8214 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 8215 uint32_t value_high = High32Bits(value); 8216 vixl32::Register first_low = LowRegisterFrom(first); 8217 vixl32::Register first_high = HighRegisterFrom(first); 8218 vixl32::Register out_low = LowRegisterFrom(out); 8219 vixl32::Register out_high = HighRegisterFrom(out); 8220 if (instruction->IsAnd()) { 8221 GenerateAndConst(out_low, first_low, value_low); 8222 GenerateAndConst(out_high, first_high, value_high); 8223 } else if (instruction->IsOr()) { 8224 GenerateOrrConst(out_low, first_low, value_low); 8225 GenerateOrrConst(out_high, first_high, value_high); 8226 } else { 8227 DCHECK(instruction->IsXor()); 8228 GenerateEorConst(out_low, first_low, value_low); 8229 GenerateEorConst(out_high, first_high, value_high); 8230 } 8231 } 8232 return; 8233 } 8234 8235 if (instruction->GetResultType() == DataType::Type::kInt32) { 8236 vixl32::Register first_reg = InputRegisterAt(instruction, 0); 8237 vixl32::Register second_reg = InputRegisterAt(instruction, 1); 8238 vixl32::Register out_reg = OutputRegister(instruction); 8239 if (instruction->IsAnd()) { 8240 __ And(out_reg, first_reg, second_reg); 8241 } else if (instruction->IsOr()) { 8242 __ Orr(out_reg, first_reg, second_reg); 8243 } else { 8244 DCHECK(instruction->IsXor()); 8245 __ Eor(out_reg, first_reg, second_reg); 8246 } 8247 } else { 8248 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 8249 vixl32::Register first_low = LowRegisterFrom(first); 8250 vixl32::Register first_high = HighRegisterFrom(first); 8251 vixl32::Register second_low = LowRegisterFrom(second); 8252 vixl32::Register second_high = HighRegisterFrom(second); 8253 vixl32::Register out_low = LowRegisterFrom(out); 8254 vixl32::Register out_high = HighRegisterFrom(out); 8255 if (instruction->IsAnd()) { 8256 __ And(out_low, first_low, second_low); 8257 __ And(out_high, first_high, second_high); 8258 } else if (instruction->IsOr()) { 8259 __ Orr(out_low, first_low, second_low); 8260 __ Orr(out_high, first_high, second_high); 8261 } else { 8262 DCHECK(instruction->IsXor()); 8263 __ Eor(out_low, first_low, second_low); 8264 __ Eor(out_high, first_high, second_high); 8265 } 8266 } 8267 } 8268 8269 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( 8270 HInstruction* instruction, 8271 Location out, 8272 uint32_t offset, 8273 Location maybe_temp, 8274 ReadBarrierOption read_barrier_option) { 8275 vixl32::Register out_reg = RegisterFrom(out); 8276 if (read_barrier_option == kWithReadBarrier) { 8277 CHECK(kEmitCompilerReadBarrier); 8278 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 8279 if (kUseBakerReadBarrier) { 8280 // Load with fast path based Baker's read barrier. 8281 // /* HeapReference<Object> */ out = *(out + offset) 8282 codegen_->GenerateFieldLoadWithBakerReadBarrier( 8283 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); 8284 } else { 8285 // Load with slow path based read barrier. 8286 // Save the value of `out` into `maybe_temp` before overwriting it 8287 // in the following move operation, as we will need it for the 8288 // read barrier below. 8289 __ Mov(RegisterFrom(maybe_temp), out_reg); 8290 // /* HeapReference<Object> */ out = *(out + offset) 8291 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset); 8292 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 8293 } 8294 } else { 8295 // Plain load with no read barrier. 8296 // /* HeapReference<Object> */ out = *(out + offset) 8297 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset); 8298 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 8299 } 8300 } 8301 8302 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( 8303 HInstruction* instruction, 8304 Location out, 8305 Location obj, 8306 uint32_t offset, 8307 Location maybe_temp, 8308 ReadBarrierOption read_barrier_option) { 8309 vixl32::Register out_reg = RegisterFrom(out); 8310 vixl32::Register obj_reg = RegisterFrom(obj); 8311 if (read_barrier_option == kWithReadBarrier) { 8312 CHECK(kEmitCompilerReadBarrier); 8313 if (kUseBakerReadBarrier) { 8314 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 8315 // Load with fast path based Baker's read barrier. 8316 // /* HeapReference<Object> */ out = *(obj + offset) 8317 codegen_->GenerateFieldLoadWithBakerReadBarrier( 8318 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); 8319 } else { 8320 // Load with slow path based read barrier. 8321 // /* HeapReference<Object> */ out = *(obj + offset) 8322 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); 8323 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 8324 } 8325 } else { 8326 // Plain load with no read barrier. 8327 // /* HeapReference<Object> */ out = *(obj + offset) 8328 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); 8329 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 8330 } 8331 } 8332 8333 void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( 8334 HInstruction* instruction, 8335 Location root, 8336 vixl32::Register obj, 8337 uint32_t offset, 8338 ReadBarrierOption read_barrier_option) { 8339 vixl32::Register root_reg = RegisterFrom(root); 8340 if (read_barrier_option == kWithReadBarrier) { 8341 DCHECK(kEmitCompilerReadBarrier); 8342 if (kUseBakerReadBarrier) { 8343 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 8344 // Baker's read barrier are used. 8345 if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && 8346 !Runtime::Current()->UseJitCompilation()) { 8347 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 8348 // the Marking Register) to decide whether we need to enter 8349 // the slow path to mark the GC root. 8350 // 8351 // We use link-time generated thunks for the slow path. That thunk 8352 // checks the reference and jumps to the entrypoint if needed. 8353 // 8354 // lr = &return_address; 8355 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 8356 // if (mr) { // Thread::Current()->GetIsGcMarking() 8357 // goto gc_root_thunk<root_reg>(lr) 8358 // } 8359 // return_address: 8360 8361 UseScratchRegisterScope temps(GetVIXLAssembler()); 8362 ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); 8363 bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); 8364 uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( 8365 root_reg.GetCode(), narrow); 8366 vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); 8367 8368 vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); 8369 vixl32::Label return_address; 8370 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); 8371 __ cmp(mr, Operand(0)); 8372 // Currently the offset is always within range. If that changes, 8373 // we shall have to split the load the same way as for fields. 8374 DCHECK_LT(offset, kReferenceLoadMinFarOffset); 8375 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); 8376 __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); 8377 EmitPlaceholderBne(codegen_, bne_label); 8378 __ Bind(&return_address); 8379 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), 8380 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET 8381 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); 8382 } else { 8383 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 8384 // the Marking Register) to decide whether we need to enter 8385 // the slow path to mark the GC root. 8386 // 8387 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 8388 // if (mr) { // Thread::Current()->GetIsGcMarking() 8389 // // Slow path. 8390 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 8391 // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. 8392 // } 8393 8394 // Slow path marking the GC root `root`. The entrypoint will 8395 // be loaded by the slow path code. 8396 SlowPathCodeARMVIXL* slow_path = 8397 new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); 8398 codegen_->AddSlowPath(slow_path); 8399 8400 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 8401 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); 8402 static_assert( 8403 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 8404 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 8405 "have different sizes."); 8406 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 8407 "art::mirror::CompressedReference<mirror::Object> and int32_t " 8408 "have different sizes."); 8409 8410 __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); 8411 __ Bind(slow_path->GetExitLabel()); 8412 } 8413 } else { 8414 // GC root loaded through a slow path for read barriers other 8415 // than Baker's. 8416 // /* GcRoot<mirror::Object>* */ root = obj + offset 8417 __ Add(root_reg, obj, offset); 8418 // /* mirror::Object* */ root = root->Read() 8419 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 8420 } 8421 } else { 8422 // Plain GC root load with no read barrier. 8423 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 8424 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); 8425 // Note that GC roots are not affected by heap poisoning, thus we 8426 // do not have to unpoison `root_reg` here. 8427 } 8428 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); 8429 } 8430 8431 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { 8432 DCHECK(kEmitCompilerReadBarrier); 8433 DCHECK(kUseBakerReadBarrier); 8434 if (kBakerReadBarrierLinkTimeThunksEnableForFields) { 8435 if (!Runtime::Current()->UseJitCompilation()) { 8436 locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); 8437 } 8438 } 8439 } 8440 8441 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 8442 Location ref, 8443 vixl32::Register obj, 8444 uint32_t offset, 8445 Location temp, 8446 bool needs_null_check) { 8447 DCHECK(kEmitCompilerReadBarrier); 8448 DCHECK(kUseBakerReadBarrier); 8449 8450 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 8451 !Runtime::Current()->UseJitCompilation()) { 8452 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 8453 // Marking Register) to decide whether we need to enter the slow 8454 // path to mark the reference. Then, in the slow path, check the 8455 // gray bit in the lock word of the reference's holder (`obj`) to 8456 // decide whether to mark `ref` or not. 8457 // 8458 // We use link-time generated thunks for the slow path. That thunk checks 8459 // the holder and jumps to the entrypoint if needed. If the holder is not 8460 // gray, it creates a fake dependency and returns to the LDR instruction. 8461 // 8462 // lr = &gray_return_address; 8463 // if (mr) { // Thread::Current()->GetIsGcMarking() 8464 // goto field_thunk<holder_reg, base_reg>(lr) 8465 // } 8466 // not_gray_return_address: 8467 // // Original reference load. If the offset is too large to fit 8468 // // into LDR, we use an adjusted base register here. 8469 // HeapReference<mirror::Object> reference = *(obj+offset); 8470 // gray_return_address: 8471 8472 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); 8473 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 8474 bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); 8475 vixl32::Register base = obj; 8476 if (offset >= kReferenceLoadMinFarOffset) { 8477 base = RegisterFrom(temp); 8478 DCHECK(!base.Is(kBakerCcEntrypointRegister)); 8479 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); 8480 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); 8481 offset &= (kReferenceLoadMinFarOffset - 1u); 8482 // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large 8483 // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely 8484 // increase the overall code size when taking the generated thunks into account. 8485 DCHECK(!narrow); 8486 } 8487 UseScratchRegisterScope temps(GetVIXLAssembler()); 8488 ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); 8489 uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( 8490 base.GetCode(), obj.GetCode(), narrow); 8491 vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); 8492 8493 { 8494 vixl::EmissionCheckScope guard( 8495 GetVIXLAssembler(), 8496 (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); 8497 vixl32::Label return_address; 8498 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); 8499 __ cmp(mr, Operand(0)); 8500 EmitPlaceholderBne(this, bne_label); 8501 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); 8502 __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); 8503 if (needs_null_check) { 8504 MaybeRecordImplicitNullCheck(instruction); 8505 } 8506 // Note: We need a specific width for the unpoisoning NEG. 8507 if (kPoisonHeapReferences) { 8508 if (narrow) { 8509 // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). 8510 __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); 8511 } else { 8512 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); 8513 } 8514 } 8515 __ Bind(&return_address); 8516 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), 8517 narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET 8518 : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); 8519 } 8520 MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); 8521 return; 8522 } 8523 8524 // /* HeapReference<Object> */ ref = *(obj + offset) 8525 Location no_index = Location::NoLocation(); 8526 ScaleFactor no_scale_factor = TIMES_1; 8527 GenerateReferenceLoadWithBakerReadBarrier( 8528 instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); 8529 } 8530 8531 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 8532 Location ref, 8533 vixl32::Register obj, 8534 uint32_t data_offset, 8535 Location index, 8536 Location temp, 8537 bool needs_null_check) { 8538 DCHECK(kEmitCompilerReadBarrier); 8539 DCHECK(kUseBakerReadBarrier); 8540 8541 static_assert( 8542 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 8543 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 8544 ScaleFactor scale_factor = TIMES_4; 8545 8546 if (kBakerReadBarrierLinkTimeThunksEnableForArrays && 8547 !Runtime::Current()->UseJitCompilation()) { 8548 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 8549 // Marking Register) to decide whether we need to enter the slow 8550 // path to mark the reference. Then, in the slow path, check the 8551 // gray bit in the lock word of the reference's holder (`obj`) to 8552 // decide whether to mark `ref` or not. 8553 // 8554 // We use link-time generated thunks for the slow path. That thunk checks 8555 // the holder and jumps to the entrypoint if needed. If the holder is not 8556 // gray, it creates a fake dependency and returns to the LDR instruction. 8557 // 8558 // lr = &gray_return_address; 8559 // if (mr) { // Thread::Current()->GetIsGcMarking() 8560 // goto array_thunk<base_reg>(lr) 8561 // } 8562 // not_gray_return_address: 8563 // // Original reference load. If the offset is too large to fit 8564 // // into LDR, we use an adjusted base register here. 8565 // HeapReference<mirror::Object> reference = data[index]; 8566 // gray_return_address: 8567 8568 DCHECK(index.IsValid()); 8569 vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); 8570 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 8571 vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. 8572 DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); 8573 8574 UseScratchRegisterScope temps(GetVIXLAssembler()); 8575 ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); 8576 uint32_t custom_data = 8577 linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); 8578 vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); 8579 8580 __ Add(data_reg, obj, Operand(data_offset)); 8581 { 8582 vixl::EmissionCheckScope guard( 8583 GetVIXLAssembler(), 8584 (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); 8585 vixl32::Label return_address; 8586 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); 8587 __ cmp(mr, Operand(0)); 8588 EmitPlaceholderBne(this, bne_label); 8589 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); 8590 __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); 8591 DCHECK(!needs_null_check); // The thunk cannot handle the null check. 8592 // Note: We need a Wide NEG for the unpoisoning. 8593 if (kPoisonHeapReferences) { 8594 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); 8595 } 8596 __ Bind(&return_address); 8597 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), 8598 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); 8599 } 8600 MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); 8601 return; 8602 } 8603 8604 // /* HeapReference<Object> */ ref = 8605 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 8606 GenerateReferenceLoadWithBakerReadBarrier( 8607 instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); 8608 } 8609 8610 void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 8611 Location ref, 8612 vixl32::Register obj, 8613 uint32_t offset, 8614 Location index, 8615 ScaleFactor scale_factor, 8616 Location temp, 8617 bool needs_null_check) { 8618 DCHECK(kEmitCompilerReadBarrier); 8619 DCHECK(kUseBakerReadBarrier); 8620 8621 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 8622 // Marking Register) to decide whether we need to enter the slow 8623 // path to mark the reference. Then, in the slow path, check the 8624 // gray bit in the lock word of the reference's holder (`obj`) to 8625 // decide whether to mark `ref` or not. 8626 // 8627 // if (mr) { // Thread::Current()->GetIsGcMarking() 8628 // // Slow path. 8629 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 8630 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 8631 // HeapReference<mirror::Object> ref = *src; // Original reference load. 8632 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 8633 // if (is_gray) { 8634 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 8635 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 8636 // } 8637 // } else { 8638 // HeapReference<mirror::Object> ref = *src; // Original reference load. 8639 // } 8640 8641 vixl32::Register temp_reg = RegisterFrom(temp); 8642 8643 // Slow path marking the object `ref` when the GC is marking. The 8644 // entrypoint will be loaded by the slow path code. 8645 SlowPathCodeARMVIXL* slow_path = 8646 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( 8647 instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); 8648 AddSlowPath(slow_path); 8649 8650 __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); 8651 // Fast path: the GC is not marking: just load the reference. 8652 GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); 8653 __ Bind(slow_path->GetExitLabel()); 8654 MaybeGenerateMarkingRegisterCheck(/* code */ 21); 8655 } 8656 8657 void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, 8658 Location ref, 8659 vixl32::Register obj, 8660 Location field_offset, 8661 Location temp, 8662 bool needs_null_check, 8663 vixl32::Register temp2) { 8664 DCHECK(kEmitCompilerReadBarrier); 8665 DCHECK(kUseBakerReadBarrier); 8666 8667 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 8668 // Marking Register) to decide whether we need to enter the slow 8669 // path to update the reference field within `obj`. Then, in the 8670 // slow path, check the gray bit in the lock word of the reference's 8671 // holder (`obj`) to decide whether to mark `ref` and update the 8672 // field or not. 8673 // 8674 // if (mr) { // Thread::Current()->GetIsGcMarking() 8675 // // Slow path. 8676 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 8677 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 8678 // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. 8679 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 8680 // if (is_gray) { 8681 // old_ref = ref; 8682 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 8683 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 8684 // compareAndSwapObject(obj, field_offset, old_ref, ref); 8685 // } 8686 // } 8687 8688 vixl32::Register temp_reg = RegisterFrom(temp); 8689 8690 // Slow path updating the object reference at address `obj + field_offset` 8691 // when the GC is marking. The entrypoint will be loaded by the slow path code. 8692 SlowPathCodeARMVIXL* slow_path = 8693 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( 8694 instruction, 8695 ref, 8696 obj, 8697 /* offset */ 0u, 8698 /* index */ field_offset, 8699 /* scale_factor */ ScaleFactor::TIMES_1, 8700 needs_null_check, 8701 temp_reg, 8702 temp2); 8703 AddSlowPath(slow_path); 8704 8705 __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); 8706 // Fast path: the GC is not marking: nothing to do (the field is 8707 // up-to-date, and we don't need to load the reference). 8708 __ Bind(slow_path->GetExitLabel()); 8709 MaybeGenerateMarkingRegisterCheck(/* code */ 22); 8710 } 8711 8712 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, 8713 Location ref, 8714 vixl::aarch32::Register obj, 8715 uint32_t offset, 8716 Location index, 8717 ScaleFactor scale_factor, 8718 bool needs_null_check) { 8719 DataType::Type type = DataType::Type::kReference; 8720 vixl32::Register ref_reg = RegisterFrom(ref, type); 8721 8722 // If needed, vixl::EmissionCheckScope guards are used to ensure 8723 // that no pools are emitted between the load (macro) instruction 8724 // and MaybeRecordImplicitNullCheck. 8725 8726 if (index.IsValid()) { 8727 // Load types involving an "index": ArrayGet, 8728 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject 8729 // intrinsics. 8730 // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) 8731 if (index.IsConstant()) { 8732 size_t computed_offset = 8733 (Int32ConstantFrom(index) << scale_factor) + offset; 8734 vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 8735 GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); 8736 if (needs_null_check) { 8737 MaybeRecordImplicitNullCheck(instruction); 8738 } 8739 } else { 8740 // Handle the special case of the 8741 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject 8742 // intrinsics, which use a register pair as index ("long 8743 // offset"), of which only the low part contains data. 8744 vixl32::Register index_reg = index.IsRegisterPair() 8745 ? LowRegisterFrom(index) 8746 : RegisterFrom(index); 8747 UseScratchRegisterScope temps(GetVIXLAssembler()); 8748 vixl32::Register temp = temps.Acquire(); 8749 __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor)); 8750 { 8751 vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 8752 GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset); 8753 if (needs_null_check) { 8754 MaybeRecordImplicitNullCheck(instruction); 8755 } 8756 } 8757 } 8758 } else { 8759 // /* HeapReference<mirror::Object> */ ref = *(obj + offset) 8760 vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 8761 GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset); 8762 if (needs_null_check) { 8763 MaybeRecordImplicitNullCheck(instruction); 8764 } 8765 } 8766 8767 // Object* ref = ref_addr->AsMirrorPtr() 8768 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 8769 } 8770 8771 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { 8772 // The following condition is a compile-time one, so it does not have a run-time cost. 8773 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { 8774 // The following condition is a run-time one; it is executed after the 8775 // previous compile-time test, to avoid penalizing non-debug builds. 8776 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { 8777 UseScratchRegisterScope temps(GetVIXLAssembler()); 8778 vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire(); 8779 GetAssembler()->GenerateMarkingRegisterCheck(temp, 8780 kMarkingRegisterCheckBreakCodeBaseCode + code); 8781 } 8782 } 8783 } 8784 8785 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, 8786 Location out, 8787 Location ref, 8788 Location obj, 8789 uint32_t offset, 8790 Location index) { 8791 DCHECK(kEmitCompilerReadBarrier); 8792 8793 // Insert a slow path based read barrier *after* the reference load. 8794 // 8795 // If heap poisoning is enabled, the unpoisoning of the loaded 8796 // reference will be carried out by the runtime within the slow 8797 // path. 8798 // 8799 // Note that `ref` currently does not get unpoisoned (when heap 8800 // poisoning is enabled), which is alright as the `ref` argument is 8801 // not used by the artReadBarrierSlow entry point. 8802 // 8803 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 8804 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) 8805 ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index); 8806 AddSlowPath(slow_path); 8807 8808 __ B(slow_path->GetEntryLabel()); 8809 __ Bind(slow_path->GetExitLabel()); 8810 } 8811 8812 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 8813 Location out, 8814 Location ref, 8815 Location obj, 8816 uint32_t offset, 8817 Location index) { 8818 if (kEmitCompilerReadBarrier) { 8819 // Baker's read barriers shall be handled by the fast path 8820 // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier). 8821 DCHECK(!kUseBakerReadBarrier); 8822 // If heap poisoning is enabled, unpoisoning will be taken care of 8823 // by the runtime within the slow path. 8824 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 8825 } else if (kPoisonHeapReferences) { 8826 GetAssembler()->UnpoisonHeapReference(RegisterFrom(out)); 8827 } 8828 } 8829 8830 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction, 8831 Location out, 8832 Location root) { 8833 DCHECK(kEmitCompilerReadBarrier); 8834 8835 // Insert a slow path based read barrier *after* the GC root load. 8836 // 8837 // Note that GC roots are not affected by heap poisoning, so we do 8838 // not need to do anything special for this here. 8839 SlowPathCodeARMVIXL* slow_path = 8840 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root); 8841 AddSlowPath(slow_path); 8842 8843 __ B(slow_path->GetEntryLabel()); 8844 __ Bind(slow_path->GetExitLabel()); 8845 } 8846 8847 // Check if the desired_dispatch_info is supported. If it is, return it, 8848 // otherwise return a fall-back info that should be used instead. 8849 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( 8850 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 8851 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { 8852 return desired_dispatch_info; 8853 } 8854 8855 vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( 8856 HInvokeStaticOrDirect* invoke, vixl32::Register temp) { 8857 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 8858 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 8859 if (!invoke->GetLocations()->Intrinsified()) { 8860 return RegisterFrom(location); 8861 } 8862 // For intrinsics we allow any location, so it may be on the stack. 8863 if (!location.IsRegister()) { 8864 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex()); 8865 return temp; 8866 } 8867 // For register locations, check if the register was saved. If so, get it from the stack. 8868 // Note: There is a chance that the register was saved but not overwritten, so we could 8869 // save one load. However, since this is just an intrinsic slow path we prefer this 8870 // simple and more robust approach rather that trying to determine if that's the case. 8871 SlowPathCode* slow_path = GetCurrentSlowPath(); 8872 if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) { 8873 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode()); 8874 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset); 8875 return temp; 8876 } 8877 return RegisterFrom(location); 8878 } 8879 8880 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( 8881 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 8882 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 8883 switch (invoke->GetMethodLoadKind()) { 8884 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 8885 uint32_t offset = 8886 GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 8887 // temp = thread->string_init_entrypoint 8888 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset); 8889 break; 8890 } 8891 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 8892 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 8893 break; 8894 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 8895 DCHECK(GetCompilerOptions().IsBootImage()); 8896 PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod()); 8897 vixl32::Register temp_reg = RegisterFrom(temp); 8898 EmitMovwMovtPlaceholder(labels, temp_reg); 8899 break; 8900 } 8901 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 8902 __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); 8903 break; 8904 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 8905 PcRelativePatchInfo* labels = NewMethodBssEntryPatch( 8906 MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); 8907 vixl32::Register temp_reg = RegisterFrom(temp); 8908 EmitMovwMovtPlaceholder(labels, temp_reg); 8909 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); 8910 break; 8911 } 8912 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 8913 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 8914 return; // No code pointer retrieval; the runtime performs the call directly. 8915 } 8916 } 8917 8918 switch (invoke->GetCodePtrLocation()) { 8919 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 8920 { 8921 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 8922 ExactAssemblyScope aas(GetVIXLAssembler(), 8923 vixl32::k32BitT32InstructionSizeInBytes, 8924 CodeBufferCheckScope::kMaximumSize); 8925 __ bl(GetFrameEntryLabel()); 8926 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 8927 } 8928 break; 8929 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 8930 // LR = callee_method->entry_point_from_quick_compiled_code_ 8931 GetAssembler()->LoadFromOffset( 8932 kLoadWord, 8933 lr, 8934 RegisterFrom(callee_method), 8935 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); 8936 { 8937 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 8938 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. 8939 ExactAssemblyScope aas(GetVIXLAssembler(), 8940 vixl32::k16BitT32InstructionSizeInBytes, 8941 CodeBufferCheckScope::kExactSize); 8942 // LR() 8943 __ blx(lr); 8944 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 8945 } 8946 break; 8947 } 8948 8949 DCHECK(!IsLeafMethod()); 8950 } 8951 8952 void CodeGeneratorARMVIXL::GenerateVirtualCall( 8953 HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { 8954 vixl32::Register temp = RegisterFrom(temp_location); 8955 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 8956 invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); 8957 8958 // Use the calling convention instead of the location of the receiver, as 8959 // intrinsics may have put the receiver in a different register. In the intrinsics 8960 // slow path, the arguments have been moved to the right place, so here we are 8961 // guaranteed that the receiver is the first register of the calling convention. 8962 InvokeDexCallingConventionARMVIXL calling_convention; 8963 vixl32::Register receiver = calling_convention.GetRegisterAt(0); 8964 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 8965 { 8966 // Make sure the pc is recorded immediately after the `ldr` instruction. 8967 ExactAssemblyScope aas(GetVIXLAssembler(), 8968 vixl32::kMaxInstructionSizeInBytes, 8969 CodeBufferCheckScope::kMaximumSize); 8970 // /* HeapReference<Class> */ temp = receiver->klass_ 8971 __ ldr(temp, MemOperand(receiver, class_offset)); 8972 MaybeRecordImplicitNullCheck(invoke); 8973 } 8974 // Instead of simply (possibly) unpoisoning `temp` here, we should 8975 // emit a read barrier for the previous class reference load. 8976 // However this is not required in practice, as this is an 8977 // intermediate/temporary reference and because the current 8978 // concurrent copying collector keeps the from-space memory 8979 // intact/accessible until the end of the marking phase (the 8980 // concurrent copying collector may not in the future). 8981 GetAssembler()->MaybeUnpoisonHeapReference(temp); 8982 8983 // temp = temp->GetMethodAt(method_offset); 8984 uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( 8985 kArmPointerSize).Int32Value(); 8986 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); 8987 // LR = temp->GetEntryPoint(); 8988 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); 8989 { 8990 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 8991 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. 8992 ExactAssemblyScope aas(GetVIXLAssembler(), 8993 vixl32::k16BitT32InstructionSizeInBytes, 8994 CodeBufferCheckScope::kExactSize); 8995 // LR(); 8996 __ blx(lr); 8997 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 8998 } 8999 } 9000 9001 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( 9002 MethodReference target_method) { 9003 return NewPcRelativePatch( 9004 target_method.dex_file, target_method.index, &boot_image_method_patches_); 9005 } 9006 9007 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( 9008 MethodReference target_method) { 9009 return NewPcRelativePatch( 9010 target_method.dex_file, target_method.index, &method_bss_entry_patches_); 9011 } 9012 9013 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch( 9014 const DexFile& dex_file, dex::TypeIndex type_index) { 9015 return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_); 9016 } 9017 9018 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch( 9019 const DexFile& dex_file, dex::TypeIndex type_index) { 9020 return NewPcRelativePatch(&dex_file, type_index.index_, &type_bss_entry_patches_); 9021 } 9022 9023 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch( 9024 const DexFile& dex_file, dex::StringIndex string_index) { 9025 return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_); 9026 } 9027 9028 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( 9029 const DexFile& dex_file, dex::StringIndex string_index) { 9030 return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_); 9031 } 9032 9033 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( 9034 const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { 9035 patches->emplace_back(dex_file, offset_or_index); 9036 return &patches->back(); 9037 } 9038 9039 vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { 9040 baker_read_barrier_patches_.emplace_back(custom_data); 9041 return &baker_read_barrier_patches_.back().label; 9042 } 9043 9044 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { 9045 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); 9046 } 9047 9048 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( 9049 const DexFile& dex_file, 9050 dex::StringIndex string_index, 9051 Handle<mirror::String> handle) { 9052 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 9053 return jit_string_patches_.GetOrCreate( 9054 StringReference(&dex_file, string_index), 9055 [this]() { 9056 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); 9057 }); 9058 } 9059 9060 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file, 9061 dex::TypeIndex type_index, 9062 Handle<mirror::Class> handle) { 9063 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 9064 return jit_class_patches_.GetOrCreate( 9065 TypeReference(&dex_file, type_index), 9066 [this]() { 9067 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); 9068 }); 9069 } 9070 9071 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 9072 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( 9073 const ArenaDeque<PcRelativePatchInfo>& infos, 9074 ArenaVector<linker::LinkerPatch>* linker_patches) { 9075 for (const PcRelativePatchInfo& info : infos) { 9076 const DexFile* dex_file = info.target_dex_file; 9077 size_t offset_or_index = info.offset_or_index; 9078 DCHECK(info.add_pc_label.IsBound()); 9079 uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation()); 9080 // Add MOVW patch. 9081 DCHECK(info.movw_label.IsBound()); 9082 uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation()); 9083 linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index)); 9084 // Add MOVT patch. 9085 DCHECK(info.movt_label.IsBound()); 9086 uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation()); 9087 linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index)); 9088 } 9089 } 9090 9091 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 9092 DCHECK(linker_patches->empty()); 9093 size_t size = 9094 /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() + 9095 /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + 9096 /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() + 9097 /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + 9098 /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + 9099 /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + 9100 baker_read_barrier_patches_.size(); 9101 linker_patches->reserve(size); 9102 if (GetCompilerOptions().IsBootImage()) { 9103 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 9104 boot_image_method_patches_, linker_patches); 9105 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 9106 boot_image_type_patches_, linker_patches); 9107 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 9108 boot_image_string_patches_, linker_patches); 9109 } else { 9110 DCHECK(boot_image_method_patches_.empty()); 9111 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( 9112 boot_image_type_patches_, linker_patches); 9113 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( 9114 boot_image_string_patches_, linker_patches); 9115 } 9116 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 9117 method_bss_entry_patches_, linker_patches); 9118 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 9119 type_bss_entry_patches_, linker_patches); 9120 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 9121 string_bss_entry_patches_, linker_patches); 9122 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 9123 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( 9124 info.label.GetLocation(), info.custom_data)); 9125 } 9126 DCHECK_EQ(size, linker_patches->size()); 9127 } 9128 9129 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( 9130 uint32_t value, 9131 Uint32ToLiteralMap* map) { 9132 return map->GetOrCreate( 9133 value, 9134 [this, value]() { 9135 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value); 9136 }); 9137 } 9138 9139 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 9140 LocationSummary* locations = 9141 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); 9142 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, 9143 Location::RequiresRegister()); 9144 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 9145 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 9146 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 9147 } 9148 9149 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 9150 vixl32::Register res = OutputRegister(instr); 9151 vixl32::Register accumulator = 9152 InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 9153 vixl32::Register mul_left = 9154 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); 9155 vixl32::Register mul_right = 9156 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); 9157 9158 if (instr->GetOpKind() == HInstruction::kAdd) { 9159 __ Mla(res, mul_left, mul_right, accumulator); 9160 } else { 9161 __ Mls(res, mul_left, mul_right, accumulator); 9162 } 9163 } 9164 9165 void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 9166 // Nothing to do, this should be removed during prepare for register allocator. 9167 LOG(FATAL) << "Unreachable"; 9168 } 9169 9170 void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 9171 // Nothing to do, this should be removed during prepare for register allocator. 9172 LOG(FATAL) << "Unreachable"; 9173 } 9174 9175 // Simple implementation of packed switch - generate cascaded compare/jumps. 9176 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) { 9177 LocationSummary* locations = 9178 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 9179 locations->SetInAt(0, Location::RequiresRegister()); 9180 if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && 9181 codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) { 9182 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. 9183 if (switch_instr->GetStartValue() != 0) { 9184 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias. 9185 } 9186 } 9187 } 9188 9189 // TODO(VIXL): Investigate and reach the parity with old arm codegen. 9190 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) { 9191 int32_t lower_bound = switch_instr->GetStartValue(); 9192 uint32_t num_entries = switch_instr->GetNumEntries(); 9193 LocationSummary* locations = switch_instr->GetLocations(); 9194 vixl32::Register value_reg = InputRegisterAt(switch_instr, 0); 9195 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 9196 9197 if (num_entries <= kPackedSwitchCompareJumpThreshold || 9198 !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) { 9199 // Create a series of compare/jumps. 9200 UseScratchRegisterScope temps(GetVIXLAssembler()); 9201 vixl32::Register temp_reg = temps.Acquire(); 9202 // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store 9203 // the immediate, because IP is used as the destination register. For the other 9204 // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant, 9205 // and they can be encoded in the instruction without making use of IP register. 9206 __ Adds(temp_reg, value_reg, -lower_bound); 9207 9208 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 9209 // Jump to successors[0] if value == lower_bound. 9210 __ B(eq, codegen_->GetLabelOf(successors[0])); 9211 int32_t last_index = 0; 9212 for (; num_entries - last_index > 2; last_index += 2) { 9213 __ Adds(temp_reg, temp_reg, -2); 9214 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 9215 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 9216 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 9217 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 9218 } 9219 if (num_entries - last_index == 2) { 9220 // The last missing case_value. 9221 __ Cmp(temp_reg, 1); 9222 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 9223 } 9224 9225 // And the default for any other value. 9226 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 9227 __ B(codegen_->GetLabelOf(default_block)); 9228 } 9229 } else { 9230 // Create a table lookup. 9231 vixl32::Register table_base = RegisterFrom(locations->GetTemp(0)); 9232 9233 JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr); 9234 9235 // Remove the bias. 9236 vixl32::Register key_reg; 9237 if (lower_bound != 0) { 9238 key_reg = RegisterFrom(locations->GetTemp(1)); 9239 __ Sub(key_reg, value_reg, lower_bound); 9240 } else { 9241 key_reg = value_reg; 9242 } 9243 9244 // Check whether the value is in the table, jump to default block if not. 9245 __ Cmp(key_reg, num_entries - 1); 9246 __ B(hi, codegen_->GetLabelOf(default_block)); 9247 9248 UseScratchRegisterScope temps(GetVIXLAssembler()); 9249 vixl32::Register jump_offset = temps.Acquire(); 9250 9251 // Load jump offset from the table. 9252 { 9253 const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t); 9254 ExactAssemblyScope aas(GetVIXLAssembler(), 9255 (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size, 9256 CodeBufferCheckScope::kMaximumSize); 9257 __ adr(table_base, jump_table->GetTableStartLabel()); 9258 __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2)); 9259 9260 // Jump to target block by branching to table_base(pc related) + offset. 9261 vixl32::Register target_address = table_base; 9262 __ add(target_address, table_base, jump_offset); 9263 __ bx(target_address); 9264 9265 jump_table->EmitTable(codegen_); 9266 } 9267 } 9268 } 9269 9270 // Copy the result of a call into the given target. 9271 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) { 9272 if (!trg.IsValid()) { 9273 DCHECK_EQ(type, DataType::Type::kVoid); 9274 return; 9275 } 9276 9277 DCHECK_NE(type, DataType::Type::kVoid); 9278 9279 Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type); 9280 if (return_loc.Equals(trg)) { 9281 return; 9282 } 9283 9284 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged 9285 // with the last branch. 9286 if (type == DataType::Type::kInt64) { 9287 TODO_VIXL32(FATAL); 9288 } else if (type == DataType::Type::kFloat64) { 9289 TODO_VIXL32(FATAL); 9290 } else { 9291 // Let the parallel move resolver take care of all of this. 9292 HParallelMove parallel_move(GetGraph()->GetAllocator()); 9293 parallel_move.AddMove(return_loc, trg, type, nullptr); 9294 GetMoveResolver()->EmitNativeCode(¶llel_move); 9295 } 9296 } 9297 9298 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) { 9299 LocationSummary* locations = 9300 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 9301 locations->SetInAt(0, Location::RequiresRegister()); 9302 locations->SetOut(Location::RequiresRegister()); 9303 } 9304 9305 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) { 9306 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 9307 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 9308 instruction->GetIndex(), kArmPointerSize).SizeValue(); 9309 GetAssembler()->LoadFromOffset(kLoadWord, 9310 OutputRegister(instruction), 9311 InputRegisterAt(instruction, 0), 9312 method_offset); 9313 } else { 9314 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 9315 instruction->GetIndex(), kArmPointerSize)); 9316 GetAssembler()->LoadFromOffset(kLoadWord, 9317 OutputRegister(instruction), 9318 InputRegisterAt(instruction, 0), 9319 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); 9320 GetAssembler()->LoadFromOffset(kLoadWord, 9321 OutputRegister(instruction), 9322 OutputRegister(instruction), 9323 method_offset); 9324 } 9325 } 9326 9327 static void PatchJitRootUse(uint8_t* code, 9328 const uint8_t* roots_data, 9329 VIXLUInt32Literal* literal, 9330 uint64_t index_in_table) { 9331 DCHECK(literal->IsBound()); 9332 uint32_t literal_offset = literal->GetLocation(); 9333 uintptr_t address = 9334 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 9335 uint8_t* data = code + literal_offset; 9336 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); 9337 } 9338 9339 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 9340 for (const auto& entry : jit_string_patches_) { 9341 const StringReference& string_reference = entry.first; 9342 VIXLUInt32Literal* table_entry_literal = entry.second; 9343 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 9344 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 9345 } 9346 for (const auto& entry : jit_class_patches_) { 9347 const TypeReference& type_reference = entry.first; 9348 VIXLUInt32Literal* table_entry_literal = entry.second; 9349 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 9350 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 9351 } 9352 } 9353 9354 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( 9355 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, 9356 vixl32::Register out) { 9357 ExactAssemblyScope aas(GetVIXLAssembler(), 9358 3 * vixl32::kMaxInstructionSizeInBytes, 9359 CodeBufferCheckScope::kMaximumSize); 9360 // TODO(VIXL): Think about using mov instead of movw. 9361 __ bind(&labels->movw_label); 9362 __ movw(out, /* placeholder */ 0u); 9363 __ bind(&labels->movt_label); 9364 __ movt(out, /* placeholder */ 0u); 9365 __ bind(&labels->add_pc_label); 9366 __ add(out, out, pc); 9367 } 9368 9369 #undef __ 9370 #undef QUICK_ENTRY_POINT 9371 #undef TODO_VIXL32 9372 9373 } // namespace arm 9374 } // namespace art 9375