1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_arm64.h" 18 19 #include "arch/arm64/instruction_set_features_arm64.h" 20 #include "art_method.h" 21 #include "code_generator_arm64.h" 22 #include "common_arm64.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "heap_poisoning.h" 25 #include "intrinsics.h" 26 #include "lock_word.h" 27 #include "mirror/array-inl.h" 28 #include "mirror/object_array-inl.h" 29 #include "mirror/reference.h" 30 #include "mirror/string-inl.h" 31 #include "scoped_thread_state_change-inl.h" 32 #include "thread-current-inl.h" 33 #include "utils/arm64/assembler_arm64.h" 34 35 using namespace vixl::aarch64; // NOLINT(build/namespaces) 36 37 // TODO(VIXL): Make VIXL compile with -Wshadow. 38 #pragma GCC diagnostic push 39 #pragma GCC diagnostic ignored "-Wshadow" 40 #include "aarch64/disasm-aarch64.h" 41 #include "aarch64/macro-assembler-aarch64.h" 42 #pragma GCC diagnostic pop 43 44 namespace art { 45 46 namespace arm64 { 47 48 using helpers::DRegisterFrom; 49 using helpers::FPRegisterFrom; 50 using helpers::HeapOperand; 51 using helpers::LocationFrom; 52 using helpers::OperandFrom; 53 using helpers::RegisterFrom; 54 using helpers::SRegisterFrom; 55 using helpers::WRegisterFrom; 56 using helpers::XRegisterFrom; 57 using helpers::InputRegisterAt; 58 using helpers::OutputRegister; 59 60 namespace { 61 62 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) { 63 return MemOperand(XRegisterFrom(location), offset); 64 } 65 66 } // namespace 67 68 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() { 69 return codegen_->GetVIXLAssembler(); 70 } 71 72 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() { 73 return codegen_->GetGraph()->GetAllocator(); 74 } 75 76 #define __ codegen->GetVIXLAssembler()-> 77 78 static void MoveFromReturnRegister(Location trg, 79 DataType::Type type, 80 CodeGeneratorARM64* codegen) { 81 if (!trg.IsValid()) { 82 DCHECK(type == DataType::Type::kVoid); 83 return; 84 } 85 86 DCHECK_NE(type, DataType::Type::kVoid); 87 88 if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { 89 Register trg_reg = RegisterFrom(trg, type); 90 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); 91 __ Mov(trg_reg, res_reg, kDiscardForSameWReg); 92 } else { 93 FPRegister trg_reg = FPRegisterFrom(trg, type); 94 FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type); 95 __ Fmov(trg_reg, res_reg); 96 } 97 } 98 99 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { 100 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 101 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 102 } 103 104 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified 105 // call. This will copy the arguments into the positions for a regular call. 106 // 107 // Note: The actual parameters are required to be in the locations given by the invoke's location 108 // summary. If an intrinsic modifies those locations before a slowpath call, they must be 109 // restored! 110 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { 111 public: 112 explicit IntrinsicSlowPathARM64(HInvoke* invoke) 113 : SlowPathCodeARM64(invoke), invoke_(invoke) { } 114 115 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { 116 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 117 __ Bind(GetEntryLabel()); 118 119 SaveLiveRegisters(codegen, invoke_->GetLocations()); 120 121 MoveArguments(invoke_, codegen); 122 123 { 124 // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there 125 // are no pools emitted. 126 vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 127 if (invoke_->IsInvokeStaticOrDirect()) { 128 codegen->GenerateStaticOrDirectCall( 129 invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); 130 } else { 131 codegen->GenerateVirtualCall( 132 invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); 133 } 134 } 135 136 // Copy the result back to the expected output. 137 Location out = invoke_->GetLocations()->Out(); 138 if (out.IsValid()) { 139 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 140 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 141 MoveFromReturnRegister(out, invoke_->GetType(), codegen); 142 } 143 144 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 145 __ B(GetExitLabel()); 146 } 147 148 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; } 149 150 private: 151 // The instruction where this slow path is happening. 152 HInvoke* const invoke_; 153 154 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); 155 }; 156 157 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 158 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { 159 public: 160 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) 161 : SlowPathCodeARM64(instruction), tmp_(tmp) { 162 DCHECK(kEmitCompilerReadBarrier); 163 DCHECK(kUseBakerReadBarrier); 164 } 165 166 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { 167 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 168 LocationSummary* locations = instruction_->GetLocations(); 169 DCHECK(locations->CanCall()); 170 DCHECK(instruction_->IsInvokeStaticOrDirect()) 171 << "Unexpected instruction in read barrier arraycopy slow path: " 172 << instruction_->DebugName(); 173 DCHECK(instruction_->GetLocations()->Intrinsified()); 174 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 175 176 const int32_t element_size = DataType::Size(DataType::Type::kReference); 177 178 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); 179 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); 180 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); 181 Register tmp_reg = WRegisterFrom(tmp_); 182 183 __ Bind(GetEntryLabel()); 184 vixl::aarch64::Label slow_copy_loop; 185 __ Bind(&slow_copy_loop); 186 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); 187 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); 188 // TODO: Inline the mark bit check before calling the runtime? 189 // tmp_reg = ReadBarrier::Mark(tmp_reg); 190 // No need to save live registers; it's taken care of by the 191 // entrypoint. Also, there is no need to update the stack mask, 192 // as this runtime call will not trigger a garbage collection. 193 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more 194 // explanations.) 195 DCHECK_NE(tmp_.reg(), LR); 196 DCHECK_NE(tmp_.reg(), WSP); 197 DCHECK_NE(tmp_.reg(), WZR); 198 // IP0 is used internally by the ReadBarrierMarkRegX entry point 199 // as a temporary (and not preserved). It thus cannot be used by 200 // any live register in this slow path. 201 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); 202 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); 203 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); 204 DCHECK_NE(tmp_.reg(), IP0); 205 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); 206 // TODO: Load the entrypoint once before the loop, instead of 207 // loading it at every iteration. 208 int32_t entry_point_offset = 209 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); 210 // This runtime call does not require a stack map. 211 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 212 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); 213 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); 214 __ Cmp(src_curr_addr, src_stop_addr); 215 __ B(&slow_copy_loop, ne); 216 __ B(GetExitLabel()); 217 } 218 219 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } 220 221 private: 222 Location tmp_; 223 224 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); 225 }; 226 #undef __ 227 228 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { 229 Dispatch(invoke); 230 LocationSummary* res = invoke->GetLocations(); 231 if (res == nullptr) { 232 return false; 233 } 234 return res->Intrinsified(); 235 } 236 237 #define __ masm-> 238 239 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 240 LocationSummary* locations = 241 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 242 locations->SetInAt(0, Location::RequiresFpuRegister()); 243 locations->SetOut(Location::RequiresRegister()); 244 } 245 246 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 247 LocationSummary* locations = 248 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 249 locations->SetInAt(0, Location::RequiresRegister()); 250 locations->SetOut(Location::RequiresFpuRegister()); 251 } 252 253 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 254 Location input = locations->InAt(0); 255 Location output = locations->Out(); 256 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output), 257 is64bit ? DRegisterFrom(input) : SRegisterFrom(input)); 258 } 259 260 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 261 Location input = locations->InAt(0); 262 Location output = locations->Out(); 263 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output), 264 is64bit ? XRegisterFrom(input) : WRegisterFrom(input)); 265 } 266 267 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 268 CreateFPToIntLocations(allocator_, invoke); 269 } 270 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 271 CreateIntToFPLocations(allocator_, invoke); 272 } 273 274 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 275 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 276 } 277 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 278 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 279 } 280 281 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 282 CreateFPToIntLocations(allocator_, invoke); 283 } 284 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 285 CreateIntToFPLocations(allocator_, invoke); 286 } 287 288 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 289 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 290 } 291 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 292 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 293 } 294 295 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 296 LocationSummary* locations = 297 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 298 locations->SetInAt(0, Location::RequiresRegister()); 299 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 300 } 301 302 static void GenReverseBytes(LocationSummary* locations, 303 DataType::Type type, 304 MacroAssembler* masm) { 305 Location in = locations->InAt(0); 306 Location out = locations->Out(); 307 308 switch (type) { 309 case DataType::Type::kInt16: 310 __ Rev16(WRegisterFrom(out), WRegisterFrom(in)); 311 __ Sxth(WRegisterFrom(out), WRegisterFrom(out)); 312 break; 313 case DataType::Type::kInt32: 314 case DataType::Type::kInt64: 315 __ Rev(RegisterFrom(out, type), RegisterFrom(in, type)); 316 break; 317 default: 318 LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; 319 UNREACHABLE(); 320 } 321 } 322 323 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 324 CreateIntToIntLocations(allocator_, invoke); 325 } 326 327 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 328 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 329 } 330 331 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) { 332 CreateIntToIntLocations(allocator_, invoke); 333 } 334 335 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) { 336 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 337 } 338 339 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) { 340 CreateIntToIntLocations(allocator_, invoke); 341 } 342 343 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { 344 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); 345 } 346 347 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 348 LocationSummary* locations = 349 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 350 locations->SetInAt(0, Location::RequiresRegister()); 351 locations->SetInAt(1, Location::RequiresRegister()); 352 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 353 } 354 355 static void GenNumberOfLeadingZeros(LocationSummary* locations, 356 DataType::Type type, 357 MacroAssembler* masm) { 358 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 359 360 Location in = locations->InAt(0); 361 Location out = locations->Out(); 362 363 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type)); 364 } 365 366 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 367 CreateIntToIntLocations(allocator_, invoke); 368 } 369 370 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 371 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 372 } 373 374 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 375 CreateIntToIntLocations(allocator_, invoke); 376 } 377 378 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 379 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 380 } 381 382 static void GenNumberOfTrailingZeros(LocationSummary* locations, 383 DataType::Type type, 384 MacroAssembler* masm) { 385 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 386 387 Location in = locations->InAt(0); 388 Location out = locations->Out(); 389 390 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 391 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type)); 392 } 393 394 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 395 CreateIntToIntLocations(allocator_, invoke); 396 } 397 398 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 399 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 400 } 401 402 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 403 CreateIntToIntLocations(allocator_, invoke); 404 } 405 406 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 407 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 408 } 409 410 static void GenReverse(LocationSummary* locations, 411 DataType::Type type, 412 MacroAssembler* masm) { 413 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 414 415 Location in = locations->InAt(0); 416 Location out = locations->Out(); 417 418 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 419 } 420 421 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) { 422 CreateIntToIntLocations(allocator_, invoke); 423 } 424 425 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) { 426 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 427 } 428 429 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) { 430 CreateIntToIntLocations(allocator_, invoke); 431 } 432 433 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { 434 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 435 } 436 437 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) { 438 DCHECK(DataType::IsIntOrLongType(type)) << type; 439 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32); 440 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type); 441 442 UseScratchRegisterScope temps(masm); 443 444 Register src = InputRegisterAt(instr, 0); 445 Register dst = RegisterFrom(instr->GetLocations()->Out(), type); 446 FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS(); 447 448 __ Fmov(fpr, src); 449 __ Cnt(fpr.V8B(), fpr.V8B()); 450 __ Addv(fpr.B(), fpr.V8B()); 451 __ Fmov(dst, fpr); 452 } 453 454 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) { 455 CreateIntToIntLocations(allocator_, invoke); 456 } 457 458 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) { 459 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 460 } 461 462 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) { 463 CreateIntToIntLocations(allocator_, invoke); 464 } 465 466 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) { 467 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 468 } 469 470 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { 471 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 472 473 UseScratchRegisterScope temps(masm); 474 475 Register src = InputRegisterAt(invoke, 0); 476 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); 477 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); 478 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u; 479 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u; 480 481 __ Clz(temp, src); 482 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate) 483 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0. 484 __ Lsr(dst, dst, temp); 485 } 486 487 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { 488 CreateIntToIntLocations(allocator_, invoke); 489 } 490 491 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { 492 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 493 } 494 495 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) { 496 CreateIntToIntLocations(allocator_, invoke); 497 } 498 499 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) { 500 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 501 } 502 503 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { 504 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 505 506 UseScratchRegisterScope temps(masm); 507 508 Register src = InputRegisterAt(invoke, 0); 509 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); 510 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); 511 512 __ Neg(temp, src); 513 __ And(dst, temp, src); 514 } 515 516 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { 517 CreateIntToIntLocations(allocator_, invoke); 518 } 519 520 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { 521 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 522 } 523 524 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) { 525 CreateIntToIntLocations(allocator_, invoke); 526 } 527 528 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) { 529 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 530 } 531 532 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 533 LocationSummary* locations = 534 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 535 locations->SetInAt(0, Location::RequiresFpuRegister()); 536 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 537 } 538 539 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 540 Location in = locations->InAt(0); 541 Location out = locations->Out(); 542 543 FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); 544 FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); 545 546 __ Fabs(out_reg, in_reg); 547 } 548 549 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { 550 CreateFPToFPLocations(allocator_, invoke); 551 } 552 553 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { 554 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 555 } 556 557 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { 558 CreateFPToFPLocations(allocator_, invoke); 559 } 560 561 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { 562 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 563 } 564 565 static void GenAbsInteger(LocationSummary* locations, 566 bool is64bit, 567 MacroAssembler* masm) { 568 Location in = locations->InAt(0); 569 Location output = locations->Out(); 570 571 Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); 572 Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); 573 574 __ Cmp(in_reg, Operand(0)); 575 __ Cneg(out_reg, in_reg, lt); 576 } 577 578 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { 579 CreateIntToIntLocations(allocator_, invoke); 580 } 581 582 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { 583 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 584 } 585 586 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { 587 CreateIntToIntLocations(allocator_, invoke); 588 } 589 590 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { 591 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 592 } 593 594 static void GenMinMaxFP(LocationSummary* locations, 595 bool is_min, 596 bool is_double, 597 MacroAssembler* masm) { 598 Location op1 = locations->InAt(0); 599 Location op2 = locations->InAt(1); 600 Location out = locations->Out(); 601 602 FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); 603 FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); 604 FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); 605 if (is_min) { 606 __ Fmin(out_reg, op1_reg, op2_reg); 607 } else { 608 __ Fmax(out_reg, op1_reg, op2_reg); 609 } 610 } 611 612 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 613 LocationSummary* locations = 614 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 615 locations->SetInAt(0, Location::RequiresFpuRegister()); 616 locations->SetInAt(1, Location::RequiresFpuRegister()); 617 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 618 } 619 620 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { 621 CreateFPFPToFPLocations(allocator_, invoke); 622 } 623 624 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { 625 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); 626 } 627 628 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { 629 CreateFPFPToFPLocations(allocator_, invoke); 630 } 631 632 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { 633 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); 634 } 635 636 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 637 CreateFPFPToFPLocations(allocator_, invoke); 638 } 639 640 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 641 GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); 642 } 643 644 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { 645 CreateFPFPToFPLocations(allocator_, invoke); 646 } 647 648 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { 649 GenMinMaxFP( 650 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); 651 } 652 653 static void GenMinMax(LocationSummary* locations, 654 bool is_min, 655 bool is_long, 656 MacroAssembler* masm) { 657 Location op1 = locations->InAt(0); 658 Location op2 = locations->InAt(1); 659 Location out = locations->Out(); 660 661 Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); 662 Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); 663 Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); 664 665 __ Cmp(op1_reg, op2_reg); 666 __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); 667 } 668 669 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { 670 CreateIntIntToIntLocations(allocator_, invoke); 671 } 672 673 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { 674 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); 675 } 676 677 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { 678 CreateIntIntToIntLocations(allocator_, invoke); 679 } 680 681 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { 682 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); 683 } 684 685 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { 686 CreateIntIntToIntLocations(allocator_, invoke); 687 } 688 689 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { 690 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); 691 } 692 693 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { 694 CreateIntIntToIntLocations(allocator_, invoke); 695 } 696 697 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { 698 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); 699 } 700 701 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { 702 CreateFPToFPLocations(allocator_, invoke); 703 } 704 705 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) { 706 LocationSummary* locations = invoke->GetLocations(); 707 MacroAssembler* masm = GetVIXLAssembler(); 708 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 709 } 710 711 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) { 712 CreateFPToFPLocations(allocator_, invoke); 713 } 714 715 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) { 716 LocationSummary* locations = invoke->GetLocations(); 717 MacroAssembler* masm = GetVIXLAssembler(); 718 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 719 } 720 721 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) { 722 CreateFPToFPLocations(allocator_, invoke); 723 } 724 725 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) { 726 LocationSummary* locations = invoke->GetLocations(); 727 MacroAssembler* masm = GetVIXLAssembler(); 728 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 729 } 730 731 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) { 732 CreateFPToFPLocations(allocator_, invoke); 733 } 734 735 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { 736 LocationSummary* locations = invoke->GetLocations(); 737 MacroAssembler* masm = GetVIXLAssembler(); 738 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 739 } 740 741 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) { 742 LocationSummary* locations = 743 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 744 locations->SetInAt(0, Location::RequiresFpuRegister()); 745 locations->SetOut(Location::RequiresRegister()); 746 locations->AddTemp(Location::RequiresFpuRegister()); 747 } 748 749 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) { 750 // Java 8 API definition for Math.round(): 751 // Return the closest long or int to the argument, with ties rounding to positive infinity. 752 // 753 // There is no single instruction in ARMv8 that can support the above definition. 754 // We choose to use FCVTAS here, because it has closest semantic. 755 // FCVTAS performs rounding to nearest integer, ties away from zero. 756 // For most inputs (positive values, zero or NaN), this instruction is enough. 757 // We only need a few handling code after FCVTAS if the input is negative half value. 758 // 759 // The reason why we didn't choose FCVTPS instruction here is that 760 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest. 761 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2. 762 // If we were using this instruction, for most inputs, more handling code would be needed. 763 LocationSummary* l = invoke->GetLocations(); 764 FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); 765 FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); 766 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out()); 767 vixl::aarch64::Label done; 768 769 // Round to nearest integer, ties away from zero. 770 __ Fcvtas(out_reg, in_reg); 771 772 // For positive values, zero or NaN inputs, rounding is done. 773 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done); 774 775 // Handle input < 0 cases. 776 // If input is negative but not a tie, previous result (round to nearest) is valid. 777 // If input is a negative tie, out_reg += 1. 778 __ Frinta(tmp_fp, in_reg); 779 __ Fsub(tmp_fp, in_reg, tmp_fp); 780 __ Fcmp(tmp_fp, 0.5); 781 __ Cinc(out_reg, out_reg, eq); 782 783 __ Bind(&done); 784 } 785 786 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { 787 CreateFPToIntPlusFPTempLocations(allocator_, invoke); 788 } 789 790 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { 791 GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler()); 792 } 793 794 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { 795 CreateFPToIntPlusFPTempLocations(allocator_, invoke); 796 } 797 798 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { 799 GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler()); 800 } 801 802 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { 803 CreateIntToIntLocations(allocator_, invoke); 804 } 805 806 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) { 807 MacroAssembler* masm = GetVIXLAssembler(); 808 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()), 809 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 810 } 811 812 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 813 CreateIntToIntLocations(allocator_, invoke); 814 } 815 816 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 817 MacroAssembler* masm = GetVIXLAssembler(); 818 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()), 819 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 820 } 821 822 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 823 CreateIntToIntLocations(allocator_, invoke); 824 } 825 826 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 827 MacroAssembler* masm = GetVIXLAssembler(); 828 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()), 829 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 830 } 831 832 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 833 CreateIntToIntLocations(allocator_, invoke); 834 } 835 836 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 837 MacroAssembler* masm = GetVIXLAssembler(); 838 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()), 839 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 840 } 841 842 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { 843 LocationSummary* locations = 844 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 845 locations->SetInAt(0, Location::RequiresRegister()); 846 locations->SetInAt(1, Location::RequiresRegister()); 847 } 848 849 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) { 850 CreateIntIntToVoidLocations(allocator_, invoke); 851 } 852 853 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) { 854 MacroAssembler* masm = GetVIXLAssembler(); 855 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)), 856 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 857 } 858 859 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 860 CreateIntIntToVoidLocations(allocator_, invoke); 861 } 862 863 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 864 MacroAssembler* masm = GetVIXLAssembler(); 865 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)), 866 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 867 } 868 869 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 870 CreateIntIntToVoidLocations(allocator_, invoke); 871 } 872 873 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 874 MacroAssembler* masm = GetVIXLAssembler(); 875 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)), 876 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 877 } 878 879 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 880 CreateIntIntToVoidLocations(allocator_, invoke); 881 } 882 883 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 884 MacroAssembler* masm = GetVIXLAssembler(); 885 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)), 886 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 887 } 888 889 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) { 890 LocationSummary* locations = 891 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 892 locations->SetOut(Location::RequiresRegister()); 893 } 894 895 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) { 896 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()), 897 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value())); 898 } 899 900 static void GenUnsafeGet(HInvoke* invoke, 901 DataType::Type type, 902 bool is_volatile, 903 CodeGeneratorARM64* codegen) { 904 LocationSummary* locations = invoke->GetLocations(); 905 DCHECK((type == DataType::Type::kInt32) || 906 (type == DataType::Type::kInt64) || 907 (type == DataType::Type::kReference)); 908 Location base_loc = locations->InAt(1); 909 Register base = WRegisterFrom(base_loc); // Object pointer. 910 Location offset_loc = locations->InAt(2); 911 Register offset = XRegisterFrom(offset_loc); // Long offset. 912 Location trg_loc = locations->Out(); 913 Register trg = RegisterFrom(trg_loc, type); 914 915 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 916 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. 917 Register temp = WRegisterFrom(locations->GetTemp(0)); 918 codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, 919 trg_loc, 920 base, 921 /* offset */ 0u, 922 /* index */ offset_loc, 923 /* scale_factor */ 0u, 924 temp, 925 /* needs_null_check */ false, 926 is_volatile); 927 } else { 928 // Other cases. 929 MemOperand mem_op(base.X(), offset); 930 if (is_volatile) { 931 codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); 932 } else { 933 codegen->Load(type, trg, mem_op); 934 } 935 936 if (type == DataType::Type::kReference) { 937 DCHECK(trg.IsW()); 938 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); 939 } 940 } 941 } 942 943 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 944 bool can_call = kEmitCompilerReadBarrier && 945 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 946 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 947 LocationSummary* locations = 948 new (allocator) LocationSummary(invoke, 949 can_call 950 ? LocationSummary::kCallOnSlowPath 951 : LocationSummary::kNoCall, 952 kIntrinsified); 953 if (can_call && kUseBakerReadBarrier) { 954 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 955 // We need a temporary register for the read barrier marking slow 956 // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier. 957 locations->AddTemp(Location::RequiresRegister()); 958 } 959 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 960 locations->SetInAt(1, Location::RequiresRegister()); 961 locations->SetInAt(2, Location::RequiresRegister()); 962 locations->SetOut(Location::RequiresRegister(), 963 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 964 } 965 966 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { 967 CreateIntIntIntToIntLocations(allocator_, invoke); 968 } 969 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 970 CreateIntIntIntToIntLocations(allocator_, invoke); 971 } 972 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) { 973 CreateIntIntIntToIntLocations(allocator_, invoke); 974 } 975 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 976 CreateIntIntIntToIntLocations(allocator_, invoke); 977 } 978 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) { 979 CreateIntIntIntToIntLocations(allocator_, invoke); 980 } 981 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 982 CreateIntIntIntToIntLocations(allocator_, invoke); 983 } 984 985 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { 986 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); 987 } 988 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 989 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); 990 } 991 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { 992 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); 993 } 994 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 995 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); 996 } 997 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { 998 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); 999 } 1000 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1001 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); 1002 } 1003 1004 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { 1005 LocationSummary* locations = 1006 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1007 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1008 locations->SetInAt(1, Location::RequiresRegister()); 1009 locations->SetInAt(2, Location::RequiresRegister()); 1010 locations->SetInAt(3, Location::RequiresRegister()); 1011 } 1012 1013 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) { 1014 CreateIntIntIntIntToVoid(allocator_, invoke); 1015 } 1016 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 1017 CreateIntIntIntIntToVoid(allocator_, invoke); 1018 } 1019 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 1020 CreateIntIntIntIntToVoid(allocator_, invoke); 1021 } 1022 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) { 1023 CreateIntIntIntIntToVoid(allocator_, invoke); 1024 } 1025 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1026 CreateIntIntIntIntToVoid(allocator_, invoke); 1027 } 1028 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1029 CreateIntIntIntIntToVoid(allocator_, invoke); 1030 } 1031 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) { 1032 CreateIntIntIntIntToVoid(allocator_, invoke); 1033 } 1034 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1035 CreateIntIntIntIntToVoid(allocator_, invoke); 1036 } 1037 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1038 CreateIntIntIntIntToVoid(allocator_, invoke); 1039 } 1040 1041 static void GenUnsafePut(HInvoke* invoke, 1042 DataType::Type type, 1043 bool is_volatile, 1044 bool is_ordered, 1045 CodeGeneratorARM64* codegen) { 1046 LocationSummary* locations = invoke->GetLocations(); 1047 MacroAssembler* masm = codegen->GetVIXLAssembler(); 1048 1049 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1050 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. 1051 Register value = RegisterFrom(locations->InAt(3), type); 1052 Register source = value; 1053 MemOperand mem_op(base.X(), offset); 1054 1055 { 1056 // We use a block to end the scratch scope before the write barrier, thus 1057 // freeing the temporary registers so they can be used in `MarkGCCard`. 1058 UseScratchRegisterScope temps(masm); 1059 1060 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1061 DCHECK(value.IsW()); 1062 Register temp = temps.AcquireW(); 1063 __ Mov(temp.W(), value.W()); 1064 codegen->GetAssembler()->PoisonHeapReference(temp.W()); 1065 source = temp; 1066 } 1067 1068 if (is_volatile || is_ordered) { 1069 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false); 1070 } else { 1071 codegen->Store(type, source, mem_op); 1072 } 1073 } 1074 1075 if (type == DataType::Type::kReference) { 1076 bool value_can_be_null = true; // TODO: Worth finding out this information? 1077 codegen->MarkGCCard(base, value, value_can_be_null); 1078 } 1079 } 1080 1081 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { 1082 GenUnsafePut(invoke, 1083 DataType::Type::kInt32, 1084 /* is_volatile */ false, 1085 /* is_ordered */ false, 1086 codegen_); 1087 } 1088 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 1089 GenUnsafePut(invoke, 1090 DataType::Type::kInt32, 1091 /* is_volatile */ false, 1092 /* is_ordered */ true, 1093 codegen_); 1094 } 1095 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 1096 GenUnsafePut(invoke, 1097 DataType::Type::kInt32, 1098 /* is_volatile */ true, 1099 /* is_ordered */ false, 1100 codegen_); 1101 } 1102 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { 1103 GenUnsafePut(invoke, 1104 DataType::Type::kReference, 1105 /* is_volatile */ false, 1106 /* is_ordered */ false, 1107 codegen_); 1108 } 1109 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1110 GenUnsafePut(invoke, 1111 DataType::Type::kReference, 1112 /* is_volatile */ false, 1113 /* is_ordered */ true, 1114 codegen_); 1115 } 1116 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1117 GenUnsafePut(invoke, 1118 DataType::Type::kReference, 1119 /* is_volatile */ true, 1120 /* is_ordered */ false, 1121 codegen_); 1122 } 1123 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { 1124 GenUnsafePut(invoke, 1125 DataType::Type::kInt64, 1126 /* is_volatile */ false, 1127 /* is_ordered */ false, 1128 codegen_); 1129 } 1130 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1131 GenUnsafePut(invoke, 1132 DataType::Type::kInt64, 1133 /* is_volatile */ false, 1134 /* is_ordered */ true, 1135 codegen_); 1136 } 1137 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1138 GenUnsafePut(invoke, 1139 DataType::Type::kInt64, 1140 /* is_volatile */ true, 1141 /* is_ordered */ false, 1142 codegen_); 1143 } 1144 1145 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, 1146 HInvoke* invoke, 1147 DataType::Type type) { 1148 bool can_call = kEmitCompilerReadBarrier && 1149 kUseBakerReadBarrier && 1150 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 1151 LocationSummary* locations = 1152 new (allocator) LocationSummary(invoke, 1153 can_call 1154 ? LocationSummary::kCallOnSlowPath 1155 : LocationSummary::kNoCall, 1156 kIntrinsified); 1157 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1158 locations->SetInAt(1, Location::RequiresRegister()); 1159 locations->SetInAt(2, Location::RequiresRegister()); 1160 locations->SetInAt(3, Location::RequiresRegister()); 1161 locations->SetInAt(4, Location::RequiresRegister()); 1162 1163 // If heap poisoning is enabled, we don't want the unpoisoning 1164 // operations to potentially clobber the output. Likewise when 1165 // emitting a (Baker) read barrier, which may call. 1166 Location::OutputOverlap overlaps = 1167 ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) 1168 ? Location::kOutputOverlap 1169 : Location::kNoOutputOverlap; 1170 locations->SetOut(Location::RequiresRegister(), overlaps); 1171 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1172 // Temporary register for (Baker) read barrier. 1173 locations->AddTemp(Location::RequiresRegister()); 1174 } 1175 } 1176 1177 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { 1178 MacroAssembler* masm = codegen->GetVIXLAssembler(); 1179 LocationSummary* locations = invoke->GetLocations(); 1180 1181 Location out_loc = locations->Out(); 1182 Register out = WRegisterFrom(out_loc); // Boolean result. 1183 1184 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1185 Location offset_loc = locations->InAt(2); 1186 Register offset = XRegisterFrom(offset_loc); // Long offset. 1187 Register expected = RegisterFrom(locations->InAt(3), type); // Expected. 1188 Register value = RegisterFrom(locations->InAt(4), type); // Value. 1189 1190 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. 1191 if (type == DataType::Type::kReference) { 1192 // Mark card for object assuming new value is stored. 1193 bool value_can_be_null = true; // TODO: Worth finding out this information? 1194 codegen->MarkGCCard(base, value, value_can_be_null); 1195 1196 // The only read barrier implementation supporting the 1197 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1198 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1199 1200 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1201 Register temp = WRegisterFrom(locations->GetTemp(0)); 1202 // Need to make sure the reference stored in the field is a to-space 1203 // one before attempting the CAS or the CAS could fail incorrectly. 1204 codegen->UpdateReferenceFieldWithBakerReadBarrier( 1205 invoke, 1206 out_loc, // Unused, used only as a "temporary" within the read barrier. 1207 base, 1208 /* field_offset */ offset_loc, 1209 temp, 1210 /* needs_null_check */ false, 1211 /* use_load_acquire */ false); 1212 } 1213 } 1214 1215 UseScratchRegisterScope temps(masm); 1216 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. 1217 Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory. 1218 1219 Register tmp_32 = tmp_value.W(); 1220 1221 __ Add(tmp_ptr, base.X(), Operand(offset)); 1222 1223 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1224 codegen->GetAssembler()->PoisonHeapReference(expected); 1225 if (value.Is(expected)) { 1226 // Do not poison `value`, as it is the same register as 1227 // `expected`, which has just been poisoned. 1228 } else { 1229 codegen->GetAssembler()->PoisonHeapReference(value); 1230 } 1231 } 1232 1233 // do { 1234 // tmp_value = [tmp_ptr] - expected; 1235 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); 1236 // result = tmp_value != 0; 1237 1238 vixl::aarch64::Label loop_head, exit_loop; 1239 __ Bind(&loop_head); 1240 __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); 1241 __ Cmp(tmp_value, expected); 1242 __ B(&exit_loop, ne); 1243 __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); 1244 __ Cbnz(tmp_32, &loop_head); 1245 __ Bind(&exit_loop); 1246 __ Cset(out, eq); 1247 1248 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1249 codegen->GetAssembler()->UnpoisonHeapReference(expected); 1250 if (value.Is(expected)) { 1251 // Do not unpoison `value`, as it is the same register as 1252 // `expected`, which has just been unpoisoned. 1253 } else { 1254 codegen->GetAssembler()->UnpoisonHeapReference(value); 1255 } 1256 } 1257 } 1258 1259 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1260 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32); 1261 } 1262 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1263 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64); 1264 } 1265 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1266 // The only read barrier implementation supporting the 1267 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1268 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1269 return; 1270 } 1271 1272 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference); 1273 } 1274 1275 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1276 GenCas(invoke, DataType::Type::kInt32, codegen_); 1277 } 1278 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1279 GenCas(invoke, DataType::Type::kInt64, codegen_); 1280 } 1281 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1282 // The only read barrier implementation supporting the 1283 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1284 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1285 1286 GenCas(invoke, DataType::Type::kReference, codegen_); 1287 } 1288 1289 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { 1290 LocationSummary* locations = 1291 new (allocator_) LocationSummary(invoke, 1292 invoke->InputAt(1)->CanBeNull() 1293 ? LocationSummary::kCallOnSlowPath 1294 : LocationSummary::kNoCall, 1295 kIntrinsified); 1296 locations->SetInAt(0, Location::RequiresRegister()); 1297 locations->SetInAt(1, Location::RequiresRegister()); 1298 locations->AddTemp(Location::RequiresRegister()); 1299 locations->AddTemp(Location::RequiresRegister()); 1300 locations->AddTemp(Location::RequiresRegister()); 1301 // Need temporary registers for String compression's feature. 1302 if (mirror::kUseStringCompression) { 1303 locations->AddTemp(Location::RequiresRegister()); 1304 } 1305 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1306 } 1307 1308 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { 1309 MacroAssembler* masm = GetVIXLAssembler(); 1310 LocationSummary* locations = invoke->GetLocations(); 1311 1312 Register str = InputRegisterAt(invoke, 0); 1313 Register arg = InputRegisterAt(invoke, 1); 1314 DCHECK(str.IsW()); 1315 DCHECK(arg.IsW()); 1316 Register out = OutputRegister(invoke); 1317 1318 Register temp0 = WRegisterFrom(locations->GetTemp(0)); 1319 Register temp1 = WRegisterFrom(locations->GetTemp(1)); 1320 Register temp2 = WRegisterFrom(locations->GetTemp(2)); 1321 Register temp3; 1322 if (mirror::kUseStringCompression) { 1323 temp3 = WRegisterFrom(locations->GetTemp(3)); 1324 } 1325 1326 vixl::aarch64::Label loop; 1327 vixl::aarch64::Label find_char_diff; 1328 vixl::aarch64::Label end; 1329 vixl::aarch64::Label different_compression; 1330 1331 // Get offsets of count and value fields within a string object. 1332 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1333 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1334 1335 // Note that the null check must have been done earlier. 1336 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1337 1338 // Take slow path and throw if input can be and is null. 1339 SlowPathCodeARM64* slow_path = nullptr; 1340 const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); 1341 if (can_slow_path) { 1342 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1343 codegen_->AddSlowPath(slow_path); 1344 __ Cbz(arg, slow_path->GetEntryLabel()); 1345 } 1346 1347 // Reference equality check, return 0 if same reference. 1348 __ Subs(out, str, arg); 1349 __ B(&end, eq); 1350 1351 if (mirror::kUseStringCompression) { 1352 // Load `count` fields of this and argument strings. 1353 __ Ldr(temp3, HeapOperand(str, count_offset)); 1354 __ Ldr(temp2, HeapOperand(arg, count_offset)); 1355 // Clean out compression flag from lengths. 1356 __ Lsr(temp0, temp3, 1u); 1357 __ Lsr(temp1, temp2, 1u); 1358 } else { 1359 // Load lengths of this and argument strings. 1360 __ Ldr(temp0, HeapOperand(str, count_offset)); 1361 __ Ldr(temp1, HeapOperand(arg, count_offset)); 1362 } 1363 // out = length diff. 1364 __ Subs(out, temp0, temp1); 1365 // temp0 = min(len(str), len(arg)). 1366 __ Csel(temp0, temp1, temp0, ge); 1367 // Shorter string is empty? 1368 __ Cbz(temp0, &end); 1369 1370 if (mirror::kUseStringCompression) { 1371 // Check if both strings using same compression style to use this comparison loop. 1372 __ Eor(temp2, temp2, Operand(temp3)); 1373 // Interleave with compression flag extraction which is needed for both paths 1374 // and also set flags which is needed only for the different compressions path. 1375 __ Ands(temp3.W(), temp3.W(), Operand(1)); 1376 __ Tbnz(temp2, 0, &different_compression); // Does not use flags. 1377 } 1378 // Store offset of string value in preparation for comparison loop. 1379 __ Mov(temp1, value_offset); 1380 if (mirror::kUseStringCompression) { 1381 // For string compression, calculate the number of bytes to compare (not chars). 1382 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. 1383 __ Lsl(temp0, temp0, temp3); 1384 } 1385 1386 UseScratchRegisterScope scratch_scope(masm); 1387 Register temp4 = scratch_scope.AcquireX(); 1388 1389 // Assertions that must hold in order to compare strings 8 bytes at a time. 1390 DCHECK_ALIGNED(value_offset, 8); 1391 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1392 1393 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1394 DCHECK_EQ(char_size, 2u); 1395 1396 // Promote temp2 to an X reg, ready for LDR. 1397 temp2 = temp2.X(); 1398 1399 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). 1400 __ Bind(&loop); 1401 __ Ldr(temp4, MemOperand(str.X(), temp1.X())); 1402 __ Ldr(temp2, MemOperand(arg.X(), temp1.X())); 1403 __ Cmp(temp4, temp2); 1404 __ B(ne, &find_char_diff); 1405 __ Add(temp1, temp1, char_size * 4); 1406 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1407 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4); 1408 __ B(&loop, hi); 1409 __ B(&end); 1410 1411 // Promote temp1 to an X reg, ready for EOR. 1412 temp1 = temp1.X(); 1413 1414 // Find the single character difference. 1415 __ Bind(&find_char_diff); 1416 // Get the bit position of the first character that differs. 1417 __ Eor(temp1, temp2, temp4); 1418 __ Rbit(temp1, temp1); 1419 __ Clz(temp1, temp1); 1420 1421 // If the number of chars remaining <= the index where the difference occurs (0-3), then 1422 // the difference occurs outside the remaining string data, so just return length diff (out). 1423 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the 1424 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or 1425 // unsigned when string compression is disabled. 1426 // When it's enabled, the comparison must be unsigned. 1427 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); 1428 __ B(ls, &end); 1429 1430 // Extract the characters and calculate the difference. 1431 if (mirror:: kUseStringCompression) { 1432 __ Bic(temp1, temp1, 0x7); 1433 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u)); 1434 } else { 1435 __ Bic(temp1, temp1, 0xf); 1436 } 1437 __ Lsr(temp2, temp2, temp1); 1438 __ Lsr(temp4, temp4, temp1); 1439 if (mirror::kUseStringCompression) { 1440 // Prioritize the case of compressed strings and calculate such result first. 1441 __ Uxtb(temp1, temp4); 1442 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB)); 1443 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done. 1444 } 1445 __ Uxth(temp4, temp4); 1446 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH)); 1447 1448 if (mirror::kUseStringCompression) { 1449 __ B(&end); 1450 __ Bind(&different_compression); 1451 1452 // Comparison for different compression style. 1453 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1454 DCHECK_EQ(c_char_size, 1u); 1455 temp1 = temp1.W(); 1456 temp2 = temp2.W(); 1457 temp4 = temp4.W(); 1458 1459 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. 1460 // Note that flags have been set by the `str` compression flag extraction to `temp3` 1461 // before branching to the `different_compression` label. 1462 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string. 1463 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string. 1464 1465 // We want to free up the temp3, currently holding `str` compression flag, for comparison. 1466 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat 1467 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which 1468 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. 1469 __ Lsl(temp0, temp0, 1u); 1470 1471 // Adjust temp1 and temp2 from string pointers to data pointers. 1472 __ Add(temp1, temp1, Operand(value_offset)); 1473 __ Add(temp2, temp2, Operand(value_offset)); 1474 1475 // Complete the move of the compression flag. 1476 __ Sub(temp0, temp0, Operand(temp3)); 1477 1478 vixl::aarch64::Label different_compression_loop; 1479 vixl::aarch64::Label different_compression_diff; 1480 1481 __ Bind(&different_compression_loop); 1482 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex)); 1483 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex)); 1484 __ Subs(temp4, temp4, Operand(temp3)); 1485 __ B(&different_compression_diff, ne); 1486 __ Subs(temp0, temp0, 2); 1487 __ B(&different_compression_loop, hi); 1488 __ B(&end); 1489 1490 // Calculate the difference. 1491 __ Bind(&different_compression_diff); 1492 __ Tst(temp0, Operand(1)); 1493 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1494 "Expecting 0=compressed, 1=uncompressed"); 1495 __ Cneg(out, temp4, ne); 1496 } 1497 1498 __ Bind(&end); 1499 1500 if (can_slow_path) { 1501 __ Bind(slow_path->GetExitLabel()); 1502 } 1503 } 1504 1505 // The cut off for unrolling the loop in String.equals() intrinsic for const strings. 1506 // The normal loop plus the pre-header is 9 instructions without string compression and 12 1507 // instructions with string compression. We can compare up to 8 bytes in 4 instructions 1508 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up 1509 // to 10 instructions for the unrolled loop. 1510 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32; 1511 1512 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { 1513 if (candidate->IsLoadString()) { 1514 HLoadString* load_string = candidate->AsLoadString(); 1515 const DexFile& dex_file = load_string->GetDexFile(); 1516 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); 1517 } 1518 return nullptr; 1519 } 1520 1521 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { 1522 if (kEmitCompilerReadBarrier && 1523 !StringEqualsOptimizations(invoke).GetArgumentIsString() && 1524 !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { 1525 // No support for this odd case (String class is moveable, not in the boot image). 1526 return; 1527 } 1528 1529 LocationSummary* locations = 1530 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1531 locations->SetInAt(0, Location::RequiresRegister()); 1532 locations->SetInAt(1, Location::RequiresRegister()); 1533 1534 // For the generic implementation and for long const strings we need a temporary. 1535 // We do not need it for short const strings, up to 8 bytes, see code generation below. 1536 uint32_t const_string_length = 0u; 1537 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1538 if (const_string == nullptr) { 1539 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1540 } 1541 bool is_compressed = 1542 mirror::kUseStringCompression && 1543 const_string != nullptr && 1544 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1545 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) { 1546 locations->AddTemp(Location::RequiresRegister()); 1547 } 1548 1549 // TODO: If the String.equals() is used only for an immediately following HIf, we can 1550 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. 1551 // Then we shall need an extra temporary register instead of the output register. 1552 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1553 } 1554 1555 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { 1556 MacroAssembler* masm = GetVIXLAssembler(); 1557 LocationSummary* locations = invoke->GetLocations(); 1558 1559 Register str = WRegisterFrom(locations->InAt(0)); 1560 Register arg = WRegisterFrom(locations->InAt(1)); 1561 Register out = XRegisterFrom(locations->Out()); 1562 1563 UseScratchRegisterScope scratch_scope(masm); 1564 Register temp = scratch_scope.AcquireW(); 1565 Register temp1 = scratch_scope.AcquireW(); 1566 1567 vixl::aarch64::Label loop; 1568 vixl::aarch64::Label end; 1569 vixl::aarch64::Label return_true; 1570 vixl::aarch64::Label return_false; 1571 1572 // Get offsets of count, value, and class fields within a string object. 1573 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1574 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1575 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1576 1577 // Note that the null check must have been done earlier. 1578 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1579 1580 StringEqualsOptimizations optimizations(invoke); 1581 if (!optimizations.GetArgumentNotNull()) { 1582 // Check if input is null, return false if it is. 1583 __ Cbz(arg, &return_false); 1584 } 1585 1586 // Reference equality check, return true if same reference. 1587 __ Cmp(str, arg); 1588 __ B(&return_true, eq); 1589 1590 if (!optimizations.GetArgumentIsString()) { 1591 // Instanceof check for the argument by comparing class fields. 1592 // All string objects must have the same type since String cannot be subclassed. 1593 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1594 // If the argument is a string object, its class field must be equal to receiver's class field. 1595 __ Ldr(temp, MemOperand(str.X(), class_offset)); 1596 __ Ldr(temp1, MemOperand(arg.X(), class_offset)); 1597 __ Cmp(temp, temp1); 1598 __ B(&return_false, ne); 1599 } 1600 1601 // Check if one of the inputs is a const string. Do not special-case both strings 1602 // being const, such cases should be handled by constant folding if needed. 1603 uint32_t const_string_length = 0u; 1604 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1605 if (const_string == nullptr) { 1606 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1607 if (const_string != nullptr) { 1608 std::swap(str, arg); // Make sure the const string is in `str`. 1609 } 1610 } 1611 bool is_compressed = 1612 mirror::kUseStringCompression && 1613 const_string != nullptr && 1614 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1615 1616 if (const_string != nullptr) { 1617 // Load `count` field of the argument string and check if it matches the const string. 1618 // Also compares the compression style, if differs return false. 1619 __ Ldr(temp, MemOperand(arg.X(), count_offset)); 1620 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate. 1621 scratch_scope.Release(temp1); 1622 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); 1623 temp1 = scratch_scope.AcquireW(); 1624 __ B(&return_false, ne); 1625 } else { 1626 // Load `count` fields of this and argument strings. 1627 __ Ldr(temp, MemOperand(str.X(), count_offset)); 1628 __ Ldr(temp1, MemOperand(arg.X(), count_offset)); 1629 // Check if `count` fields are equal, return false if they're not. 1630 // Also compares the compression style, if differs return false. 1631 __ Cmp(temp, temp1); 1632 __ B(&return_false, ne); 1633 } 1634 1635 // Assertions that must hold in order to compare strings 8 bytes at a time. 1636 // Ok to do this because strings are zero-padded to kObjectAlignment. 1637 DCHECK_ALIGNED(value_offset, 8); 1638 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1639 1640 if (const_string != nullptr && 1641 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes 1642 : kShortConstStringEqualsCutoffInBytes / 2u)) { 1643 // Load and compare the contents. Though we know the contents of the short const string 1644 // at compile time, materializing constants may be more code than loading from memory. 1645 int32_t offset = value_offset; 1646 size_t remaining_bytes = 1647 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u); 1648 temp = temp.X(); 1649 temp1 = temp1.X(); 1650 while (remaining_bytes > sizeof(uint64_t)) { 1651 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1652 __ Ldp(temp, temp1, MemOperand(str.X(), offset)); 1653 __ Ldp(temp2, out, MemOperand(arg.X(), offset)); 1654 __ Cmp(temp, temp2); 1655 __ Ccmp(temp1, out, NoFlag, eq); 1656 __ B(&return_false, ne); 1657 offset += 2u * sizeof(uint64_t); 1658 remaining_bytes -= 2u * sizeof(uint64_t); 1659 } 1660 if (remaining_bytes != 0u) { 1661 __ Ldr(temp, MemOperand(str.X(), offset)); 1662 __ Ldr(temp1, MemOperand(arg.X(), offset)); 1663 __ Cmp(temp, temp1); 1664 __ B(&return_false, ne); 1665 } 1666 } else { 1667 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1668 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1669 "Expecting 0=compressed, 1=uncompressed"); 1670 __ Cbz(temp, &return_true); 1671 1672 if (mirror::kUseStringCompression) { 1673 // For string compression, calculate the number of bytes to compare (not chars). 1674 // This could in theory exceed INT32_MAX, so treat temp as unsigned. 1675 __ And(temp1, temp, Operand(1)); // Extract compression flag. 1676 __ Lsr(temp, temp, 1u); // Extract length. 1677 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare. 1678 } 1679 1680 // Store offset of string value in preparation for comparison loop 1681 __ Mov(temp1, value_offset); 1682 1683 temp1 = temp1.X(); 1684 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1685 // Loop to compare strings 8 bytes at a time starting at the front of the string. 1686 __ Bind(&loop); 1687 __ Ldr(out, MemOperand(str.X(), temp1)); 1688 __ Ldr(temp2, MemOperand(arg.X(), temp1)); 1689 __ Add(temp1, temp1, Operand(sizeof(uint64_t))); 1690 __ Cmp(out, temp2); 1691 __ B(&return_false, ne); 1692 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1693 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags); 1694 __ B(&loop, hi); 1695 } 1696 1697 // Return true and exit the function. 1698 // If loop does not result in returning false, we return true. 1699 __ Bind(&return_true); 1700 __ Mov(out, 1); 1701 __ B(&end); 1702 1703 // Return false and exit the function. 1704 __ Bind(&return_false); 1705 __ Mov(out, 0); 1706 __ Bind(&end); 1707 } 1708 1709 static void GenerateVisitStringIndexOf(HInvoke* invoke, 1710 MacroAssembler* masm, 1711 CodeGeneratorARM64* codegen, 1712 bool start_at_zero) { 1713 LocationSummary* locations = invoke->GetLocations(); 1714 1715 // Note that the null check must have been done earlier. 1716 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1717 1718 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1719 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1720 SlowPathCodeARM64* slow_path = nullptr; 1721 HInstruction* code_point = invoke->InputAt(1); 1722 if (code_point->IsIntConstant()) { 1723 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { 1724 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1725 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1726 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1727 codegen->AddSlowPath(slow_path); 1728 __ B(slow_path->GetEntryLabel()); 1729 __ Bind(slow_path->GetExitLabel()); 1730 return; 1731 } 1732 } else if (code_point->GetType() != DataType::Type::kUint16) { 1733 Register char_reg = WRegisterFrom(locations->InAt(1)); 1734 __ Tst(char_reg, 0xFFFF0000); 1735 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1736 codegen->AddSlowPath(slow_path); 1737 __ B(ne, slow_path->GetEntryLabel()); 1738 } 1739 1740 if (start_at_zero) { 1741 // Start-index = 0. 1742 Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); 1743 __ Mov(tmp_reg, 0); 1744 } 1745 1746 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); 1747 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); 1748 1749 if (slow_path != nullptr) { 1750 __ Bind(slow_path->GetExitLabel()); 1751 } 1752 } 1753 1754 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { 1755 LocationSummary* locations = new (allocator_) LocationSummary( 1756 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1757 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1758 // best to align the inputs accordingly. 1759 InvokeRuntimeCallingConvention calling_convention; 1760 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1761 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1762 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); 1763 1764 // Need to send start_index=0. 1765 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 1766 } 1767 1768 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { 1769 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true); 1770 } 1771 1772 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1773 LocationSummary* locations = new (allocator_) LocationSummary( 1774 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1775 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1776 // best to align the inputs accordingly. 1777 InvokeRuntimeCallingConvention calling_convention; 1778 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1779 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1780 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1781 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); 1782 } 1783 1784 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1785 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false); 1786 } 1787 1788 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1789 LocationSummary* locations = new (allocator_) LocationSummary( 1790 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1791 InvokeRuntimeCallingConvention calling_convention; 1792 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1793 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1794 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1795 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); 1796 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1797 } 1798 1799 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1800 MacroAssembler* masm = GetVIXLAssembler(); 1801 LocationSummary* locations = invoke->GetLocations(); 1802 1803 Register byte_array = WRegisterFrom(locations->InAt(0)); 1804 __ Cmp(byte_array, 0); 1805 SlowPathCodeARM64* slow_path = 1806 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1807 codegen_->AddSlowPath(slow_path); 1808 __ B(eq, slow_path->GetEntryLabel()); 1809 1810 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); 1811 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1812 __ Bind(slow_path->GetExitLabel()); 1813 } 1814 1815 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1816 LocationSummary* locations = 1817 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1818 InvokeRuntimeCallingConvention calling_convention; 1819 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1820 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1821 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1822 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1823 } 1824 1825 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1826 // No need to emit code checking whether `locations->InAt(2)` is a null 1827 // pointer, as callers of the native method 1828 // 1829 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1830 // 1831 // all include a null check on `data` before calling that method. 1832 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1833 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1834 } 1835 1836 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1837 LocationSummary* locations = new (allocator_) LocationSummary( 1838 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1839 InvokeRuntimeCallingConvention calling_convention; 1840 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1841 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1842 } 1843 1844 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1845 MacroAssembler* masm = GetVIXLAssembler(); 1846 LocationSummary* locations = invoke->GetLocations(); 1847 1848 Register string_to_copy = WRegisterFrom(locations->InAt(0)); 1849 __ Cmp(string_to_copy, 0); 1850 SlowPathCodeARM64* slow_path = 1851 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1852 codegen_->AddSlowPath(slow_path); 1853 __ B(eq, slow_path->GetEntryLabel()); 1854 1855 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); 1856 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1857 __ Bind(slow_path->GetExitLabel()); 1858 } 1859 1860 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 1861 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 1862 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1863 DCHECK(DataType::IsFloatingPointType(invoke->GetType())); 1864 1865 LocationSummary* const locations = 1866 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1867 InvokeRuntimeCallingConvention calling_convention; 1868 1869 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1870 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1871 } 1872 1873 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 1874 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 1875 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1876 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); 1877 DCHECK(DataType::IsFloatingPointType(invoke->GetType())); 1878 1879 LocationSummary* const locations = 1880 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1881 InvokeRuntimeCallingConvention calling_convention; 1882 1883 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1884 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 1885 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1886 } 1887 1888 static void GenFPToFPCall(HInvoke* invoke, 1889 CodeGeneratorARM64* codegen, 1890 QuickEntrypointEnum entry) { 1891 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 1892 } 1893 1894 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { 1895 CreateFPToFPCallLocations(allocator_, invoke); 1896 } 1897 1898 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { 1899 GenFPToFPCall(invoke, codegen_, kQuickCos); 1900 } 1901 1902 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { 1903 CreateFPToFPCallLocations(allocator_, invoke); 1904 } 1905 1906 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { 1907 GenFPToFPCall(invoke, codegen_, kQuickSin); 1908 } 1909 1910 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { 1911 CreateFPToFPCallLocations(allocator_, invoke); 1912 } 1913 1914 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { 1915 GenFPToFPCall(invoke, codegen_, kQuickAcos); 1916 } 1917 1918 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { 1919 CreateFPToFPCallLocations(allocator_, invoke); 1920 } 1921 1922 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { 1923 GenFPToFPCall(invoke, codegen_, kQuickAsin); 1924 } 1925 1926 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { 1927 CreateFPToFPCallLocations(allocator_, invoke); 1928 } 1929 1930 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { 1931 GenFPToFPCall(invoke, codegen_, kQuickAtan); 1932 } 1933 1934 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { 1935 CreateFPToFPCallLocations(allocator_, invoke); 1936 } 1937 1938 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { 1939 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 1940 } 1941 1942 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { 1943 CreateFPToFPCallLocations(allocator_, invoke); 1944 } 1945 1946 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { 1947 GenFPToFPCall(invoke, codegen_, kQuickCosh); 1948 } 1949 1950 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { 1951 CreateFPToFPCallLocations(allocator_, invoke); 1952 } 1953 1954 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { 1955 GenFPToFPCall(invoke, codegen_, kQuickExp); 1956 } 1957 1958 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { 1959 CreateFPToFPCallLocations(allocator_, invoke); 1960 } 1961 1962 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { 1963 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 1964 } 1965 1966 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { 1967 CreateFPToFPCallLocations(allocator_, invoke); 1968 } 1969 1970 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { 1971 GenFPToFPCall(invoke, codegen_, kQuickLog); 1972 } 1973 1974 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { 1975 CreateFPToFPCallLocations(allocator_, invoke); 1976 } 1977 1978 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { 1979 GenFPToFPCall(invoke, codegen_, kQuickLog10); 1980 } 1981 1982 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { 1983 CreateFPToFPCallLocations(allocator_, invoke); 1984 } 1985 1986 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { 1987 GenFPToFPCall(invoke, codegen_, kQuickSinh); 1988 } 1989 1990 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { 1991 CreateFPToFPCallLocations(allocator_, invoke); 1992 } 1993 1994 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { 1995 GenFPToFPCall(invoke, codegen_, kQuickTan); 1996 } 1997 1998 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { 1999 CreateFPToFPCallLocations(allocator_, invoke); 2000 } 2001 2002 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { 2003 GenFPToFPCall(invoke, codegen_, kQuickTanh); 2004 } 2005 2006 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { 2007 CreateFPFPToFPCallLocations(allocator_, invoke); 2008 } 2009 2010 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { 2011 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 2012 } 2013 2014 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { 2015 CreateFPFPToFPCallLocations(allocator_, invoke); 2016 } 2017 2018 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { 2019 GenFPToFPCall(invoke, codegen_, kQuickPow); 2020 } 2021 2022 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { 2023 CreateFPFPToFPCallLocations(allocator_, invoke); 2024 } 2025 2026 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { 2027 GenFPToFPCall(invoke, codegen_, kQuickHypot); 2028 } 2029 2030 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { 2031 CreateFPFPToFPCallLocations(allocator_, invoke); 2032 } 2033 2034 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { 2035 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 2036 } 2037 2038 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 2039 LocationSummary* locations = 2040 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2041 locations->SetInAt(0, Location::RequiresRegister()); 2042 locations->SetInAt(1, Location::RequiresRegister()); 2043 locations->SetInAt(2, Location::RequiresRegister()); 2044 locations->SetInAt(3, Location::RequiresRegister()); 2045 locations->SetInAt(4, Location::RequiresRegister()); 2046 2047 locations->AddTemp(Location::RequiresRegister()); 2048 locations->AddTemp(Location::RequiresRegister()); 2049 locations->AddTemp(Location::RequiresRegister()); 2050 } 2051 2052 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 2053 MacroAssembler* masm = GetVIXLAssembler(); 2054 LocationSummary* locations = invoke->GetLocations(); 2055 2056 // Check assumption that sizeof(Char) is 2 (used in scaling below). 2057 const size_t char_size = DataType::Size(DataType::Type::kUint16); 2058 DCHECK_EQ(char_size, 2u); 2059 2060 // Location of data in char array buffer. 2061 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 2062 2063 // Location of char array data in string. 2064 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 2065 2066 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); 2067 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. 2068 Register srcObj = XRegisterFrom(locations->InAt(0)); 2069 Register srcBegin = XRegisterFrom(locations->InAt(1)); 2070 Register srcEnd = XRegisterFrom(locations->InAt(2)); 2071 Register dstObj = XRegisterFrom(locations->InAt(3)); 2072 Register dstBegin = XRegisterFrom(locations->InAt(4)); 2073 2074 Register src_ptr = XRegisterFrom(locations->GetTemp(0)); 2075 Register num_chr = XRegisterFrom(locations->GetTemp(1)); 2076 Register tmp1 = XRegisterFrom(locations->GetTemp(2)); 2077 2078 UseScratchRegisterScope temps(masm); 2079 Register dst_ptr = temps.AcquireX(); 2080 Register tmp2 = temps.AcquireX(); 2081 2082 vixl::aarch64::Label done; 2083 vixl::aarch64::Label compressed_string_loop; 2084 __ Sub(num_chr, srcEnd, srcBegin); 2085 // Early out for valid zero-length retrievals. 2086 __ Cbz(num_chr, &done); 2087 2088 // dst address start to copy to. 2089 __ Add(dst_ptr, dstObj, Operand(data_offset)); 2090 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); 2091 2092 // src address to copy from. 2093 __ Add(src_ptr, srcObj, Operand(value_offset)); 2094 vixl::aarch64::Label compressed_string_preloop; 2095 if (mirror::kUseStringCompression) { 2096 // Location of count in string. 2097 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2098 // String's length. 2099 __ Ldr(tmp2, MemOperand(srcObj, count_offset)); 2100 __ Tbz(tmp2, 0, &compressed_string_preloop); 2101 } 2102 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); 2103 2104 // Do the copy. 2105 vixl::aarch64::Label loop; 2106 vixl::aarch64::Label remainder; 2107 2108 // Save repairing the value of num_chr on the < 8 character path. 2109 __ Subs(tmp1, num_chr, 8); 2110 __ B(lt, &remainder); 2111 2112 // Keep the result of the earlier subs, we are going to fetch at least 8 characters. 2113 __ Mov(num_chr, tmp1); 2114 2115 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time. 2116 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.) 2117 __ Bind(&loop); 2118 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex)); 2119 __ Subs(num_chr, num_chr, 8); 2120 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex)); 2121 __ B(ge, &loop); 2122 2123 __ Adds(num_chr, num_chr, 8); 2124 __ B(eq, &done); 2125 2126 // Main loop for < 8 character case and remainder handling. Loads and stores one 2127 // 16-bit Java character at a time. 2128 __ Bind(&remainder); 2129 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex)); 2130 __ Subs(num_chr, num_chr, 1); 2131 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2132 __ B(gt, &remainder); 2133 __ B(&done); 2134 2135 if (mirror::kUseStringCompression) { 2136 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 2137 DCHECK_EQ(c_char_size, 1u); 2138 __ Bind(&compressed_string_preloop); 2139 __ Add(src_ptr, src_ptr, Operand(srcBegin)); 2140 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. 2141 __ Bind(&compressed_string_loop); 2142 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); 2143 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2144 __ Subs(num_chr, num_chr, Operand(1)); 2145 __ B(gt, &compressed_string_loop); 2146 } 2147 2148 __ Bind(&done); 2149 } 2150 2151 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native 2152 // implementation there for longer copy lengths. 2153 static constexpr int32_t kSystemArrayCopyCharThreshold = 32; 2154 2155 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, 2156 uint32_t at, 2157 HInstruction* input) { 2158 HIntConstant* const_input = input->AsIntConstant(); 2159 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { 2160 locations->SetInAt(at, Location::RequiresRegister()); 2161 } else { 2162 locations->SetInAt(at, Location::RegisterOrConstant(input)); 2163 } 2164 } 2165 2166 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2167 // Check to see if we have known failures that will cause us to have to bail out 2168 // to the runtime, and just generate the runtime call directly. 2169 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2170 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant(); 2171 2172 // The positions must be non-negative. 2173 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2174 (dst_pos != nullptr && dst_pos->GetValue() < 0)) { 2175 // We will have to fail anyways. 2176 return; 2177 } 2178 2179 // The length must be >= 0 and not so long that we would (currently) prefer libcore's 2180 // native implementation. 2181 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2182 if (length != nullptr) { 2183 int32_t len = length->GetValue(); 2184 if (len < 0 || len > kSystemArrayCopyCharThreshold) { 2185 // Just call as normal. 2186 return; 2187 } 2188 } 2189 2190 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); 2191 LocationSummary* locations = 2192 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 2193 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length). 2194 locations->SetInAt(0, Location::RequiresRegister()); 2195 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2196 locations->SetInAt(2, Location::RequiresRegister()); 2197 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2198 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2199 2200 locations->AddTemp(Location::RequiresRegister()); 2201 locations->AddTemp(Location::RequiresRegister()); 2202 locations->AddTemp(Location::RequiresRegister()); 2203 } 2204 2205 static void CheckSystemArrayCopyPosition(MacroAssembler* masm, 2206 const Location& pos, 2207 const Register& input, 2208 const Location& length, 2209 SlowPathCodeARM64* slow_path, 2210 const Register& temp, 2211 bool length_is_input_length = false) { 2212 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); 2213 if (pos.IsConstant()) { 2214 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 2215 if (pos_const == 0) { 2216 if (!length_is_input_length) { 2217 // Check that length(input) >= length. 2218 __ Ldr(temp, MemOperand(input, length_offset)); 2219 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); 2220 __ B(slow_path->GetEntryLabel(), lt); 2221 } 2222 } else { 2223 // Check that length(input) >= pos. 2224 __ Ldr(temp, MemOperand(input, length_offset)); 2225 __ Subs(temp, temp, pos_const); 2226 __ B(slow_path->GetEntryLabel(), lt); 2227 2228 // Check that (length(input) - pos) >= length. 2229 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); 2230 __ B(slow_path->GetEntryLabel(), lt); 2231 } 2232 } else if (length_is_input_length) { 2233 // The only way the copy can succeed is if pos is zero. 2234 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel()); 2235 } else { 2236 // Check that pos >= 0. 2237 Register pos_reg = WRegisterFrom(pos); 2238 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel()); 2239 2240 // Check that pos <= length(input) && (length(input) - pos) >= length. 2241 __ Ldr(temp, MemOperand(input, length_offset)); 2242 __ Subs(temp, temp, pos_reg); 2243 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt). 2244 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge); 2245 __ B(slow_path->GetEntryLabel(), lt); 2246 } 2247 } 2248 2249 // Compute base source address, base destination address, and end 2250 // source address for System.arraycopy* intrinsics in `src_base`, 2251 // `dst_base` and `src_end` respectively. 2252 static void GenSystemArrayCopyAddresses(MacroAssembler* masm, 2253 DataType::Type type, 2254 const Register& src, 2255 const Location& src_pos, 2256 const Register& dst, 2257 const Location& dst_pos, 2258 const Location& copy_length, 2259 const Register& src_base, 2260 const Register& dst_base, 2261 const Register& src_end) { 2262 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. 2263 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16) 2264 << "Unexpected element type: " << type; 2265 const int32_t element_size = DataType::Size(type); 2266 const int32_t element_size_shift = DataType::SizeShift(type); 2267 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2268 2269 if (src_pos.IsConstant()) { 2270 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2271 __ Add(src_base, src, element_size * constant + data_offset); 2272 } else { 2273 __ Add(src_base, src, data_offset); 2274 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift)); 2275 } 2276 2277 if (dst_pos.IsConstant()) { 2278 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); 2279 __ Add(dst_base, dst, element_size * constant + data_offset); 2280 } else { 2281 __ Add(dst_base, dst, data_offset); 2282 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); 2283 } 2284 2285 if (copy_length.IsConstant()) { 2286 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2287 __ Add(src_end, src_base, element_size * constant); 2288 } else { 2289 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); 2290 } 2291 } 2292 2293 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2294 MacroAssembler* masm = GetVIXLAssembler(); 2295 LocationSummary* locations = invoke->GetLocations(); 2296 Register src = XRegisterFrom(locations->InAt(0)); 2297 Location src_pos = locations->InAt(1); 2298 Register dst = XRegisterFrom(locations->InAt(2)); 2299 Location dst_pos = locations->InAt(3); 2300 Location length = locations->InAt(4); 2301 2302 SlowPathCodeARM64* slow_path = 2303 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 2304 codegen_->AddSlowPath(slow_path); 2305 2306 // If source and destination are the same, take the slow path. Overlapping copy regions must be 2307 // copied in reverse and we can't know in all cases if it's needed. 2308 __ Cmp(src, dst); 2309 __ B(slow_path->GetEntryLabel(), eq); 2310 2311 // Bail out if the source is null. 2312 __ Cbz(src, slow_path->GetEntryLabel()); 2313 2314 // Bail out if the destination is null. 2315 __ Cbz(dst, slow_path->GetEntryLabel()); 2316 2317 if (!length.IsConstant()) { 2318 // Merge the following two comparisons into one: 2319 // If the length is negative, bail out (delegate to libcore's native implementation). 2320 // If the length > 32 then (currently) prefer libcore's native implementation. 2321 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold); 2322 __ B(slow_path->GetEntryLabel(), hi); 2323 } else { 2324 // We have already checked in the LocationsBuilder for the constant case. 2325 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0); 2326 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32); 2327 } 2328 2329 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0)); 2330 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1)); 2331 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2)); 2332 2333 CheckSystemArrayCopyPosition(masm, 2334 src_pos, 2335 src, 2336 length, 2337 slow_path, 2338 src_curr_addr, 2339 false); 2340 2341 CheckSystemArrayCopyPosition(masm, 2342 dst_pos, 2343 dst, 2344 length, 2345 slow_path, 2346 src_curr_addr, 2347 false); 2348 2349 src_curr_addr = src_curr_addr.X(); 2350 dst_curr_addr = dst_curr_addr.X(); 2351 src_stop_addr = src_stop_addr.X(); 2352 2353 GenSystemArrayCopyAddresses(masm, 2354 DataType::Type::kUint16, 2355 src, 2356 src_pos, 2357 dst, 2358 dst_pos, 2359 length, 2360 src_curr_addr, 2361 dst_curr_addr, 2362 src_stop_addr); 2363 2364 // Iterate over the arrays and do a raw copy of the chars. 2365 const int32_t char_size = DataType::Size(DataType::Type::kUint16); 2366 UseScratchRegisterScope temps(masm); 2367 Register tmp = temps.AcquireW(); 2368 vixl::aarch64::Label loop, done; 2369 __ Bind(&loop); 2370 __ Cmp(src_curr_addr, src_stop_addr); 2371 __ B(&done, eq); 2372 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); 2373 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); 2374 __ B(&loop); 2375 __ Bind(&done); 2376 2377 __ Bind(slow_path->GetExitLabel()); 2378 } 2379 2380 // We can choose to use the native implementation there for longer copy lengths. 2381 static constexpr int32_t kSystemArrayCopyThreshold = 128; 2382 2383 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers. 2384 // We want to use two temporary registers in order to reduce the register pressure in arm64. 2385 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. 2386 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2387 // The only read barrier implementation supporting the 2388 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2389 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2390 return; 2391 } 2392 2393 // Check to see if we have known failures that will cause us to have to bail out 2394 // to the runtime, and just generate the runtime call directly. 2395 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2396 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 2397 2398 // The positions must be non-negative. 2399 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2400 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 2401 // We will have to fail anyways. 2402 return; 2403 } 2404 2405 // The length must be >= 0. 2406 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2407 if (length != nullptr) { 2408 int32_t len = length->GetValue(); 2409 if (len < 0 || len >= kSystemArrayCopyThreshold) { 2410 // Just call as normal. 2411 return; 2412 } 2413 } 2414 2415 SystemArrayCopyOptimizations optimizations(invoke); 2416 2417 if (optimizations.GetDestinationIsSource()) { 2418 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { 2419 // We only support backward copying if source and destination are the same. 2420 return; 2421 } 2422 } 2423 2424 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { 2425 // We currently don't intrinsify primitive copying. 2426 return; 2427 } 2428 2429 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); 2430 LocationSummary* locations = 2431 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 2432 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 2433 locations->SetInAt(0, Location::RequiresRegister()); 2434 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2435 locations->SetInAt(2, Location::RequiresRegister()); 2436 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2437 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2438 2439 locations->AddTemp(Location::RequiresRegister()); 2440 locations->AddTemp(Location::RequiresRegister()); 2441 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2442 // Temporary register IP0, obtained from the VIXL scratch register 2443 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 2444 // (because that register is clobbered by ReadBarrierMarkRegX 2445 // entry points). It cannot be used in calls to 2446 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier 2447 // either. For these reasons, get a third extra temporary register 2448 // from the register allocator. 2449 locations->AddTemp(Location::RequiresRegister()); 2450 } else { 2451 // Cases other than Baker read barriers: the third temporary will 2452 // be acquired from the VIXL scratch register pool. 2453 } 2454 } 2455 2456 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2457 // The only read barrier implementation supporting the 2458 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2459 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2460 2461 MacroAssembler* masm = GetVIXLAssembler(); 2462 LocationSummary* locations = invoke->GetLocations(); 2463 2464 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2465 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2466 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2467 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2468 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2469 2470 Register src = XRegisterFrom(locations->InAt(0)); 2471 Location src_pos = locations->InAt(1); 2472 Register dest = XRegisterFrom(locations->InAt(2)); 2473 Location dest_pos = locations->InAt(3); 2474 Location length = locations->InAt(4); 2475 Register temp1 = WRegisterFrom(locations->GetTemp(0)); 2476 Location temp1_loc = LocationFrom(temp1); 2477 Register temp2 = WRegisterFrom(locations->GetTemp(1)); 2478 Location temp2_loc = LocationFrom(temp2); 2479 2480 SlowPathCodeARM64* intrinsic_slow_path = 2481 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 2482 codegen_->AddSlowPath(intrinsic_slow_path); 2483 2484 vixl::aarch64::Label conditions_on_positions_validated; 2485 SystemArrayCopyOptimizations optimizations(invoke); 2486 2487 // If source and destination are the same, we go to slow path if we need to do 2488 // forward copying. 2489 if (src_pos.IsConstant()) { 2490 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2491 if (dest_pos.IsConstant()) { 2492 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2493 if (optimizations.GetDestinationIsSource()) { 2494 // Checked when building locations. 2495 DCHECK_GE(src_pos_constant, dest_pos_constant); 2496 } else if (src_pos_constant < dest_pos_constant) { 2497 __ Cmp(src, dest); 2498 __ B(intrinsic_slow_path->GetEntryLabel(), eq); 2499 } 2500 // Checked when building locations. 2501 DCHECK(!optimizations.GetDestinationIsSource() 2502 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); 2503 } else { 2504 if (!optimizations.GetDestinationIsSource()) { 2505 __ Cmp(src, dest); 2506 __ B(&conditions_on_positions_validated, ne); 2507 } 2508 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); 2509 __ B(intrinsic_slow_path->GetEntryLabel(), gt); 2510 } 2511 } else { 2512 if (!optimizations.GetDestinationIsSource()) { 2513 __ Cmp(src, dest); 2514 __ B(&conditions_on_positions_validated, ne); 2515 } 2516 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), 2517 OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); 2518 __ B(intrinsic_slow_path->GetEntryLabel(), lt); 2519 } 2520 2521 __ Bind(&conditions_on_positions_validated); 2522 2523 if (!optimizations.GetSourceIsNotNull()) { 2524 // Bail out if the source is null. 2525 __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); 2526 } 2527 2528 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2529 // Bail out if the destination is null. 2530 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); 2531 } 2532 2533 // We have already checked in the LocationsBuilder for the constant case. 2534 if (!length.IsConstant() && 2535 !optimizations.GetCountIsSourceLength() && 2536 !optimizations.GetCountIsDestinationLength()) { 2537 // Merge the following two comparisons into one: 2538 // If the length is negative, bail out (delegate to libcore's native implementation). 2539 // If the length >= 128 then (currently) prefer native implementation. 2540 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); 2541 __ B(intrinsic_slow_path->GetEntryLabel(), hs); 2542 } 2543 // Validity checks: source. 2544 CheckSystemArrayCopyPosition(masm, 2545 src_pos, 2546 src, 2547 length, 2548 intrinsic_slow_path, 2549 temp1, 2550 optimizations.GetCountIsSourceLength()); 2551 2552 // Validity checks: dest. 2553 CheckSystemArrayCopyPosition(masm, 2554 dest_pos, 2555 dest, 2556 length, 2557 intrinsic_slow_path, 2558 temp1, 2559 optimizations.GetCountIsDestinationLength()); 2560 { 2561 // We use a block to end the scratch scope before the write barrier, thus 2562 // freeing the temporary registers so they can be used in `MarkGCCard`. 2563 UseScratchRegisterScope temps(masm); 2564 Location temp3_loc; // Used only for Baker read barrier. 2565 Register temp3; 2566 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2567 temp3_loc = locations->GetTemp(2); 2568 temp3 = WRegisterFrom(temp3_loc); 2569 } else { 2570 temp3 = temps.AcquireW(); 2571 } 2572 2573 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2574 // Check whether all elements of the source array are assignable to the component 2575 // type of the destination array. We do two checks: the classes are the same, 2576 // or the destination is Object[]. If none of these checks succeed, we go to the 2577 // slow path. 2578 2579 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2580 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2581 // /* HeapReference<Class> */ temp1 = src->klass_ 2582 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2583 temp1_loc, 2584 src.W(), 2585 class_offset, 2586 temp3_loc, 2587 /* needs_null_check */ false, 2588 /* use_load_acquire */ false); 2589 // Bail out if the source is not a non primitive array. 2590 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2591 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2592 temp1_loc, 2593 temp1, 2594 component_offset, 2595 temp3_loc, 2596 /* needs_null_check */ false, 2597 /* use_load_acquire */ false); 2598 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); 2599 // If heap poisoning is enabled, `temp1` has been unpoisoned 2600 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2601 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); 2602 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); 2603 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2604 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2605 } 2606 2607 // /* HeapReference<Class> */ temp1 = dest->klass_ 2608 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2609 temp1_loc, 2610 dest.W(), 2611 class_offset, 2612 temp3_loc, 2613 /* needs_null_check */ false, 2614 /* use_load_acquire */ false); 2615 2616 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2617 // Bail out if the destination is not a non primitive array. 2618 // 2619 // Register `temp1` is not trashed by the read barrier emitted 2620 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2621 // method produces a call to a ReadBarrierMarkRegX entry point, 2622 // which saves all potentially live registers, including 2623 // temporaries such a `temp1`. 2624 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2625 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2626 temp2_loc, 2627 temp1, 2628 component_offset, 2629 temp3_loc, 2630 /* needs_null_check */ false, 2631 /* use_load_acquire */ false); 2632 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2633 // If heap poisoning is enabled, `temp2` has been unpoisoned 2634 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2635 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2636 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2637 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2638 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2639 } 2640 2641 // For the same reason given earlier, `temp1` is not trashed by the 2642 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2643 // /* HeapReference<Class> */ temp2 = src->klass_ 2644 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2645 temp2_loc, 2646 src.W(), 2647 class_offset, 2648 temp3_loc, 2649 /* needs_null_check */ false, 2650 /* use_load_acquire */ false); 2651 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2652 __ Cmp(temp1, temp2); 2653 2654 if (optimizations.GetDestinationIsTypedObjectArray()) { 2655 vixl::aarch64::Label do_copy; 2656 __ B(&do_copy, eq); 2657 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2658 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2659 temp1_loc, 2660 temp1, 2661 component_offset, 2662 temp3_loc, 2663 /* needs_null_check */ false, 2664 /* use_load_acquire */ false); 2665 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2666 // We do not need to emit a read barrier for the following 2667 // heap reference load, as `temp1` is only used in a 2668 // comparison with null below, and this reference is not 2669 // kept afterwards. 2670 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2671 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2672 __ Bind(&do_copy); 2673 } else { 2674 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2675 } 2676 } else { 2677 // Non read barrier code. 2678 2679 // /* HeapReference<Class> */ temp1 = dest->klass_ 2680 __ Ldr(temp1, MemOperand(dest, class_offset)); 2681 // /* HeapReference<Class> */ temp2 = src->klass_ 2682 __ Ldr(temp2, MemOperand(src, class_offset)); 2683 bool did_unpoison = false; 2684 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 2685 !optimizations.GetSourceIsNonPrimitiveArray()) { 2686 // One or two of the references need to be unpoisoned. Unpoison them 2687 // both to make the identity check valid. 2688 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2689 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2690 did_unpoison = true; 2691 } 2692 2693 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2694 // Bail out if the destination is not a non primitive array. 2695 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2696 __ Ldr(temp3, HeapOperand(temp1, component_offset)); 2697 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2698 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2699 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2700 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2701 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2702 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2703 } 2704 2705 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2706 // Bail out if the source is not a non primitive array. 2707 // /* HeapReference<Class> */ temp3 = temp2->component_type_ 2708 __ Ldr(temp3, HeapOperand(temp2, component_offset)); 2709 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2710 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2711 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2712 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2713 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2714 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2715 } 2716 2717 __ Cmp(temp1, temp2); 2718 2719 if (optimizations.GetDestinationIsTypedObjectArray()) { 2720 vixl::aarch64::Label do_copy; 2721 __ B(&do_copy, eq); 2722 if (!did_unpoison) { 2723 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2724 } 2725 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2726 __ Ldr(temp1, HeapOperand(temp1, component_offset)); 2727 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2728 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2729 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2730 // No need to unpoison the result, we're comparing against null. 2731 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2732 __ Bind(&do_copy); 2733 } else { 2734 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2735 } 2736 } 2737 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2738 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2739 // Bail out if the source is not a non primitive array. 2740 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2741 // /* HeapReference<Class> */ temp1 = src->klass_ 2742 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2743 temp1_loc, 2744 src.W(), 2745 class_offset, 2746 temp3_loc, 2747 /* needs_null_check */ false, 2748 /* use_load_acquire */ false); 2749 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2750 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2751 temp2_loc, 2752 temp1, 2753 component_offset, 2754 temp3_loc, 2755 /* needs_null_check */ false, 2756 /* use_load_acquire */ false); 2757 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2758 // If heap poisoning is enabled, `temp2` has been unpoisoned 2759 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2760 } else { 2761 // /* HeapReference<Class> */ temp1 = src->klass_ 2762 __ Ldr(temp1, HeapOperand(src.W(), class_offset)); 2763 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2764 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2765 __ Ldr(temp2, HeapOperand(temp1, component_offset)); 2766 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2767 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2768 } 2769 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2770 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2771 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2772 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2773 } 2774 2775 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { 2776 // Null constant length: not need to emit the loop code at all. 2777 } else { 2778 Register src_curr_addr = temp1.X(); 2779 Register dst_curr_addr = temp2.X(); 2780 Register src_stop_addr = temp3.X(); 2781 vixl::aarch64::Label done; 2782 const DataType::Type type = DataType::Type::kReference; 2783 const int32_t element_size = DataType::Size(type); 2784 2785 if (length.IsRegister()) { 2786 // Don't enter the copy loop if the length is null. 2787 __ Cbz(WRegisterFrom(length), &done); 2788 } 2789 2790 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2791 // TODO: Also convert this intrinsic to the IsGcMarking strategy? 2792 2793 // SystemArrayCopy implementation for Baker read barriers (see 2794 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier): 2795 // 2796 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2797 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2798 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2799 // if (is_gray) { 2800 // // Slow-path copy. 2801 // do { 2802 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 2803 // } while (src_ptr != end_ptr) 2804 // } else { 2805 // // Fast-path copy. 2806 // do { 2807 // *dest_ptr++ = *src_ptr++; 2808 // } while (src_ptr != end_ptr) 2809 // } 2810 2811 // Make sure `tmp` is not IP0, as it is clobbered by 2812 // ReadBarrierMarkRegX entry points in 2813 // ReadBarrierSystemArrayCopySlowPathARM64. 2814 DCHECK(temps.IsAvailable(ip0)); 2815 temps.Exclude(ip0); 2816 Register tmp = temps.AcquireW(); 2817 DCHECK_NE(LocationFrom(tmp).reg(), IP0); 2818 // Put IP0 back in the pool so that VIXL has at least one 2819 // scratch register available to emit macro-instructions (note 2820 // that IP1 is already used for `tmp`). Indeed some 2821 // macro-instructions used in GenSystemArrayCopyAddresses 2822 // (invoked hereunder) may require a scratch register (for 2823 // instance to emit a load with a large constant offset). 2824 temps.Include(ip0); 2825 2826 // /* int32_t */ monitor = src->monitor_ 2827 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); 2828 // /* LockWord */ lock_word = LockWord(monitor) 2829 static_assert(sizeof(LockWord) == sizeof(int32_t), 2830 "art::LockWord and int32_t have different sizes."); 2831 2832 // Introduce a dependency on the lock_word including rb_state, 2833 // to prevent load-load reordering, and without using 2834 // a memory barrier (which would be more expensive). 2835 // `src` is unchanged by this operation, but its value now depends 2836 // on `tmp`. 2837 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); 2838 2839 // Compute base source address, base destination address, and end 2840 // source address for System.arraycopy* intrinsics in `src_base`, 2841 // `dst_base` and `src_end` respectively. 2842 // Note that `src_curr_addr` is computed from from `src` (and 2843 // `src_pos`) here, and thus honors the artificial dependency 2844 // of `src` on `tmp`. 2845 GenSystemArrayCopyAddresses(masm, 2846 type, 2847 src, 2848 src_pos, 2849 dest, 2850 dest_pos, 2851 length, 2852 src_curr_addr, 2853 dst_curr_addr, 2854 src_stop_addr); 2855 2856 // Slow path used to copy array when `src` is gray. 2857 SlowPathCodeARM64* read_barrier_slow_path = 2858 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64( 2859 invoke, LocationFrom(tmp)); 2860 codegen_->AddSlowPath(read_barrier_slow_path); 2861 2862 // Given the numeric representation, it's enough to check the low bit of the rb_state. 2863 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 2864 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2865 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); 2866 2867 // Fast-path copy. 2868 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2869 // poison/unpoison. 2870 vixl::aarch64::Label loop; 2871 __ Bind(&loop); 2872 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2873 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2874 __ Cmp(src_curr_addr, src_stop_addr); 2875 __ B(&loop, ne); 2876 2877 __ Bind(read_barrier_slow_path->GetExitLabel()); 2878 } else { 2879 // Non read barrier code. 2880 // Compute base source address, base destination address, and end 2881 // source address for System.arraycopy* intrinsics in `src_base`, 2882 // `dst_base` and `src_end` respectively. 2883 GenSystemArrayCopyAddresses(masm, 2884 type, 2885 src, 2886 src_pos, 2887 dest, 2888 dest_pos, 2889 length, 2890 src_curr_addr, 2891 dst_curr_addr, 2892 src_stop_addr); 2893 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2894 // poison/unpoison. 2895 vixl::aarch64::Label loop; 2896 __ Bind(&loop); 2897 { 2898 Register tmp = temps.AcquireW(); 2899 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2900 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2901 } 2902 __ Cmp(src_curr_addr, src_stop_addr); 2903 __ B(&loop, ne); 2904 } 2905 __ Bind(&done); 2906 } 2907 } 2908 2909 // We only need one card marking on the destination array. 2910 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); 2911 2912 __ Bind(intrinsic_slow_path->GetExitLabel()); 2913 } 2914 2915 static void GenIsInfinite(LocationSummary* locations, 2916 bool is64bit, 2917 MacroAssembler* masm) { 2918 Operand infinity; 2919 Register out; 2920 2921 if (is64bit) { 2922 infinity = kPositiveInfinityDouble; 2923 out = XRegisterFrom(locations->Out()); 2924 } else { 2925 infinity = kPositiveInfinityFloat; 2926 out = WRegisterFrom(locations->Out()); 2927 } 2928 2929 const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out); 2930 2931 MoveFPToInt(locations, is64bit, masm); 2932 __ Eor(out, out, infinity); 2933 // We don't care about the sign bit, so shift left. 2934 __ Cmp(zero, Operand(out, LSL, 1)); 2935 __ Cset(out, eq); 2936 } 2937 2938 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2939 CreateFPToIntLocations(allocator_, invoke); 2940 } 2941 2942 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2943 GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 2944 } 2945 2946 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2947 CreateFPToIntLocations(allocator_, invoke); 2948 } 2949 2950 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2951 GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 2952 } 2953 2954 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { 2955 InvokeRuntimeCallingConvention calling_convention; 2956 IntrinsicVisitor::ComputeIntegerValueOfLocations( 2957 invoke, 2958 codegen_, 2959 calling_convention.GetReturnLocation(DataType::Type::kReference), 2960 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 2961 } 2962 2963 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { 2964 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 2965 LocationSummary* locations = invoke->GetLocations(); 2966 MacroAssembler* masm = GetVIXLAssembler(); 2967 2968 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); 2969 UseScratchRegisterScope temps(masm); 2970 Register temp = temps.AcquireW(); 2971 InvokeRuntimeCallingConvention calling_convention; 2972 Register argument = calling_convention.GetRegisterAt(0); 2973 if (invoke->InputAt(0)->IsConstant()) { 2974 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 2975 if (value >= info.low && value <= info.high) { 2976 // Just embed the j.l.Integer in the code. 2977 ScopedObjectAccess soa(Thread::Current()); 2978 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 2979 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 2980 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 2981 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 2982 } else { 2983 // Allocate and initialize a new j.l.Integer. 2984 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 2985 // JIT object table. 2986 uint32_t address = 2987 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 2988 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 2989 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 2990 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 2991 __ Mov(temp.W(), value); 2992 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); 2993 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 2994 // one. 2995 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2996 } 2997 } else { 2998 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); 2999 // Check bounds of our cache. 3000 __ Add(out.W(), in.W(), -info.low); 3001 __ Cmp(out.W(), info.high - info.low + 1); 3002 vixl::aarch64::Label allocate, done; 3003 __ B(&allocate, hs); 3004 // If the value is within the bounds, load the j.l.Integer directly from the array. 3005 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3006 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 3007 __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); 3008 MemOperand source = HeapOperand( 3009 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); 3010 codegen_->Load(DataType::Type::kReference, out, source); 3011 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); 3012 __ B(&done); 3013 __ Bind(&allocate); 3014 // Otherwise allocate and initialize a new j.l.Integer. 3015 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3016 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 3017 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3018 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3019 __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); 3020 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 3021 // one. 3022 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3023 __ Bind(&done); 3024 } 3025 } 3026 3027 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) { 3028 LocationSummary* locations = 3029 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3030 locations->SetOut(Location::RequiresRegister()); 3031 } 3032 3033 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { 3034 MacroAssembler* masm = GetVIXLAssembler(); 3035 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32); 3036 UseScratchRegisterScope temps(masm); 3037 Register temp = temps.AcquireX(); 3038 3039 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value()); 3040 __ Ldar(out.W(), MemOperand(temp)); 3041 3042 vixl::aarch64::Label done; 3043 __ Cbz(out.W(), &done); 3044 __ Stlr(wzr, MemOperand(temp)); 3045 __ Bind(&done); 3046 } 3047 3048 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { 3049 LocationSummary* locations = 3050 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3051 locations->SetInAt(0, Location::Any()); 3052 } 3053 3054 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 3055 3056 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) 3057 3058 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); 3059 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); 3060 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); 3061 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); 3062 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); 3063 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend); 3064 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); 3065 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); 3066 3067 // 1.8. 3068 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) 3069 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) 3070 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) 3071 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) 3072 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) 3073 3074 UNREACHABLE_INTRINSICS(ARM64) 3075 3076 #undef __ 3077 3078 } // namespace arm64 3079 } // namespace art 3080