1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_arm64.h" 18 19 #include "arch/arm64/instruction_set_features_arm64.h" 20 #include "art_method.h" 21 #include "code_generator_arm64.h" 22 #include "common_arm64.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "intrinsics.h" 25 #include "lock_word.h" 26 #include "mirror/array-inl.h" 27 #include "mirror/object_array-inl.h" 28 #include "mirror/reference.h" 29 #include "mirror/string-inl.h" 30 #include "scoped_thread_state_change-inl.h" 31 #include "thread-current-inl.h" 32 #include "utils/arm64/assembler_arm64.h" 33 34 using namespace vixl::aarch64; // NOLINT(build/namespaces) 35 36 // TODO(VIXL): Make VIXL compile with -Wshadow. 37 #pragma GCC diagnostic push 38 #pragma GCC diagnostic ignored "-Wshadow" 39 #include "aarch64/disasm-aarch64.h" 40 #include "aarch64/macro-assembler-aarch64.h" 41 #pragma GCC diagnostic pop 42 43 namespace art { 44 45 namespace arm64 { 46 47 using helpers::DRegisterFrom; 48 using helpers::FPRegisterFrom; 49 using helpers::HeapOperand; 50 using helpers::LocationFrom; 51 using helpers::OperandFrom; 52 using helpers::RegisterFrom; 53 using helpers::SRegisterFrom; 54 using helpers::WRegisterFrom; 55 using helpers::XRegisterFrom; 56 using helpers::InputRegisterAt; 57 using helpers::OutputRegister; 58 59 namespace { 60 61 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) { 62 return MemOperand(XRegisterFrom(location), offset); 63 } 64 65 } // namespace 66 67 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() { 68 return codegen_->GetVIXLAssembler(); 69 } 70 71 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() { 72 return codegen_->GetGraph()->GetArena(); 73 } 74 75 #define __ codegen->GetVIXLAssembler()-> 76 77 static void MoveFromReturnRegister(Location trg, 78 Primitive::Type type, 79 CodeGeneratorARM64* codegen) { 80 if (!trg.IsValid()) { 81 DCHECK(type == Primitive::kPrimVoid); 82 return; 83 } 84 85 DCHECK_NE(type, Primitive::kPrimVoid); 86 87 if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { 88 Register trg_reg = RegisterFrom(trg, type); 89 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); 90 __ Mov(trg_reg, res_reg, kDiscardForSameWReg); 91 } else { 92 FPRegister trg_reg = FPRegisterFrom(trg, type); 93 FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type); 94 __ Fmov(trg_reg, res_reg); 95 } 96 } 97 98 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { 99 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 100 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 101 } 102 103 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified 104 // call. This will copy the arguments into the positions for a regular call. 105 // 106 // Note: The actual parameters are required to be in the locations given by the invoke's location 107 // summary. If an intrinsic modifies those locations before a slowpath call, they must be 108 // restored! 109 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { 110 public: 111 explicit IntrinsicSlowPathARM64(HInvoke* invoke) 112 : SlowPathCodeARM64(invoke), invoke_(invoke) { } 113 114 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { 115 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 116 __ Bind(GetEntryLabel()); 117 118 SaveLiveRegisters(codegen, invoke_->GetLocations()); 119 120 MoveArguments(invoke_, codegen); 121 122 { 123 // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there 124 // are no pools emitted. 125 vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 126 if (invoke_->IsInvokeStaticOrDirect()) { 127 codegen->GenerateStaticOrDirectCall( 128 invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); 129 } else { 130 codegen->GenerateVirtualCall( 131 invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); 132 } 133 } 134 135 // Copy the result back to the expected output. 136 Location out = invoke_->GetLocations()->Out(); 137 if (out.IsValid()) { 138 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 139 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 140 MoveFromReturnRegister(out, invoke_->GetType(), codegen); 141 } 142 143 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 144 __ B(GetExitLabel()); 145 } 146 147 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; } 148 149 private: 150 // The instruction where this slow path is happening. 151 HInvoke* const invoke_; 152 153 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); 154 }; 155 156 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 157 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { 158 public: 159 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) 160 : SlowPathCodeARM64(instruction), tmp_(tmp) { 161 DCHECK(kEmitCompilerReadBarrier); 162 DCHECK(kUseBakerReadBarrier); 163 } 164 165 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { 166 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 167 LocationSummary* locations = instruction_->GetLocations(); 168 DCHECK(locations->CanCall()); 169 DCHECK(instruction_->IsInvokeStaticOrDirect()) 170 << "Unexpected instruction in read barrier arraycopy slow path: " 171 << instruction_->DebugName(); 172 DCHECK(instruction_->GetLocations()->Intrinsified()); 173 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 174 175 const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); 176 177 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); 178 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); 179 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); 180 Register tmp_reg = WRegisterFrom(tmp_); 181 182 __ Bind(GetEntryLabel()); 183 vixl::aarch64::Label slow_copy_loop; 184 __ Bind(&slow_copy_loop); 185 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); 186 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); 187 // TODO: Inline the mark bit check before calling the runtime? 188 // tmp_reg = ReadBarrier::Mark(tmp_reg); 189 // No need to save live registers; it's taken care of by the 190 // entrypoint. Also, there is no need to update the stack mask, 191 // as this runtime call will not trigger a garbage collection. 192 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more 193 // explanations.) 194 DCHECK_NE(tmp_.reg(), LR); 195 DCHECK_NE(tmp_.reg(), WSP); 196 DCHECK_NE(tmp_.reg(), WZR); 197 // IP0 is used internally by the ReadBarrierMarkRegX entry point 198 // as a temporary (and not preserved). It thus cannot be used by 199 // any live register in this slow path. 200 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); 201 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); 202 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); 203 DCHECK_NE(tmp_.reg(), IP0); 204 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); 205 // TODO: Load the entrypoint once before the loop, instead of 206 // loading it at every iteration. 207 int32_t entry_point_offset = 208 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); 209 // This runtime call does not require a stack map. 210 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 211 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); 212 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); 213 __ Cmp(src_curr_addr, src_stop_addr); 214 __ B(&slow_copy_loop, ne); 215 __ B(GetExitLabel()); 216 } 217 218 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } 219 220 private: 221 Location tmp_; 222 223 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); 224 }; 225 #undef __ 226 227 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { 228 Dispatch(invoke); 229 LocationSummary* res = invoke->GetLocations(); 230 if (res == nullptr) { 231 return false; 232 } 233 return res->Intrinsified(); 234 } 235 236 #define __ masm-> 237 238 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 239 LocationSummary* locations = new (arena) LocationSummary(invoke, 240 LocationSummary::kNoCall, 241 kIntrinsified); 242 locations->SetInAt(0, Location::RequiresFpuRegister()); 243 locations->SetOut(Location::RequiresRegister()); 244 } 245 246 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 247 LocationSummary* locations = new (arena) LocationSummary(invoke, 248 LocationSummary::kNoCall, 249 kIntrinsified); 250 locations->SetInAt(0, Location::RequiresRegister()); 251 locations->SetOut(Location::RequiresFpuRegister()); 252 } 253 254 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 255 Location input = locations->InAt(0); 256 Location output = locations->Out(); 257 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output), 258 is64bit ? DRegisterFrom(input) : SRegisterFrom(input)); 259 } 260 261 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 262 Location input = locations->InAt(0); 263 Location output = locations->Out(); 264 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output), 265 is64bit ? XRegisterFrom(input) : WRegisterFrom(input)); 266 } 267 268 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 269 CreateFPToIntLocations(arena_, invoke); 270 } 271 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 272 CreateIntToFPLocations(arena_, invoke); 273 } 274 275 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 276 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 277 } 278 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 279 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 280 } 281 282 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 283 CreateFPToIntLocations(arena_, invoke); 284 } 285 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 286 CreateIntToFPLocations(arena_, invoke); 287 } 288 289 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 290 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 291 } 292 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 293 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 294 } 295 296 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 297 LocationSummary* locations = new (arena) LocationSummary(invoke, 298 LocationSummary::kNoCall, 299 kIntrinsified); 300 locations->SetInAt(0, Location::RequiresRegister()); 301 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 302 } 303 304 static void GenReverseBytes(LocationSummary* locations, 305 Primitive::Type type, 306 MacroAssembler* masm) { 307 Location in = locations->InAt(0); 308 Location out = locations->Out(); 309 310 switch (type) { 311 case Primitive::kPrimShort: 312 __ Rev16(WRegisterFrom(out), WRegisterFrom(in)); 313 __ Sxth(WRegisterFrom(out), WRegisterFrom(out)); 314 break; 315 case Primitive::kPrimInt: 316 case Primitive::kPrimLong: 317 __ Rev(RegisterFrom(out, type), RegisterFrom(in, type)); 318 break; 319 default: 320 LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; 321 UNREACHABLE(); 322 } 323 } 324 325 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 326 CreateIntToIntLocations(arena_, invoke); 327 } 328 329 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 330 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); 331 } 332 333 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) { 334 CreateIntToIntLocations(arena_, invoke); 335 } 336 337 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) { 338 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); 339 } 340 341 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) { 342 CreateIntToIntLocations(arena_, invoke); 343 } 344 345 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { 346 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler()); 347 } 348 349 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 350 LocationSummary* locations = new (arena) LocationSummary(invoke, 351 LocationSummary::kNoCall, 352 kIntrinsified); 353 locations->SetInAt(0, Location::RequiresRegister()); 354 locations->SetInAt(1, Location::RequiresRegister()); 355 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 356 } 357 358 static void GenNumberOfLeadingZeros(LocationSummary* locations, 359 Primitive::Type type, 360 MacroAssembler* masm) { 361 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 362 363 Location in = locations->InAt(0); 364 Location out = locations->Out(); 365 366 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type)); 367 } 368 369 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 370 CreateIntToIntLocations(arena_, invoke); 371 } 372 373 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 374 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); 375 } 376 377 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 378 CreateIntToIntLocations(arena_, invoke); 379 } 380 381 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 382 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); 383 } 384 385 static void GenNumberOfTrailingZeros(LocationSummary* locations, 386 Primitive::Type type, 387 MacroAssembler* masm) { 388 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 389 390 Location in = locations->InAt(0); 391 Location out = locations->Out(); 392 393 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 394 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type)); 395 } 396 397 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 398 CreateIntToIntLocations(arena_, invoke); 399 } 400 401 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 402 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); 403 } 404 405 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 406 CreateIntToIntLocations(arena_, invoke); 407 } 408 409 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 410 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); 411 } 412 413 static void GenReverse(LocationSummary* locations, 414 Primitive::Type type, 415 MacroAssembler* masm) { 416 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 417 418 Location in = locations->InAt(0); 419 Location out = locations->Out(); 420 421 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 422 } 423 424 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) { 425 CreateIntToIntLocations(arena_, invoke); 426 } 427 428 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) { 429 GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); 430 } 431 432 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) { 433 CreateIntToIntLocations(arena_, invoke); 434 } 435 436 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { 437 GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); 438 } 439 440 static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) { 441 DCHECK(Primitive::IsIntOrLongType(type)) << type; 442 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); 443 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); 444 445 UseScratchRegisterScope temps(masm); 446 447 Register src = InputRegisterAt(instr, 0); 448 Register dst = RegisterFrom(instr->GetLocations()->Out(), type); 449 FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS(); 450 451 __ Fmov(fpr, src); 452 __ Cnt(fpr.V8B(), fpr.V8B()); 453 __ Addv(fpr.B(), fpr.V8B()); 454 __ Fmov(dst, fpr); 455 } 456 457 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) { 458 CreateIntToIntLocations(arena_, invoke); 459 } 460 461 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) { 462 GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler()); 463 } 464 465 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) { 466 CreateIntToIntLocations(arena_, invoke); 467 } 468 469 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) { 470 GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler()); 471 } 472 473 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 474 LocationSummary* locations = new (arena) LocationSummary(invoke, 475 LocationSummary::kNoCall, 476 kIntrinsified); 477 locations->SetInAt(0, Location::RequiresFpuRegister()); 478 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 479 } 480 481 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 482 Location in = locations->InAt(0); 483 Location out = locations->Out(); 484 485 FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); 486 FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); 487 488 __ Fabs(out_reg, in_reg); 489 } 490 491 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { 492 CreateFPToFPLocations(arena_, invoke); 493 } 494 495 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { 496 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 497 } 498 499 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { 500 CreateFPToFPLocations(arena_, invoke); 501 } 502 503 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { 504 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 505 } 506 507 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { 508 LocationSummary* locations = new (arena) LocationSummary(invoke, 509 LocationSummary::kNoCall, 510 kIntrinsified); 511 locations->SetInAt(0, Location::RequiresRegister()); 512 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 513 } 514 515 static void GenAbsInteger(LocationSummary* locations, 516 bool is64bit, 517 MacroAssembler* masm) { 518 Location in = locations->InAt(0); 519 Location output = locations->Out(); 520 521 Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); 522 Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); 523 524 __ Cmp(in_reg, Operand(0)); 525 __ Cneg(out_reg, in_reg, lt); 526 } 527 528 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { 529 CreateIntToInt(arena_, invoke); 530 } 531 532 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { 533 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 534 } 535 536 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { 537 CreateIntToInt(arena_, invoke); 538 } 539 540 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { 541 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 542 } 543 544 static void GenMinMaxFP(LocationSummary* locations, 545 bool is_min, 546 bool is_double, 547 MacroAssembler* masm) { 548 Location op1 = locations->InAt(0); 549 Location op2 = locations->InAt(1); 550 Location out = locations->Out(); 551 552 FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); 553 FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); 554 FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); 555 if (is_min) { 556 __ Fmin(out_reg, op1_reg, op2_reg); 557 } else { 558 __ Fmax(out_reg, op1_reg, op2_reg); 559 } 560 } 561 562 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 563 LocationSummary* locations = new (arena) LocationSummary(invoke, 564 LocationSummary::kNoCall, 565 kIntrinsified); 566 locations->SetInAt(0, Location::RequiresFpuRegister()); 567 locations->SetInAt(1, Location::RequiresFpuRegister()); 568 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 569 } 570 571 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { 572 CreateFPFPToFPLocations(arena_, invoke); 573 } 574 575 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { 576 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); 577 } 578 579 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { 580 CreateFPFPToFPLocations(arena_, invoke); 581 } 582 583 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { 584 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); 585 } 586 587 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 588 CreateFPFPToFPLocations(arena_, invoke); 589 } 590 591 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 592 GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); 593 } 594 595 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { 596 CreateFPFPToFPLocations(arena_, invoke); 597 } 598 599 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { 600 GenMinMaxFP( 601 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); 602 } 603 604 static void GenMinMax(LocationSummary* locations, 605 bool is_min, 606 bool is_long, 607 MacroAssembler* masm) { 608 Location op1 = locations->InAt(0); 609 Location op2 = locations->InAt(1); 610 Location out = locations->Out(); 611 612 Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); 613 Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); 614 Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); 615 616 __ Cmp(op1_reg, op2_reg); 617 __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); 618 } 619 620 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { 621 CreateIntIntToIntLocations(arena_, invoke); 622 } 623 624 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { 625 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); 626 } 627 628 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { 629 CreateIntIntToIntLocations(arena_, invoke); 630 } 631 632 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { 633 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); 634 } 635 636 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { 637 CreateIntIntToIntLocations(arena_, invoke); 638 } 639 640 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { 641 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); 642 } 643 644 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { 645 CreateIntIntToIntLocations(arena_, invoke); 646 } 647 648 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { 649 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); 650 } 651 652 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { 653 CreateFPToFPLocations(arena_, invoke); 654 } 655 656 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) { 657 LocationSummary* locations = invoke->GetLocations(); 658 MacroAssembler* masm = GetVIXLAssembler(); 659 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 660 } 661 662 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) { 663 CreateFPToFPLocations(arena_, invoke); 664 } 665 666 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) { 667 LocationSummary* locations = invoke->GetLocations(); 668 MacroAssembler* masm = GetVIXLAssembler(); 669 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 670 } 671 672 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) { 673 CreateFPToFPLocations(arena_, invoke); 674 } 675 676 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) { 677 LocationSummary* locations = invoke->GetLocations(); 678 MacroAssembler* masm = GetVIXLAssembler(); 679 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 680 } 681 682 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) { 683 CreateFPToFPLocations(arena_, invoke); 684 } 685 686 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { 687 LocationSummary* locations = invoke->GetLocations(); 688 MacroAssembler* masm = GetVIXLAssembler(); 689 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 690 } 691 692 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) { 693 LocationSummary* locations = new (arena) LocationSummary(invoke, 694 LocationSummary::kNoCall, 695 kIntrinsified); 696 locations->SetInAt(0, Location::RequiresFpuRegister()); 697 locations->SetOut(Location::RequiresRegister()); 698 locations->AddTemp(Location::RequiresFpuRegister()); 699 } 700 701 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) { 702 // Java 8 API definition for Math.round(): 703 // Return the closest long or int to the argument, with ties rounding to positive infinity. 704 // 705 // There is no single instruction in ARMv8 that can support the above definition. 706 // We choose to use FCVTAS here, because it has closest semantic. 707 // FCVTAS performs rounding to nearest integer, ties away from zero. 708 // For most inputs (positive values, zero or NaN), this instruction is enough. 709 // We only need a few handling code after FCVTAS if the input is negative half value. 710 // 711 // The reason why we didn't choose FCVTPS instruction here is that 712 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest. 713 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2. 714 // If we were using this instruction, for most inputs, more handling code would be needed. 715 LocationSummary* l = invoke->GetLocations(); 716 FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); 717 FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); 718 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out()); 719 vixl::aarch64::Label done; 720 721 // Round to nearest integer, ties away from zero. 722 __ Fcvtas(out_reg, in_reg); 723 724 // For positive values, zero or NaN inputs, rounding is done. 725 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done); 726 727 // Handle input < 0 cases. 728 // If input is negative but not a tie, previous result (round to nearest) is valid. 729 // If input is a negative tie, out_reg += 1. 730 __ Frinta(tmp_fp, in_reg); 731 __ Fsub(tmp_fp, in_reg, tmp_fp); 732 __ Fcmp(tmp_fp, 0.5); 733 __ Cinc(out_reg, out_reg, eq); 734 735 __ Bind(&done); 736 } 737 738 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { 739 CreateFPToIntPlusFPTempLocations(arena_, invoke); 740 } 741 742 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { 743 GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler()); 744 } 745 746 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { 747 CreateFPToIntPlusFPTempLocations(arena_, invoke); 748 } 749 750 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { 751 GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler()); 752 } 753 754 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { 755 CreateIntToIntLocations(arena_, invoke); 756 } 757 758 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) { 759 MacroAssembler* masm = GetVIXLAssembler(); 760 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()), 761 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 762 } 763 764 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 765 CreateIntToIntLocations(arena_, invoke); 766 } 767 768 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 769 MacroAssembler* masm = GetVIXLAssembler(); 770 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()), 771 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 772 } 773 774 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 775 CreateIntToIntLocations(arena_, invoke); 776 } 777 778 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 779 MacroAssembler* masm = GetVIXLAssembler(); 780 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()), 781 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 782 } 783 784 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 785 CreateIntToIntLocations(arena_, invoke); 786 } 787 788 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 789 MacroAssembler* masm = GetVIXLAssembler(); 790 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()), 791 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 792 } 793 794 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { 795 LocationSummary* locations = new (arena) LocationSummary(invoke, 796 LocationSummary::kNoCall, 797 kIntrinsified); 798 locations->SetInAt(0, Location::RequiresRegister()); 799 locations->SetInAt(1, Location::RequiresRegister()); 800 } 801 802 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) { 803 CreateIntIntToVoidLocations(arena_, invoke); 804 } 805 806 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) { 807 MacroAssembler* masm = GetVIXLAssembler(); 808 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)), 809 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 810 } 811 812 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 813 CreateIntIntToVoidLocations(arena_, invoke); 814 } 815 816 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 817 MacroAssembler* masm = GetVIXLAssembler(); 818 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)), 819 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 820 } 821 822 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 823 CreateIntIntToVoidLocations(arena_, invoke); 824 } 825 826 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 827 MacroAssembler* masm = GetVIXLAssembler(); 828 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)), 829 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 830 } 831 832 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 833 CreateIntIntToVoidLocations(arena_, invoke); 834 } 835 836 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 837 MacroAssembler* masm = GetVIXLAssembler(); 838 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)), 839 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 840 } 841 842 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) { 843 LocationSummary* locations = new (arena_) LocationSummary(invoke, 844 LocationSummary::kNoCall, 845 kIntrinsified); 846 locations->SetOut(Location::RequiresRegister()); 847 } 848 849 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) { 850 codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()), 851 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value())); 852 } 853 854 static void GenUnsafeGet(HInvoke* invoke, 855 Primitive::Type type, 856 bool is_volatile, 857 CodeGeneratorARM64* codegen) { 858 LocationSummary* locations = invoke->GetLocations(); 859 DCHECK((type == Primitive::kPrimInt) || 860 (type == Primitive::kPrimLong) || 861 (type == Primitive::kPrimNot)); 862 Location base_loc = locations->InAt(1); 863 Register base = WRegisterFrom(base_loc); // Object pointer. 864 Location offset_loc = locations->InAt(2); 865 Register offset = XRegisterFrom(offset_loc); // Long offset. 866 Location trg_loc = locations->Out(); 867 Register trg = RegisterFrom(trg_loc, type); 868 869 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 870 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. 871 Register temp = WRegisterFrom(locations->GetTemp(0)); 872 codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, 873 trg_loc, 874 base, 875 /* offset */ 0u, 876 /* index */ offset_loc, 877 /* scale_factor */ 0u, 878 temp, 879 /* needs_null_check */ false, 880 is_volatile); 881 } else { 882 // Other cases. 883 MemOperand mem_op(base.X(), offset); 884 if (is_volatile) { 885 codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); 886 } else { 887 codegen->Load(type, trg, mem_op); 888 } 889 890 if (type == Primitive::kPrimNot) { 891 DCHECK(trg.IsW()); 892 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); 893 } 894 } 895 } 896 897 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 898 bool can_call = kEmitCompilerReadBarrier && 899 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 900 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 901 LocationSummary* locations = new (arena) LocationSummary(invoke, 902 (can_call 903 ? LocationSummary::kCallOnSlowPath 904 : LocationSummary::kNoCall), 905 kIntrinsified); 906 if (can_call && kUseBakerReadBarrier) { 907 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 908 // We need a temporary register for the read barrier marking slow 909 // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier. 910 locations->AddTemp(Location::RequiresRegister()); 911 } 912 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 913 locations->SetInAt(1, Location::RequiresRegister()); 914 locations->SetInAt(2, Location::RequiresRegister()); 915 locations->SetOut(Location::RequiresRegister(), 916 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 917 } 918 919 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { 920 CreateIntIntIntToIntLocations(arena_, invoke); 921 } 922 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 923 CreateIntIntIntToIntLocations(arena_, invoke); 924 } 925 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) { 926 CreateIntIntIntToIntLocations(arena_, invoke); 927 } 928 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 929 CreateIntIntIntToIntLocations(arena_, invoke); 930 } 931 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) { 932 CreateIntIntIntToIntLocations(arena_, invoke); 933 } 934 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 935 CreateIntIntIntToIntLocations(arena_, invoke); 936 } 937 938 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { 939 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 940 } 941 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 942 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 943 } 944 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { 945 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 946 } 947 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 948 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 949 } 950 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { 951 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 952 } 953 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 954 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 955 } 956 957 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { 958 LocationSummary* locations = new (arena) LocationSummary(invoke, 959 LocationSummary::kNoCall, 960 kIntrinsified); 961 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 962 locations->SetInAt(1, Location::RequiresRegister()); 963 locations->SetInAt(2, Location::RequiresRegister()); 964 locations->SetInAt(3, Location::RequiresRegister()); 965 } 966 967 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) { 968 CreateIntIntIntIntToVoid(arena_, invoke); 969 } 970 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 971 CreateIntIntIntIntToVoid(arena_, invoke); 972 } 973 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 974 CreateIntIntIntIntToVoid(arena_, invoke); 975 } 976 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) { 977 CreateIntIntIntIntToVoid(arena_, invoke); 978 } 979 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 980 CreateIntIntIntIntToVoid(arena_, invoke); 981 } 982 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 983 CreateIntIntIntIntToVoid(arena_, invoke); 984 } 985 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) { 986 CreateIntIntIntIntToVoid(arena_, invoke); 987 } 988 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 989 CreateIntIntIntIntToVoid(arena_, invoke); 990 } 991 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 992 CreateIntIntIntIntToVoid(arena_, invoke); 993 } 994 995 static void GenUnsafePut(HInvoke* invoke, 996 Primitive::Type type, 997 bool is_volatile, 998 bool is_ordered, 999 CodeGeneratorARM64* codegen) { 1000 LocationSummary* locations = invoke->GetLocations(); 1001 MacroAssembler* masm = codegen->GetVIXLAssembler(); 1002 1003 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1004 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. 1005 Register value = RegisterFrom(locations->InAt(3), type); 1006 Register source = value; 1007 MemOperand mem_op(base.X(), offset); 1008 1009 { 1010 // We use a block to end the scratch scope before the write barrier, thus 1011 // freeing the temporary registers so they can be used in `MarkGCCard`. 1012 UseScratchRegisterScope temps(masm); 1013 1014 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1015 DCHECK(value.IsW()); 1016 Register temp = temps.AcquireW(); 1017 __ Mov(temp.W(), value.W()); 1018 codegen->GetAssembler()->PoisonHeapReference(temp.W()); 1019 source = temp; 1020 } 1021 1022 if (is_volatile || is_ordered) { 1023 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false); 1024 } else { 1025 codegen->Store(type, source, mem_op); 1026 } 1027 } 1028 1029 if (type == Primitive::kPrimNot) { 1030 bool value_can_be_null = true; // TODO: Worth finding out this information? 1031 codegen->MarkGCCard(base, value, value_can_be_null); 1032 } 1033 } 1034 1035 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { 1036 GenUnsafePut(invoke, 1037 Primitive::kPrimInt, 1038 /* is_volatile */ false, 1039 /* is_ordered */ false, 1040 codegen_); 1041 } 1042 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 1043 GenUnsafePut(invoke, 1044 Primitive::kPrimInt, 1045 /* is_volatile */ false, 1046 /* is_ordered */ true, 1047 codegen_); 1048 } 1049 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 1050 GenUnsafePut(invoke, 1051 Primitive::kPrimInt, 1052 /* is_volatile */ true, 1053 /* is_ordered */ false, 1054 codegen_); 1055 } 1056 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { 1057 GenUnsafePut(invoke, 1058 Primitive::kPrimNot, 1059 /* is_volatile */ false, 1060 /* is_ordered */ false, 1061 codegen_); 1062 } 1063 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1064 GenUnsafePut(invoke, 1065 Primitive::kPrimNot, 1066 /* is_volatile */ false, 1067 /* is_ordered */ true, 1068 codegen_); 1069 } 1070 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1071 GenUnsafePut(invoke, 1072 Primitive::kPrimNot, 1073 /* is_volatile */ true, 1074 /* is_ordered */ false, 1075 codegen_); 1076 } 1077 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { 1078 GenUnsafePut(invoke, 1079 Primitive::kPrimLong, 1080 /* is_volatile */ false, 1081 /* is_ordered */ false, 1082 codegen_); 1083 } 1084 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1085 GenUnsafePut(invoke, 1086 Primitive::kPrimLong, 1087 /* is_volatile */ false, 1088 /* is_ordered */ true, 1089 codegen_); 1090 } 1091 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1092 GenUnsafePut(invoke, 1093 Primitive::kPrimLong, 1094 /* is_volatile */ true, 1095 /* is_ordered */ false, 1096 codegen_); 1097 } 1098 1099 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, 1100 HInvoke* invoke, 1101 Primitive::Type type) { 1102 bool can_call = kEmitCompilerReadBarrier && 1103 kUseBakerReadBarrier && 1104 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 1105 LocationSummary* locations = new (arena) LocationSummary(invoke, 1106 (can_call 1107 ? LocationSummary::kCallOnSlowPath 1108 : LocationSummary::kNoCall), 1109 kIntrinsified); 1110 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1111 locations->SetInAt(1, Location::RequiresRegister()); 1112 locations->SetInAt(2, Location::RequiresRegister()); 1113 locations->SetInAt(3, Location::RequiresRegister()); 1114 locations->SetInAt(4, Location::RequiresRegister()); 1115 1116 // If heap poisoning is enabled, we don't want the unpoisoning 1117 // operations to potentially clobber the output. Likewise when 1118 // emitting a (Baker) read barrier, which may call. 1119 Location::OutputOverlap overlaps = 1120 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) 1121 ? Location::kOutputOverlap 1122 : Location::kNoOutputOverlap; 1123 locations->SetOut(Location::RequiresRegister(), overlaps); 1124 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1125 // Temporary register for (Baker) read barrier. 1126 locations->AddTemp(Location::RequiresRegister()); 1127 } 1128 } 1129 1130 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { 1131 MacroAssembler* masm = codegen->GetVIXLAssembler(); 1132 LocationSummary* locations = invoke->GetLocations(); 1133 1134 Location out_loc = locations->Out(); 1135 Register out = WRegisterFrom(out_loc); // Boolean result. 1136 1137 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1138 Location offset_loc = locations->InAt(2); 1139 Register offset = XRegisterFrom(offset_loc); // Long offset. 1140 Register expected = RegisterFrom(locations->InAt(3), type); // Expected. 1141 Register value = RegisterFrom(locations->InAt(4), type); // Value. 1142 1143 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. 1144 if (type == Primitive::kPrimNot) { 1145 // Mark card for object assuming new value is stored. 1146 bool value_can_be_null = true; // TODO: Worth finding out this information? 1147 codegen->MarkGCCard(base, value, value_can_be_null); 1148 1149 // The only read barrier implementation supporting the 1150 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1151 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1152 1153 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1154 Register temp = WRegisterFrom(locations->GetTemp(0)); 1155 // Need to make sure the reference stored in the field is a to-space 1156 // one before attempting the CAS or the CAS could fail incorrectly. 1157 codegen->UpdateReferenceFieldWithBakerReadBarrier( 1158 invoke, 1159 out_loc, // Unused, used only as a "temporary" within the read barrier. 1160 base, 1161 /* field_offset */ offset_loc, 1162 temp, 1163 /* needs_null_check */ false, 1164 /* use_load_acquire */ false); 1165 } 1166 } 1167 1168 UseScratchRegisterScope temps(masm); 1169 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. 1170 Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory. 1171 1172 Register tmp_32 = tmp_value.W(); 1173 1174 __ Add(tmp_ptr, base.X(), Operand(offset)); 1175 1176 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1177 codegen->GetAssembler()->PoisonHeapReference(expected); 1178 if (value.Is(expected)) { 1179 // Do not poison `value`, as it is the same register as 1180 // `expected`, which has just been poisoned. 1181 } else { 1182 codegen->GetAssembler()->PoisonHeapReference(value); 1183 } 1184 } 1185 1186 // do { 1187 // tmp_value = [tmp_ptr] - expected; 1188 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); 1189 // result = tmp_value != 0; 1190 1191 vixl::aarch64::Label loop_head, exit_loop; 1192 __ Bind(&loop_head); 1193 __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); 1194 __ Cmp(tmp_value, expected); 1195 __ B(&exit_loop, ne); 1196 __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); 1197 __ Cbnz(tmp_32, &loop_head); 1198 __ Bind(&exit_loop); 1199 __ Cset(out, eq); 1200 1201 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1202 codegen->GetAssembler()->UnpoisonHeapReference(expected); 1203 if (value.Is(expected)) { 1204 // Do not unpoison `value`, as it is the same register as 1205 // `expected`, which has just been unpoisoned. 1206 } else { 1207 codegen->GetAssembler()->UnpoisonHeapReference(value); 1208 } 1209 } 1210 } 1211 1212 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1213 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt); 1214 } 1215 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1216 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); 1217 } 1218 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1219 // The only read barrier implementation supporting the 1220 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1221 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1222 return; 1223 } 1224 1225 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot); 1226 } 1227 1228 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1229 GenCas(invoke, Primitive::kPrimInt, codegen_); 1230 } 1231 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1232 GenCas(invoke, Primitive::kPrimLong, codegen_); 1233 } 1234 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1235 // The only read barrier implementation supporting the 1236 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1237 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1238 1239 GenCas(invoke, Primitive::kPrimNot, codegen_); 1240 } 1241 1242 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { 1243 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1244 invoke->InputAt(1)->CanBeNull() 1245 ? LocationSummary::kCallOnSlowPath 1246 : LocationSummary::kNoCall, 1247 kIntrinsified); 1248 locations->SetInAt(0, Location::RequiresRegister()); 1249 locations->SetInAt(1, Location::RequiresRegister()); 1250 locations->AddTemp(Location::RequiresRegister()); 1251 locations->AddTemp(Location::RequiresRegister()); 1252 locations->AddTemp(Location::RequiresRegister()); 1253 // Need temporary registers for String compression's feature. 1254 if (mirror::kUseStringCompression) { 1255 locations->AddTemp(Location::RequiresRegister()); 1256 } 1257 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1258 } 1259 1260 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { 1261 MacroAssembler* masm = GetVIXLAssembler(); 1262 LocationSummary* locations = invoke->GetLocations(); 1263 1264 Register str = InputRegisterAt(invoke, 0); 1265 Register arg = InputRegisterAt(invoke, 1); 1266 DCHECK(str.IsW()); 1267 DCHECK(arg.IsW()); 1268 Register out = OutputRegister(invoke); 1269 1270 Register temp0 = WRegisterFrom(locations->GetTemp(0)); 1271 Register temp1 = WRegisterFrom(locations->GetTemp(1)); 1272 Register temp2 = WRegisterFrom(locations->GetTemp(2)); 1273 Register temp3; 1274 if (mirror::kUseStringCompression) { 1275 temp3 = WRegisterFrom(locations->GetTemp(3)); 1276 } 1277 1278 vixl::aarch64::Label loop; 1279 vixl::aarch64::Label find_char_diff; 1280 vixl::aarch64::Label end; 1281 vixl::aarch64::Label different_compression; 1282 1283 // Get offsets of count and value fields within a string object. 1284 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1285 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1286 1287 // Note that the null check must have been done earlier. 1288 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1289 1290 // Take slow path and throw if input can be and is null. 1291 SlowPathCodeARM64* slow_path = nullptr; 1292 const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); 1293 if (can_slow_path) { 1294 slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); 1295 codegen_->AddSlowPath(slow_path); 1296 __ Cbz(arg, slow_path->GetEntryLabel()); 1297 } 1298 1299 // Reference equality check, return 0 if same reference. 1300 __ Subs(out, str, arg); 1301 __ B(&end, eq); 1302 1303 if (mirror::kUseStringCompression) { 1304 // Load `count` fields of this and argument strings. 1305 __ Ldr(temp3, HeapOperand(str, count_offset)); 1306 __ Ldr(temp2, HeapOperand(arg, count_offset)); 1307 // Clean out compression flag from lengths. 1308 __ Lsr(temp0, temp3, 1u); 1309 __ Lsr(temp1, temp2, 1u); 1310 } else { 1311 // Load lengths of this and argument strings. 1312 __ Ldr(temp0, HeapOperand(str, count_offset)); 1313 __ Ldr(temp1, HeapOperand(arg, count_offset)); 1314 } 1315 // out = length diff. 1316 __ Subs(out, temp0, temp1); 1317 // temp0 = min(len(str), len(arg)). 1318 __ Csel(temp0, temp1, temp0, ge); 1319 // Shorter string is empty? 1320 __ Cbz(temp0, &end); 1321 1322 if (mirror::kUseStringCompression) { 1323 // Check if both strings using same compression style to use this comparison loop. 1324 __ Eor(temp2, temp2, Operand(temp3)); 1325 // Interleave with compression flag extraction which is needed for both paths 1326 // and also set flags which is needed only for the different compressions path. 1327 __ Ands(temp3.W(), temp3.W(), Operand(1)); 1328 __ Tbnz(temp2, 0, &different_compression); // Does not use flags. 1329 } 1330 // Store offset of string value in preparation for comparison loop. 1331 __ Mov(temp1, value_offset); 1332 if (mirror::kUseStringCompression) { 1333 // For string compression, calculate the number of bytes to compare (not chars). 1334 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. 1335 __ Lsl(temp0, temp0, temp3); 1336 } 1337 1338 UseScratchRegisterScope scratch_scope(masm); 1339 Register temp4 = scratch_scope.AcquireX(); 1340 1341 // Assertions that must hold in order to compare strings 8 bytes at a time. 1342 DCHECK_ALIGNED(value_offset, 8); 1343 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1344 1345 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1346 DCHECK_EQ(char_size, 2u); 1347 1348 // Promote temp2 to an X reg, ready for LDR. 1349 temp2 = temp2.X(); 1350 1351 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). 1352 __ Bind(&loop); 1353 __ Ldr(temp4, MemOperand(str.X(), temp1.X())); 1354 __ Ldr(temp2, MemOperand(arg.X(), temp1.X())); 1355 __ Cmp(temp4, temp2); 1356 __ B(ne, &find_char_diff); 1357 __ Add(temp1, temp1, char_size * 4); 1358 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1359 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4); 1360 __ B(&loop, hi); 1361 __ B(&end); 1362 1363 // Promote temp1 to an X reg, ready for EOR. 1364 temp1 = temp1.X(); 1365 1366 // Find the single character difference. 1367 __ Bind(&find_char_diff); 1368 // Get the bit position of the first character that differs. 1369 __ Eor(temp1, temp2, temp4); 1370 __ Rbit(temp1, temp1); 1371 __ Clz(temp1, temp1); 1372 1373 // If the number of chars remaining <= the index where the difference occurs (0-3), then 1374 // the difference occurs outside the remaining string data, so just return length diff (out). 1375 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the 1376 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or 1377 // unsigned when string compression is disabled. 1378 // When it's enabled, the comparison must be unsigned. 1379 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); 1380 __ B(ls, &end); 1381 1382 // Extract the characters and calculate the difference. 1383 if (mirror:: kUseStringCompression) { 1384 __ Bic(temp1, temp1, 0x7); 1385 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u)); 1386 } else { 1387 __ Bic(temp1, temp1, 0xf); 1388 } 1389 __ Lsr(temp2, temp2, temp1); 1390 __ Lsr(temp4, temp4, temp1); 1391 if (mirror::kUseStringCompression) { 1392 // Prioritize the case of compressed strings and calculate such result first. 1393 __ Uxtb(temp1, temp4); 1394 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB)); 1395 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done. 1396 } 1397 __ Uxth(temp4, temp4); 1398 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH)); 1399 1400 if (mirror::kUseStringCompression) { 1401 __ B(&end); 1402 __ Bind(&different_compression); 1403 1404 // Comparison for different compression style. 1405 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); 1406 DCHECK_EQ(c_char_size, 1u); 1407 temp1 = temp1.W(); 1408 temp2 = temp2.W(); 1409 temp4 = temp4.W(); 1410 1411 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. 1412 // Note that flags have been set by the `str` compression flag extraction to `temp3` 1413 // before branching to the `different_compression` label. 1414 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string. 1415 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string. 1416 1417 // We want to free up the temp3, currently holding `str` compression flag, for comparison. 1418 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat 1419 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which 1420 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. 1421 __ Lsl(temp0, temp0, 1u); 1422 1423 // Adjust temp1 and temp2 from string pointers to data pointers. 1424 __ Add(temp1, temp1, Operand(value_offset)); 1425 __ Add(temp2, temp2, Operand(value_offset)); 1426 1427 // Complete the move of the compression flag. 1428 __ Sub(temp0, temp0, Operand(temp3)); 1429 1430 vixl::aarch64::Label different_compression_loop; 1431 vixl::aarch64::Label different_compression_diff; 1432 1433 __ Bind(&different_compression_loop); 1434 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex)); 1435 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex)); 1436 __ Subs(temp4, temp4, Operand(temp3)); 1437 __ B(&different_compression_diff, ne); 1438 __ Subs(temp0, temp0, 2); 1439 __ B(&different_compression_loop, hi); 1440 __ B(&end); 1441 1442 // Calculate the difference. 1443 __ Bind(&different_compression_diff); 1444 __ Tst(temp0, Operand(1)); 1445 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1446 "Expecting 0=compressed, 1=uncompressed"); 1447 __ Cneg(out, temp4, ne); 1448 } 1449 1450 __ Bind(&end); 1451 1452 if (can_slow_path) { 1453 __ Bind(slow_path->GetExitLabel()); 1454 } 1455 } 1456 1457 // The cut off for unrolling the loop in String.equals() intrinsic for const strings. 1458 // The normal loop plus the pre-header is 9 instructions without string compression and 12 1459 // instructions with string compression. We can compare up to 8 bytes in 4 instructions 1460 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up 1461 // to 10 instructions for the unrolled loop. 1462 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32; 1463 1464 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { 1465 if (candidate->IsLoadString()) { 1466 HLoadString* load_string = candidate->AsLoadString(); 1467 const DexFile& dex_file = load_string->GetDexFile(); 1468 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); 1469 } 1470 return nullptr; 1471 } 1472 1473 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { 1474 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1475 LocationSummary::kNoCall, 1476 kIntrinsified); 1477 locations->SetInAt(0, Location::RequiresRegister()); 1478 locations->SetInAt(1, Location::RequiresRegister()); 1479 1480 // For the generic implementation and for long const strings we need a temporary. 1481 // We do not need it for short const strings, up to 8 bytes, see code generation below. 1482 uint32_t const_string_length = 0u; 1483 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1484 if (const_string == nullptr) { 1485 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1486 } 1487 bool is_compressed = 1488 mirror::kUseStringCompression && 1489 const_string != nullptr && 1490 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1491 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) { 1492 locations->AddTemp(Location::RequiresRegister()); 1493 } 1494 1495 // TODO: If the String.equals() is used only for an immediately following HIf, we can 1496 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. 1497 // Then we shall need an extra temporary register instead of the output register. 1498 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1499 } 1500 1501 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { 1502 MacroAssembler* masm = GetVIXLAssembler(); 1503 LocationSummary* locations = invoke->GetLocations(); 1504 1505 Register str = WRegisterFrom(locations->InAt(0)); 1506 Register arg = WRegisterFrom(locations->InAt(1)); 1507 Register out = XRegisterFrom(locations->Out()); 1508 1509 UseScratchRegisterScope scratch_scope(masm); 1510 Register temp = scratch_scope.AcquireW(); 1511 Register temp1 = scratch_scope.AcquireW(); 1512 1513 vixl::aarch64::Label loop; 1514 vixl::aarch64::Label end; 1515 vixl::aarch64::Label return_true; 1516 vixl::aarch64::Label return_false; 1517 1518 // Get offsets of count, value, and class fields within a string object. 1519 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1520 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1521 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1522 1523 // Note that the null check must have been done earlier. 1524 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1525 1526 StringEqualsOptimizations optimizations(invoke); 1527 if (!optimizations.GetArgumentNotNull()) { 1528 // Check if input is null, return false if it is. 1529 __ Cbz(arg, &return_false); 1530 } 1531 1532 // Reference equality check, return true if same reference. 1533 __ Cmp(str, arg); 1534 __ B(&return_true, eq); 1535 1536 if (!optimizations.GetArgumentIsString()) { 1537 // Instanceof check for the argument by comparing class fields. 1538 // All string objects must have the same type since String cannot be subclassed. 1539 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1540 // If the argument is a string object, its class field must be equal to receiver's class field. 1541 __ Ldr(temp, MemOperand(str.X(), class_offset)); 1542 __ Ldr(temp1, MemOperand(arg.X(), class_offset)); 1543 __ Cmp(temp, temp1); 1544 __ B(&return_false, ne); 1545 } 1546 1547 // Check if one of the inputs is a const string. Do not special-case both strings 1548 // being const, such cases should be handled by constant folding if needed. 1549 uint32_t const_string_length = 0u; 1550 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1551 if (const_string == nullptr) { 1552 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1553 if (const_string != nullptr) { 1554 std::swap(str, arg); // Make sure the const string is in `str`. 1555 } 1556 } 1557 bool is_compressed = 1558 mirror::kUseStringCompression && 1559 const_string != nullptr && 1560 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1561 1562 if (const_string != nullptr) { 1563 // Load `count` field of the argument string and check if it matches the const string. 1564 // Also compares the compression style, if differs return false. 1565 __ Ldr(temp, MemOperand(arg.X(), count_offset)); 1566 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate. 1567 scratch_scope.Release(temp1); 1568 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); 1569 temp1 = scratch_scope.AcquireW(); 1570 __ B(&return_false, ne); 1571 } else { 1572 // Load `count` fields of this and argument strings. 1573 __ Ldr(temp, MemOperand(str.X(), count_offset)); 1574 __ Ldr(temp1, MemOperand(arg.X(), count_offset)); 1575 // Check if `count` fields are equal, return false if they're not. 1576 // Also compares the compression style, if differs return false. 1577 __ Cmp(temp, temp1); 1578 __ B(&return_false, ne); 1579 } 1580 1581 // Assertions that must hold in order to compare strings 8 bytes at a time. 1582 DCHECK_ALIGNED(value_offset, 8); 1583 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1584 1585 if (const_string != nullptr && 1586 const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes 1587 : kShortConstStringEqualsCutoffInBytes / 2u)) { 1588 // Load and compare the contents. Though we know the contents of the short const string 1589 // at compile time, materializing constants may be more code than loading from memory. 1590 int32_t offset = value_offset; 1591 size_t remaining_bytes = 1592 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u); 1593 temp = temp.X(); 1594 temp1 = temp1.X(); 1595 while (remaining_bytes > 8u) { 1596 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1597 __ Ldp(temp, temp1, MemOperand(str.X(), offset)); 1598 __ Ldp(temp2, out, MemOperand(arg.X(), offset)); 1599 __ Cmp(temp, temp2); 1600 __ Ccmp(temp1, out, NoFlag, eq); 1601 __ B(&return_false, ne); 1602 offset += 2u * sizeof(uint64_t); 1603 remaining_bytes -= 2u * sizeof(uint64_t); 1604 } 1605 if (remaining_bytes != 0u) { 1606 __ Ldr(temp, MemOperand(str.X(), offset)); 1607 __ Ldr(temp1, MemOperand(arg.X(), offset)); 1608 __ Cmp(temp, temp1); 1609 __ B(&return_false, ne); 1610 } 1611 } else { 1612 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1613 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1614 "Expecting 0=compressed, 1=uncompressed"); 1615 __ Cbz(temp, &return_true); 1616 1617 if (mirror::kUseStringCompression) { 1618 // For string compression, calculate the number of bytes to compare (not chars). 1619 // This could in theory exceed INT32_MAX, so treat temp as unsigned. 1620 __ And(temp1, temp, Operand(1)); // Extract compression flag. 1621 __ Lsr(temp, temp, 1u); // Extract length. 1622 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare. 1623 } 1624 1625 // Store offset of string value in preparation for comparison loop 1626 __ Mov(temp1, value_offset); 1627 1628 temp1 = temp1.X(); 1629 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1630 // Loop to compare strings 8 bytes at a time starting at the front of the string. 1631 // Ok to do this because strings are zero-padded to kObjectAlignment. 1632 __ Bind(&loop); 1633 __ Ldr(out, MemOperand(str.X(), temp1)); 1634 __ Ldr(temp2, MemOperand(arg.X(), temp1)); 1635 __ Add(temp1, temp1, Operand(sizeof(uint64_t))); 1636 __ Cmp(out, temp2); 1637 __ B(&return_false, ne); 1638 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1639 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags); 1640 __ B(&loop, hi); 1641 } 1642 1643 // Return true and exit the function. 1644 // If loop does not result in returning false, we return true. 1645 __ Bind(&return_true); 1646 __ Mov(out, 1); 1647 __ B(&end); 1648 1649 // Return false and exit the function. 1650 __ Bind(&return_false); 1651 __ Mov(out, 0); 1652 __ Bind(&end); 1653 } 1654 1655 static void GenerateVisitStringIndexOf(HInvoke* invoke, 1656 MacroAssembler* masm, 1657 CodeGeneratorARM64* codegen, 1658 ArenaAllocator* allocator, 1659 bool start_at_zero) { 1660 LocationSummary* locations = invoke->GetLocations(); 1661 1662 // Note that the null check must have been done earlier. 1663 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1664 1665 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1666 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1667 SlowPathCodeARM64* slow_path = nullptr; 1668 HInstruction* code_point = invoke->InputAt(1); 1669 if (code_point->IsIntConstant()) { 1670 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { 1671 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1672 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1673 slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); 1674 codegen->AddSlowPath(slow_path); 1675 __ B(slow_path->GetEntryLabel()); 1676 __ Bind(slow_path->GetExitLabel()); 1677 return; 1678 } 1679 } else if (code_point->GetType() != Primitive::kPrimChar) { 1680 Register char_reg = WRegisterFrom(locations->InAt(1)); 1681 __ Tst(char_reg, 0xFFFF0000); 1682 slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); 1683 codegen->AddSlowPath(slow_path); 1684 __ B(ne, slow_path->GetEntryLabel()); 1685 } 1686 1687 if (start_at_zero) { 1688 // Start-index = 0. 1689 Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); 1690 __ Mov(tmp_reg, 0); 1691 } 1692 1693 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); 1694 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); 1695 1696 if (slow_path != nullptr) { 1697 __ Bind(slow_path->GetExitLabel()); 1698 } 1699 } 1700 1701 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { 1702 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1703 LocationSummary::kCallOnMainAndSlowPath, 1704 kIntrinsified); 1705 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1706 // best to align the inputs accordingly. 1707 InvokeRuntimeCallingConvention calling_convention; 1708 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1709 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1710 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); 1711 1712 // Need to send start_index=0. 1713 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 1714 } 1715 1716 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { 1717 GenerateVisitStringIndexOf( 1718 invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1719 } 1720 1721 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1722 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1723 LocationSummary::kCallOnMainAndSlowPath, 1724 kIntrinsified); 1725 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1726 // best to align the inputs accordingly. 1727 InvokeRuntimeCallingConvention calling_convention; 1728 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1729 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1730 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1731 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); 1732 } 1733 1734 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1735 GenerateVisitStringIndexOf( 1736 invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1737 } 1738 1739 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1740 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1741 LocationSummary::kCallOnMainAndSlowPath, 1742 kIntrinsified); 1743 InvokeRuntimeCallingConvention calling_convention; 1744 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1745 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1746 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1747 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); 1748 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); 1749 } 1750 1751 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1752 MacroAssembler* masm = GetVIXLAssembler(); 1753 LocationSummary* locations = invoke->GetLocations(); 1754 1755 Register byte_array = WRegisterFrom(locations->InAt(0)); 1756 __ Cmp(byte_array, 0); 1757 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); 1758 codegen_->AddSlowPath(slow_path); 1759 __ B(eq, slow_path->GetEntryLabel()); 1760 1761 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); 1762 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1763 __ Bind(slow_path->GetExitLabel()); 1764 } 1765 1766 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1767 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1768 LocationSummary::kCallOnMainOnly, 1769 kIntrinsified); 1770 InvokeRuntimeCallingConvention calling_convention; 1771 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1772 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1773 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1774 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); 1775 } 1776 1777 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1778 // No need to emit code checking whether `locations->InAt(2)` is a null 1779 // pointer, as callers of the native method 1780 // 1781 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1782 // 1783 // all include a null check on `data` before calling that method. 1784 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1785 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1786 } 1787 1788 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1789 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1790 LocationSummary::kCallOnMainAndSlowPath, 1791 kIntrinsified); 1792 InvokeRuntimeCallingConvention calling_convention; 1793 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1794 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); 1795 } 1796 1797 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1798 MacroAssembler* masm = GetVIXLAssembler(); 1799 LocationSummary* locations = invoke->GetLocations(); 1800 1801 Register string_to_copy = WRegisterFrom(locations->InAt(0)); 1802 __ Cmp(string_to_copy, 0); 1803 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); 1804 codegen_->AddSlowPath(slow_path); 1805 __ B(eq, slow_path->GetEntryLabel()); 1806 1807 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); 1808 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1809 __ Bind(slow_path->GetExitLabel()); 1810 } 1811 1812 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { 1813 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 1814 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1815 DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); 1816 1817 LocationSummary* const locations = new (arena) LocationSummary(invoke, 1818 LocationSummary::kCallOnMainOnly, 1819 kIntrinsified); 1820 InvokeRuntimeCallingConvention calling_convention; 1821 1822 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1823 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1824 } 1825 1826 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { 1827 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 1828 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1829 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType())); 1830 DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); 1831 1832 LocationSummary* const locations = new (arena) LocationSummary(invoke, 1833 LocationSummary::kCallOnMainOnly, 1834 kIntrinsified); 1835 InvokeRuntimeCallingConvention calling_convention; 1836 1837 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1838 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 1839 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1840 } 1841 1842 static void GenFPToFPCall(HInvoke* invoke, 1843 CodeGeneratorARM64* codegen, 1844 QuickEntrypointEnum entry) { 1845 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 1846 } 1847 1848 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { 1849 CreateFPToFPCallLocations(arena_, invoke); 1850 } 1851 1852 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { 1853 GenFPToFPCall(invoke, codegen_, kQuickCos); 1854 } 1855 1856 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { 1857 CreateFPToFPCallLocations(arena_, invoke); 1858 } 1859 1860 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { 1861 GenFPToFPCall(invoke, codegen_, kQuickSin); 1862 } 1863 1864 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { 1865 CreateFPToFPCallLocations(arena_, invoke); 1866 } 1867 1868 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { 1869 GenFPToFPCall(invoke, codegen_, kQuickAcos); 1870 } 1871 1872 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { 1873 CreateFPToFPCallLocations(arena_, invoke); 1874 } 1875 1876 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { 1877 GenFPToFPCall(invoke, codegen_, kQuickAsin); 1878 } 1879 1880 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { 1881 CreateFPToFPCallLocations(arena_, invoke); 1882 } 1883 1884 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { 1885 GenFPToFPCall(invoke, codegen_, kQuickAtan); 1886 } 1887 1888 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { 1889 CreateFPToFPCallLocations(arena_, invoke); 1890 } 1891 1892 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { 1893 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 1894 } 1895 1896 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { 1897 CreateFPToFPCallLocations(arena_, invoke); 1898 } 1899 1900 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { 1901 GenFPToFPCall(invoke, codegen_, kQuickCosh); 1902 } 1903 1904 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { 1905 CreateFPToFPCallLocations(arena_, invoke); 1906 } 1907 1908 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { 1909 GenFPToFPCall(invoke, codegen_, kQuickExp); 1910 } 1911 1912 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { 1913 CreateFPToFPCallLocations(arena_, invoke); 1914 } 1915 1916 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { 1917 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 1918 } 1919 1920 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { 1921 CreateFPToFPCallLocations(arena_, invoke); 1922 } 1923 1924 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { 1925 GenFPToFPCall(invoke, codegen_, kQuickLog); 1926 } 1927 1928 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { 1929 CreateFPToFPCallLocations(arena_, invoke); 1930 } 1931 1932 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { 1933 GenFPToFPCall(invoke, codegen_, kQuickLog10); 1934 } 1935 1936 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { 1937 CreateFPToFPCallLocations(arena_, invoke); 1938 } 1939 1940 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { 1941 GenFPToFPCall(invoke, codegen_, kQuickSinh); 1942 } 1943 1944 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { 1945 CreateFPToFPCallLocations(arena_, invoke); 1946 } 1947 1948 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { 1949 GenFPToFPCall(invoke, codegen_, kQuickTan); 1950 } 1951 1952 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { 1953 CreateFPToFPCallLocations(arena_, invoke); 1954 } 1955 1956 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { 1957 GenFPToFPCall(invoke, codegen_, kQuickTanh); 1958 } 1959 1960 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { 1961 CreateFPFPToFPCallLocations(arena_, invoke); 1962 } 1963 1964 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { 1965 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 1966 } 1967 1968 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { 1969 CreateFPFPToFPCallLocations(arena_, invoke); 1970 } 1971 1972 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { 1973 GenFPToFPCall(invoke, codegen_, kQuickHypot); 1974 } 1975 1976 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { 1977 CreateFPFPToFPCallLocations(arena_, invoke); 1978 } 1979 1980 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { 1981 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 1982 } 1983 1984 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1985 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1986 LocationSummary::kNoCall, 1987 kIntrinsified); 1988 locations->SetInAt(0, Location::RequiresRegister()); 1989 locations->SetInAt(1, Location::RequiresRegister()); 1990 locations->SetInAt(2, Location::RequiresRegister()); 1991 locations->SetInAt(3, Location::RequiresRegister()); 1992 locations->SetInAt(4, Location::RequiresRegister()); 1993 1994 locations->AddTemp(Location::RequiresRegister()); 1995 locations->AddTemp(Location::RequiresRegister()); 1996 locations->AddTemp(Location::RequiresRegister()); 1997 } 1998 1999 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 2000 MacroAssembler* masm = GetVIXLAssembler(); 2001 LocationSummary* locations = invoke->GetLocations(); 2002 2003 // Check assumption that sizeof(Char) is 2 (used in scaling below). 2004 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 2005 DCHECK_EQ(char_size, 2u); 2006 2007 // Location of data in char array buffer. 2008 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 2009 2010 // Location of char array data in string. 2011 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 2012 2013 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); 2014 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. 2015 Register srcObj = XRegisterFrom(locations->InAt(0)); 2016 Register srcBegin = XRegisterFrom(locations->InAt(1)); 2017 Register srcEnd = XRegisterFrom(locations->InAt(2)); 2018 Register dstObj = XRegisterFrom(locations->InAt(3)); 2019 Register dstBegin = XRegisterFrom(locations->InAt(4)); 2020 2021 Register src_ptr = XRegisterFrom(locations->GetTemp(0)); 2022 Register num_chr = XRegisterFrom(locations->GetTemp(1)); 2023 Register tmp1 = XRegisterFrom(locations->GetTemp(2)); 2024 2025 UseScratchRegisterScope temps(masm); 2026 Register dst_ptr = temps.AcquireX(); 2027 Register tmp2 = temps.AcquireX(); 2028 2029 vixl::aarch64::Label done; 2030 vixl::aarch64::Label compressed_string_loop; 2031 __ Sub(num_chr, srcEnd, srcBegin); 2032 // Early out for valid zero-length retrievals. 2033 __ Cbz(num_chr, &done); 2034 2035 // dst address start to copy to. 2036 __ Add(dst_ptr, dstObj, Operand(data_offset)); 2037 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); 2038 2039 // src address to copy from. 2040 __ Add(src_ptr, srcObj, Operand(value_offset)); 2041 vixl::aarch64::Label compressed_string_preloop; 2042 if (mirror::kUseStringCompression) { 2043 // Location of count in string. 2044 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2045 // String's length. 2046 __ Ldr(tmp2, MemOperand(srcObj, count_offset)); 2047 __ Tbz(tmp2, 0, &compressed_string_preloop); 2048 } 2049 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); 2050 2051 // Do the copy. 2052 vixl::aarch64::Label loop; 2053 vixl::aarch64::Label remainder; 2054 2055 // Save repairing the value of num_chr on the < 8 character path. 2056 __ Subs(tmp1, num_chr, 8); 2057 __ B(lt, &remainder); 2058 2059 // Keep the result of the earlier subs, we are going to fetch at least 8 characters. 2060 __ Mov(num_chr, tmp1); 2061 2062 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time. 2063 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.) 2064 __ Bind(&loop); 2065 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex)); 2066 __ Subs(num_chr, num_chr, 8); 2067 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex)); 2068 __ B(ge, &loop); 2069 2070 __ Adds(num_chr, num_chr, 8); 2071 __ B(eq, &done); 2072 2073 // Main loop for < 8 character case and remainder handling. Loads and stores one 2074 // 16-bit Java character at a time. 2075 __ Bind(&remainder); 2076 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex)); 2077 __ Subs(num_chr, num_chr, 1); 2078 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2079 __ B(gt, &remainder); 2080 __ B(&done); 2081 2082 if (mirror::kUseStringCompression) { 2083 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); 2084 DCHECK_EQ(c_char_size, 1u); 2085 __ Bind(&compressed_string_preloop); 2086 __ Add(src_ptr, src_ptr, Operand(srcBegin)); 2087 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. 2088 __ Bind(&compressed_string_loop); 2089 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); 2090 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2091 __ Subs(num_chr, num_chr, Operand(1)); 2092 __ B(gt, &compressed_string_loop); 2093 } 2094 2095 __ Bind(&done); 2096 } 2097 2098 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native 2099 // implementation there for longer copy lengths. 2100 static constexpr int32_t kSystemArrayCopyCharThreshold = 32; 2101 2102 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, 2103 uint32_t at, 2104 HInstruction* input) { 2105 HIntConstant* const_input = input->AsIntConstant(); 2106 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { 2107 locations->SetInAt(at, Location::RequiresRegister()); 2108 } else { 2109 locations->SetInAt(at, Location::RegisterOrConstant(input)); 2110 } 2111 } 2112 2113 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2114 // Check to see if we have known failures that will cause us to have to bail out 2115 // to the runtime, and just generate the runtime call directly. 2116 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2117 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant(); 2118 2119 // The positions must be non-negative. 2120 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2121 (dst_pos != nullptr && dst_pos->GetValue() < 0)) { 2122 // We will have to fail anyways. 2123 return; 2124 } 2125 2126 // The length must be >= 0 and not so long that we would (currently) prefer libcore's 2127 // native implementation. 2128 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2129 if (length != nullptr) { 2130 int32_t len = length->GetValue(); 2131 if (len < 0 || len > kSystemArrayCopyCharThreshold) { 2132 // Just call as normal. 2133 return; 2134 } 2135 } 2136 2137 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); 2138 LocationSummary* locations = new (allocator) LocationSummary(invoke, 2139 LocationSummary::kCallOnSlowPath, 2140 kIntrinsified); 2141 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length). 2142 locations->SetInAt(0, Location::RequiresRegister()); 2143 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2144 locations->SetInAt(2, Location::RequiresRegister()); 2145 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2146 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2147 2148 locations->AddTemp(Location::RequiresRegister()); 2149 locations->AddTemp(Location::RequiresRegister()); 2150 locations->AddTemp(Location::RequiresRegister()); 2151 } 2152 2153 static void CheckSystemArrayCopyPosition(MacroAssembler* masm, 2154 const Location& pos, 2155 const Register& input, 2156 const Location& length, 2157 SlowPathCodeARM64* slow_path, 2158 const Register& temp, 2159 bool length_is_input_length = false) { 2160 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); 2161 if (pos.IsConstant()) { 2162 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 2163 if (pos_const == 0) { 2164 if (!length_is_input_length) { 2165 // Check that length(input) >= length. 2166 __ Ldr(temp, MemOperand(input, length_offset)); 2167 __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt)); 2168 __ B(slow_path->GetEntryLabel(), lt); 2169 } 2170 } else { 2171 // Check that length(input) >= pos. 2172 __ Ldr(temp, MemOperand(input, length_offset)); 2173 __ Subs(temp, temp, pos_const); 2174 __ B(slow_path->GetEntryLabel(), lt); 2175 2176 // Check that (length(input) - pos) >= length. 2177 __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt)); 2178 __ B(slow_path->GetEntryLabel(), lt); 2179 } 2180 } else if (length_is_input_length) { 2181 // The only way the copy can succeed is if pos is zero. 2182 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel()); 2183 } else { 2184 // Check that pos >= 0. 2185 Register pos_reg = WRegisterFrom(pos); 2186 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel()); 2187 2188 // Check that pos <= length(input) && (length(input) - pos) >= length. 2189 __ Ldr(temp, MemOperand(input, length_offset)); 2190 __ Subs(temp, temp, pos_reg); 2191 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt). 2192 __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge); 2193 __ B(slow_path->GetEntryLabel(), lt); 2194 } 2195 } 2196 2197 // Compute base source address, base destination address, and end 2198 // source address for System.arraycopy* intrinsics in `src_base`, 2199 // `dst_base` and `src_end` respectively. 2200 static void GenSystemArrayCopyAddresses(MacroAssembler* masm, 2201 Primitive::Type type, 2202 const Register& src, 2203 const Location& src_pos, 2204 const Register& dst, 2205 const Location& dst_pos, 2206 const Location& copy_length, 2207 const Register& src_base, 2208 const Register& dst_base, 2209 const Register& src_end) { 2210 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. 2211 DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar) 2212 << "Unexpected element type: " << type; 2213 const int32_t element_size = Primitive::ComponentSize(type); 2214 const int32_t element_size_shift = Primitive::ComponentSizeShift(type); 2215 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2216 2217 if (src_pos.IsConstant()) { 2218 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2219 __ Add(src_base, src, element_size * constant + data_offset); 2220 } else { 2221 __ Add(src_base, src, data_offset); 2222 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift)); 2223 } 2224 2225 if (dst_pos.IsConstant()) { 2226 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); 2227 __ Add(dst_base, dst, element_size * constant + data_offset); 2228 } else { 2229 __ Add(dst_base, dst, data_offset); 2230 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); 2231 } 2232 2233 if (copy_length.IsConstant()) { 2234 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2235 __ Add(src_end, src_base, element_size * constant); 2236 } else { 2237 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); 2238 } 2239 } 2240 2241 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2242 MacroAssembler* masm = GetVIXLAssembler(); 2243 LocationSummary* locations = invoke->GetLocations(); 2244 Register src = XRegisterFrom(locations->InAt(0)); 2245 Location src_pos = locations->InAt(1); 2246 Register dst = XRegisterFrom(locations->InAt(2)); 2247 Location dst_pos = locations->InAt(3); 2248 Location length = locations->InAt(4); 2249 2250 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); 2251 codegen_->AddSlowPath(slow_path); 2252 2253 // If source and destination are the same, take the slow path. Overlapping copy regions must be 2254 // copied in reverse and we can't know in all cases if it's needed. 2255 __ Cmp(src, dst); 2256 __ B(slow_path->GetEntryLabel(), eq); 2257 2258 // Bail out if the source is null. 2259 __ Cbz(src, slow_path->GetEntryLabel()); 2260 2261 // Bail out if the destination is null. 2262 __ Cbz(dst, slow_path->GetEntryLabel()); 2263 2264 if (!length.IsConstant()) { 2265 // Merge the following two comparisons into one: 2266 // If the length is negative, bail out (delegate to libcore's native implementation). 2267 // If the length > 32 then (currently) prefer libcore's native implementation. 2268 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold); 2269 __ B(slow_path->GetEntryLabel(), hi); 2270 } else { 2271 // We have already checked in the LocationsBuilder for the constant case. 2272 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0); 2273 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32); 2274 } 2275 2276 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0)); 2277 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1)); 2278 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2)); 2279 2280 CheckSystemArrayCopyPosition(masm, 2281 src_pos, 2282 src, 2283 length, 2284 slow_path, 2285 src_curr_addr, 2286 false); 2287 2288 CheckSystemArrayCopyPosition(masm, 2289 dst_pos, 2290 dst, 2291 length, 2292 slow_path, 2293 src_curr_addr, 2294 false); 2295 2296 src_curr_addr = src_curr_addr.X(); 2297 dst_curr_addr = dst_curr_addr.X(); 2298 src_stop_addr = src_stop_addr.X(); 2299 2300 GenSystemArrayCopyAddresses(masm, 2301 Primitive::kPrimChar, 2302 src, 2303 src_pos, 2304 dst, 2305 dst_pos, 2306 length, 2307 src_curr_addr, 2308 dst_curr_addr, 2309 src_stop_addr); 2310 2311 // Iterate over the arrays and do a raw copy of the chars. 2312 const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 2313 UseScratchRegisterScope temps(masm); 2314 Register tmp = temps.AcquireW(); 2315 vixl::aarch64::Label loop, done; 2316 __ Bind(&loop); 2317 __ Cmp(src_curr_addr, src_stop_addr); 2318 __ B(&done, eq); 2319 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); 2320 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); 2321 __ B(&loop); 2322 __ Bind(&done); 2323 2324 __ Bind(slow_path->GetExitLabel()); 2325 } 2326 2327 // We can choose to use the native implementation there for longer copy lengths. 2328 static constexpr int32_t kSystemArrayCopyThreshold = 128; 2329 2330 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers. 2331 // We want to use two temporary registers in order to reduce the register pressure in arm64. 2332 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. 2333 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2334 // The only read barrier implementation supporting the 2335 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2336 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2337 return; 2338 } 2339 2340 // Check to see if we have known failures that will cause us to have to bail out 2341 // to the runtime, and just generate the runtime call directly. 2342 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2343 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 2344 2345 // The positions must be non-negative. 2346 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2347 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 2348 // We will have to fail anyways. 2349 return; 2350 } 2351 2352 // The length must be >= 0. 2353 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2354 if (length != nullptr) { 2355 int32_t len = length->GetValue(); 2356 if (len < 0 || len >= kSystemArrayCopyThreshold) { 2357 // Just call as normal. 2358 return; 2359 } 2360 } 2361 2362 SystemArrayCopyOptimizations optimizations(invoke); 2363 2364 if (optimizations.GetDestinationIsSource()) { 2365 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { 2366 // We only support backward copying if source and destination are the same. 2367 return; 2368 } 2369 } 2370 2371 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { 2372 // We currently don't intrinsify primitive copying. 2373 return; 2374 } 2375 2376 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); 2377 LocationSummary* locations = new (allocator) LocationSummary(invoke, 2378 LocationSummary::kCallOnSlowPath, 2379 kIntrinsified); 2380 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 2381 locations->SetInAt(0, Location::RequiresRegister()); 2382 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2383 locations->SetInAt(2, Location::RequiresRegister()); 2384 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2385 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2386 2387 locations->AddTemp(Location::RequiresRegister()); 2388 locations->AddTemp(Location::RequiresRegister()); 2389 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2390 // Temporary register IP0, obtained from the VIXL scratch register 2391 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 2392 // (because that register is clobbered by ReadBarrierMarkRegX 2393 // entry points). It cannot be used in calls to 2394 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier 2395 // either. For these reasons, get a third extra temporary register 2396 // from the register allocator. 2397 locations->AddTemp(Location::RequiresRegister()); 2398 } else { 2399 // Cases other than Baker read barriers: the third temporary will 2400 // be acquired from the VIXL scratch register pool. 2401 } 2402 } 2403 2404 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2405 // The only read barrier implementation supporting the 2406 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2407 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2408 2409 MacroAssembler* masm = GetVIXLAssembler(); 2410 LocationSummary* locations = invoke->GetLocations(); 2411 2412 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2413 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2414 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2415 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2416 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2417 2418 Register src = XRegisterFrom(locations->InAt(0)); 2419 Location src_pos = locations->InAt(1); 2420 Register dest = XRegisterFrom(locations->InAt(2)); 2421 Location dest_pos = locations->InAt(3); 2422 Location length = locations->InAt(4); 2423 Register temp1 = WRegisterFrom(locations->GetTemp(0)); 2424 Location temp1_loc = LocationFrom(temp1); 2425 Register temp2 = WRegisterFrom(locations->GetTemp(1)); 2426 Location temp2_loc = LocationFrom(temp2); 2427 2428 SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); 2429 codegen_->AddSlowPath(intrinsic_slow_path); 2430 2431 vixl::aarch64::Label conditions_on_positions_validated; 2432 SystemArrayCopyOptimizations optimizations(invoke); 2433 2434 // If source and destination are the same, we go to slow path if we need to do 2435 // forward copying. 2436 if (src_pos.IsConstant()) { 2437 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2438 if (dest_pos.IsConstant()) { 2439 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2440 if (optimizations.GetDestinationIsSource()) { 2441 // Checked when building locations. 2442 DCHECK_GE(src_pos_constant, dest_pos_constant); 2443 } else if (src_pos_constant < dest_pos_constant) { 2444 __ Cmp(src, dest); 2445 __ B(intrinsic_slow_path->GetEntryLabel(), eq); 2446 } 2447 // Checked when building locations. 2448 DCHECK(!optimizations.GetDestinationIsSource() 2449 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); 2450 } else { 2451 if (!optimizations.GetDestinationIsSource()) { 2452 __ Cmp(src, dest); 2453 __ B(&conditions_on_positions_validated, ne); 2454 } 2455 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); 2456 __ B(intrinsic_slow_path->GetEntryLabel(), gt); 2457 } 2458 } else { 2459 if (!optimizations.GetDestinationIsSource()) { 2460 __ Cmp(src, dest); 2461 __ B(&conditions_on_positions_validated, ne); 2462 } 2463 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), 2464 OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); 2465 __ B(intrinsic_slow_path->GetEntryLabel(), lt); 2466 } 2467 2468 __ Bind(&conditions_on_positions_validated); 2469 2470 if (!optimizations.GetSourceIsNotNull()) { 2471 // Bail out if the source is null. 2472 __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); 2473 } 2474 2475 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2476 // Bail out if the destination is null. 2477 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); 2478 } 2479 2480 // We have already checked in the LocationsBuilder for the constant case. 2481 if (!length.IsConstant() && 2482 !optimizations.GetCountIsSourceLength() && 2483 !optimizations.GetCountIsDestinationLength()) { 2484 // Merge the following two comparisons into one: 2485 // If the length is negative, bail out (delegate to libcore's native implementation). 2486 // If the length >= 128 then (currently) prefer native implementation. 2487 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); 2488 __ B(intrinsic_slow_path->GetEntryLabel(), hs); 2489 } 2490 // Validity checks: source. 2491 CheckSystemArrayCopyPosition(masm, 2492 src_pos, 2493 src, 2494 length, 2495 intrinsic_slow_path, 2496 temp1, 2497 optimizations.GetCountIsSourceLength()); 2498 2499 // Validity checks: dest. 2500 CheckSystemArrayCopyPosition(masm, 2501 dest_pos, 2502 dest, 2503 length, 2504 intrinsic_slow_path, 2505 temp1, 2506 optimizations.GetCountIsDestinationLength()); 2507 { 2508 // We use a block to end the scratch scope before the write barrier, thus 2509 // freeing the temporary registers so they can be used in `MarkGCCard`. 2510 UseScratchRegisterScope temps(masm); 2511 Location temp3_loc; // Used only for Baker read barrier. 2512 Register temp3; 2513 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2514 temp3_loc = locations->GetTemp(2); 2515 temp3 = WRegisterFrom(temp3_loc); 2516 } else { 2517 temp3 = temps.AcquireW(); 2518 } 2519 2520 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2521 // Check whether all elements of the source array are assignable to the component 2522 // type of the destination array. We do two checks: the classes are the same, 2523 // or the destination is Object[]. If none of these checks succeed, we go to the 2524 // slow path. 2525 2526 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2527 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2528 // /* HeapReference<Class> */ temp1 = src->klass_ 2529 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2530 temp1_loc, 2531 src.W(), 2532 class_offset, 2533 temp3_loc, 2534 /* needs_null_check */ false, 2535 /* use_load_acquire */ false); 2536 // Bail out if the source is not a non primitive array. 2537 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2538 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2539 temp1_loc, 2540 temp1, 2541 component_offset, 2542 temp3_loc, 2543 /* needs_null_check */ false, 2544 /* use_load_acquire */ false); 2545 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); 2546 // If heap poisoning is enabled, `temp1` has been unpoisoned 2547 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2548 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); 2549 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); 2550 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2551 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2552 } 2553 2554 // /* HeapReference<Class> */ temp1 = dest->klass_ 2555 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2556 temp1_loc, 2557 dest.W(), 2558 class_offset, 2559 temp3_loc, 2560 /* needs_null_check */ false, 2561 /* use_load_acquire */ false); 2562 2563 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2564 // Bail out if the destination is not a non primitive array. 2565 // 2566 // Register `temp1` is not trashed by the read barrier emitted 2567 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2568 // method produces a call to a ReadBarrierMarkRegX entry point, 2569 // which saves all potentially live registers, including 2570 // temporaries such a `temp1`. 2571 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2572 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2573 temp2_loc, 2574 temp1, 2575 component_offset, 2576 temp3_loc, 2577 /* needs_null_check */ false, 2578 /* use_load_acquire */ false); 2579 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2580 // If heap poisoning is enabled, `temp2` has been unpoisoned 2581 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2582 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2583 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2584 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2585 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2586 } 2587 2588 // For the same reason given earlier, `temp1` is not trashed by the 2589 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2590 // /* HeapReference<Class> */ temp2 = src->klass_ 2591 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2592 temp2_loc, 2593 src.W(), 2594 class_offset, 2595 temp3_loc, 2596 /* needs_null_check */ false, 2597 /* use_load_acquire */ false); 2598 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2599 __ Cmp(temp1, temp2); 2600 2601 if (optimizations.GetDestinationIsTypedObjectArray()) { 2602 vixl::aarch64::Label do_copy; 2603 __ B(&do_copy, eq); 2604 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2605 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2606 temp1_loc, 2607 temp1, 2608 component_offset, 2609 temp3_loc, 2610 /* needs_null_check */ false, 2611 /* use_load_acquire */ false); 2612 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2613 // We do not need to emit a read barrier for the following 2614 // heap reference load, as `temp1` is only used in a 2615 // comparison with null below, and this reference is not 2616 // kept afterwards. 2617 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2618 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2619 __ Bind(&do_copy); 2620 } else { 2621 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2622 } 2623 } else { 2624 // Non read barrier code. 2625 2626 // /* HeapReference<Class> */ temp1 = dest->klass_ 2627 __ Ldr(temp1, MemOperand(dest, class_offset)); 2628 // /* HeapReference<Class> */ temp2 = src->klass_ 2629 __ Ldr(temp2, MemOperand(src, class_offset)); 2630 bool did_unpoison = false; 2631 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 2632 !optimizations.GetSourceIsNonPrimitiveArray()) { 2633 // One or two of the references need to be unpoisoned. Unpoison them 2634 // both to make the identity check valid. 2635 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2636 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2637 did_unpoison = true; 2638 } 2639 2640 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2641 // Bail out if the destination is not a non primitive array. 2642 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2643 __ Ldr(temp3, HeapOperand(temp1, component_offset)); 2644 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2645 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2646 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2647 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2648 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2649 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2650 } 2651 2652 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2653 // Bail out if the source is not a non primitive array. 2654 // /* HeapReference<Class> */ temp3 = temp2->component_type_ 2655 __ Ldr(temp3, HeapOperand(temp2, component_offset)); 2656 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2657 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2658 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2659 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2660 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2661 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2662 } 2663 2664 __ Cmp(temp1, temp2); 2665 2666 if (optimizations.GetDestinationIsTypedObjectArray()) { 2667 vixl::aarch64::Label do_copy; 2668 __ B(&do_copy, eq); 2669 if (!did_unpoison) { 2670 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2671 } 2672 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2673 __ Ldr(temp1, HeapOperand(temp1, component_offset)); 2674 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2675 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2676 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2677 // No need to unpoison the result, we're comparing against null. 2678 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2679 __ Bind(&do_copy); 2680 } else { 2681 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2682 } 2683 } 2684 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2685 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2686 // Bail out if the source is not a non primitive array. 2687 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2688 // /* HeapReference<Class> */ temp1 = src->klass_ 2689 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2690 temp1_loc, 2691 src.W(), 2692 class_offset, 2693 temp3_loc, 2694 /* needs_null_check */ false, 2695 /* use_load_acquire */ false); 2696 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2697 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2698 temp2_loc, 2699 temp1, 2700 component_offset, 2701 temp3_loc, 2702 /* needs_null_check */ false, 2703 /* use_load_acquire */ false); 2704 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2705 // If heap poisoning is enabled, `temp2` has been unpoisoned 2706 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2707 } else { 2708 // /* HeapReference<Class> */ temp1 = src->klass_ 2709 __ Ldr(temp1, HeapOperand(src.W(), class_offset)); 2710 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2711 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2712 __ Ldr(temp2, HeapOperand(temp1, component_offset)); 2713 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2714 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2715 } 2716 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2717 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2718 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2719 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2720 } 2721 2722 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { 2723 // Null constant length: not need to emit the loop code at all. 2724 } else { 2725 Register src_curr_addr = temp1.X(); 2726 Register dst_curr_addr = temp2.X(); 2727 Register src_stop_addr = temp3.X(); 2728 vixl::aarch64::Label done; 2729 const Primitive::Type type = Primitive::kPrimNot; 2730 const int32_t element_size = Primitive::ComponentSize(type); 2731 2732 if (length.IsRegister()) { 2733 // Don't enter the copy loop if the length is null. 2734 __ Cbz(WRegisterFrom(length), &done); 2735 } 2736 2737 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2738 // TODO: Also convert this intrinsic to the IsGcMarking strategy? 2739 2740 // SystemArrayCopy implementation for Baker read barriers (see 2741 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier): 2742 // 2743 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2744 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2745 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2746 // if (is_gray) { 2747 // // Slow-path copy. 2748 // do { 2749 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 2750 // } while (src_ptr != end_ptr) 2751 // } else { 2752 // // Fast-path copy. 2753 // do { 2754 // *dest_ptr++ = *src_ptr++; 2755 // } while (src_ptr != end_ptr) 2756 // } 2757 2758 // Make sure `tmp` is not IP0, as it is clobbered by 2759 // ReadBarrierMarkRegX entry points in 2760 // ReadBarrierSystemArrayCopySlowPathARM64. 2761 DCHECK(temps.IsAvailable(ip0)); 2762 temps.Exclude(ip0); 2763 Register tmp = temps.AcquireW(); 2764 DCHECK_NE(LocationFrom(tmp).reg(), IP0); 2765 // Put IP0 back in the pool so that VIXL has at least one 2766 // scratch register available to emit macro-instructions (note 2767 // that IP1 is already used for `tmp`). Indeed some 2768 // macro-instructions used in GenSystemArrayCopyAddresses 2769 // (invoked hereunder) may require a scratch register (for 2770 // instance to emit a load with a large constant offset). 2771 temps.Include(ip0); 2772 2773 // /* int32_t */ monitor = src->monitor_ 2774 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); 2775 // /* LockWord */ lock_word = LockWord(monitor) 2776 static_assert(sizeof(LockWord) == sizeof(int32_t), 2777 "art::LockWord and int32_t have different sizes."); 2778 2779 // Introduce a dependency on the lock_word including rb_state, 2780 // to prevent load-load reordering, and without using 2781 // a memory barrier (which would be more expensive). 2782 // `src` is unchanged by this operation, but its value now depends 2783 // on `tmp`. 2784 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); 2785 2786 // Compute base source address, base destination address, and end 2787 // source address for System.arraycopy* intrinsics in `src_base`, 2788 // `dst_base` and `src_end` respectively. 2789 // Note that `src_curr_addr` is computed from from `src` (and 2790 // `src_pos`) here, and thus honors the artificial dependency 2791 // of `src` on `tmp`. 2792 GenSystemArrayCopyAddresses(masm, 2793 type, 2794 src, 2795 src_pos, 2796 dest, 2797 dest_pos, 2798 length, 2799 src_curr_addr, 2800 dst_curr_addr, 2801 src_stop_addr); 2802 2803 // Slow path used to copy array when `src` is gray. 2804 SlowPathCodeARM64* read_barrier_slow_path = 2805 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); 2806 codegen_->AddSlowPath(read_barrier_slow_path); 2807 2808 // Given the numeric representation, it's enough to check the low bit of the rb_state. 2809 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 2810 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2811 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); 2812 2813 // Fast-path copy. 2814 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2815 // poison/unpoison. 2816 vixl::aarch64::Label loop; 2817 __ Bind(&loop); 2818 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2819 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2820 __ Cmp(src_curr_addr, src_stop_addr); 2821 __ B(&loop, ne); 2822 2823 __ Bind(read_barrier_slow_path->GetExitLabel()); 2824 } else { 2825 // Non read barrier code. 2826 // Compute base source address, base destination address, and end 2827 // source address for System.arraycopy* intrinsics in `src_base`, 2828 // `dst_base` and `src_end` respectively. 2829 GenSystemArrayCopyAddresses(masm, 2830 type, 2831 src, 2832 src_pos, 2833 dest, 2834 dest_pos, 2835 length, 2836 src_curr_addr, 2837 dst_curr_addr, 2838 src_stop_addr); 2839 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2840 // poison/unpoison. 2841 vixl::aarch64::Label loop; 2842 __ Bind(&loop); 2843 { 2844 Register tmp = temps.AcquireW(); 2845 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2846 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2847 } 2848 __ Cmp(src_curr_addr, src_stop_addr); 2849 __ B(&loop, ne); 2850 } 2851 __ Bind(&done); 2852 } 2853 } 2854 2855 // We only need one card marking on the destination array. 2856 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); 2857 2858 __ Bind(intrinsic_slow_path->GetExitLabel()); 2859 } 2860 2861 static void GenIsInfinite(LocationSummary* locations, 2862 bool is64bit, 2863 MacroAssembler* masm) { 2864 Operand infinity; 2865 Register out; 2866 2867 if (is64bit) { 2868 infinity = kPositiveInfinityDouble; 2869 out = XRegisterFrom(locations->Out()); 2870 } else { 2871 infinity = kPositiveInfinityFloat; 2872 out = WRegisterFrom(locations->Out()); 2873 } 2874 2875 const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out); 2876 2877 MoveFPToInt(locations, is64bit, masm); 2878 __ Eor(out, out, infinity); 2879 // We don't care about the sign bit, so shift left. 2880 __ Cmp(zero, Operand(out, LSL, 1)); 2881 __ Cset(out, eq); 2882 } 2883 2884 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2885 CreateFPToIntLocations(arena_, invoke); 2886 } 2887 2888 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2889 GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); 2890 } 2891 2892 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2893 CreateFPToIntLocations(arena_, invoke); 2894 } 2895 2896 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2897 GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); 2898 } 2899 2900 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { 2901 InvokeRuntimeCallingConvention calling_convention; 2902 IntrinsicVisitor::ComputeIntegerValueOfLocations( 2903 invoke, 2904 codegen_, 2905 calling_convention.GetReturnLocation(Primitive::kPrimNot), 2906 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 2907 } 2908 2909 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { 2910 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 2911 LocationSummary* locations = invoke->GetLocations(); 2912 MacroAssembler* masm = GetVIXLAssembler(); 2913 2914 Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot); 2915 UseScratchRegisterScope temps(masm); 2916 Register temp = temps.AcquireW(); 2917 InvokeRuntimeCallingConvention calling_convention; 2918 Register argument = calling_convention.GetRegisterAt(0); 2919 if (invoke->InputAt(0)->IsConstant()) { 2920 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 2921 if (value >= info.low && value <= info.high) { 2922 // Just embed the j.l.Integer in the code. 2923 ScopedObjectAccess soa(Thread::Current()); 2924 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 2925 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 2926 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 2927 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 2928 } else { 2929 // Allocate and initialize a new j.l.Integer. 2930 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 2931 // JIT object table. 2932 uint32_t address = 2933 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 2934 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 2935 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 2936 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 2937 __ Mov(temp.W(), value); 2938 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); 2939 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 2940 // one. 2941 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2942 } 2943 } else { 2944 Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt); 2945 // Check bounds of our cache. 2946 __ Add(out.W(), in.W(), -info.low); 2947 __ Cmp(out.W(), info.high - info.low + 1); 2948 vixl::aarch64::Label allocate, done; 2949 __ B(&allocate, hs); 2950 // If the value is within the bounds, load the j.l.Integer directly from the array. 2951 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 2952 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 2953 __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); 2954 MemOperand source = HeapOperand( 2955 temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot)); 2956 codegen_->Load(Primitive::kPrimNot, out, source); 2957 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); 2958 __ B(&done); 2959 __ Bind(&allocate); 2960 // Otherwise allocate and initialize a new j.l.Integer. 2961 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 2962 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 2963 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 2964 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 2965 __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); 2966 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 2967 // one. 2968 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2969 __ Bind(&done); 2970 } 2971 } 2972 2973 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) { 2974 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2975 LocationSummary::kNoCall, 2976 kIntrinsified); 2977 locations->SetOut(Location::RequiresRegister()); 2978 } 2979 2980 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { 2981 MacroAssembler* masm = GetVIXLAssembler(); 2982 Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt); 2983 UseScratchRegisterScope temps(masm); 2984 Register temp = temps.AcquireX(); 2985 2986 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value()); 2987 __ Ldar(out.W(), MemOperand(temp)); 2988 2989 vixl::aarch64::Label done; 2990 __ Cbz(out.W(), &done); 2991 __ Stlr(wzr, MemOperand(temp)); 2992 __ Bind(&done); 2993 } 2994 2995 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) 2996 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) 2997 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) 2998 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) 2999 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit) 3000 3001 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); 3002 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); 3003 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); 3004 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); 3005 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); 3006 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend); 3007 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); 3008 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); 3009 3010 // 1.8. 3011 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) 3012 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) 3013 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) 3014 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) 3015 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) 3016 3017 UNREACHABLE_INTRINSICS(ARM64) 3018 3019 #undef __ 3020 3021 } // namespace arm64 3022 } // namespace art 3023