Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "intrinsics_arm64.h"
     18 
     19 #include "arch/arm64/instruction_set_features_arm64.h"
     20 #include "art_method.h"
     21 #include "code_generator_arm64.h"
     22 #include "common_arm64.h"
     23 #include "entrypoints/quick/quick_entrypoints.h"
     24 #include "heap_poisoning.h"
     25 #include "intrinsics.h"
     26 #include "lock_word.h"
     27 #include "mirror/array-inl.h"
     28 #include "mirror/object_array-inl.h"
     29 #include "mirror/reference.h"
     30 #include "mirror/string-inl.h"
     31 #include "scoped_thread_state_change-inl.h"
     32 #include "thread-current-inl.h"
     33 #include "utils/arm64/assembler_arm64.h"
     34 
     35 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     36 
     37 // TODO(VIXL): Make VIXL compile with -Wshadow.
     38 #pragma GCC diagnostic push
     39 #pragma GCC diagnostic ignored "-Wshadow"
     40 #include "aarch64/disasm-aarch64.h"
     41 #include "aarch64/macro-assembler-aarch64.h"
     42 #pragma GCC diagnostic pop
     43 
     44 namespace art {
     45 
     46 namespace arm64 {
     47 
     48 using helpers::DRegisterFrom;
     49 using helpers::FPRegisterFrom;
     50 using helpers::HeapOperand;
     51 using helpers::LocationFrom;
     52 using helpers::OperandFrom;
     53 using helpers::RegisterFrom;
     54 using helpers::SRegisterFrom;
     55 using helpers::WRegisterFrom;
     56 using helpers::XRegisterFrom;
     57 using helpers::InputRegisterAt;
     58 using helpers::OutputRegister;
     59 
     60 namespace {
     61 
     62 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
     63   return MemOperand(XRegisterFrom(location), offset);
     64 }
     65 
     66 }  // namespace
     67 
     68 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
     69   return codegen_->GetVIXLAssembler();
     70 }
     71 
     72 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
     73   return codegen_->GetGraph()->GetAllocator();
     74 }
     75 
     76 #define __ codegen->GetVIXLAssembler()->
     77 
     78 static void MoveFromReturnRegister(Location trg,
     79                                    DataType::Type type,
     80                                    CodeGeneratorARM64* codegen) {
     81   if (!trg.IsValid()) {
     82     DCHECK(type == DataType::Type::kVoid);
     83     return;
     84   }
     85 
     86   DCHECK_NE(type, DataType::Type::kVoid);
     87 
     88   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
     89     Register trg_reg = RegisterFrom(trg, type);
     90     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
     91     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
     92   } else {
     93     FPRegister trg_reg = FPRegisterFrom(trg, type);
     94     FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
     95     __ Fmov(trg_reg, res_reg);
     96   }
     97 }
     98 
     99 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
    100   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
    101   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
    102 }
    103 
    104 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
    105 // call. This will copy the arguments into the positions for a regular call.
    106 //
    107 // Note: The actual parameters are required to be in the locations given by the invoke's location
    108 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
    109 //       restored!
    110 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
    111  public:
    112   explicit IntrinsicSlowPathARM64(HInvoke* invoke)
    113       : SlowPathCodeARM64(invoke), invoke_(invoke) { }
    114 
    115   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
    116     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
    117     __ Bind(GetEntryLabel());
    118 
    119     SaveLiveRegisters(codegen, invoke_->GetLocations());
    120 
    121     MoveArguments(invoke_, codegen);
    122 
    123     {
    124       // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
    125       // are no pools emitted.
    126       vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
    127       if (invoke_->IsInvokeStaticOrDirect()) {
    128         codegen->GenerateStaticOrDirectCall(
    129             invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
    130       } else {
    131         codegen->GenerateVirtualCall(
    132             invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
    133       }
    134     }
    135 
    136     // Copy the result back to the expected output.
    137     Location out = invoke_->GetLocations()->Out();
    138     if (out.IsValid()) {
    139       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
    140       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    141       MoveFromReturnRegister(out, invoke_->GetType(), codegen);
    142     }
    143 
    144     RestoreLiveRegisters(codegen, invoke_->GetLocations());
    145     __ B(GetExitLabel());
    146   }
    147 
    148   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
    149 
    150  private:
    151   // The instruction where this slow path is happening.
    152   HInvoke* const invoke_;
    153 
    154   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
    155 };
    156 
    157 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
    158 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
    159  public:
    160   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
    161       : SlowPathCodeARM64(instruction), tmp_(tmp) {
    162     DCHECK(kEmitCompilerReadBarrier);
    163     DCHECK(kUseBakerReadBarrier);
    164   }
    165 
    166   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
    167     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
    168     LocationSummary* locations = instruction_->GetLocations();
    169     DCHECK(locations->CanCall());
    170     DCHECK(instruction_->IsInvokeStaticOrDirect())
    171         << "Unexpected instruction in read barrier arraycopy slow path: "
    172         << instruction_->DebugName();
    173     DCHECK(instruction_->GetLocations()->Intrinsified());
    174     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
    175 
    176     const int32_t element_size = DataType::Size(DataType::Type::kReference);
    177 
    178     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
    179     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
    180     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
    181     Register tmp_reg = WRegisterFrom(tmp_);
    182 
    183     __ Bind(GetEntryLabel());
    184     vixl::aarch64::Label slow_copy_loop;
    185     __ Bind(&slow_copy_loop);
    186     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
    187     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
    188     // TODO: Inline the mark bit check before calling the runtime?
    189     // tmp_reg = ReadBarrier::Mark(tmp_reg);
    190     // No need to save live registers; it's taken care of by the
    191     // entrypoint. Also, there is no need to update the stack mask,
    192     // as this runtime call will not trigger a garbage collection.
    193     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
    194     // explanations.)
    195     DCHECK_NE(tmp_.reg(), LR);
    196     DCHECK_NE(tmp_.reg(), WSP);
    197     DCHECK_NE(tmp_.reg(), WZR);
    198     // IP0 is used internally by the ReadBarrierMarkRegX entry point
    199     // as a temporary (and not preserved).  It thus cannot be used by
    200     // any live register in this slow path.
    201     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
    202     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
    203     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
    204     DCHECK_NE(tmp_.reg(), IP0);
    205     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
    206     // TODO: Load the entrypoint once before the loop, instead of
    207     // loading it at every iteration.
    208     int32_t entry_point_offset =
    209         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
    210     // This runtime call does not require a stack map.
    211     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    212     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
    213     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
    214     __ Cmp(src_curr_addr, src_stop_addr);
    215     __ B(&slow_copy_loop, ne);
    216     __ B(GetExitLabel());
    217   }
    218 
    219   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
    220 
    221  private:
    222   Location tmp_;
    223 
    224   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
    225 };
    226 #undef __
    227 
    228 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
    229   Dispatch(invoke);
    230   LocationSummary* res = invoke->GetLocations();
    231   if (res == nullptr) {
    232     return false;
    233   }
    234   return res->Intrinsified();
    235 }
    236 
    237 #define __ masm->
    238 
    239 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    240   LocationSummary* locations =
    241       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    242   locations->SetInAt(0, Location::RequiresFpuRegister());
    243   locations->SetOut(Location::RequiresRegister());
    244 }
    245 
    246 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    247   LocationSummary* locations =
    248       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    249   locations->SetInAt(0, Location::RequiresRegister());
    250   locations->SetOut(Location::RequiresFpuRegister());
    251 }
    252 
    253 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    254   Location input = locations->InAt(0);
    255   Location output = locations->Out();
    256   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
    257           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
    258 }
    259 
    260 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    261   Location input = locations->InAt(0);
    262   Location output = locations->Out();
    263   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
    264           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
    265 }
    266 
    267 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    268   CreateFPToIntLocations(allocator_, invoke);
    269 }
    270 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    271   CreateIntToFPLocations(allocator_, invoke);
    272 }
    273 
    274 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    275   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    276 }
    277 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    278   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    279 }
    280 
    281 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    282   CreateFPToIntLocations(allocator_, invoke);
    283 }
    284 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    285   CreateIntToFPLocations(allocator_, invoke);
    286 }
    287 
    288 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    289   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    290 }
    291 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    292   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    293 }
    294 
    295 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    296   LocationSummary* locations =
    297       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    298   locations->SetInAt(0, Location::RequiresRegister());
    299   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    300 }
    301 
    302 static void GenReverseBytes(LocationSummary* locations,
    303                             DataType::Type type,
    304                             MacroAssembler* masm) {
    305   Location in = locations->InAt(0);
    306   Location out = locations->Out();
    307 
    308   switch (type) {
    309     case DataType::Type::kInt16:
    310       __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
    311       __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
    312       break;
    313     case DataType::Type::kInt32:
    314     case DataType::Type::kInt64:
    315       __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
    316       break;
    317     default:
    318       LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
    319       UNREACHABLE();
    320   }
    321 }
    322 
    323 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
    324   CreateIntToIntLocations(allocator_, invoke);
    325 }
    326 
    327 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
    328   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
    329 }
    330 
    331 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
    332   CreateIntToIntLocations(allocator_, invoke);
    333 }
    334 
    335 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
    336   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
    337 }
    338 
    339 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
    340   CreateIntToIntLocations(allocator_, invoke);
    341 }
    342 
    343 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
    344   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
    345 }
    346 
    347 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    348   LocationSummary* locations =
    349       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    350   locations->SetInAt(0, Location::RequiresRegister());
    351   locations->SetInAt(1, Location::RequiresRegister());
    352   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    353 }
    354 
    355 static void GenNumberOfLeadingZeros(LocationSummary* locations,
    356                                     DataType::Type type,
    357                                     MacroAssembler* masm) {
    358   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
    359 
    360   Location in = locations->InAt(0);
    361   Location out = locations->Out();
    362 
    363   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
    364 }
    365 
    366 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    367   CreateIntToIntLocations(allocator_, invoke);
    368 }
    369 
    370 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    371   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
    372 }
    373 
    374 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    375   CreateIntToIntLocations(allocator_, invoke);
    376 }
    377 
    378 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    379   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
    380 }
    381 
    382 static void GenNumberOfTrailingZeros(LocationSummary* locations,
    383                                      DataType::Type type,
    384                                      MacroAssembler* masm) {
    385   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
    386 
    387   Location in = locations->InAt(0);
    388   Location out = locations->Out();
    389 
    390   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
    391   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
    392 }
    393 
    394 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    395   CreateIntToIntLocations(allocator_, invoke);
    396 }
    397 
    398 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    399   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
    400 }
    401 
    402 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    403   CreateIntToIntLocations(allocator_, invoke);
    404 }
    405 
    406 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    407   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
    408 }
    409 
    410 static void GenReverse(LocationSummary* locations,
    411                        DataType::Type type,
    412                        MacroAssembler* masm) {
    413   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
    414 
    415   Location in = locations->InAt(0);
    416   Location out = locations->Out();
    417 
    418   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
    419 }
    420 
    421 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
    422   CreateIntToIntLocations(allocator_, invoke);
    423 }
    424 
    425 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
    426   GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
    427 }
    428 
    429 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
    430   CreateIntToIntLocations(allocator_, invoke);
    431 }
    432 
    433 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
    434   GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
    435 }
    436 
    437 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
    438   DCHECK(DataType::IsIntOrLongType(type)) << type;
    439   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
    440   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
    441 
    442   UseScratchRegisterScope temps(masm);
    443 
    444   Register src = InputRegisterAt(instr, 0);
    445   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
    446   FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
    447 
    448   __ Fmov(fpr, src);
    449   __ Cnt(fpr.V8B(), fpr.V8B());
    450   __ Addv(fpr.B(), fpr.V8B());
    451   __ Fmov(dst, fpr);
    452 }
    453 
    454 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
    455   CreateIntToIntLocations(allocator_, invoke);
    456 }
    457 
    458 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
    459   GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
    460 }
    461 
    462 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
    463   CreateIntToIntLocations(allocator_, invoke);
    464 }
    465 
    466 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
    467   GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
    468 }
    469 
    470 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
    471   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
    472 
    473   UseScratchRegisterScope temps(masm);
    474 
    475   Register src = InputRegisterAt(invoke, 0);
    476   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
    477   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
    478   size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
    479   size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
    480 
    481   __ Clz(temp, src);
    482   __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
    483   __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
    484   __ Lsr(dst, dst, temp);
    485 }
    486 
    487 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
    488   CreateIntToIntLocations(allocator_, invoke);
    489 }
    490 
    491 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
    492   GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
    493 }
    494 
    495 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
    496   CreateIntToIntLocations(allocator_, invoke);
    497 }
    498 
    499 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
    500   GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
    501 }
    502 
    503 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
    504   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
    505 
    506   UseScratchRegisterScope temps(masm);
    507 
    508   Register src = InputRegisterAt(invoke, 0);
    509   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
    510   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
    511 
    512   __ Neg(temp, src);
    513   __ And(dst, temp, src);
    514 }
    515 
    516 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
    517   CreateIntToIntLocations(allocator_, invoke);
    518 }
    519 
    520 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
    521   GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
    522 }
    523 
    524 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
    525   CreateIntToIntLocations(allocator_, invoke);
    526 }
    527 
    528 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
    529   GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
    530 }
    531 
    532 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    533   LocationSummary* locations =
    534       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    535   locations->SetInAt(0, Location::RequiresFpuRegister());
    536   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    537 }
    538 
    539 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    540   Location in = locations->InAt(0);
    541   Location out = locations->Out();
    542 
    543   FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
    544   FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
    545 
    546   __ Fabs(out_reg, in_reg);
    547 }
    548 
    549 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
    550   CreateFPToFPLocations(allocator_, invoke);
    551 }
    552 
    553 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
    554   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    555 }
    556 
    557 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
    558   CreateFPToFPLocations(allocator_, invoke);
    559 }
    560 
    561 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
    562   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    563 }
    564 
    565 static void GenAbsInteger(LocationSummary* locations,
    566                           bool is64bit,
    567                           MacroAssembler* masm) {
    568   Location in = locations->InAt(0);
    569   Location output = locations->Out();
    570 
    571   Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
    572   Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
    573 
    574   __ Cmp(in_reg, Operand(0));
    575   __ Cneg(out_reg, in_reg, lt);
    576 }
    577 
    578 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
    579   CreateIntToIntLocations(allocator_, invoke);
    580 }
    581 
    582 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
    583   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    584 }
    585 
    586 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
    587   CreateIntToIntLocations(allocator_, invoke);
    588 }
    589 
    590 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
    591   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    592 }
    593 
    594 static void GenMinMaxFP(LocationSummary* locations,
    595                         bool is_min,
    596                         bool is_double,
    597                         MacroAssembler* masm) {
    598   Location op1 = locations->InAt(0);
    599   Location op2 = locations->InAt(1);
    600   Location out = locations->Out();
    601 
    602   FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
    603   FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
    604   FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
    605   if (is_min) {
    606     __ Fmin(out_reg, op1_reg, op2_reg);
    607   } else {
    608     __ Fmax(out_reg, op1_reg, op2_reg);
    609   }
    610 }
    611 
    612 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    613   LocationSummary* locations =
    614       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    615   locations->SetInAt(0, Location::RequiresFpuRegister());
    616   locations->SetInAt(1, Location::RequiresFpuRegister());
    617   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    618 }
    619 
    620 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
    621   CreateFPFPToFPLocations(allocator_, invoke);
    622 }
    623 
    624 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
    625   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
    626 }
    627 
    628 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
    629   CreateFPFPToFPLocations(allocator_, invoke);
    630 }
    631 
    632 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
    633   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
    634 }
    635 
    636 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    637   CreateFPFPToFPLocations(allocator_, invoke);
    638 }
    639 
    640 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    641   GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
    642 }
    643 
    644 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
    645   CreateFPFPToFPLocations(allocator_, invoke);
    646 }
    647 
    648 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
    649   GenMinMaxFP(
    650       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
    651 }
    652 
    653 static void GenMinMax(LocationSummary* locations,
    654                       bool is_min,
    655                       bool is_long,
    656                       MacroAssembler* masm) {
    657   Location op1 = locations->InAt(0);
    658   Location op2 = locations->InAt(1);
    659   Location out = locations->Out();
    660 
    661   Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
    662   Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
    663   Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
    664 
    665   __ Cmp(op1_reg, op2_reg);
    666   __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
    667 }
    668 
    669 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
    670   CreateIntIntToIntLocations(allocator_, invoke);
    671 }
    672 
    673 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
    674   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
    675 }
    676 
    677 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
    678   CreateIntIntToIntLocations(allocator_, invoke);
    679 }
    680 
    681 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
    682   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
    683 }
    684 
    685 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
    686   CreateIntIntToIntLocations(allocator_, invoke);
    687 }
    688 
    689 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
    690   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
    691 }
    692 
    693 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
    694   CreateIntIntToIntLocations(allocator_, invoke);
    695 }
    696 
    697 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
    698   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
    699 }
    700 
    701 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
    702   CreateFPToFPLocations(allocator_, invoke);
    703 }
    704 
    705 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
    706   LocationSummary* locations = invoke->GetLocations();
    707   MacroAssembler* masm = GetVIXLAssembler();
    708   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    709 }
    710 
    711 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
    712   CreateFPToFPLocations(allocator_, invoke);
    713 }
    714 
    715 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
    716   LocationSummary* locations = invoke->GetLocations();
    717   MacroAssembler* masm = GetVIXLAssembler();
    718   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    719 }
    720 
    721 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
    722   CreateFPToFPLocations(allocator_, invoke);
    723 }
    724 
    725 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
    726   LocationSummary* locations = invoke->GetLocations();
    727   MacroAssembler* masm = GetVIXLAssembler();
    728   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    729 }
    730 
    731 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
    732   CreateFPToFPLocations(allocator_, invoke);
    733 }
    734 
    735 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
    736   LocationSummary* locations = invoke->GetLocations();
    737   MacroAssembler* masm = GetVIXLAssembler();
    738   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    739 }
    740 
    741 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    742   LocationSummary* locations =
    743       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    744   locations->SetInAt(0, Location::RequiresFpuRegister());
    745   locations->SetOut(Location::RequiresRegister());
    746   locations->AddTemp(Location::RequiresFpuRegister());
    747 }
    748 
    749 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
    750   // Java 8 API definition for Math.round():
    751   // Return the closest long or int to the argument, with ties rounding to positive infinity.
    752   //
    753   // There is no single instruction in ARMv8 that can support the above definition.
    754   // We choose to use FCVTAS here, because it has closest semantic.
    755   // FCVTAS performs rounding to nearest integer, ties away from zero.
    756   // For most inputs (positive values, zero or NaN), this instruction is enough.
    757   // We only need a few handling code after FCVTAS if the input is negative half value.
    758   //
    759   // The reason why we didn't choose FCVTPS instruction here is that
    760   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
    761   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
    762   // If we were using this instruction, for most inputs, more handling code would be needed.
    763   LocationSummary* l = invoke->GetLocations();
    764   FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
    765   FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
    766   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
    767   vixl::aarch64::Label done;
    768 
    769   // Round to nearest integer, ties away from zero.
    770   __ Fcvtas(out_reg, in_reg);
    771 
    772   // For positive values, zero or NaN inputs, rounding is done.
    773   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
    774 
    775   // Handle input < 0 cases.
    776   // If input is negative but not a tie, previous result (round to nearest) is valid.
    777   // If input is a negative tie, out_reg += 1.
    778   __ Frinta(tmp_fp, in_reg);
    779   __ Fsub(tmp_fp, in_reg, tmp_fp);
    780   __ Fcmp(tmp_fp, 0.5);
    781   __ Cinc(out_reg, out_reg, eq);
    782 
    783   __ Bind(&done);
    784 }
    785 
    786 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
    787   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
    788 }
    789 
    790 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
    791   GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
    792 }
    793 
    794 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
    795   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
    796 }
    797 
    798 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
    799   GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
    800 }
    801 
    802 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
    803   CreateIntToIntLocations(allocator_, invoke);
    804 }
    805 
    806 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
    807   MacroAssembler* masm = GetVIXLAssembler();
    808   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
    809           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    810 }
    811 
    812 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
    813   CreateIntToIntLocations(allocator_, invoke);
    814 }
    815 
    816 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
    817   MacroAssembler* masm = GetVIXLAssembler();
    818   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
    819          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    820 }
    821 
    822 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
    823   CreateIntToIntLocations(allocator_, invoke);
    824 }
    825 
    826 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
    827   MacroAssembler* masm = GetVIXLAssembler();
    828   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
    829          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    830 }
    831 
    832 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
    833   CreateIntToIntLocations(allocator_, invoke);
    834 }
    835 
    836 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
    837   MacroAssembler* masm = GetVIXLAssembler();
    838   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
    839            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    840 }
    841 
    842 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    843   LocationSummary* locations =
    844       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    845   locations->SetInAt(0, Location::RequiresRegister());
    846   locations->SetInAt(1, Location::RequiresRegister());
    847 }
    848 
    849 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
    850   CreateIntIntToVoidLocations(allocator_, invoke);
    851 }
    852 
    853 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
    854   MacroAssembler* masm = GetVIXLAssembler();
    855   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    856           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    857 }
    858 
    859 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
    860   CreateIntIntToVoidLocations(allocator_, invoke);
    861 }
    862 
    863 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
    864   MacroAssembler* masm = GetVIXLAssembler();
    865   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    866          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    867 }
    868 
    869 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
    870   CreateIntIntToVoidLocations(allocator_, invoke);
    871 }
    872 
    873 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
    874   MacroAssembler* masm = GetVIXLAssembler();
    875   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
    876          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    877 }
    878 
    879 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
    880   CreateIntIntToVoidLocations(allocator_, invoke);
    881 }
    882 
    883 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
    884   MacroAssembler* masm = GetVIXLAssembler();
    885   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    886           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    887 }
    888 
    889 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
    890   LocationSummary* locations =
    891       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    892   locations->SetOut(Location::RequiresRegister());
    893 }
    894 
    895 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
    896   codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
    897                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
    898 }
    899 
    900 static void GenUnsafeGet(HInvoke* invoke,
    901                          DataType::Type type,
    902                          bool is_volatile,
    903                          CodeGeneratorARM64* codegen) {
    904   LocationSummary* locations = invoke->GetLocations();
    905   DCHECK((type == DataType::Type::kInt32) ||
    906          (type == DataType::Type::kInt64) ||
    907          (type == DataType::Type::kReference));
    908   Location base_loc = locations->InAt(1);
    909   Register base = WRegisterFrom(base_loc);      // Object pointer.
    910   Location offset_loc = locations->InAt(2);
    911   Register offset = XRegisterFrom(offset_loc);  // Long offset.
    912   Location trg_loc = locations->Out();
    913   Register trg = RegisterFrom(trg_loc, type);
    914 
    915   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
    916     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
    917     Register temp = WRegisterFrom(locations->GetTemp(0));
    918     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
    919                                                        trg_loc,
    920                                                        base,
    921                                                        /* offset */ 0u,
    922                                                        /* index */ offset_loc,
    923                                                        /* scale_factor */ 0u,
    924                                                        temp,
    925                                                        /* needs_null_check */ false,
    926                                                        is_volatile);
    927   } else {
    928     // Other cases.
    929     MemOperand mem_op(base.X(), offset);
    930     if (is_volatile) {
    931       codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
    932     } else {
    933       codegen->Load(type, trg, mem_op);
    934     }
    935 
    936     if (type == DataType::Type::kReference) {
    937       DCHECK(trg.IsW());
    938       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
    939     }
    940   }
    941 }
    942 
    943 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    944   bool can_call = kEmitCompilerReadBarrier &&
    945       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
    946        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
    947   LocationSummary* locations =
    948       new (allocator) LocationSummary(invoke,
    949                                       can_call
    950                                           ? LocationSummary::kCallOnSlowPath
    951                                           : LocationSummary::kNoCall,
    952                                       kIntrinsified);
    953   if (can_call && kUseBakerReadBarrier) {
    954     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
    955     // We need a temporary register for the read barrier marking slow
    956     // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
    957     locations->AddTemp(Location::RequiresRegister());
    958   }
    959   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
    960   locations->SetInAt(1, Location::RequiresRegister());
    961   locations->SetInAt(2, Location::RequiresRegister());
    962   locations->SetOut(Location::RequiresRegister(),
    963                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
    964 }
    965 
    966 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
    967   CreateIntIntIntToIntLocations(allocator_, invoke);
    968 }
    969 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
    970   CreateIntIntIntToIntLocations(allocator_, invoke);
    971 }
    972 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
    973   CreateIntIntIntToIntLocations(allocator_, invoke);
    974 }
    975 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
    976   CreateIntIntIntToIntLocations(allocator_, invoke);
    977 }
    978 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
    979   CreateIntIntIntToIntLocations(allocator_, invoke);
    980 }
    981 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
    982   CreateIntIntIntToIntLocations(allocator_, invoke);
    983 }
    984 
    985 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
    986   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
    987 }
    988 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
    989   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
    990 }
    991 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
    992   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
    993 }
    994 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
    995   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
    996 }
    997 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
    998   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
    999 }
   1000 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   1001   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
   1002 }
   1003 
   1004 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
   1005   LocationSummary* locations =
   1006       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1007   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1008   locations->SetInAt(1, Location::RequiresRegister());
   1009   locations->SetInAt(2, Location::RequiresRegister());
   1010   locations->SetInAt(3, Location::RequiresRegister());
   1011 }
   1012 
   1013 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
   1014   CreateIntIntIntIntToVoid(allocator_, invoke);
   1015 }
   1016 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
   1017   CreateIntIntIntIntToVoid(allocator_, invoke);
   1018 }
   1019 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
   1020   CreateIntIntIntIntToVoid(allocator_, invoke);
   1021 }
   1022 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
   1023   CreateIntIntIntIntToVoid(allocator_, invoke);
   1024 }
   1025 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   1026   CreateIntIntIntIntToVoid(allocator_, invoke);
   1027 }
   1028 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   1029   CreateIntIntIntIntToVoid(allocator_, invoke);
   1030 }
   1031 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
   1032   CreateIntIntIntIntToVoid(allocator_, invoke);
   1033 }
   1034 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   1035   CreateIntIntIntIntToVoid(allocator_, invoke);
   1036 }
   1037 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   1038   CreateIntIntIntIntToVoid(allocator_, invoke);
   1039 }
   1040 
   1041 static void GenUnsafePut(HInvoke* invoke,
   1042                          DataType::Type type,
   1043                          bool is_volatile,
   1044                          bool is_ordered,
   1045                          CodeGeneratorARM64* codegen) {
   1046   LocationSummary* locations = invoke->GetLocations();
   1047   MacroAssembler* masm = codegen->GetVIXLAssembler();
   1048 
   1049   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   1050   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   1051   Register value = RegisterFrom(locations->InAt(3), type);
   1052   Register source = value;
   1053   MemOperand mem_op(base.X(), offset);
   1054 
   1055   {
   1056     // We use a block to end the scratch scope before the write barrier, thus
   1057     // freeing the temporary registers so they can be used in `MarkGCCard`.
   1058     UseScratchRegisterScope temps(masm);
   1059 
   1060     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
   1061       DCHECK(value.IsW());
   1062       Register temp = temps.AcquireW();
   1063       __ Mov(temp.W(), value.W());
   1064       codegen->GetAssembler()->PoisonHeapReference(temp.W());
   1065       source = temp;
   1066     }
   1067 
   1068     if (is_volatile || is_ordered) {
   1069       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
   1070     } else {
   1071       codegen->Store(type, source, mem_op);
   1072     }
   1073   }
   1074 
   1075   if (type == DataType::Type::kReference) {
   1076     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1077     codegen->MarkGCCard(base, value, value_can_be_null);
   1078   }
   1079 }
   1080 
   1081 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
   1082   GenUnsafePut(invoke,
   1083                DataType::Type::kInt32,
   1084                /* is_volatile */ false,
   1085                /* is_ordered */ false,
   1086                codegen_);
   1087 }
   1088 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
   1089   GenUnsafePut(invoke,
   1090                DataType::Type::kInt32,
   1091                /* is_volatile */ false,
   1092                /* is_ordered */ true,
   1093                codegen_);
   1094 }
   1095 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
   1096   GenUnsafePut(invoke,
   1097                DataType::Type::kInt32,
   1098                /* is_volatile */ true,
   1099                /* is_ordered */ false,
   1100                codegen_);
   1101 }
   1102 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
   1103   GenUnsafePut(invoke,
   1104                DataType::Type::kReference,
   1105                /* is_volatile */ false,
   1106                /* is_ordered */ false,
   1107                codegen_);
   1108 }
   1109 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   1110   GenUnsafePut(invoke,
   1111                DataType::Type::kReference,
   1112                /* is_volatile */ false,
   1113                /* is_ordered */ true,
   1114                codegen_);
   1115 }
   1116 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   1117   GenUnsafePut(invoke,
   1118                DataType::Type::kReference,
   1119                /* is_volatile */ true,
   1120                /* is_ordered */ false,
   1121                codegen_);
   1122 }
   1123 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
   1124   GenUnsafePut(invoke,
   1125                DataType::Type::kInt64,
   1126                /* is_volatile */ false,
   1127                /* is_ordered */ false,
   1128                codegen_);
   1129 }
   1130 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   1131   GenUnsafePut(invoke,
   1132                DataType::Type::kInt64,
   1133                /* is_volatile */ false,
   1134                /* is_ordered */ true,
   1135                codegen_);
   1136 }
   1137 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   1138   GenUnsafePut(invoke,
   1139                DataType::Type::kInt64,
   1140                /* is_volatile */ true,
   1141                /* is_ordered */ false,
   1142                codegen_);
   1143 }
   1144 
   1145 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
   1146                                        HInvoke* invoke,
   1147                                        DataType::Type type) {
   1148   bool can_call = kEmitCompilerReadBarrier &&
   1149       kUseBakerReadBarrier &&
   1150       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   1151   LocationSummary* locations =
   1152       new (allocator) LocationSummary(invoke,
   1153                                       can_call
   1154                                           ? LocationSummary::kCallOnSlowPath
   1155                                           : LocationSummary::kNoCall,
   1156                                       kIntrinsified);
   1157   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1158   locations->SetInAt(1, Location::RequiresRegister());
   1159   locations->SetInAt(2, Location::RequiresRegister());
   1160   locations->SetInAt(3, Location::RequiresRegister());
   1161   locations->SetInAt(4, Location::RequiresRegister());
   1162 
   1163   // If heap poisoning is enabled, we don't want the unpoisoning
   1164   // operations to potentially clobber the output. Likewise when
   1165   // emitting a (Baker) read barrier, which may call.
   1166   Location::OutputOverlap overlaps =
   1167       ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
   1168       ? Location::kOutputOverlap
   1169       : Location::kNoOutputOverlap;
   1170   locations->SetOut(Location::RequiresRegister(), overlaps);
   1171   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1172     // Temporary register for (Baker) read barrier.
   1173     locations->AddTemp(Location::RequiresRegister());
   1174   }
   1175 }
   1176 
   1177 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
   1178   MacroAssembler* masm = codegen->GetVIXLAssembler();
   1179   LocationSummary* locations = invoke->GetLocations();
   1180 
   1181   Location out_loc = locations->Out();
   1182   Register out = WRegisterFrom(out_loc);                           // Boolean result.
   1183 
   1184   Register base = WRegisterFrom(locations->InAt(1));               // Object pointer.
   1185   Location offset_loc = locations->InAt(2);
   1186   Register offset = XRegisterFrom(offset_loc);                     // Long offset.
   1187   Register expected = RegisterFrom(locations->InAt(3), type);      // Expected.
   1188   Register value = RegisterFrom(locations->InAt(4), type);         // Value.
   1189 
   1190   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
   1191   if (type == DataType::Type::kReference) {
   1192     // Mark card for object assuming new value is stored.
   1193     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1194     codegen->MarkGCCard(base, value, value_can_be_null);
   1195 
   1196     // The only read barrier implementation supporting the
   1197     // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1198     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1199 
   1200     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1201       Register temp = WRegisterFrom(locations->GetTemp(0));
   1202       // Need to make sure the reference stored in the field is a to-space
   1203       // one before attempting the CAS or the CAS could fail incorrectly.
   1204       codegen->UpdateReferenceFieldWithBakerReadBarrier(
   1205           invoke,
   1206           out_loc,  // Unused, used only as a "temporary" within the read barrier.
   1207           base,
   1208           /* field_offset */ offset_loc,
   1209           temp,
   1210           /* needs_null_check */ false,
   1211           /* use_load_acquire */ false);
   1212     }
   1213   }
   1214 
   1215   UseScratchRegisterScope temps(masm);
   1216   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
   1217   Register tmp_value = temps.AcquireSameSizeAs(value);             // Value in memory.
   1218 
   1219   Register tmp_32 = tmp_value.W();
   1220 
   1221   __ Add(tmp_ptr, base.X(), Operand(offset));
   1222 
   1223   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
   1224     codegen->GetAssembler()->PoisonHeapReference(expected);
   1225     if (value.Is(expected)) {
   1226       // Do not poison `value`, as it is the same register as
   1227       // `expected`, which has just been poisoned.
   1228     } else {
   1229       codegen->GetAssembler()->PoisonHeapReference(value);
   1230     }
   1231   }
   1232 
   1233   // do {
   1234   //   tmp_value = [tmp_ptr] - expected;
   1235   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   1236   // result = tmp_value != 0;
   1237 
   1238   vixl::aarch64::Label loop_head, exit_loop;
   1239   __ Bind(&loop_head);
   1240   __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
   1241   __ Cmp(tmp_value, expected);
   1242   __ B(&exit_loop, ne);
   1243   __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
   1244   __ Cbnz(tmp_32, &loop_head);
   1245   __ Bind(&exit_loop);
   1246   __ Cset(out, eq);
   1247 
   1248   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
   1249     codegen->GetAssembler()->UnpoisonHeapReference(expected);
   1250     if (value.Is(expected)) {
   1251       // Do not unpoison `value`, as it is the same register as
   1252       // `expected`, which has just been unpoisoned.
   1253     } else {
   1254       codegen->GetAssembler()->UnpoisonHeapReference(value);
   1255     }
   1256   }
   1257 }
   1258 
   1259 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
   1260   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32);
   1261 }
   1262 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
   1263   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64);
   1264 }
   1265 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   1266   // The only read barrier implementation supporting the
   1267   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1268   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   1269     return;
   1270   }
   1271 
   1272   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference);
   1273 }
   1274 
   1275 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
   1276   GenCas(invoke, DataType::Type::kInt32, codegen_);
   1277 }
   1278 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
   1279   GenCas(invoke, DataType::Type::kInt64, codegen_);
   1280 }
   1281 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   1282   // The only read barrier implementation supporting the
   1283   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1284   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1285 
   1286   GenCas(invoke, DataType::Type::kReference, codegen_);
   1287 }
   1288 
   1289 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   1290   LocationSummary* locations =
   1291       new (allocator_) LocationSummary(invoke,
   1292                                        invoke->InputAt(1)->CanBeNull()
   1293                                            ? LocationSummary::kCallOnSlowPath
   1294                                            : LocationSummary::kNoCall,
   1295                                        kIntrinsified);
   1296   locations->SetInAt(0, Location::RequiresRegister());
   1297   locations->SetInAt(1, Location::RequiresRegister());
   1298   locations->AddTemp(Location::RequiresRegister());
   1299   locations->AddTemp(Location::RequiresRegister());
   1300   locations->AddTemp(Location::RequiresRegister());
   1301   // Need temporary registers for String compression's feature.
   1302   if (mirror::kUseStringCompression) {
   1303     locations->AddTemp(Location::RequiresRegister());
   1304   }
   1305   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   1306 }
   1307 
   1308 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
   1309   MacroAssembler* masm = GetVIXLAssembler();
   1310   LocationSummary* locations = invoke->GetLocations();
   1311 
   1312   Register str = InputRegisterAt(invoke, 0);
   1313   Register arg = InputRegisterAt(invoke, 1);
   1314   DCHECK(str.IsW());
   1315   DCHECK(arg.IsW());
   1316   Register out = OutputRegister(invoke);
   1317 
   1318   Register temp0 = WRegisterFrom(locations->GetTemp(0));
   1319   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   1320   Register temp2 = WRegisterFrom(locations->GetTemp(2));
   1321   Register temp3;
   1322   if (mirror::kUseStringCompression) {
   1323     temp3 = WRegisterFrom(locations->GetTemp(3));
   1324   }
   1325 
   1326   vixl::aarch64::Label loop;
   1327   vixl::aarch64::Label find_char_diff;
   1328   vixl::aarch64::Label end;
   1329   vixl::aarch64::Label different_compression;
   1330 
   1331   // Get offsets of count and value fields within a string object.
   1332   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1333   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1334 
   1335   // Note that the null check must have been done earlier.
   1336   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1337 
   1338   // Take slow path and throw if input can be and is null.
   1339   SlowPathCodeARM64* slow_path = nullptr;
   1340   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
   1341   if (can_slow_path) {
   1342     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   1343     codegen_->AddSlowPath(slow_path);
   1344     __ Cbz(arg, slow_path->GetEntryLabel());
   1345   }
   1346 
   1347   // Reference equality check, return 0 if same reference.
   1348   __ Subs(out, str, arg);
   1349   __ B(&end, eq);
   1350 
   1351   if (mirror::kUseStringCompression) {
   1352     // Load `count` fields of this and argument strings.
   1353     __ Ldr(temp3, HeapOperand(str, count_offset));
   1354     __ Ldr(temp2, HeapOperand(arg, count_offset));
   1355     // Clean out compression flag from lengths.
   1356     __ Lsr(temp0, temp3, 1u);
   1357     __ Lsr(temp1, temp2, 1u);
   1358   } else {
   1359     // Load lengths of this and argument strings.
   1360     __ Ldr(temp0, HeapOperand(str, count_offset));
   1361     __ Ldr(temp1, HeapOperand(arg, count_offset));
   1362   }
   1363   // out = length diff.
   1364   __ Subs(out, temp0, temp1);
   1365   // temp0 = min(len(str), len(arg)).
   1366   __ Csel(temp0, temp1, temp0, ge);
   1367   // Shorter string is empty?
   1368   __ Cbz(temp0, &end);
   1369 
   1370   if (mirror::kUseStringCompression) {
   1371     // Check if both strings using same compression style to use this comparison loop.
   1372     __ Eor(temp2, temp2, Operand(temp3));
   1373     // Interleave with compression flag extraction which is needed for both paths
   1374     // and also set flags which is needed only for the different compressions path.
   1375     __ Ands(temp3.W(), temp3.W(), Operand(1));
   1376     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
   1377   }
   1378   // Store offset of string value in preparation for comparison loop.
   1379   __ Mov(temp1, value_offset);
   1380   if (mirror::kUseStringCompression) {
   1381     // For string compression, calculate the number of bytes to compare (not chars).
   1382     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
   1383     __ Lsl(temp0, temp0, temp3);
   1384   }
   1385 
   1386   UseScratchRegisterScope scratch_scope(masm);
   1387   Register temp4 = scratch_scope.AcquireX();
   1388 
   1389   // Assertions that must hold in order to compare strings 8 bytes at a time.
   1390   DCHECK_ALIGNED(value_offset, 8);
   1391   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
   1392 
   1393   const size_t char_size = DataType::Size(DataType::Type::kUint16);
   1394   DCHECK_EQ(char_size, 2u);
   1395 
   1396   // Promote temp2 to an X reg, ready for LDR.
   1397   temp2 = temp2.X();
   1398 
   1399   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
   1400   __ Bind(&loop);
   1401   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
   1402   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
   1403   __ Cmp(temp4, temp2);
   1404   __ B(ne, &find_char_diff);
   1405   __ Add(temp1, temp1, char_size * 4);
   1406   // With string compression, we have compared 8 bytes, otherwise 4 chars.
   1407   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
   1408   __ B(&loop, hi);
   1409   __ B(&end);
   1410 
   1411   // Promote temp1 to an X reg, ready for EOR.
   1412   temp1 = temp1.X();
   1413 
   1414   // Find the single character difference.
   1415   __ Bind(&find_char_diff);
   1416   // Get the bit position of the first character that differs.
   1417   __ Eor(temp1, temp2, temp4);
   1418   __ Rbit(temp1, temp1);
   1419   __ Clz(temp1, temp1);
   1420 
   1421   // If the number of chars remaining <= the index where the difference occurs (0-3), then
   1422   // the difference occurs outside the remaining string data, so just return length diff (out).
   1423   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
   1424   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
   1425   // unsigned when string compression is disabled.
   1426   // When it's enabled, the comparison must be unsigned.
   1427   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
   1428   __ B(ls, &end);
   1429 
   1430   // Extract the characters and calculate the difference.
   1431   if (mirror:: kUseStringCompression) {
   1432     __ Bic(temp1, temp1, 0x7);
   1433     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
   1434   } else {
   1435     __ Bic(temp1, temp1, 0xf);
   1436   }
   1437   __ Lsr(temp2, temp2, temp1);
   1438   __ Lsr(temp4, temp4, temp1);
   1439   if (mirror::kUseStringCompression) {
   1440     // Prioritize the case of compressed strings and calculate such result first.
   1441     __ Uxtb(temp1, temp4);
   1442     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
   1443     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
   1444   }
   1445   __ Uxth(temp4, temp4);
   1446   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
   1447 
   1448   if (mirror::kUseStringCompression) {
   1449     __ B(&end);
   1450     __ Bind(&different_compression);
   1451 
   1452     // Comparison for different compression style.
   1453     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
   1454     DCHECK_EQ(c_char_size, 1u);
   1455     temp1 = temp1.W();
   1456     temp2 = temp2.W();
   1457     temp4 = temp4.W();
   1458 
   1459     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
   1460     // Note that flags have been set by the `str` compression flag extraction to `temp3`
   1461     // before branching to the `different_compression` label.
   1462     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
   1463     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
   1464 
   1465     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
   1466     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
   1467     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
   1468     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
   1469     __ Lsl(temp0, temp0, 1u);
   1470 
   1471     // Adjust temp1 and temp2 from string pointers to data pointers.
   1472     __ Add(temp1, temp1, Operand(value_offset));
   1473     __ Add(temp2, temp2, Operand(value_offset));
   1474 
   1475     // Complete the move of the compression flag.
   1476     __ Sub(temp0, temp0, Operand(temp3));
   1477 
   1478     vixl::aarch64::Label different_compression_loop;
   1479     vixl::aarch64::Label different_compression_diff;
   1480 
   1481     __ Bind(&different_compression_loop);
   1482     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
   1483     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
   1484     __ Subs(temp4, temp4, Operand(temp3));
   1485     __ B(&different_compression_diff, ne);
   1486     __ Subs(temp0, temp0, 2);
   1487     __ B(&different_compression_loop, hi);
   1488     __ B(&end);
   1489 
   1490     // Calculate the difference.
   1491     __ Bind(&different_compression_diff);
   1492     __ Tst(temp0, Operand(1));
   1493     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1494                   "Expecting 0=compressed, 1=uncompressed");
   1495     __ Cneg(out, temp4, ne);
   1496   }
   1497 
   1498   __ Bind(&end);
   1499 
   1500   if (can_slow_path) {
   1501     __ Bind(slow_path->GetExitLabel());
   1502   }
   1503 }
   1504 
   1505 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
   1506 // The normal loop plus the pre-header is 9 instructions without string compression and 12
   1507 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
   1508 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
   1509 // to 10 instructions for the unrolled loop.
   1510 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
   1511 
   1512 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
   1513   if (candidate->IsLoadString()) {
   1514     HLoadString* load_string = candidate->AsLoadString();
   1515     const DexFile& dex_file = load_string->GetDexFile();
   1516     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
   1517   }
   1518   return nullptr;
   1519 }
   1520 
   1521 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
   1522   if (kEmitCompilerReadBarrier &&
   1523       !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
   1524       !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
   1525     // No support for this odd case (String class is moveable, not in the boot image).
   1526     return;
   1527   }
   1528 
   1529   LocationSummary* locations =
   1530       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1531   locations->SetInAt(0, Location::RequiresRegister());
   1532   locations->SetInAt(1, Location::RequiresRegister());
   1533 
   1534   // For the generic implementation and for long const strings we need a temporary.
   1535   // We do not need it for short const strings, up to 8 bytes, see code generation below.
   1536   uint32_t const_string_length = 0u;
   1537   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
   1538   if (const_string == nullptr) {
   1539     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
   1540   }
   1541   bool is_compressed =
   1542       mirror::kUseStringCompression &&
   1543       const_string != nullptr &&
   1544       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
   1545   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
   1546     locations->AddTemp(Location::RequiresRegister());
   1547   }
   1548 
   1549   // TODO: If the String.equals() is used only for an immediately following HIf, we can
   1550   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
   1551   // Then we shall need an extra temporary register instead of the output register.
   1552   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   1553 }
   1554 
   1555 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
   1556   MacroAssembler* masm = GetVIXLAssembler();
   1557   LocationSummary* locations = invoke->GetLocations();
   1558 
   1559   Register str = WRegisterFrom(locations->InAt(0));
   1560   Register arg = WRegisterFrom(locations->InAt(1));
   1561   Register out = XRegisterFrom(locations->Out());
   1562 
   1563   UseScratchRegisterScope scratch_scope(masm);
   1564   Register temp = scratch_scope.AcquireW();
   1565   Register temp1 = scratch_scope.AcquireW();
   1566 
   1567   vixl::aarch64::Label loop;
   1568   vixl::aarch64::Label end;
   1569   vixl::aarch64::Label return_true;
   1570   vixl::aarch64::Label return_false;
   1571 
   1572   // Get offsets of count, value, and class fields within a string object.
   1573   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1574   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1575   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   1576 
   1577   // Note that the null check must have been done earlier.
   1578   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1579 
   1580   StringEqualsOptimizations optimizations(invoke);
   1581   if (!optimizations.GetArgumentNotNull()) {
   1582     // Check if input is null, return false if it is.
   1583     __ Cbz(arg, &return_false);
   1584   }
   1585 
   1586   // Reference equality check, return true if same reference.
   1587   __ Cmp(str, arg);
   1588   __ B(&return_true, eq);
   1589 
   1590   if (!optimizations.GetArgumentIsString()) {
   1591     // Instanceof check for the argument by comparing class fields.
   1592     // All string objects must have the same type since String cannot be subclassed.
   1593     // Receiver must be a string object, so its class field is equal to all strings' class fields.
   1594     // If the argument is a string object, its class field must be equal to receiver's class field.
   1595     __ Ldr(temp, MemOperand(str.X(), class_offset));
   1596     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
   1597     __ Cmp(temp, temp1);
   1598     __ B(&return_false, ne);
   1599   }
   1600 
   1601   // Check if one of the inputs is a const string. Do not special-case both strings
   1602   // being const, such cases should be handled by constant folding if needed.
   1603   uint32_t const_string_length = 0u;
   1604   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
   1605   if (const_string == nullptr) {
   1606     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
   1607     if (const_string != nullptr) {
   1608       std::swap(str, arg);  // Make sure the const string is in `str`.
   1609     }
   1610   }
   1611   bool is_compressed =
   1612       mirror::kUseStringCompression &&
   1613       const_string != nullptr &&
   1614       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
   1615 
   1616   if (const_string != nullptr) {
   1617     // Load `count` field of the argument string and check if it matches the const string.
   1618     // Also compares the compression style, if differs return false.
   1619     __ Ldr(temp, MemOperand(arg.X(), count_offset));
   1620     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
   1621     scratch_scope.Release(temp1);
   1622     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
   1623     temp1 = scratch_scope.AcquireW();
   1624     __ B(&return_false, ne);
   1625   } else {
   1626     // Load `count` fields of this and argument strings.
   1627     __ Ldr(temp, MemOperand(str.X(), count_offset));
   1628     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
   1629     // Check if `count` fields are equal, return false if they're not.
   1630     // Also compares the compression style, if differs return false.
   1631     __ Cmp(temp, temp1);
   1632     __ B(&return_false, ne);
   1633   }
   1634 
   1635   // Assertions that must hold in order to compare strings 8 bytes at a time.
   1636   // Ok to do this because strings are zero-padded to kObjectAlignment.
   1637   DCHECK_ALIGNED(value_offset, 8);
   1638   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
   1639 
   1640   if (const_string != nullptr &&
   1641       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
   1642                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
   1643     // Load and compare the contents. Though we know the contents of the short const string
   1644     // at compile time, materializing constants may be more code than loading from memory.
   1645     int32_t offset = value_offset;
   1646     size_t remaining_bytes =
   1647         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
   1648     temp = temp.X();
   1649     temp1 = temp1.X();
   1650     while (remaining_bytes > sizeof(uint64_t)) {
   1651       Register temp2 = XRegisterFrom(locations->GetTemp(0));
   1652       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
   1653       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
   1654       __ Cmp(temp, temp2);
   1655       __ Ccmp(temp1, out, NoFlag, eq);
   1656       __ B(&return_false, ne);
   1657       offset += 2u * sizeof(uint64_t);
   1658       remaining_bytes -= 2u * sizeof(uint64_t);
   1659     }
   1660     if (remaining_bytes != 0u) {
   1661       __ Ldr(temp, MemOperand(str.X(), offset));
   1662       __ Ldr(temp1, MemOperand(arg.X(), offset));
   1663       __ Cmp(temp, temp1);
   1664       __ B(&return_false, ne);
   1665     }
   1666   } else {
   1667     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
   1668     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1669                   "Expecting 0=compressed, 1=uncompressed");
   1670     __ Cbz(temp, &return_true);
   1671 
   1672     if (mirror::kUseStringCompression) {
   1673       // For string compression, calculate the number of bytes to compare (not chars).
   1674       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
   1675       __ And(temp1, temp, Operand(1));    // Extract compression flag.
   1676       __ Lsr(temp, temp, 1u);             // Extract length.
   1677       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
   1678     }
   1679 
   1680     // Store offset of string value in preparation for comparison loop
   1681     __ Mov(temp1, value_offset);
   1682 
   1683     temp1 = temp1.X();
   1684     Register temp2 = XRegisterFrom(locations->GetTemp(0));
   1685     // Loop to compare strings 8 bytes at a time starting at the front of the string.
   1686     __ Bind(&loop);
   1687     __ Ldr(out, MemOperand(str.X(), temp1));
   1688     __ Ldr(temp2, MemOperand(arg.X(), temp1));
   1689     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
   1690     __ Cmp(out, temp2);
   1691     __ B(&return_false, ne);
   1692     // With string compression, we have compared 8 bytes, otherwise 4 chars.
   1693     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
   1694     __ B(&loop, hi);
   1695   }
   1696 
   1697   // Return true and exit the function.
   1698   // If loop does not result in returning false, we return true.
   1699   __ Bind(&return_true);
   1700   __ Mov(out, 1);
   1701   __ B(&end);
   1702 
   1703   // Return false and exit the function.
   1704   __ Bind(&return_false);
   1705   __ Mov(out, 0);
   1706   __ Bind(&end);
   1707 }
   1708 
   1709 static void GenerateVisitStringIndexOf(HInvoke* invoke,
   1710                                        MacroAssembler* masm,
   1711                                        CodeGeneratorARM64* codegen,
   1712                                        bool start_at_zero) {
   1713   LocationSummary* locations = invoke->GetLocations();
   1714 
   1715   // Note that the null check must have been done earlier.
   1716   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1717 
   1718   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   1719   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   1720   SlowPathCodeARM64* slow_path = nullptr;
   1721   HInstruction* code_point = invoke->InputAt(1);
   1722   if (code_point->IsIntConstant()) {
   1723     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
   1724       // Always needs the slow-path. We could directly dispatch to it, but this case should be
   1725       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
   1726       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   1727       codegen->AddSlowPath(slow_path);
   1728       __ B(slow_path->GetEntryLabel());
   1729       __ Bind(slow_path->GetExitLabel());
   1730       return;
   1731     }
   1732   } else if (code_point->GetType() != DataType::Type::kUint16) {
   1733     Register char_reg = WRegisterFrom(locations->InAt(1));
   1734     __ Tst(char_reg, 0xFFFF0000);
   1735     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   1736     codegen->AddSlowPath(slow_path);
   1737     __ B(ne, slow_path->GetEntryLabel());
   1738   }
   1739 
   1740   if (start_at_zero) {
   1741     // Start-index = 0.
   1742     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
   1743     __ Mov(tmp_reg, 0);
   1744   }
   1745 
   1746   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
   1747   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   1748 
   1749   if (slow_path != nullptr) {
   1750     __ Bind(slow_path->GetExitLabel());
   1751   }
   1752 }
   1753 
   1754 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   1755   LocationSummary* locations = new (allocator_) LocationSummary(
   1756       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1757   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1758   // best to align the inputs accordingly.
   1759   InvokeRuntimeCallingConvention calling_convention;
   1760   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1761   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1762   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
   1763 
   1764   // Need to send start_index=0.
   1765   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
   1766 }
   1767 
   1768 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
   1769   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true);
   1770 }
   1771 
   1772 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   1773   LocationSummary* locations = new (allocator_) LocationSummary(
   1774       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1775   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1776   // best to align the inputs accordingly.
   1777   InvokeRuntimeCallingConvention calling_convention;
   1778   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1779   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1780   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1781   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
   1782 }
   1783 
   1784 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   1785   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false);
   1786 }
   1787 
   1788 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1789   LocationSummary* locations = new (allocator_) LocationSummary(
   1790       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1791   InvokeRuntimeCallingConvention calling_convention;
   1792   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1793   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1794   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1795   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
   1796   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
   1797 }
   1798 
   1799 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1800   MacroAssembler* masm = GetVIXLAssembler();
   1801   LocationSummary* locations = invoke->GetLocations();
   1802 
   1803   Register byte_array = WRegisterFrom(locations->InAt(0));
   1804   __ Cmp(byte_array, 0);
   1805   SlowPathCodeARM64* slow_path =
   1806       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   1807   codegen_->AddSlowPath(slow_path);
   1808   __ B(eq, slow_path->GetEntryLabel());
   1809 
   1810   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
   1811   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   1812   __ Bind(slow_path->GetExitLabel());
   1813 }
   1814 
   1815 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   1816   LocationSummary* locations =
   1817       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
   1818   InvokeRuntimeCallingConvention calling_convention;
   1819   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1820   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1821   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1822   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
   1823 }
   1824 
   1825 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   1826   // No need to emit code checking whether `locations->InAt(2)` is a null
   1827   // pointer, as callers of the native method
   1828   //
   1829   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   1830   //
   1831   // all include a null check on `data` before calling that method.
   1832   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
   1833   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   1834 }
   1835 
   1836 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   1837   LocationSummary* locations = new (allocator_) LocationSummary(
   1838       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1839   InvokeRuntimeCallingConvention calling_convention;
   1840   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1841   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
   1842 }
   1843 
   1844 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   1845   MacroAssembler* masm = GetVIXLAssembler();
   1846   LocationSummary* locations = invoke->GetLocations();
   1847 
   1848   Register string_to_copy = WRegisterFrom(locations->InAt(0));
   1849   __ Cmp(string_to_copy, 0);
   1850   SlowPathCodeARM64* slow_path =
   1851       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   1852   codegen_->AddSlowPath(slow_path);
   1853   __ B(eq, slow_path->GetEntryLabel());
   1854 
   1855   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
   1856   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   1857   __ Bind(slow_path->GetExitLabel());
   1858 }
   1859 
   1860 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   1861   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
   1862   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
   1863   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
   1864 
   1865   LocationSummary* const locations =
   1866       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
   1867   InvokeRuntimeCallingConvention calling_convention;
   1868 
   1869   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   1870   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
   1871 }
   1872 
   1873 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   1874   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
   1875   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
   1876   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
   1877   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
   1878 
   1879   LocationSummary* const locations =
   1880       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
   1881   InvokeRuntimeCallingConvention calling_convention;
   1882 
   1883   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   1884   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
   1885   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
   1886 }
   1887 
   1888 static void GenFPToFPCall(HInvoke* invoke,
   1889                           CodeGeneratorARM64* codegen,
   1890                           QuickEntrypointEnum entry) {
   1891   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
   1892 }
   1893 
   1894 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
   1895   CreateFPToFPCallLocations(allocator_, invoke);
   1896 }
   1897 
   1898 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
   1899   GenFPToFPCall(invoke, codegen_, kQuickCos);
   1900 }
   1901 
   1902 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
   1903   CreateFPToFPCallLocations(allocator_, invoke);
   1904 }
   1905 
   1906 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
   1907   GenFPToFPCall(invoke, codegen_, kQuickSin);
   1908 }
   1909 
   1910 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
   1911   CreateFPToFPCallLocations(allocator_, invoke);
   1912 }
   1913 
   1914 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
   1915   GenFPToFPCall(invoke, codegen_, kQuickAcos);
   1916 }
   1917 
   1918 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
   1919   CreateFPToFPCallLocations(allocator_, invoke);
   1920 }
   1921 
   1922 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
   1923   GenFPToFPCall(invoke, codegen_, kQuickAsin);
   1924 }
   1925 
   1926 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
   1927   CreateFPToFPCallLocations(allocator_, invoke);
   1928 }
   1929 
   1930 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
   1931   GenFPToFPCall(invoke, codegen_, kQuickAtan);
   1932 }
   1933 
   1934 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
   1935   CreateFPToFPCallLocations(allocator_, invoke);
   1936 }
   1937 
   1938 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
   1939   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
   1940 }
   1941 
   1942 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
   1943   CreateFPToFPCallLocations(allocator_, invoke);
   1944 }
   1945 
   1946 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
   1947   GenFPToFPCall(invoke, codegen_, kQuickCosh);
   1948 }
   1949 
   1950 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
   1951   CreateFPToFPCallLocations(allocator_, invoke);
   1952 }
   1953 
   1954 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
   1955   GenFPToFPCall(invoke, codegen_, kQuickExp);
   1956 }
   1957 
   1958 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
   1959   CreateFPToFPCallLocations(allocator_, invoke);
   1960 }
   1961 
   1962 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
   1963   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
   1964 }
   1965 
   1966 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
   1967   CreateFPToFPCallLocations(allocator_, invoke);
   1968 }
   1969 
   1970 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
   1971   GenFPToFPCall(invoke, codegen_, kQuickLog);
   1972 }
   1973 
   1974 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
   1975   CreateFPToFPCallLocations(allocator_, invoke);
   1976 }
   1977 
   1978 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
   1979   GenFPToFPCall(invoke, codegen_, kQuickLog10);
   1980 }
   1981 
   1982 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
   1983   CreateFPToFPCallLocations(allocator_, invoke);
   1984 }
   1985 
   1986 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
   1987   GenFPToFPCall(invoke, codegen_, kQuickSinh);
   1988 }
   1989 
   1990 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
   1991   CreateFPToFPCallLocations(allocator_, invoke);
   1992 }
   1993 
   1994 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
   1995   GenFPToFPCall(invoke, codegen_, kQuickTan);
   1996 }
   1997 
   1998 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
   1999   CreateFPToFPCallLocations(allocator_, invoke);
   2000 }
   2001 
   2002 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
   2003   GenFPToFPCall(invoke, codegen_, kQuickTanh);
   2004 }
   2005 
   2006 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
   2007   CreateFPFPToFPCallLocations(allocator_, invoke);
   2008 }
   2009 
   2010 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
   2011   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
   2012 }
   2013 
   2014 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
   2015   CreateFPFPToFPCallLocations(allocator_, invoke);
   2016 }
   2017 
   2018 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
   2019   GenFPToFPCall(invoke, codegen_, kQuickPow);
   2020 }
   2021 
   2022 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
   2023   CreateFPFPToFPCallLocations(allocator_, invoke);
   2024 }
   2025 
   2026 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
   2027   GenFPToFPCall(invoke, codegen_, kQuickHypot);
   2028 }
   2029 
   2030 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
   2031   CreateFPFPToFPCallLocations(allocator_, invoke);
   2032 }
   2033 
   2034 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
   2035   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
   2036 }
   2037 
   2038 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   2039   LocationSummary* locations =
   2040       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2041   locations->SetInAt(0, Location::RequiresRegister());
   2042   locations->SetInAt(1, Location::RequiresRegister());
   2043   locations->SetInAt(2, Location::RequiresRegister());
   2044   locations->SetInAt(3, Location::RequiresRegister());
   2045   locations->SetInAt(4, Location::RequiresRegister());
   2046 
   2047   locations->AddTemp(Location::RequiresRegister());
   2048   locations->AddTemp(Location::RequiresRegister());
   2049   locations->AddTemp(Location::RequiresRegister());
   2050 }
   2051 
   2052 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   2053   MacroAssembler* masm = GetVIXLAssembler();
   2054   LocationSummary* locations = invoke->GetLocations();
   2055 
   2056   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   2057   const size_t char_size = DataType::Size(DataType::Type::kUint16);
   2058   DCHECK_EQ(char_size, 2u);
   2059 
   2060   // Location of data in char array buffer.
   2061   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
   2062 
   2063   // Location of char array data in string.
   2064   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   2065 
   2066   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   2067   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
   2068   Register srcObj = XRegisterFrom(locations->InAt(0));
   2069   Register srcBegin = XRegisterFrom(locations->InAt(1));
   2070   Register srcEnd = XRegisterFrom(locations->InAt(2));
   2071   Register dstObj = XRegisterFrom(locations->InAt(3));
   2072   Register dstBegin = XRegisterFrom(locations->InAt(4));
   2073 
   2074   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
   2075   Register num_chr = XRegisterFrom(locations->GetTemp(1));
   2076   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
   2077 
   2078   UseScratchRegisterScope temps(masm);
   2079   Register dst_ptr = temps.AcquireX();
   2080   Register tmp2 = temps.AcquireX();
   2081 
   2082   vixl::aarch64::Label done;
   2083   vixl::aarch64::Label compressed_string_loop;
   2084   __ Sub(num_chr, srcEnd, srcBegin);
   2085   // Early out for valid zero-length retrievals.
   2086   __ Cbz(num_chr, &done);
   2087 
   2088   // dst address start to copy to.
   2089   __ Add(dst_ptr, dstObj, Operand(data_offset));
   2090   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
   2091 
   2092   // src address to copy from.
   2093   __ Add(src_ptr, srcObj, Operand(value_offset));
   2094   vixl::aarch64::Label compressed_string_preloop;
   2095   if (mirror::kUseStringCompression) {
   2096     // Location of count in string.
   2097     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   2098     // String's length.
   2099     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
   2100     __ Tbz(tmp2, 0, &compressed_string_preloop);
   2101   }
   2102   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
   2103 
   2104   // Do the copy.
   2105   vixl::aarch64::Label loop;
   2106   vixl::aarch64::Label remainder;
   2107 
   2108   // Save repairing the value of num_chr on the < 8 character path.
   2109   __ Subs(tmp1, num_chr, 8);
   2110   __ B(lt, &remainder);
   2111 
   2112   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
   2113   __ Mov(num_chr, tmp1);
   2114 
   2115   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
   2116   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   2117   __ Bind(&loop);
   2118   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
   2119   __ Subs(num_chr, num_chr, 8);
   2120   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
   2121   __ B(ge, &loop);
   2122 
   2123   __ Adds(num_chr, num_chr, 8);
   2124   __ B(eq, &done);
   2125 
   2126   // Main loop for < 8 character case and remainder handling. Loads and stores one
   2127   // 16-bit Java character at a time.
   2128   __ Bind(&remainder);
   2129   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
   2130   __ Subs(num_chr, num_chr, 1);
   2131   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   2132   __ B(gt, &remainder);
   2133   __ B(&done);
   2134 
   2135   if (mirror::kUseStringCompression) {
   2136     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
   2137     DCHECK_EQ(c_char_size, 1u);
   2138     __ Bind(&compressed_string_preloop);
   2139     __ Add(src_ptr, src_ptr, Operand(srcBegin));
   2140     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
   2141     __ Bind(&compressed_string_loop);
   2142     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
   2143     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   2144     __ Subs(num_chr, num_chr, Operand(1));
   2145     __ B(gt, &compressed_string_loop);
   2146   }
   2147 
   2148   __ Bind(&done);
   2149 }
   2150 
   2151 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
   2152 // implementation there for longer copy lengths.
   2153 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
   2154 
   2155 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
   2156                                                uint32_t at,
   2157                                                HInstruction* input) {
   2158   HIntConstant* const_input = input->AsIntConstant();
   2159   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
   2160     locations->SetInAt(at, Location::RequiresRegister());
   2161   } else {
   2162     locations->SetInAt(at, Location::RegisterOrConstant(input));
   2163   }
   2164 }
   2165 
   2166 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   2167   // Check to see if we have known failures that will cause us to have to bail out
   2168   // to the runtime, and just generate the runtime call directly.
   2169   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   2170   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
   2171 
   2172   // The positions must be non-negative.
   2173   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   2174       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
   2175     // We will have to fail anyways.
   2176     return;
   2177   }
   2178 
   2179   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
   2180   // native implementation.
   2181   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   2182   if (length != nullptr) {
   2183     int32_t len = length->GetValue();
   2184     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
   2185       // Just call as normal.
   2186       return;
   2187     }
   2188   }
   2189 
   2190   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
   2191   LocationSummary* locations =
   2192       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
   2193   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
   2194   locations->SetInAt(0, Location::RequiresRegister());
   2195   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
   2196   locations->SetInAt(2, Location::RequiresRegister());
   2197   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
   2198   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
   2199 
   2200   locations->AddTemp(Location::RequiresRegister());
   2201   locations->AddTemp(Location::RequiresRegister());
   2202   locations->AddTemp(Location::RequiresRegister());
   2203 }
   2204 
   2205 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
   2206                                          const Location& pos,
   2207                                          const Register& input,
   2208                                          const Location& length,
   2209                                          SlowPathCodeARM64* slow_path,
   2210                                          const Register& temp,
   2211                                          bool length_is_input_length = false) {
   2212   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
   2213   if (pos.IsConstant()) {
   2214     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
   2215     if (pos_const == 0) {
   2216       if (!length_is_input_length) {
   2217         // Check that length(input) >= length.
   2218         __ Ldr(temp, MemOperand(input, length_offset));
   2219         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
   2220         __ B(slow_path->GetEntryLabel(), lt);
   2221       }
   2222     } else {
   2223       // Check that length(input) >= pos.
   2224       __ Ldr(temp, MemOperand(input, length_offset));
   2225       __ Subs(temp, temp, pos_const);
   2226       __ B(slow_path->GetEntryLabel(), lt);
   2227 
   2228       // Check that (length(input) - pos) >= length.
   2229       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
   2230       __ B(slow_path->GetEntryLabel(), lt);
   2231     }
   2232   } else if (length_is_input_length) {
   2233     // The only way the copy can succeed is if pos is zero.
   2234     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
   2235   } else {
   2236     // Check that pos >= 0.
   2237     Register pos_reg = WRegisterFrom(pos);
   2238     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
   2239 
   2240     // Check that pos <= length(input) && (length(input) - pos) >= length.
   2241     __ Ldr(temp, MemOperand(input, length_offset));
   2242     __ Subs(temp, temp, pos_reg);
   2243     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
   2244     __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
   2245     __ B(slow_path->GetEntryLabel(), lt);
   2246   }
   2247 }
   2248 
   2249 // Compute base source address, base destination address, and end
   2250 // source address for System.arraycopy* intrinsics in `src_base`,
   2251 // `dst_base` and `src_end` respectively.
   2252 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
   2253                                         DataType::Type type,
   2254                                         const Register& src,
   2255                                         const Location& src_pos,
   2256                                         const Register& dst,
   2257                                         const Location& dst_pos,
   2258                                         const Location& copy_length,
   2259                                         const Register& src_base,
   2260                                         const Register& dst_base,
   2261                                         const Register& src_end) {
   2262   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
   2263   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
   2264       << "Unexpected element type: " << type;
   2265   const int32_t element_size = DataType::Size(type);
   2266   const int32_t element_size_shift = DataType::SizeShift(type);
   2267   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   2268 
   2269   if (src_pos.IsConstant()) {
   2270     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2271     __ Add(src_base, src, element_size * constant + data_offset);
   2272   } else {
   2273     __ Add(src_base, src, data_offset);
   2274     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
   2275   }
   2276 
   2277   if (dst_pos.IsConstant()) {
   2278     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
   2279     __ Add(dst_base, dst, element_size * constant + data_offset);
   2280   } else {
   2281     __ Add(dst_base, dst, data_offset);
   2282     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
   2283   }
   2284 
   2285   if (copy_length.IsConstant()) {
   2286     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
   2287     __ Add(src_end, src_base, element_size * constant);
   2288   } else {
   2289     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
   2290   }
   2291 }
   2292 
   2293 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   2294   MacroAssembler* masm = GetVIXLAssembler();
   2295   LocationSummary* locations = invoke->GetLocations();
   2296   Register src = XRegisterFrom(locations->InAt(0));
   2297   Location src_pos = locations->InAt(1);
   2298   Register dst = XRegisterFrom(locations->InAt(2));
   2299   Location dst_pos = locations->InAt(3);
   2300   Location length = locations->InAt(4);
   2301 
   2302   SlowPathCodeARM64* slow_path =
   2303       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   2304   codegen_->AddSlowPath(slow_path);
   2305 
   2306   // If source and destination are the same, take the slow path. Overlapping copy regions must be
   2307   // copied in reverse and we can't know in all cases if it's needed.
   2308   __ Cmp(src, dst);
   2309   __ B(slow_path->GetEntryLabel(), eq);
   2310 
   2311   // Bail out if the source is null.
   2312   __ Cbz(src, slow_path->GetEntryLabel());
   2313 
   2314   // Bail out if the destination is null.
   2315   __ Cbz(dst, slow_path->GetEntryLabel());
   2316 
   2317   if (!length.IsConstant()) {
   2318     // Merge the following two comparisons into one:
   2319     //   If the length is negative, bail out (delegate to libcore's native implementation).
   2320     //   If the length > 32 then (currently) prefer libcore's native implementation.
   2321     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
   2322     __ B(slow_path->GetEntryLabel(), hi);
   2323   } else {
   2324     // We have already checked in the LocationsBuilder for the constant case.
   2325     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
   2326     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
   2327   }
   2328 
   2329   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
   2330   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
   2331   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
   2332 
   2333   CheckSystemArrayCopyPosition(masm,
   2334                                src_pos,
   2335                                src,
   2336                                length,
   2337                                slow_path,
   2338                                src_curr_addr,
   2339                                false);
   2340 
   2341   CheckSystemArrayCopyPosition(masm,
   2342                                dst_pos,
   2343                                dst,
   2344                                length,
   2345                                slow_path,
   2346                                src_curr_addr,
   2347                                false);
   2348 
   2349   src_curr_addr = src_curr_addr.X();
   2350   dst_curr_addr = dst_curr_addr.X();
   2351   src_stop_addr = src_stop_addr.X();
   2352 
   2353   GenSystemArrayCopyAddresses(masm,
   2354                               DataType::Type::kUint16,
   2355                               src,
   2356                               src_pos,
   2357                               dst,
   2358                               dst_pos,
   2359                               length,
   2360                               src_curr_addr,
   2361                               dst_curr_addr,
   2362                               src_stop_addr);
   2363 
   2364   // Iterate over the arrays and do a raw copy of the chars.
   2365   const int32_t char_size = DataType::Size(DataType::Type::kUint16);
   2366   UseScratchRegisterScope temps(masm);
   2367   Register tmp = temps.AcquireW();
   2368   vixl::aarch64::Label loop, done;
   2369   __ Bind(&loop);
   2370   __ Cmp(src_curr_addr, src_stop_addr);
   2371   __ B(&done, eq);
   2372   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
   2373   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
   2374   __ B(&loop);
   2375   __ Bind(&done);
   2376 
   2377   __ Bind(slow_path->GetExitLabel());
   2378 }
   2379 
   2380 // We can choose to use the native implementation there for longer copy lengths.
   2381 static constexpr int32_t kSystemArrayCopyThreshold = 128;
   2382 
   2383 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
   2384 // We want to use two temporary registers in order to reduce the register pressure in arm64.
   2385 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
   2386 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
   2387   // The only read barrier implementation supporting the
   2388   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2389   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2390     return;
   2391   }
   2392 
   2393   // Check to see if we have known failures that will cause us to have to bail out
   2394   // to the runtime, and just generate the runtime call directly.
   2395   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   2396   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
   2397 
   2398   // The positions must be non-negative.
   2399   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   2400       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
   2401     // We will have to fail anyways.
   2402     return;
   2403   }
   2404 
   2405   // The length must be >= 0.
   2406   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   2407   if (length != nullptr) {
   2408     int32_t len = length->GetValue();
   2409     if (len < 0 || len >= kSystemArrayCopyThreshold) {
   2410       // Just call as normal.
   2411       return;
   2412     }
   2413   }
   2414 
   2415   SystemArrayCopyOptimizations optimizations(invoke);
   2416 
   2417   if (optimizations.GetDestinationIsSource()) {
   2418     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
   2419       // We only support backward copying if source and destination are the same.
   2420       return;
   2421     }
   2422   }
   2423 
   2424   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
   2425     // We currently don't intrinsify primitive copying.
   2426     return;
   2427   }
   2428 
   2429   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
   2430   LocationSummary* locations =
   2431       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
   2432   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
   2433   locations->SetInAt(0, Location::RequiresRegister());
   2434   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
   2435   locations->SetInAt(2, Location::RequiresRegister());
   2436   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
   2437   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
   2438 
   2439   locations->AddTemp(Location::RequiresRegister());
   2440   locations->AddTemp(Location::RequiresRegister());
   2441   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2442     // Temporary register IP0, obtained from the VIXL scratch register
   2443     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
   2444     // (because that register is clobbered by ReadBarrierMarkRegX
   2445     // entry points). It cannot be used in calls to
   2446     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
   2447     // either. For these reasons, get a third extra temporary register
   2448     // from the register allocator.
   2449     locations->AddTemp(Location::RequiresRegister());
   2450   } else {
   2451     // Cases other than Baker read barriers: the third temporary will
   2452     // be acquired from the VIXL scratch register pool.
   2453   }
   2454 }
   2455 
   2456 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
   2457   // The only read barrier implementation supporting the
   2458   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2459   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2460 
   2461   MacroAssembler* masm = GetVIXLAssembler();
   2462   LocationSummary* locations = invoke->GetLocations();
   2463 
   2464   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2465   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2466   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2467   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   2468   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   2469 
   2470   Register src = XRegisterFrom(locations->InAt(0));
   2471   Location src_pos = locations->InAt(1);
   2472   Register dest = XRegisterFrom(locations->InAt(2));
   2473   Location dest_pos = locations->InAt(3);
   2474   Location length = locations->InAt(4);
   2475   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   2476   Location temp1_loc = LocationFrom(temp1);
   2477   Register temp2 = WRegisterFrom(locations->GetTemp(1));
   2478   Location temp2_loc = LocationFrom(temp2);
   2479 
   2480   SlowPathCodeARM64* intrinsic_slow_path =
   2481       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   2482   codegen_->AddSlowPath(intrinsic_slow_path);
   2483 
   2484   vixl::aarch64::Label conditions_on_positions_validated;
   2485   SystemArrayCopyOptimizations optimizations(invoke);
   2486 
   2487   // If source and destination are the same, we go to slow path if we need to do
   2488   // forward copying.
   2489   if (src_pos.IsConstant()) {
   2490     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2491     if (dest_pos.IsConstant()) {
   2492       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2493       if (optimizations.GetDestinationIsSource()) {
   2494         // Checked when building locations.
   2495         DCHECK_GE(src_pos_constant, dest_pos_constant);
   2496       } else if (src_pos_constant < dest_pos_constant) {
   2497         __ Cmp(src, dest);
   2498         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
   2499       }
   2500       // Checked when building locations.
   2501       DCHECK(!optimizations.GetDestinationIsSource()
   2502              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
   2503     } else {
   2504       if (!optimizations.GetDestinationIsSource()) {
   2505         __ Cmp(src, dest);
   2506         __ B(&conditions_on_positions_validated, ne);
   2507       }
   2508       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
   2509       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
   2510     }
   2511   } else {
   2512     if (!optimizations.GetDestinationIsSource()) {
   2513       __ Cmp(src, dest);
   2514       __ B(&conditions_on_positions_validated, ne);
   2515     }
   2516     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
   2517            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
   2518     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
   2519   }
   2520 
   2521   __ Bind(&conditions_on_positions_validated);
   2522 
   2523   if (!optimizations.GetSourceIsNotNull()) {
   2524     // Bail out if the source is null.
   2525     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
   2526   }
   2527 
   2528   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
   2529     // Bail out if the destination is null.
   2530     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
   2531   }
   2532 
   2533   // We have already checked in the LocationsBuilder for the constant case.
   2534   if (!length.IsConstant() &&
   2535       !optimizations.GetCountIsSourceLength() &&
   2536       !optimizations.GetCountIsDestinationLength()) {
   2537     // Merge the following two comparisons into one:
   2538     //   If the length is negative, bail out (delegate to libcore's native implementation).
   2539     //   If the length >= 128 then (currently) prefer native implementation.
   2540     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
   2541     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
   2542   }
   2543   // Validity checks: source.
   2544   CheckSystemArrayCopyPosition(masm,
   2545                                src_pos,
   2546                                src,
   2547                                length,
   2548                                intrinsic_slow_path,
   2549                                temp1,
   2550                                optimizations.GetCountIsSourceLength());
   2551 
   2552   // Validity checks: dest.
   2553   CheckSystemArrayCopyPosition(masm,
   2554                                dest_pos,
   2555                                dest,
   2556                                length,
   2557                                intrinsic_slow_path,
   2558                                temp1,
   2559                                optimizations.GetCountIsDestinationLength());
   2560   {
   2561     // We use a block to end the scratch scope before the write barrier, thus
   2562     // freeing the temporary registers so they can be used in `MarkGCCard`.
   2563     UseScratchRegisterScope temps(masm);
   2564     Location temp3_loc;  // Used only for Baker read barrier.
   2565     Register temp3;
   2566     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2567       temp3_loc = locations->GetTemp(2);
   2568       temp3 = WRegisterFrom(temp3_loc);
   2569     } else {
   2570       temp3 = temps.AcquireW();
   2571     }
   2572 
   2573     if (!optimizations.GetDoesNotNeedTypeCheck()) {
   2574       // Check whether all elements of the source array are assignable to the component
   2575       // type of the destination array. We do two checks: the classes are the same,
   2576       // or the destination is Object[]. If none of these checks succeed, we go to the
   2577       // slow path.
   2578 
   2579       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2580         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2581           // /* HeapReference<Class> */ temp1 = src->klass_
   2582           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2583                                                           temp1_loc,
   2584                                                           src.W(),
   2585                                                           class_offset,
   2586                                                           temp3_loc,
   2587                                                           /* needs_null_check */ false,
   2588                                                           /* use_load_acquire */ false);
   2589           // Bail out if the source is not a non primitive array.
   2590           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2591           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2592                                                           temp1_loc,
   2593                                                           temp1,
   2594                                                           component_offset,
   2595                                                           temp3_loc,
   2596                                                           /* needs_null_check */ false,
   2597                                                           /* use_load_acquire */ false);
   2598           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
   2599           // If heap poisoning is enabled, `temp1` has been unpoisoned
   2600           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2601           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
   2602           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
   2603           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2604           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2605         }
   2606 
   2607         // /* HeapReference<Class> */ temp1 = dest->klass_
   2608         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2609                                                         temp1_loc,
   2610                                                         dest.W(),
   2611                                                         class_offset,
   2612                                                         temp3_loc,
   2613                                                         /* needs_null_check */ false,
   2614                                                         /* use_load_acquire */ false);
   2615 
   2616         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2617           // Bail out if the destination is not a non primitive array.
   2618           //
   2619           // Register `temp1` is not trashed by the read barrier emitted
   2620           // by GenerateFieldLoadWithBakerReadBarrier below, as that
   2621           // method produces a call to a ReadBarrierMarkRegX entry point,
   2622           // which saves all potentially live registers, including
   2623           // temporaries such a `temp1`.
   2624           // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2625           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2626                                                           temp2_loc,
   2627                                                           temp1,
   2628                                                           component_offset,
   2629                                                           temp3_loc,
   2630                                                           /* needs_null_check */ false,
   2631                                                           /* use_load_acquire */ false);
   2632           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2633           // If heap poisoning is enabled, `temp2` has been unpoisoned
   2634           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2635           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
   2636           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
   2637           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2638           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
   2639         }
   2640 
   2641         // For the same reason given earlier, `temp1` is not trashed by the
   2642         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
   2643         // /* HeapReference<Class> */ temp2 = src->klass_
   2644         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2645                                                         temp2_loc,
   2646                                                         src.W(),
   2647                                                         class_offset,
   2648                                                         temp3_loc,
   2649                                                         /* needs_null_check */ false,
   2650                                                         /* use_load_acquire */ false);
   2651         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
   2652         __ Cmp(temp1, temp2);
   2653 
   2654         if (optimizations.GetDestinationIsTypedObjectArray()) {
   2655           vixl::aarch64::Label do_copy;
   2656           __ B(&do_copy, eq);
   2657           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2658           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2659                                                           temp1_loc,
   2660                                                           temp1,
   2661                                                           component_offset,
   2662                                                           temp3_loc,
   2663                                                           /* needs_null_check */ false,
   2664                                                           /* use_load_acquire */ false);
   2665           // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2666           // We do not need to emit a read barrier for the following
   2667           // heap reference load, as `temp1` is only used in a
   2668           // comparison with null below, and this reference is not
   2669           // kept afterwards.
   2670           __ Ldr(temp1, HeapOperand(temp1, super_offset));
   2671           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2672           __ Bind(&do_copy);
   2673         } else {
   2674           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
   2675         }
   2676       } else {
   2677         // Non read barrier code.
   2678 
   2679         // /* HeapReference<Class> */ temp1 = dest->klass_
   2680         __ Ldr(temp1, MemOperand(dest, class_offset));
   2681         // /* HeapReference<Class> */ temp2 = src->klass_
   2682         __ Ldr(temp2, MemOperand(src, class_offset));
   2683         bool did_unpoison = false;
   2684         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
   2685             !optimizations.GetSourceIsNonPrimitiveArray()) {
   2686           // One or two of the references need to be unpoisoned. Unpoison them
   2687           // both to make the identity check valid.
   2688           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2689           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
   2690           did_unpoison = true;
   2691         }
   2692 
   2693         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2694           // Bail out if the destination is not a non primitive array.
   2695           // /* HeapReference<Class> */ temp3 = temp1->component_type_
   2696           __ Ldr(temp3, HeapOperand(temp1, component_offset));
   2697           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
   2698           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
   2699           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2700           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
   2701           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2702           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
   2703         }
   2704 
   2705         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2706           // Bail out if the source is not a non primitive array.
   2707           // /* HeapReference<Class> */ temp3 = temp2->component_type_
   2708           __ Ldr(temp3, HeapOperand(temp2, component_offset));
   2709           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
   2710           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
   2711           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2712           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
   2713           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2714           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
   2715         }
   2716 
   2717         __ Cmp(temp1, temp2);
   2718 
   2719         if (optimizations.GetDestinationIsTypedObjectArray()) {
   2720           vixl::aarch64::Label do_copy;
   2721           __ B(&do_copy, eq);
   2722           if (!did_unpoison) {
   2723             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2724           }
   2725           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2726           __ Ldr(temp1, HeapOperand(temp1, component_offset));
   2727           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2728           // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2729           __ Ldr(temp1, HeapOperand(temp1, super_offset));
   2730           // No need to unpoison the result, we're comparing against null.
   2731           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2732           __ Bind(&do_copy);
   2733         } else {
   2734           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
   2735         }
   2736       }
   2737     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2738       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
   2739       // Bail out if the source is not a non primitive array.
   2740       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2741         // /* HeapReference<Class> */ temp1 = src->klass_
   2742         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2743                                                         temp1_loc,
   2744                                                         src.W(),
   2745                                                         class_offset,
   2746                                                         temp3_loc,
   2747                                                         /* needs_null_check */ false,
   2748                                                         /* use_load_acquire */ false);
   2749         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2750         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2751                                                         temp2_loc,
   2752                                                         temp1,
   2753                                                         component_offset,
   2754                                                         temp3_loc,
   2755                                                         /* needs_null_check */ false,
   2756                                                         /* use_load_acquire */ false);
   2757         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2758         // If heap poisoning is enabled, `temp2` has been unpoisoned
   2759         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2760       } else {
   2761         // /* HeapReference<Class> */ temp1 = src->klass_
   2762         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
   2763         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2764         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2765         __ Ldr(temp2, HeapOperand(temp1, component_offset));
   2766         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2767         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
   2768       }
   2769       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
   2770       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
   2771       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2772       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
   2773     }
   2774 
   2775     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
   2776       // Null constant length: not need to emit the loop code at all.
   2777     } else {
   2778       Register src_curr_addr = temp1.X();
   2779       Register dst_curr_addr = temp2.X();
   2780       Register src_stop_addr = temp3.X();
   2781       vixl::aarch64::Label done;
   2782       const DataType::Type type = DataType::Type::kReference;
   2783       const int32_t element_size = DataType::Size(type);
   2784 
   2785       if (length.IsRegister()) {
   2786         // Don't enter the copy loop if the length is null.
   2787         __ Cbz(WRegisterFrom(length), &done);
   2788       }
   2789 
   2790       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2791         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
   2792 
   2793         // SystemArrayCopy implementation for Baker read barriers (see
   2794         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
   2795         //
   2796         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
   2797         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   2798         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   2799         //   if (is_gray) {
   2800         //     // Slow-path copy.
   2801         //     do {
   2802         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
   2803         //     } while (src_ptr != end_ptr)
   2804         //   } else {
   2805         //     // Fast-path copy.
   2806         //     do {
   2807         //       *dest_ptr++ = *src_ptr++;
   2808         //     } while (src_ptr != end_ptr)
   2809         //   }
   2810 
   2811         // Make sure `tmp` is not IP0, as it is clobbered by
   2812         // ReadBarrierMarkRegX entry points in
   2813         // ReadBarrierSystemArrayCopySlowPathARM64.
   2814         DCHECK(temps.IsAvailable(ip0));
   2815         temps.Exclude(ip0);
   2816         Register tmp = temps.AcquireW();
   2817         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
   2818         // Put IP0 back in the pool so that VIXL has at least one
   2819         // scratch register available to emit macro-instructions (note
   2820         // that IP1 is already used for `tmp`). Indeed some
   2821         // macro-instructions used in GenSystemArrayCopyAddresses
   2822         // (invoked hereunder) may require a scratch register (for
   2823         // instance to emit a load with a large constant offset).
   2824         temps.Include(ip0);
   2825 
   2826         // /* int32_t */ monitor = src->monitor_
   2827         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
   2828         // /* LockWord */ lock_word = LockWord(monitor)
   2829         static_assert(sizeof(LockWord) == sizeof(int32_t),
   2830                       "art::LockWord and int32_t have different sizes.");
   2831 
   2832         // Introduce a dependency on the lock_word including rb_state,
   2833         // to prevent load-load reordering, and without using
   2834         // a memory barrier (which would be more expensive).
   2835         // `src` is unchanged by this operation, but its value now depends
   2836         // on `tmp`.
   2837         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
   2838 
   2839         // Compute base source address, base destination address, and end
   2840         // source address for System.arraycopy* intrinsics in `src_base`,
   2841         // `dst_base` and `src_end` respectively.
   2842         // Note that `src_curr_addr` is computed from from `src` (and
   2843         // `src_pos`) here, and thus honors the artificial dependency
   2844         // of `src` on `tmp`.
   2845         GenSystemArrayCopyAddresses(masm,
   2846                                     type,
   2847                                     src,
   2848                                     src_pos,
   2849                                     dest,
   2850                                     dest_pos,
   2851                                     length,
   2852                                     src_curr_addr,
   2853                                     dst_curr_addr,
   2854                                     src_stop_addr);
   2855 
   2856         // Slow path used to copy array when `src` is gray.
   2857         SlowPathCodeARM64* read_barrier_slow_path =
   2858             new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
   2859                 invoke, LocationFrom(tmp));
   2860         codegen_->AddSlowPath(read_barrier_slow_path);
   2861 
   2862         // Given the numeric representation, it's enough to check the low bit of the rb_state.
   2863         static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   2864         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   2865         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
   2866 
   2867         // Fast-path copy.
   2868         // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2869         // poison/unpoison.
   2870         vixl::aarch64::Label loop;
   2871         __ Bind(&loop);
   2872         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
   2873         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
   2874         __ Cmp(src_curr_addr, src_stop_addr);
   2875         __ B(&loop, ne);
   2876 
   2877         __ Bind(read_barrier_slow_path->GetExitLabel());
   2878       } else {
   2879         // Non read barrier code.
   2880         // Compute base source address, base destination address, and end
   2881         // source address for System.arraycopy* intrinsics in `src_base`,
   2882         // `dst_base` and `src_end` respectively.
   2883         GenSystemArrayCopyAddresses(masm,
   2884                                     type,
   2885                                     src,
   2886                                     src_pos,
   2887                                     dest,
   2888                                     dest_pos,
   2889                                     length,
   2890                                     src_curr_addr,
   2891                                     dst_curr_addr,
   2892                                     src_stop_addr);
   2893         // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2894         // poison/unpoison.
   2895         vixl::aarch64::Label loop;
   2896         __ Bind(&loop);
   2897         {
   2898           Register tmp = temps.AcquireW();
   2899           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
   2900           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
   2901         }
   2902         __ Cmp(src_curr_addr, src_stop_addr);
   2903         __ B(&loop, ne);
   2904       }
   2905       __ Bind(&done);
   2906     }
   2907   }
   2908 
   2909   // We only need one card marking on the destination array.
   2910   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
   2911 
   2912   __ Bind(intrinsic_slow_path->GetExitLabel());
   2913 }
   2914 
   2915 static void GenIsInfinite(LocationSummary* locations,
   2916                           bool is64bit,
   2917                           MacroAssembler* masm) {
   2918   Operand infinity;
   2919   Register out;
   2920 
   2921   if (is64bit) {
   2922     infinity = kPositiveInfinityDouble;
   2923     out = XRegisterFrom(locations->Out());
   2924   } else {
   2925     infinity = kPositiveInfinityFloat;
   2926     out = WRegisterFrom(locations->Out());
   2927   }
   2928 
   2929   const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
   2930 
   2931   MoveFPToInt(locations, is64bit, masm);
   2932   __ Eor(out, out, infinity);
   2933   // We don't care about the sign bit, so shift left.
   2934   __ Cmp(zero, Operand(out, LSL, 1));
   2935   __ Cset(out, eq);
   2936 }
   2937 
   2938 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
   2939   CreateFPToIntLocations(allocator_, invoke);
   2940 }
   2941 
   2942 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
   2943   GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
   2944 }
   2945 
   2946 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
   2947   CreateFPToIntLocations(allocator_, invoke);
   2948 }
   2949 
   2950 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
   2951   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
   2952 }
   2953 
   2954 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
   2955   InvokeRuntimeCallingConvention calling_convention;
   2956   IntrinsicVisitor::ComputeIntegerValueOfLocations(
   2957       invoke,
   2958       codegen_,
   2959       calling_convention.GetReturnLocation(DataType::Type::kReference),
   2960       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   2961 }
   2962 
   2963 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
   2964   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
   2965   LocationSummary* locations = invoke->GetLocations();
   2966   MacroAssembler* masm = GetVIXLAssembler();
   2967 
   2968   Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
   2969   UseScratchRegisterScope temps(masm);
   2970   Register temp = temps.AcquireW();
   2971   InvokeRuntimeCallingConvention calling_convention;
   2972   Register argument = calling_convention.GetRegisterAt(0);
   2973   if (invoke->InputAt(0)->IsConstant()) {
   2974     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
   2975     if (value >= info.low && value <= info.high) {
   2976       // Just embed the j.l.Integer in the code.
   2977       ScopedObjectAccess soa(Thread::Current());
   2978       mirror::Object* boxed = info.cache->Get(value + (-info.low));
   2979       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
   2980       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
   2981       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   2982     } else {
   2983       // Allocate and initialize a new j.l.Integer.
   2984       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
   2985       // JIT object table.
   2986       uint32_t address =
   2987           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   2988       __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   2989       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   2990       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   2991       __ Mov(temp.W(), value);
   2992       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
   2993       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   2994       // one.
   2995       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   2996     }
   2997   } else {
   2998     Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
   2999     // Check bounds of our cache.
   3000     __ Add(out.W(), in.W(), -info.low);
   3001     __ Cmp(out.W(), info.high - info.low + 1);
   3002     vixl::aarch64::Label allocate, done;
   3003     __ B(&allocate, hs);
   3004     // If the value is within the bounds, load the j.l.Integer directly from the array.
   3005     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   3006     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
   3007     __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
   3008     MemOperand source = HeapOperand(
   3009         temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
   3010     codegen_->Load(DataType::Type::kReference, out, source);
   3011     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
   3012     __ B(&done);
   3013     __ Bind(&allocate);
   3014     // Otherwise allocate and initialize a new j.l.Integer.
   3015     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3016     __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   3017     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3018     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3019     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
   3020     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   3021     // one.
   3022     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   3023     __ Bind(&done);
   3024   }
   3025 }
   3026 
   3027 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
   3028   LocationSummary* locations =
   3029       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   3030   locations->SetOut(Location::RequiresRegister());
   3031 }
   3032 
   3033 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
   3034   MacroAssembler* masm = GetVIXLAssembler();
   3035   Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
   3036   UseScratchRegisterScope temps(masm);
   3037   Register temp = temps.AcquireX();
   3038 
   3039   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
   3040   __ Ldar(out.W(), MemOperand(temp));
   3041 
   3042   vixl::aarch64::Label done;
   3043   __ Cbz(out.W(), &done);
   3044   __ Stlr(wzr, MemOperand(temp));
   3045   __ Bind(&done);
   3046 }
   3047 
   3048 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
   3049   LocationSummary* locations =
   3050       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   3051   locations->SetInAt(0, Location::Any());
   3052 }
   3053 
   3054 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
   3055 
   3056 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
   3057 
   3058 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
   3059 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
   3060 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
   3061 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
   3062 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
   3063 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
   3064 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
   3065 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
   3066 
   3067 // 1.8.
   3068 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
   3069 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
   3070 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
   3071 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
   3072 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
   3073 
   3074 UNREACHABLE_INTRINSICS(ARM64)
   3075 
   3076 #undef __
   3077 
   3078 }  // namespace arm64
   3079 }  // namespace art
   3080