Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "intrinsics_arm64.h"
     18 
     19 #include "arch/arm64/instruction_set_features_arm64.h"
     20 #include "art_method.h"
     21 #include "code_generator_arm64.h"
     22 #include "common_arm64.h"
     23 #include "entrypoints/quick/quick_entrypoints.h"
     24 #include "intrinsics.h"
     25 #include "lock_word.h"
     26 #include "mirror/array-inl.h"
     27 #include "mirror/object_array-inl.h"
     28 #include "mirror/reference.h"
     29 #include "mirror/string-inl.h"
     30 #include "scoped_thread_state_change-inl.h"
     31 #include "thread-current-inl.h"
     32 #include "utils/arm64/assembler_arm64.h"
     33 
     34 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     35 
     36 // TODO(VIXL): Make VIXL compile with -Wshadow.
     37 #pragma GCC diagnostic push
     38 #pragma GCC diagnostic ignored "-Wshadow"
     39 #include "aarch64/disasm-aarch64.h"
     40 #include "aarch64/macro-assembler-aarch64.h"
     41 #pragma GCC diagnostic pop
     42 
     43 namespace art {
     44 
     45 namespace arm64 {
     46 
     47 using helpers::DRegisterFrom;
     48 using helpers::FPRegisterFrom;
     49 using helpers::HeapOperand;
     50 using helpers::LocationFrom;
     51 using helpers::OperandFrom;
     52 using helpers::RegisterFrom;
     53 using helpers::SRegisterFrom;
     54 using helpers::WRegisterFrom;
     55 using helpers::XRegisterFrom;
     56 using helpers::InputRegisterAt;
     57 using helpers::OutputRegister;
     58 
     59 namespace {
     60 
     61 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
     62   return MemOperand(XRegisterFrom(location), offset);
     63 }
     64 
     65 }  // namespace
     66 
     67 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
     68   return codegen_->GetVIXLAssembler();
     69 }
     70 
     71 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
     72   return codegen_->GetGraph()->GetArena();
     73 }
     74 
     75 #define __ codegen->GetVIXLAssembler()->
     76 
     77 static void MoveFromReturnRegister(Location trg,
     78                                    Primitive::Type type,
     79                                    CodeGeneratorARM64* codegen) {
     80   if (!trg.IsValid()) {
     81     DCHECK(type == Primitive::kPrimVoid);
     82     return;
     83   }
     84 
     85   DCHECK_NE(type, Primitive::kPrimVoid);
     86 
     87   if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
     88     Register trg_reg = RegisterFrom(trg, type);
     89     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
     90     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
     91   } else {
     92     FPRegister trg_reg = FPRegisterFrom(trg, type);
     93     FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
     94     __ Fmov(trg_reg, res_reg);
     95   }
     96 }
     97 
     98 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
     99   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
    100   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
    101 }
    102 
    103 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
    104 // call. This will copy the arguments into the positions for a regular call.
    105 //
    106 // Note: The actual parameters are required to be in the locations given by the invoke's location
    107 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
    108 //       restored!
    109 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
    110  public:
    111   explicit IntrinsicSlowPathARM64(HInvoke* invoke)
    112       : SlowPathCodeARM64(invoke), invoke_(invoke) { }
    113 
    114   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
    115     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
    116     __ Bind(GetEntryLabel());
    117 
    118     SaveLiveRegisters(codegen, invoke_->GetLocations());
    119 
    120     MoveArguments(invoke_, codegen);
    121 
    122     {
    123       // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
    124       // are no pools emitted.
    125       vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
    126       if (invoke_->IsInvokeStaticOrDirect()) {
    127         codegen->GenerateStaticOrDirectCall(
    128             invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
    129       } else {
    130         codegen->GenerateVirtualCall(
    131             invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
    132       }
    133     }
    134 
    135     // Copy the result back to the expected output.
    136     Location out = invoke_->GetLocations()->Out();
    137     if (out.IsValid()) {
    138       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
    139       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    140       MoveFromReturnRegister(out, invoke_->GetType(), codegen);
    141     }
    142 
    143     RestoreLiveRegisters(codegen, invoke_->GetLocations());
    144     __ B(GetExitLabel());
    145   }
    146 
    147   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
    148 
    149  private:
    150   // The instruction where this slow path is happening.
    151   HInvoke* const invoke_;
    152 
    153   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
    154 };
    155 
    156 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
    157 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
    158  public:
    159   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
    160       : SlowPathCodeARM64(instruction), tmp_(tmp) {
    161     DCHECK(kEmitCompilerReadBarrier);
    162     DCHECK(kUseBakerReadBarrier);
    163   }
    164 
    165   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
    166     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
    167     LocationSummary* locations = instruction_->GetLocations();
    168     DCHECK(locations->CanCall());
    169     DCHECK(instruction_->IsInvokeStaticOrDirect())
    170         << "Unexpected instruction in read barrier arraycopy slow path: "
    171         << instruction_->DebugName();
    172     DCHECK(instruction_->GetLocations()->Intrinsified());
    173     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
    174 
    175     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
    176 
    177     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
    178     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
    179     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
    180     Register tmp_reg = WRegisterFrom(tmp_);
    181 
    182     __ Bind(GetEntryLabel());
    183     vixl::aarch64::Label slow_copy_loop;
    184     __ Bind(&slow_copy_loop);
    185     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
    186     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
    187     // TODO: Inline the mark bit check before calling the runtime?
    188     // tmp_reg = ReadBarrier::Mark(tmp_reg);
    189     // No need to save live registers; it's taken care of by the
    190     // entrypoint. Also, there is no need to update the stack mask,
    191     // as this runtime call will not trigger a garbage collection.
    192     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
    193     // explanations.)
    194     DCHECK_NE(tmp_.reg(), LR);
    195     DCHECK_NE(tmp_.reg(), WSP);
    196     DCHECK_NE(tmp_.reg(), WZR);
    197     // IP0 is used internally by the ReadBarrierMarkRegX entry point
    198     // as a temporary (and not preserved).  It thus cannot be used by
    199     // any live register in this slow path.
    200     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
    201     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
    202     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
    203     DCHECK_NE(tmp_.reg(), IP0);
    204     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
    205     // TODO: Load the entrypoint once before the loop, instead of
    206     // loading it at every iteration.
    207     int32_t entry_point_offset =
    208         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
    209     // This runtime call does not require a stack map.
    210     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    211     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
    212     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
    213     __ Cmp(src_curr_addr, src_stop_addr);
    214     __ B(&slow_copy_loop, ne);
    215     __ B(GetExitLabel());
    216   }
    217 
    218   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
    219 
    220  private:
    221   Location tmp_;
    222 
    223   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
    224 };
    225 #undef __
    226 
    227 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
    228   Dispatch(invoke);
    229   LocationSummary* res = invoke->GetLocations();
    230   if (res == nullptr) {
    231     return false;
    232   }
    233   return res->Intrinsified();
    234 }
    235 
    236 #define __ masm->
    237 
    238 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    239   LocationSummary* locations = new (arena) LocationSummary(invoke,
    240                                                            LocationSummary::kNoCall,
    241                                                            kIntrinsified);
    242   locations->SetInAt(0, Location::RequiresFpuRegister());
    243   locations->SetOut(Location::RequiresRegister());
    244 }
    245 
    246 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    247   LocationSummary* locations = new (arena) LocationSummary(invoke,
    248                                                            LocationSummary::kNoCall,
    249                                                            kIntrinsified);
    250   locations->SetInAt(0, Location::RequiresRegister());
    251   locations->SetOut(Location::RequiresFpuRegister());
    252 }
    253 
    254 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    255   Location input = locations->InAt(0);
    256   Location output = locations->Out();
    257   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
    258           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
    259 }
    260 
    261 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    262   Location input = locations->InAt(0);
    263   Location output = locations->Out();
    264   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
    265           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
    266 }
    267 
    268 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    269   CreateFPToIntLocations(arena_, invoke);
    270 }
    271 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    272   CreateIntToFPLocations(arena_, invoke);
    273 }
    274 
    275 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    276   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    277 }
    278 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    279   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    280 }
    281 
    282 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    283   CreateFPToIntLocations(arena_, invoke);
    284 }
    285 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    286   CreateIntToFPLocations(arena_, invoke);
    287 }
    288 
    289 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    290   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    291 }
    292 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    293   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    294 }
    295 
    296 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    297   LocationSummary* locations = new (arena) LocationSummary(invoke,
    298                                                            LocationSummary::kNoCall,
    299                                                            kIntrinsified);
    300   locations->SetInAt(0, Location::RequiresRegister());
    301   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    302 }
    303 
    304 static void GenReverseBytes(LocationSummary* locations,
    305                             Primitive::Type type,
    306                             MacroAssembler* masm) {
    307   Location in = locations->InAt(0);
    308   Location out = locations->Out();
    309 
    310   switch (type) {
    311     case Primitive::kPrimShort:
    312       __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
    313       __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
    314       break;
    315     case Primitive::kPrimInt:
    316     case Primitive::kPrimLong:
    317       __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
    318       break;
    319     default:
    320       LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
    321       UNREACHABLE();
    322   }
    323 }
    324 
    325 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
    326   CreateIntToIntLocations(arena_, invoke);
    327 }
    328 
    329 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
    330   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
    331 }
    332 
    333 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
    334   CreateIntToIntLocations(arena_, invoke);
    335 }
    336 
    337 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
    338   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
    339 }
    340 
    341 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
    342   CreateIntToIntLocations(arena_, invoke);
    343 }
    344 
    345 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
    346   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler());
    347 }
    348 
    349 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    350   LocationSummary* locations = new (arena) LocationSummary(invoke,
    351                                                            LocationSummary::kNoCall,
    352                                                            kIntrinsified);
    353   locations->SetInAt(0, Location::RequiresRegister());
    354   locations->SetInAt(1, Location::RequiresRegister());
    355   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    356 }
    357 
    358 static void GenNumberOfLeadingZeros(LocationSummary* locations,
    359                                     Primitive::Type type,
    360                                     MacroAssembler* masm) {
    361   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
    362 
    363   Location in = locations->InAt(0);
    364   Location out = locations->Out();
    365 
    366   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
    367 }
    368 
    369 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    370   CreateIntToIntLocations(arena_, invoke);
    371 }
    372 
    373 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    374   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
    375 }
    376 
    377 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    378   CreateIntToIntLocations(arena_, invoke);
    379 }
    380 
    381 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    382   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
    383 }
    384 
    385 static void GenNumberOfTrailingZeros(LocationSummary* locations,
    386                                      Primitive::Type type,
    387                                      MacroAssembler* masm) {
    388   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
    389 
    390   Location in = locations->InAt(0);
    391   Location out = locations->Out();
    392 
    393   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
    394   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
    395 }
    396 
    397 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    398   CreateIntToIntLocations(arena_, invoke);
    399 }
    400 
    401 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    402   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
    403 }
    404 
    405 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    406   CreateIntToIntLocations(arena_, invoke);
    407 }
    408 
    409 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    410   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
    411 }
    412 
    413 static void GenReverse(LocationSummary* locations,
    414                        Primitive::Type type,
    415                        MacroAssembler* masm) {
    416   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
    417 
    418   Location in = locations->InAt(0);
    419   Location out = locations->Out();
    420 
    421   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
    422 }
    423 
    424 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
    425   CreateIntToIntLocations(arena_, invoke);
    426 }
    427 
    428 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
    429   GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
    430 }
    431 
    432 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
    433   CreateIntToIntLocations(arena_, invoke);
    434 }
    435 
    436 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
    437   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
    438 }
    439 
    440 static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
    441   DCHECK(Primitive::IsIntOrLongType(type)) << type;
    442   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
    443   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
    444 
    445   UseScratchRegisterScope temps(masm);
    446 
    447   Register src = InputRegisterAt(instr, 0);
    448   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
    449   FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
    450 
    451   __ Fmov(fpr, src);
    452   __ Cnt(fpr.V8B(), fpr.V8B());
    453   __ Addv(fpr.B(), fpr.V8B());
    454   __ Fmov(dst, fpr);
    455 }
    456 
    457 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
    458   CreateIntToIntLocations(arena_, invoke);
    459 }
    460 
    461 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
    462   GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
    463 }
    464 
    465 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
    466   CreateIntToIntLocations(arena_, invoke);
    467 }
    468 
    469 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
    470   GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
    471 }
    472 
    473 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    474   LocationSummary* locations = new (arena) LocationSummary(invoke,
    475                                                            LocationSummary::kNoCall,
    476                                                            kIntrinsified);
    477   locations->SetInAt(0, Location::RequiresFpuRegister());
    478   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    479 }
    480 
    481 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
    482   Location in = locations->InAt(0);
    483   Location out = locations->Out();
    484 
    485   FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
    486   FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
    487 
    488   __ Fabs(out_reg, in_reg);
    489 }
    490 
    491 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
    492   CreateFPToFPLocations(arena_, invoke);
    493 }
    494 
    495 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
    496   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    497 }
    498 
    499 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
    500   CreateFPToFPLocations(arena_, invoke);
    501 }
    502 
    503 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
    504   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    505 }
    506 
    507 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
    508   LocationSummary* locations = new (arena) LocationSummary(invoke,
    509                                                            LocationSummary::kNoCall,
    510                                                            kIntrinsified);
    511   locations->SetInAt(0, Location::RequiresRegister());
    512   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    513 }
    514 
    515 static void GenAbsInteger(LocationSummary* locations,
    516                           bool is64bit,
    517                           MacroAssembler* masm) {
    518   Location in = locations->InAt(0);
    519   Location output = locations->Out();
    520 
    521   Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
    522   Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
    523 
    524   __ Cmp(in_reg, Operand(0));
    525   __ Cneg(out_reg, in_reg, lt);
    526 }
    527 
    528 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
    529   CreateIntToInt(arena_, invoke);
    530 }
    531 
    532 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
    533   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
    534 }
    535 
    536 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
    537   CreateIntToInt(arena_, invoke);
    538 }
    539 
    540 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
    541   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
    542 }
    543 
    544 static void GenMinMaxFP(LocationSummary* locations,
    545                         bool is_min,
    546                         bool is_double,
    547                         MacroAssembler* masm) {
    548   Location op1 = locations->InAt(0);
    549   Location op2 = locations->InAt(1);
    550   Location out = locations->Out();
    551 
    552   FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
    553   FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
    554   FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
    555   if (is_min) {
    556     __ Fmin(out_reg, op1_reg, op2_reg);
    557   } else {
    558     __ Fmax(out_reg, op1_reg, op2_reg);
    559   }
    560 }
    561 
    562 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    563   LocationSummary* locations = new (arena) LocationSummary(invoke,
    564                                                            LocationSummary::kNoCall,
    565                                                            kIntrinsified);
    566   locations->SetInAt(0, Location::RequiresFpuRegister());
    567   locations->SetInAt(1, Location::RequiresFpuRegister());
    568   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    569 }
    570 
    571 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
    572   CreateFPFPToFPLocations(arena_, invoke);
    573 }
    574 
    575 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
    576   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
    577 }
    578 
    579 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
    580   CreateFPFPToFPLocations(arena_, invoke);
    581 }
    582 
    583 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
    584   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
    585 }
    586 
    587 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    588   CreateFPFPToFPLocations(arena_, invoke);
    589 }
    590 
    591 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    592   GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
    593 }
    594 
    595 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
    596   CreateFPFPToFPLocations(arena_, invoke);
    597 }
    598 
    599 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
    600   GenMinMaxFP(
    601       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
    602 }
    603 
    604 static void GenMinMax(LocationSummary* locations,
    605                       bool is_min,
    606                       bool is_long,
    607                       MacroAssembler* masm) {
    608   Location op1 = locations->InAt(0);
    609   Location op2 = locations->InAt(1);
    610   Location out = locations->Out();
    611 
    612   Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
    613   Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
    614   Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
    615 
    616   __ Cmp(op1_reg, op2_reg);
    617   __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
    618 }
    619 
    620 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
    621   CreateIntIntToIntLocations(arena_, invoke);
    622 }
    623 
    624 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
    625   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
    626 }
    627 
    628 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
    629   CreateIntIntToIntLocations(arena_, invoke);
    630 }
    631 
    632 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
    633   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
    634 }
    635 
    636 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
    637   CreateIntIntToIntLocations(arena_, invoke);
    638 }
    639 
    640 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
    641   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
    642 }
    643 
    644 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
    645   CreateIntIntToIntLocations(arena_, invoke);
    646 }
    647 
    648 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
    649   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
    650 }
    651 
    652 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
    653   CreateFPToFPLocations(arena_, invoke);
    654 }
    655 
    656 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
    657   LocationSummary* locations = invoke->GetLocations();
    658   MacroAssembler* masm = GetVIXLAssembler();
    659   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    660 }
    661 
    662 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
    663   CreateFPToFPLocations(arena_, invoke);
    664 }
    665 
    666 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
    667   LocationSummary* locations = invoke->GetLocations();
    668   MacroAssembler* masm = GetVIXLAssembler();
    669   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    670 }
    671 
    672 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
    673   CreateFPToFPLocations(arena_, invoke);
    674 }
    675 
    676 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
    677   LocationSummary* locations = invoke->GetLocations();
    678   MacroAssembler* masm = GetVIXLAssembler();
    679   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    680 }
    681 
    682 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
    683   CreateFPToFPLocations(arena_, invoke);
    684 }
    685 
    686 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
    687   LocationSummary* locations = invoke->GetLocations();
    688   MacroAssembler* masm = GetVIXLAssembler();
    689   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
    690 }
    691 
    692 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
    693   LocationSummary* locations = new (arena) LocationSummary(invoke,
    694                                                            LocationSummary::kNoCall,
    695                                                            kIntrinsified);
    696   locations->SetInAt(0, Location::RequiresFpuRegister());
    697   locations->SetOut(Location::RequiresRegister());
    698   locations->AddTemp(Location::RequiresFpuRegister());
    699 }
    700 
    701 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
    702   // Java 8 API definition for Math.round():
    703   // Return the closest long or int to the argument, with ties rounding to positive infinity.
    704   //
    705   // There is no single instruction in ARMv8 that can support the above definition.
    706   // We choose to use FCVTAS here, because it has closest semantic.
    707   // FCVTAS performs rounding to nearest integer, ties away from zero.
    708   // For most inputs (positive values, zero or NaN), this instruction is enough.
    709   // We only need a few handling code after FCVTAS if the input is negative half value.
    710   //
    711   // The reason why we didn't choose FCVTPS instruction here is that
    712   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
    713   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
    714   // If we were using this instruction, for most inputs, more handling code would be needed.
    715   LocationSummary* l = invoke->GetLocations();
    716   FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
    717   FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
    718   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
    719   vixl::aarch64::Label done;
    720 
    721   // Round to nearest integer, ties away from zero.
    722   __ Fcvtas(out_reg, in_reg);
    723 
    724   // For positive values, zero or NaN inputs, rounding is done.
    725   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
    726 
    727   // Handle input < 0 cases.
    728   // If input is negative but not a tie, previous result (round to nearest) is valid.
    729   // If input is a negative tie, out_reg += 1.
    730   __ Frinta(tmp_fp, in_reg);
    731   __ Fsub(tmp_fp, in_reg, tmp_fp);
    732   __ Fcmp(tmp_fp, 0.5);
    733   __ Cinc(out_reg, out_reg, eq);
    734 
    735   __ Bind(&done);
    736 }
    737 
    738 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
    739   CreateFPToIntPlusFPTempLocations(arena_, invoke);
    740 }
    741 
    742 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
    743   GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
    744 }
    745 
    746 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
    747   CreateFPToIntPlusFPTempLocations(arena_, invoke);
    748 }
    749 
    750 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
    751   GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
    752 }
    753 
    754 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
    755   CreateIntToIntLocations(arena_, invoke);
    756 }
    757 
    758 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
    759   MacroAssembler* masm = GetVIXLAssembler();
    760   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
    761           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    762 }
    763 
    764 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
    765   CreateIntToIntLocations(arena_, invoke);
    766 }
    767 
    768 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
    769   MacroAssembler* masm = GetVIXLAssembler();
    770   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
    771          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    772 }
    773 
    774 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
    775   CreateIntToIntLocations(arena_, invoke);
    776 }
    777 
    778 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
    779   MacroAssembler* masm = GetVIXLAssembler();
    780   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
    781          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    782 }
    783 
    784 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
    785   CreateIntToIntLocations(arena_, invoke);
    786 }
    787 
    788 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
    789   MacroAssembler* masm = GetVIXLAssembler();
    790   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
    791            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    792 }
    793 
    794 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
    795   LocationSummary* locations = new (arena) LocationSummary(invoke,
    796                                                            LocationSummary::kNoCall,
    797                                                            kIntrinsified);
    798   locations->SetInAt(0, Location::RequiresRegister());
    799   locations->SetInAt(1, Location::RequiresRegister());
    800 }
    801 
    802 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
    803   CreateIntIntToVoidLocations(arena_, invoke);
    804 }
    805 
    806 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
    807   MacroAssembler* masm = GetVIXLAssembler();
    808   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    809           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    810 }
    811 
    812 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
    813   CreateIntIntToVoidLocations(arena_, invoke);
    814 }
    815 
    816 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
    817   MacroAssembler* masm = GetVIXLAssembler();
    818   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    819          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    820 }
    821 
    822 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
    823   CreateIntIntToVoidLocations(arena_, invoke);
    824 }
    825 
    826 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
    827   MacroAssembler* masm = GetVIXLAssembler();
    828   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
    829          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    830 }
    831 
    832 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
    833   CreateIntIntToVoidLocations(arena_, invoke);
    834 }
    835 
    836 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
    837   MacroAssembler* masm = GetVIXLAssembler();
    838   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
    839           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
    840 }
    841 
    842 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
    843   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    844                                                             LocationSummary::kNoCall,
    845                                                             kIntrinsified);
    846   locations->SetOut(Location::RequiresRegister());
    847 }
    848 
    849 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
    850   codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
    851                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
    852 }
    853 
    854 static void GenUnsafeGet(HInvoke* invoke,
    855                          Primitive::Type type,
    856                          bool is_volatile,
    857                          CodeGeneratorARM64* codegen) {
    858   LocationSummary* locations = invoke->GetLocations();
    859   DCHECK((type == Primitive::kPrimInt) ||
    860          (type == Primitive::kPrimLong) ||
    861          (type == Primitive::kPrimNot));
    862   Location base_loc = locations->InAt(1);
    863   Register base = WRegisterFrom(base_loc);      // Object pointer.
    864   Location offset_loc = locations->InAt(2);
    865   Register offset = XRegisterFrom(offset_loc);  // Long offset.
    866   Location trg_loc = locations->Out();
    867   Register trg = RegisterFrom(trg_loc, type);
    868 
    869   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
    870     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
    871     Register temp = WRegisterFrom(locations->GetTemp(0));
    872     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
    873                                                        trg_loc,
    874                                                        base,
    875                                                        /* offset */ 0u,
    876                                                        /* index */ offset_loc,
    877                                                        /* scale_factor */ 0u,
    878                                                        temp,
    879                                                        /* needs_null_check */ false,
    880                                                        is_volatile);
    881   } else {
    882     // Other cases.
    883     MemOperand mem_op(base.X(), offset);
    884     if (is_volatile) {
    885       codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
    886     } else {
    887       codegen->Load(type, trg, mem_op);
    888     }
    889 
    890     if (type == Primitive::kPrimNot) {
    891       DCHECK(trg.IsW());
    892       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
    893     }
    894   }
    895 }
    896 
    897 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    898   bool can_call = kEmitCompilerReadBarrier &&
    899       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
    900        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
    901   LocationSummary* locations = new (arena) LocationSummary(invoke,
    902                                                            (can_call
    903                                                                 ? LocationSummary::kCallOnSlowPath
    904                                                                 : LocationSummary::kNoCall),
    905                                                            kIntrinsified);
    906   if (can_call && kUseBakerReadBarrier) {
    907     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
    908     // We need a temporary register for the read barrier marking slow
    909     // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
    910     locations->AddTemp(Location::RequiresRegister());
    911   }
    912   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
    913   locations->SetInAt(1, Location::RequiresRegister());
    914   locations->SetInAt(2, Location::RequiresRegister());
    915   locations->SetOut(Location::RequiresRegister(),
    916                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
    917 }
    918 
    919 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
    920   CreateIntIntIntToIntLocations(arena_, invoke);
    921 }
    922 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
    923   CreateIntIntIntToIntLocations(arena_, invoke);
    924 }
    925 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
    926   CreateIntIntIntToIntLocations(arena_, invoke);
    927 }
    928 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
    929   CreateIntIntIntToIntLocations(arena_, invoke);
    930 }
    931 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
    932   CreateIntIntIntToIntLocations(arena_, invoke);
    933 }
    934 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
    935   CreateIntIntIntToIntLocations(arena_, invoke);
    936 }
    937 
    938 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
    939   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
    940 }
    941 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
    942   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
    943 }
    944 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
    945   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
    946 }
    947 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
    948   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
    949 }
    950 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
    951   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
    952 }
    953 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
    954   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
    955 }
    956 
    957 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
    958   LocationSummary* locations = new (arena) LocationSummary(invoke,
    959                                                            LocationSummary::kNoCall,
    960                                                            kIntrinsified);
    961   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
    962   locations->SetInAt(1, Location::RequiresRegister());
    963   locations->SetInAt(2, Location::RequiresRegister());
    964   locations->SetInAt(3, Location::RequiresRegister());
    965 }
    966 
    967 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
    968   CreateIntIntIntIntToVoid(arena_, invoke);
    969 }
    970 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
    971   CreateIntIntIntIntToVoid(arena_, invoke);
    972 }
    973 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
    974   CreateIntIntIntIntToVoid(arena_, invoke);
    975 }
    976 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
    977   CreateIntIntIntIntToVoid(arena_, invoke);
    978 }
    979 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
    980   CreateIntIntIntIntToVoid(arena_, invoke);
    981 }
    982 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
    983   CreateIntIntIntIntToVoid(arena_, invoke);
    984 }
    985 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
    986   CreateIntIntIntIntToVoid(arena_, invoke);
    987 }
    988 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
    989   CreateIntIntIntIntToVoid(arena_, invoke);
    990 }
    991 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
    992   CreateIntIntIntIntToVoid(arena_, invoke);
    993 }
    994 
    995 static void GenUnsafePut(HInvoke* invoke,
    996                          Primitive::Type type,
    997                          bool is_volatile,
    998                          bool is_ordered,
    999                          CodeGeneratorARM64* codegen) {
   1000   LocationSummary* locations = invoke->GetLocations();
   1001   MacroAssembler* masm = codegen->GetVIXLAssembler();
   1002 
   1003   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   1004   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   1005   Register value = RegisterFrom(locations->InAt(3), type);
   1006   Register source = value;
   1007   MemOperand mem_op(base.X(), offset);
   1008 
   1009   {
   1010     // We use a block to end the scratch scope before the write barrier, thus
   1011     // freeing the temporary registers so they can be used in `MarkGCCard`.
   1012     UseScratchRegisterScope temps(masm);
   1013 
   1014     if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1015       DCHECK(value.IsW());
   1016       Register temp = temps.AcquireW();
   1017       __ Mov(temp.W(), value.W());
   1018       codegen->GetAssembler()->PoisonHeapReference(temp.W());
   1019       source = temp;
   1020     }
   1021 
   1022     if (is_volatile || is_ordered) {
   1023       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
   1024     } else {
   1025       codegen->Store(type, source, mem_op);
   1026     }
   1027   }
   1028 
   1029   if (type == Primitive::kPrimNot) {
   1030     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1031     codegen->MarkGCCard(base, value, value_can_be_null);
   1032   }
   1033 }
   1034 
   1035 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
   1036   GenUnsafePut(invoke,
   1037                Primitive::kPrimInt,
   1038                /* is_volatile */ false,
   1039                /* is_ordered */ false,
   1040                codegen_);
   1041 }
   1042 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
   1043   GenUnsafePut(invoke,
   1044                Primitive::kPrimInt,
   1045                /* is_volatile */ false,
   1046                /* is_ordered */ true,
   1047                codegen_);
   1048 }
   1049 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
   1050   GenUnsafePut(invoke,
   1051                Primitive::kPrimInt,
   1052                /* is_volatile */ true,
   1053                /* is_ordered */ false,
   1054                codegen_);
   1055 }
   1056 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
   1057   GenUnsafePut(invoke,
   1058                Primitive::kPrimNot,
   1059                /* is_volatile */ false,
   1060                /* is_ordered */ false,
   1061                codegen_);
   1062 }
   1063 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   1064   GenUnsafePut(invoke,
   1065                Primitive::kPrimNot,
   1066                /* is_volatile */ false,
   1067                /* is_ordered */ true,
   1068                codegen_);
   1069 }
   1070 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   1071   GenUnsafePut(invoke,
   1072                Primitive::kPrimNot,
   1073                /* is_volatile */ true,
   1074                /* is_ordered */ false,
   1075                codegen_);
   1076 }
   1077 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
   1078   GenUnsafePut(invoke,
   1079                Primitive::kPrimLong,
   1080                /* is_volatile */ false,
   1081                /* is_ordered */ false,
   1082                codegen_);
   1083 }
   1084 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   1085   GenUnsafePut(invoke,
   1086                Primitive::kPrimLong,
   1087                /* is_volatile */ false,
   1088                /* is_ordered */ true,
   1089                codegen_);
   1090 }
   1091 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   1092   GenUnsafePut(invoke,
   1093                Primitive::kPrimLong,
   1094                /* is_volatile */ true,
   1095                /* is_ordered */ false,
   1096                codegen_);
   1097 }
   1098 
   1099 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
   1100                                        HInvoke* invoke,
   1101                                        Primitive::Type type) {
   1102   bool can_call = kEmitCompilerReadBarrier &&
   1103       kUseBakerReadBarrier &&
   1104       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   1105   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1106                                                            (can_call
   1107                                                                 ? LocationSummary::kCallOnSlowPath
   1108                                                                 : LocationSummary::kNoCall),
   1109                                                            kIntrinsified);
   1110   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1111   locations->SetInAt(1, Location::RequiresRegister());
   1112   locations->SetInAt(2, Location::RequiresRegister());
   1113   locations->SetInAt(3, Location::RequiresRegister());
   1114   locations->SetInAt(4, Location::RequiresRegister());
   1115 
   1116   // If heap poisoning is enabled, we don't want the unpoisoning
   1117   // operations to potentially clobber the output. Likewise when
   1118   // emitting a (Baker) read barrier, which may call.
   1119   Location::OutputOverlap overlaps =
   1120       ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
   1121       ? Location::kOutputOverlap
   1122       : Location::kNoOutputOverlap;
   1123   locations->SetOut(Location::RequiresRegister(), overlaps);
   1124   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1125     // Temporary register for (Baker) read barrier.
   1126     locations->AddTemp(Location::RequiresRegister());
   1127   }
   1128 }
   1129 
   1130 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
   1131   MacroAssembler* masm = codegen->GetVIXLAssembler();
   1132   LocationSummary* locations = invoke->GetLocations();
   1133 
   1134   Location out_loc = locations->Out();
   1135   Register out = WRegisterFrom(out_loc);                           // Boolean result.
   1136 
   1137   Register base = WRegisterFrom(locations->InAt(1));               // Object pointer.
   1138   Location offset_loc = locations->InAt(2);
   1139   Register offset = XRegisterFrom(offset_loc);                     // Long offset.
   1140   Register expected = RegisterFrom(locations->InAt(3), type);      // Expected.
   1141   Register value = RegisterFrom(locations->InAt(4), type);         // Value.
   1142 
   1143   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
   1144   if (type == Primitive::kPrimNot) {
   1145     // Mark card for object assuming new value is stored.
   1146     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1147     codegen->MarkGCCard(base, value, value_can_be_null);
   1148 
   1149     // The only read barrier implementation supporting the
   1150     // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1151     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1152 
   1153     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1154       Register temp = WRegisterFrom(locations->GetTemp(0));
   1155       // Need to make sure the reference stored in the field is a to-space
   1156       // one before attempting the CAS or the CAS could fail incorrectly.
   1157       codegen->UpdateReferenceFieldWithBakerReadBarrier(
   1158           invoke,
   1159           out_loc,  // Unused, used only as a "temporary" within the read barrier.
   1160           base,
   1161           /* field_offset */ offset_loc,
   1162           temp,
   1163           /* needs_null_check */ false,
   1164           /* use_load_acquire */ false);
   1165     }
   1166   }
   1167 
   1168   UseScratchRegisterScope temps(masm);
   1169   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
   1170   Register tmp_value = temps.AcquireSameSizeAs(value);             // Value in memory.
   1171 
   1172   Register tmp_32 = tmp_value.W();
   1173 
   1174   __ Add(tmp_ptr, base.X(), Operand(offset));
   1175 
   1176   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1177     codegen->GetAssembler()->PoisonHeapReference(expected);
   1178     if (value.Is(expected)) {
   1179       // Do not poison `value`, as it is the same register as
   1180       // `expected`, which has just been poisoned.
   1181     } else {
   1182       codegen->GetAssembler()->PoisonHeapReference(value);
   1183     }
   1184   }
   1185 
   1186   // do {
   1187   //   tmp_value = [tmp_ptr] - expected;
   1188   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   1189   // result = tmp_value != 0;
   1190 
   1191   vixl::aarch64::Label loop_head, exit_loop;
   1192   __ Bind(&loop_head);
   1193   __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
   1194   __ Cmp(tmp_value, expected);
   1195   __ B(&exit_loop, ne);
   1196   __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
   1197   __ Cbnz(tmp_32, &loop_head);
   1198   __ Bind(&exit_loop);
   1199   __ Cset(out, eq);
   1200 
   1201   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1202     codegen->GetAssembler()->UnpoisonHeapReference(expected);
   1203     if (value.Is(expected)) {
   1204       // Do not unpoison `value`, as it is the same register as
   1205       // `expected`, which has just been unpoisoned.
   1206     } else {
   1207       codegen->GetAssembler()->UnpoisonHeapReference(value);
   1208     }
   1209   }
   1210 }
   1211 
   1212 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
   1213   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt);
   1214 }
   1215 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
   1216   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
   1217 }
   1218 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   1219   // The only read barrier implementation supporting the
   1220   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1221   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   1222     return;
   1223   }
   1224 
   1225   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot);
   1226 }
   1227 
   1228 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
   1229   GenCas(invoke, Primitive::kPrimInt, codegen_);
   1230 }
   1231 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
   1232   GenCas(invoke, Primitive::kPrimLong, codegen_);
   1233 }
   1234 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   1235   // The only read barrier implementation supporting the
   1236   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1237   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1238 
   1239   GenCas(invoke, Primitive::kPrimNot, codegen_);
   1240 }
   1241 
   1242 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   1243   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1244                                                             invoke->InputAt(1)->CanBeNull()
   1245                                                                 ? LocationSummary::kCallOnSlowPath
   1246                                                                 : LocationSummary::kNoCall,
   1247                                                             kIntrinsified);
   1248   locations->SetInAt(0, Location::RequiresRegister());
   1249   locations->SetInAt(1, Location::RequiresRegister());
   1250   locations->AddTemp(Location::RequiresRegister());
   1251   locations->AddTemp(Location::RequiresRegister());
   1252   locations->AddTemp(Location::RequiresRegister());
   1253   // Need temporary registers for String compression's feature.
   1254   if (mirror::kUseStringCompression) {
   1255     locations->AddTemp(Location::RequiresRegister());
   1256   }
   1257   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   1258 }
   1259 
   1260 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
   1261   MacroAssembler* masm = GetVIXLAssembler();
   1262   LocationSummary* locations = invoke->GetLocations();
   1263 
   1264   Register str = InputRegisterAt(invoke, 0);
   1265   Register arg = InputRegisterAt(invoke, 1);
   1266   DCHECK(str.IsW());
   1267   DCHECK(arg.IsW());
   1268   Register out = OutputRegister(invoke);
   1269 
   1270   Register temp0 = WRegisterFrom(locations->GetTemp(0));
   1271   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   1272   Register temp2 = WRegisterFrom(locations->GetTemp(2));
   1273   Register temp3;
   1274   if (mirror::kUseStringCompression) {
   1275     temp3 = WRegisterFrom(locations->GetTemp(3));
   1276   }
   1277 
   1278   vixl::aarch64::Label loop;
   1279   vixl::aarch64::Label find_char_diff;
   1280   vixl::aarch64::Label end;
   1281   vixl::aarch64::Label different_compression;
   1282 
   1283   // Get offsets of count and value fields within a string object.
   1284   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1285   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1286 
   1287   // Note that the null check must have been done earlier.
   1288   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1289 
   1290   // Take slow path and throw if input can be and is null.
   1291   SlowPathCodeARM64* slow_path = nullptr;
   1292   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
   1293   if (can_slow_path) {
   1294     slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   1295     codegen_->AddSlowPath(slow_path);
   1296     __ Cbz(arg, slow_path->GetEntryLabel());
   1297   }
   1298 
   1299   // Reference equality check, return 0 if same reference.
   1300   __ Subs(out, str, arg);
   1301   __ B(&end, eq);
   1302 
   1303   if (mirror::kUseStringCompression) {
   1304     // Load `count` fields of this and argument strings.
   1305     __ Ldr(temp3, HeapOperand(str, count_offset));
   1306     __ Ldr(temp2, HeapOperand(arg, count_offset));
   1307     // Clean out compression flag from lengths.
   1308     __ Lsr(temp0, temp3, 1u);
   1309     __ Lsr(temp1, temp2, 1u);
   1310   } else {
   1311     // Load lengths of this and argument strings.
   1312     __ Ldr(temp0, HeapOperand(str, count_offset));
   1313     __ Ldr(temp1, HeapOperand(arg, count_offset));
   1314   }
   1315   // out = length diff.
   1316   __ Subs(out, temp0, temp1);
   1317   // temp0 = min(len(str), len(arg)).
   1318   __ Csel(temp0, temp1, temp0, ge);
   1319   // Shorter string is empty?
   1320   __ Cbz(temp0, &end);
   1321 
   1322   if (mirror::kUseStringCompression) {
   1323     // Check if both strings using same compression style to use this comparison loop.
   1324     __ Eor(temp2, temp2, Operand(temp3));
   1325     // Interleave with compression flag extraction which is needed for both paths
   1326     // and also set flags which is needed only for the different compressions path.
   1327     __ Ands(temp3.W(), temp3.W(), Operand(1));
   1328     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
   1329   }
   1330   // Store offset of string value in preparation for comparison loop.
   1331   __ Mov(temp1, value_offset);
   1332   if (mirror::kUseStringCompression) {
   1333     // For string compression, calculate the number of bytes to compare (not chars).
   1334     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
   1335     __ Lsl(temp0, temp0, temp3);
   1336   }
   1337 
   1338   UseScratchRegisterScope scratch_scope(masm);
   1339   Register temp4 = scratch_scope.AcquireX();
   1340 
   1341   // Assertions that must hold in order to compare strings 8 bytes at a time.
   1342   DCHECK_ALIGNED(value_offset, 8);
   1343   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
   1344 
   1345   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   1346   DCHECK_EQ(char_size, 2u);
   1347 
   1348   // Promote temp2 to an X reg, ready for LDR.
   1349   temp2 = temp2.X();
   1350 
   1351   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
   1352   __ Bind(&loop);
   1353   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
   1354   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
   1355   __ Cmp(temp4, temp2);
   1356   __ B(ne, &find_char_diff);
   1357   __ Add(temp1, temp1, char_size * 4);
   1358   // With string compression, we have compared 8 bytes, otherwise 4 chars.
   1359   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
   1360   __ B(&loop, hi);
   1361   __ B(&end);
   1362 
   1363   // Promote temp1 to an X reg, ready for EOR.
   1364   temp1 = temp1.X();
   1365 
   1366   // Find the single character difference.
   1367   __ Bind(&find_char_diff);
   1368   // Get the bit position of the first character that differs.
   1369   __ Eor(temp1, temp2, temp4);
   1370   __ Rbit(temp1, temp1);
   1371   __ Clz(temp1, temp1);
   1372 
   1373   // If the number of chars remaining <= the index where the difference occurs (0-3), then
   1374   // the difference occurs outside the remaining string data, so just return length diff (out).
   1375   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
   1376   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
   1377   // unsigned when string compression is disabled.
   1378   // When it's enabled, the comparison must be unsigned.
   1379   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
   1380   __ B(ls, &end);
   1381 
   1382   // Extract the characters and calculate the difference.
   1383   if (mirror:: kUseStringCompression) {
   1384     __ Bic(temp1, temp1, 0x7);
   1385     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
   1386   } else {
   1387     __ Bic(temp1, temp1, 0xf);
   1388   }
   1389   __ Lsr(temp2, temp2, temp1);
   1390   __ Lsr(temp4, temp4, temp1);
   1391   if (mirror::kUseStringCompression) {
   1392     // Prioritize the case of compressed strings and calculate such result first.
   1393     __ Uxtb(temp1, temp4);
   1394     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
   1395     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
   1396   }
   1397   __ Uxth(temp4, temp4);
   1398   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
   1399 
   1400   if (mirror::kUseStringCompression) {
   1401     __ B(&end);
   1402     __ Bind(&different_compression);
   1403 
   1404     // Comparison for different compression style.
   1405     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
   1406     DCHECK_EQ(c_char_size, 1u);
   1407     temp1 = temp1.W();
   1408     temp2 = temp2.W();
   1409     temp4 = temp4.W();
   1410 
   1411     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
   1412     // Note that flags have been set by the `str` compression flag extraction to `temp3`
   1413     // before branching to the `different_compression` label.
   1414     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
   1415     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
   1416 
   1417     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
   1418     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
   1419     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
   1420     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
   1421     __ Lsl(temp0, temp0, 1u);
   1422 
   1423     // Adjust temp1 and temp2 from string pointers to data pointers.
   1424     __ Add(temp1, temp1, Operand(value_offset));
   1425     __ Add(temp2, temp2, Operand(value_offset));
   1426 
   1427     // Complete the move of the compression flag.
   1428     __ Sub(temp0, temp0, Operand(temp3));
   1429 
   1430     vixl::aarch64::Label different_compression_loop;
   1431     vixl::aarch64::Label different_compression_diff;
   1432 
   1433     __ Bind(&different_compression_loop);
   1434     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
   1435     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
   1436     __ Subs(temp4, temp4, Operand(temp3));
   1437     __ B(&different_compression_diff, ne);
   1438     __ Subs(temp0, temp0, 2);
   1439     __ B(&different_compression_loop, hi);
   1440     __ B(&end);
   1441 
   1442     // Calculate the difference.
   1443     __ Bind(&different_compression_diff);
   1444     __ Tst(temp0, Operand(1));
   1445     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1446                   "Expecting 0=compressed, 1=uncompressed");
   1447     __ Cneg(out, temp4, ne);
   1448   }
   1449 
   1450   __ Bind(&end);
   1451 
   1452   if (can_slow_path) {
   1453     __ Bind(slow_path->GetExitLabel());
   1454   }
   1455 }
   1456 
   1457 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
   1458 // The normal loop plus the pre-header is 9 instructions without string compression and 12
   1459 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
   1460 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
   1461 // to 10 instructions for the unrolled loop.
   1462 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
   1463 
   1464 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
   1465   if (candidate->IsLoadString()) {
   1466     HLoadString* load_string = candidate->AsLoadString();
   1467     const DexFile& dex_file = load_string->GetDexFile();
   1468     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
   1469   }
   1470   return nullptr;
   1471 }
   1472 
   1473 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
   1474   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1475                                                             LocationSummary::kNoCall,
   1476                                                             kIntrinsified);
   1477   locations->SetInAt(0, Location::RequiresRegister());
   1478   locations->SetInAt(1, Location::RequiresRegister());
   1479 
   1480   // For the generic implementation and for long const strings we need a temporary.
   1481   // We do not need it for short const strings, up to 8 bytes, see code generation below.
   1482   uint32_t const_string_length = 0u;
   1483   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
   1484   if (const_string == nullptr) {
   1485     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
   1486   }
   1487   bool is_compressed =
   1488       mirror::kUseStringCompression &&
   1489       const_string != nullptr &&
   1490       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
   1491   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
   1492     locations->AddTemp(Location::RequiresRegister());
   1493   }
   1494 
   1495   // TODO: If the String.equals() is used only for an immediately following HIf, we can
   1496   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
   1497   // Then we shall need an extra temporary register instead of the output register.
   1498   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   1499 }
   1500 
   1501 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
   1502   MacroAssembler* masm = GetVIXLAssembler();
   1503   LocationSummary* locations = invoke->GetLocations();
   1504 
   1505   Register str = WRegisterFrom(locations->InAt(0));
   1506   Register arg = WRegisterFrom(locations->InAt(1));
   1507   Register out = XRegisterFrom(locations->Out());
   1508 
   1509   UseScratchRegisterScope scratch_scope(masm);
   1510   Register temp = scratch_scope.AcquireW();
   1511   Register temp1 = scratch_scope.AcquireW();
   1512 
   1513   vixl::aarch64::Label loop;
   1514   vixl::aarch64::Label end;
   1515   vixl::aarch64::Label return_true;
   1516   vixl::aarch64::Label return_false;
   1517 
   1518   // Get offsets of count, value, and class fields within a string object.
   1519   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1520   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1521   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   1522 
   1523   // Note that the null check must have been done earlier.
   1524   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1525 
   1526   StringEqualsOptimizations optimizations(invoke);
   1527   if (!optimizations.GetArgumentNotNull()) {
   1528     // Check if input is null, return false if it is.
   1529     __ Cbz(arg, &return_false);
   1530   }
   1531 
   1532   // Reference equality check, return true if same reference.
   1533   __ Cmp(str, arg);
   1534   __ B(&return_true, eq);
   1535 
   1536   if (!optimizations.GetArgumentIsString()) {
   1537     // Instanceof check for the argument by comparing class fields.
   1538     // All string objects must have the same type since String cannot be subclassed.
   1539     // Receiver must be a string object, so its class field is equal to all strings' class fields.
   1540     // If the argument is a string object, its class field must be equal to receiver's class field.
   1541     __ Ldr(temp, MemOperand(str.X(), class_offset));
   1542     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
   1543     __ Cmp(temp, temp1);
   1544     __ B(&return_false, ne);
   1545   }
   1546 
   1547   // Check if one of the inputs is a const string. Do not special-case both strings
   1548   // being const, such cases should be handled by constant folding if needed.
   1549   uint32_t const_string_length = 0u;
   1550   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
   1551   if (const_string == nullptr) {
   1552     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
   1553     if (const_string != nullptr) {
   1554       std::swap(str, arg);  // Make sure the const string is in `str`.
   1555     }
   1556   }
   1557   bool is_compressed =
   1558       mirror::kUseStringCompression &&
   1559       const_string != nullptr &&
   1560       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
   1561 
   1562   if (const_string != nullptr) {
   1563     // Load `count` field of the argument string and check if it matches the const string.
   1564     // Also compares the compression style, if differs return false.
   1565     __ Ldr(temp, MemOperand(arg.X(), count_offset));
   1566     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
   1567     scratch_scope.Release(temp1);
   1568     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
   1569     temp1 = scratch_scope.AcquireW();
   1570     __ B(&return_false, ne);
   1571   } else {
   1572     // Load `count` fields of this and argument strings.
   1573     __ Ldr(temp, MemOperand(str.X(), count_offset));
   1574     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
   1575     // Check if `count` fields are equal, return false if they're not.
   1576     // Also compares the compression style, if differs return false.
   1577     __ Cmp(temp, temp1);
   1578     __ B(&return_false, ne);
   1579   }
   1580 
   1581   // Assertions that must hold in order to compare strings 8 bytes at a time.
   1582   DCHECK_ALIGNED(value_offset, 8);
   1583   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
   1584 
   1585   if (const_string != nullptr &&
   1586       const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes
   1587                                            : kShortConstStringEqualsCutoffInBytes / 2u)) {
   1588     // Load and compare the contents. Though we know the contents of the short const string
   1589     // at compile time, materializing constants may be more code than loading from memory.
   1590     int32_t offset = value_offset;
   1591     size_t remaining_bytes =
   1592         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
   1593     temp = temp.X();
   1594     temp1 = temp1.X();
   1595     while (remaining_bytes > 8u) {
   1596       Register temp2 = XRegisterFrom(locations->GetTemp(0));
   1597       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
   1598       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
   1599       __ Cmp(temp, temp2);
   1600       __ Ccmp(temp1, out, NoFlag, eq);
   1601       __ B(&return_false, ne);
   1602       offset += 2u * sizeof(uint64_t);
   1603       remaining_bytes -= 2u * sizeof(uint64_t);
   1604     }
   1605     if (remaining_bytes != 0u) {
   1606       __ Ldr(temp, MemOperand(str.X(), offset));
   1607       __ Ldr(temp1, MemOperand(arg.X(), offset));
   1608       __ Cmp(temp, temp1);
   1609       __ B(&return_false, ne);
   1610     }
   1611   } else {
   1612     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
   1613     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1614                   "Expecting 0=compressed, 1=uncompressed");
   1615     __ Cbz(temp, &return_true);
   1616 
   1617     if (mirror::kUseStringCompression) {
   1618       // For string compression, calculate the number of bytes to compare (not chars).
   1619       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
   1620       __ And(temp1, temp, Operand(1));    // Extract compression flag.
   1621       __ Lsr(temp, temp, 1u);             // Extract length.
   1622       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
   1623     }
   1624 
   1625     // Store offset of string value in preparation for comparison loop
   1626     __ Mov(temp1, value_offset);
   1627 
   1628     temp1 = temp1.X();
   1629     Register temp2 = XRegisterFrom(locations->GetTemp(0));
   1630     // Loop to compare strings 8 bytes at a time starting at the front of the string.
   1631     // Ok to do this because strings are zero-padded to kObjectAlignment.
   1632     __ Bind(&loop);
   1633     __ Ldr(out, MemOperand(str.X(), temp1));
   1634     __ Ldr(temp2, MemOperand(arg.X(), temp1));
   1635     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
   1636     __ Cmp(out, temp2);
   1637     __ B(&return_false, ne);
   1638     // With string compression, we have compared 8 bytes, otherwise 4 chars.
   1639     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
   1640     __ B(&loop, hi);
   1641   }
   1642 
   1643   // Return true and exit the function.
   1644   // If loop does not result in returning false, we return true.
   1645   __ Bind(&return_true);
   1646   __ Mov(out, 1);
   1647   __ B(&end);
   1648 
   1649   // Return false and exit the function.
   1650   __ Bind(&return_false);
   1651   __ Mov(out, 0);
   1652   __ Bind(&end);
   1653 }
   1654 
   1655 static void GenerateVisitStringIndexOf(HInvoke* invoke,
   1656                                        MacroAssembler* masm,
   1657                                        CodeGeneratorARM64* codegen,
   1658                                        ArenaAllocator* allocator,
   1659                                        bool start_at_zero) {
   1660   LocationSummary* locations = invoke->GetLocations();
   1661 
   1662   // Note that the null check must have been done earlier.
   1663   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1664 
   1665   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   1666   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   1667   SlowPathCodeARM64* slow_path = nullptr;
   1668   HInstruction* code_point = invoke->InputAt(1);
   1669   if (code_point->IsIntConstant()) {
   1670     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
   1671       // Always needs the slow-path. We could directly dispatch to it, but this case should be
   1672       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
   1673       slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
   1674       codegen->AddSlowPath(slow_path);
   1675       __ B(slow_path->GetEntryLabel());
   1676       __ Bind(slow_path->GetExitLabel());
   1677       return;
   1678     }
   1679   } else if (code_point->GetType() != Primitive::kPrimChar) {
   1680     Register char_reg = WRegisterFrom(locations->InAt(1));
   1681     __ Tst(char_reg, 0xFFFF0000);
   1682     slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
   1683     codegen->AddSlowPath(slow_path);
   1684     __ B(ne, slow_path->GetEntryLabel());
   1685   }
   1686 
   1687   if (start_at_zero) {
   1688     // Start-index = 0.
   1689     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
   1690     __ Mov(tmp_reg, 0);
   1691   }
   1692 
   1693   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
   1694   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   1695 
   1696   if (slow_path != nullptr) {
   1697     __ Bind(slow_path->GetExitLabel());
   1698   }
   1699 }
   1700 
   1701 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   1702   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1703                                                             LocationSummary::kCallOnMainAndSlowPath,
   1704                                                             kIntrinsified);
   1705   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1706   // best to align the inputs accordingly.
   1707   InvokeRuntimeCallingConvention calling_convention;
   1708   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1709   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1710   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
   1711 
   1712   // Need to send start_index=0.
   1713   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
   1714 }
   1715 
   1716 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
   1717   GenerateVisitStringIndexOf(
   1718       invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
   1719 }
   1720 
   1721 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   1722   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1723                                                             LocationSummary::kCallOnMainAndSlowPath,
   1724                                                             kIntrinsified);
   1725   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1726   // best to align the inputs accordingly.
   1727   InvokeRuntimeCallingConvention calling_convention;
   1728   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1729   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1730   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1731   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
   1732 }
   1733 
   1734 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   1735   GenerateVisitStringIndexOf(
   1736       invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
   1737 }
   1738 
   1739 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1740   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1741                                                             LocationSummary::kCallOnMainAndSlowPath,
   1742                                                             kIntrinsified);
   1743   InvokeRuntimeCallingConvention calling_convention;
   1744   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1745   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1746   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1747   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
   1748   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   1749 }
   1750 
   1751 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1752   MacroAssembler* masm = GetVIXLAssembler();
   1753   LocationSummary* locations = invoke->GetLocations();
   1754 
   1755   Register byte_array = WRegisterFrom(locations->InAt(0));
   1756   __ Cmp(byte_array, 0);
   1757   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   1758   codegen_->AddSlowPath(slow_path);
   1759   __ B(eq, slow_path->GetEntryLabel());
   1760 
   1761   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
   1762   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   1763   __ Bind(slow_path->GetExitLabel());
   1764 }
   1765 
   1766 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   1767   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1768                                                             LocationSummary::kCallOnMainOnly,
   1769                                                             kIntrinsified);
   1770   InvokeRuntimeCallingConvention calling_convention;
   1771   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1772   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1773   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1774   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   1775 }
   1776 
   1777 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   1778   // No need to emit code checking whether `locations->InAt(2)` is a null
   1779   // pointer, as callers of the native method
   1780   //
   1781   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   1782   //
   1783   // all include a null check on `data` before calling that method.
   1784   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
   1785   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   1786 }
   1787 
   1788 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   1789   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1790                                                             LocationSummary::kCallOnMainAndSlowPath,
   1791                                                             kIntrinsified);
   1792   InvokeRuntimeCallingConvention calling_convention;
   1793   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1794   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   1795 }
   1796 
   1797 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   1798   MacroAssembler* masm = GetVIXLAssembler();
   1799   LocationSummary* locations = invoke->GetLocations();
   1800 
   1801   Register string_to_copy = WRegisterFrom(locations->InAt(0));
   1802   __ Cmp(string_to_copy, 0);
   1803   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   1804   codegen_->AddSlowPath(slow_path);
   1805   __ B(eq, slow_path->GetEntryLabel());
   1806 
   1807   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
   1808   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   1809   __ Bind(slow_path->GetExitLabel());
   1810 }
   1811 
   1812 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
   1813   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
   1814   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
   1815   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
   1816 
   1817   LocationSummary* const locations = new (arena) LocationSummary(invoke,
   1818                                                                  LocationSummary::kCallOnMainOnly,
   1819                                                                  kIntrinsified);
   1820   InvokeRuntimeCallingConvention calling_convention;
   1821 
   1822   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   1823   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
   1824 }
   1825 
   1826 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
   1827   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
   1828   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
   1829   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
   1830   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
   1831 
   1832   LocationSummary* const locations = new (arena) LocationSummary(invoke,
   1833                                                                  LocationSummary::kCallOnMainOnly,
   1834                                                                  kIntrinsified);
   1835   InvokeRuntimeCallingConvention calling_convention;
   1836 
   1837   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   1838   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
   1839   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
   1840 }
   1841 
   1842 static void GenFPToFPCall(HInvoke* invoke,
   1843                           CodeGeneratorARM64* codegen,
   1844                           QuickEntrypointEnum entry) {
   1845   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
   1846 }
   1847 
   1848 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
   1849   CreateFPToFPCallLocations(arena_, invoke);
   1850 }
   1851 
   1852 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
   1853   GenFPToFPCall(invoke, codegen_, kQuickCos);
   1854 }
   1855 
   1856 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
   1857   CreateFPToFPCallLocations(arena_, invoke);
   1858 }
   1859 
   1860 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
   1861   GenFPToFPCall(invoke, codegen_, kQuickSin);
   1862 }
   1863 
   1864 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
   1865   CreateFPToFPCallLocations(arena_, invoke);
   1866 }
   1867 
   1868 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
   1869   GenFPToFPCall(invoke, codegen_, kQuickAcos);
   1870 }
   1871 
   1872 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
   1873   CreateFPToFPCallLocations(arena_, invoke);
   1874 }
   1875 
   1876 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
   1877   GenFPToFPCall(invoke, codegen_, kQuickAsin);
   1878 }
   1879 
   1880 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
   1881   CreateFPToFPCallLocations(arena_, invoke);
   1882 }
   1883 
   1884 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
   1885   GenFPToFPCall(invoke, codegen_, kQuickAtan);
   1886 }
   1887 
   1888 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
   1889   CreateFPToFPCallLocations(arena_, invoke);
   1890 }
   1891 
   1892 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
   1893   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
   1894 }
   1895 
   1896 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
   1897   CreateFPToFPCallLocations(arena_, invoke);
   1898 }
   1899 
   1900 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
   1901   GenFPToFPCall(invoke, codegen_, kQuickCosh);
   1902 }
   1903 
   1904 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
   1905   CreateFPToFPCallLocations(arena_, invoke);
   1906 }
   1907 
   1908 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
   1909   GenFPToFPCall(invoke, codegen_, kQuickExp);
   1910 }
   1911 
   1912 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
   1913   CreateFPToFPCallLocations(arena_, invoke);
   1914 }
   1915 
   1916 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
   1917   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
   1918 }
   1919 
   1920 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
   1921   CreateFPToFPCallLocations(arena_, invoke);
   1922 }
   1923 
   1924 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
   1925   GenFPToFPCall(invoke, codegen_, kQuickLog);
   1926 }
   1927 
   1928 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
   1929   CreateFPToFPCallLocations(arena_, invoke);
   1930 }
   1931 
   1932 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
   1933   GenFPToFPCall(invoke, codegen_, kQuickLog10);
   1934 }
   1935 
   1936 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
   1937   CreateFPToFPCallLocations(arena_, invoke);
   1938 }
   1939 
   1940 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
   1941   GenFPToFPCall(invoke, codegen_, kQuickSinh);
   1942 }
   1943 
   1944 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
   1945   CreateFPToFPCallLocations(arena_, invoke);
   1946 }
   1947 
   1948 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
   1949   GenFPToFPCall(invoke, codegen_, kQuickTan);
   1950 }
   1951 
   1952 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
   1953   CreateFPToFPCallLocations(arena_, invoke);
   1954 }
   1955 
   1956 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
   1957   GenFPToFPCall(invoke, codegen_, kQuickTanh);
   1958 }
   1959 
   1960 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
   1961   CreateFPFPToFPCallLocations(arena_, invoke);
   1962 }
   1963 
   1964 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
   1965   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
   1966 }
   1967 
   1968 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
   1969   CreateFPFPToFPCallLocations(arena_, invoke);
   1970 }
   1971 
   1972 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
   1973   GenFPToFPCall(invoke, codegen_, kQuickHypot);
   1974 }
   1975 
   1976 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
   1977   CreateFPFPToFPCallLocations(arena_, invoke);
   1978 }
   1979 
   1980 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
   1981   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
   1982 }
   1983 
   1984 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   1985   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1986                                                             LocationSummary::kNoCall,
   1987                                                             kIntrinsified);
   1988   locations->SetInAt(0, Location::RequiresRegister());
   1989   locations->SetInAt(1, Location::RequiresRegister());
   1990   locations->SetInAt(2, Location::RequiresRegister());
   1991   locations->SetInAt(3, Location::RequiresRegister());
   1992   locations->SetInAt(4, Location::RequiresRegister());
   1993 
   1994   locations->AddTemp(Location::RequiresRegister());
   1995   locations->AddTemp(Location::RequiresRegister());
   1996   locations->AddTemp(Location::RequiresRegister());
   1997 }
   1998 
   1999 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   2000   MacroAssembler* masm = GetVIXLAssembler();
   2001   LocationSummary* locations = invoke->GetLocations();
   2002 
   2003   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   2004   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   2005   DCHECK_EQ(char_size, 2u);
   2006 
   2007   // Location of data in char array buffer.
   2008   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
   2009 
   2010   // Location of char array data in string.
   2011   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   2012 
   2013   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   2014   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
   2015   Register srcObj = XRegisterFrom(locations->InAt(0));
   2016   Register srcBegin = XRegisterFrom(locations->InAt(1));
   2017   Register srcEnd = XRegisterFrom(locations->InAt(2));
   2018   Register dstObj = XRegisterFrom(locations->InAt(3));
   2019   Register dstBegin = XRegisterFrom(locations->InAt(4));
   2020 
   2021   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
   2022   Register num_chr = XRegisterFrom(locations->GetTemp(1));
   2023   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
   2024 
   2025   UseScratchRegisterScope temps(masm);
   2026   Register dst_ptr = temps.AcquireX();
   2027   Register tmp2 = temps.AcquireX();
   2028 
   2029   vixl::aarch64::Label done;
   2030   vixl::aarch64::Label compressed_string_loop;
   2031   __ Sub(num_chr, srcEnd, srcBegin);
   2032   // Early out for valid zero-length retrievals.
   2033   __ Cbz(num_chr, &done);
   2034 
   2035   // dst address start to copy to.
   2036   __ Add(dst_ptr, dstObj, Operand(data_offset));
   2037   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
   2038 
   2039   // src address to copy from.
   2040   __ Add(src_ptr, srcObj, Operand(value_offset));
   2041   vixl::aarch64::Label compressed_string_preloop;
   2042   if (mirror::kUseStringCompression) {
   2043     // Location of count in string.
   2044     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   2045     // String's length.
   2046     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
   2047     __ Tbz(tmp2, 0, &compressed_string_preloop);
   2048   }
   2049   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
   2050 
   2051   // Do the copy.
   2052   vixl::aarch64::Label loop;
   2053   vixl::aarch64::Label remainder;
   2054 
   2055   // Save repairing the value of num_chr on the < 8 character path.
   2056   __ Subs(tmp1, num_chr, 8);
   2057   __ B(lt, &remainder);
   2058 
   2059   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
   2060   __ Mov(num_chr, tmp1);
   2061 
   2062   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
   2063   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   2064   __ Bind(&loop);
   2065   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
   2066   __ Subs(num_chr, num_chr, 8);
   2067   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
   2068   __ B(ge, &loop);
   2069 
   2070   __ Adds(num_chr, num_chr, 8);
   2071   __ B(eq, &done);
   2072 
   2073   // Main loop for < 8 character case and remainder handling. Loads and stores one
   2074   // 16-bit Java character at a time.
   2075   __ Bind(&remainder);
   2076   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
   2077   __ Subs(num_chr, num_chr, 1);
   2078   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   2079   __ B(gt, &remainder);
   2080   __ B(&done);
   2081 
   2082   if (mirror::kUseStringCompression) {
   2083     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
   2084     DCHECK_EQ(c_char_size, 1u);
   2085     __ Bind(&compressed_string_preloop);
   2086     __ Add(src_ptr, src_ptr, Operand(srcBegin));
   2087     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
   2088     __ Bind(&compressed_string_loop);
   2089     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
   2090     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   2091     __ Subs(num_chr, num_chr, Operand(1));
   2092     __ B(gt, &compressed_string_loop);
   2093   }
   2094 
   2095   __ Bind(&done);
   2096 }
   2097 
   2098 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
   2099 // implementation there for longer copy lengths.
   2100 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
   2101 
   2102 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
   2103                                                uint32_t at,
   2104                                                HInstruction* input) {
   2105   HIntConstant* const_input = input->AsIntConstant();
   2106   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
   2107     locations->SetInAt(at, Location::RequiresRegister());
   2108   } else {
   2109     locations->SetInAt(at, Location::RegisterOrConstant(input));
   2110   }
   2111 }
   2112 
   2113 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   2114   // Check to see if we have known failures that will cause us to have to bail out
   2115   // to the runtime, and just generate the runtime call directly.
   2116   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   2117   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
   2118 
   2119   // The positions must be non-negative.
   2120   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   2121       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
   2122     // We will have to fail anyways.
   2123     return;
   2124   }
   2125 
   2126   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
   2127   // native implementation.
   2128   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   2129   if (length != nullptr) {
   2130     int32_t len = length->GetValue();
   2131     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
   2132       // Just call as normal.
   2133       return;
   2134     }
   2135   }
   2136 
   2137   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
   2138   LocationSummary* locations = new (allocator) LocationSummary(invoke,
   2139                                                                LocationSummary::kCallOnSlowPath,
   2140                                                                kIntrinsified);
   2141   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
   2142   locations->SetInAt(0, Location::RequiresRegister());
   2143   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
   2144   locations->SetInAt(2, Location::RequiresRegister());
   2145   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
   2146   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
   2147 
   2148   locations->AddTemp(Location::RequiresRegister());
   2149   locations->AddTemp(Location::RequiresRegister());
   2150   locations->AddTemp(Location::RequiresRegister());
   2151 }
   2152 
   2153 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
   2154                                          const Location& pos,
   2155                                          const Register& input,
   2156                                          const Location& length,
   2157                                          SlowPathCodeARM64* slow_path,
   2158                                          const Register& temp,
   2159                                          bool length_is_input_length = false) {
   2160   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
   2161   if (pos.IsConstant()) {
   2162     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
   2163     if (pos_const == 0) {
   2164       if (!length_is_input_length) {
   2165         // Check that length(input) >= length.
   2166         __ Ldr(temp, MemOperand(input, length_offset));
   2167         __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
   2168         __ B(slow_path->GetEntryLabel(), lt);
   2169       }
   2170     } else {
   2171       // Check that length(input) >= pos.
   2172       __ Ldr(temp, MemOperand(input, length_offset));
   2173       __ Subs(temp, temp, pos_const);
   2174       __ B(slow_path->GetEntryLabel(), lt);
   2175 
   2176       // Check that (length(input) - pos) >= length.
   2177       __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
   2178       __ B(slow_path->GetEntryLabel(), lt);
   2179     }
   2180   } else if (length_is_input_length) {
   2181     // The only way the copy can succeed is if pos is zero.
   2182     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
   2183   } else {
   2184     // Check that pos >= 0.
   2185     Register pos_reg = WRegisterFrom(pos);
   2186     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
   2187 
   2188     // Check that pos <= length(input) && (length(input) - pos) >= length.
   2189     __ Ldr(temp, MemOperand(input, length_offset));
   2190     __ Subs(temp, temp, pos_reg);
   2191     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
   2192     __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge);
   2193     __ B(slow_path->GetEntryLabel(), lt);
   2194   }
   2195 }
   2196 
   2197 // Compute base source address, base destination address, and end
   2198 // source address for System.arraycopy* intrinsics in `src_base`,
   2199 // `dst_base` and `src_end` respectively.
   2200 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
   2201                                         Primitive::Type type,
   2202                                         const Register& src,
   2203                                         const Location& src_pos,
   2204                                         const Register& dst,
   2205                                         const Location& dst_pos,
   2206                                         const Location& copy_length,
   2207                                         const Register& src_base,
   2208                                         const Register& dst_base,
   2209                                         const Register& src_end) {
   2210   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
   2211   DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
   2212       << "Unexpected element type: " << type;
   2213   const int32_t element_size = Primitive::ComponentSize(type);
   2214   const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
   2215   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   2216 
   2217   if (src_pos.IsConstant()) {
   2218     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2219     __ Add(src_base, src, element_size * constant + data_offset);
   2220   } else {
   2221     __ Add(src_base, src, data_offset);
   2222     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
   2223   }
   2224 
   2225   if (dst_pos.IsConstant()) {
   2226     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
   2227     __ Add(dst_base, dst, element_size * constant + data_offset);
   2228   } else {
   2229     __ Add(dst_base, dst, data_offset);
   2230     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
   2231   }
   2232 
   2233   if (copy_length.IsConstant()) {
   2234     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
   2235     __ Add(src_end, src_base, element_size * constant);
   2236   } else {
   2237     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
   2238   }
   2239 }
   2240 
   2241 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   2242   MacroAssembler* masm = GetVIXLAssembler();
   2243   LocationSummary* locations = invoke->GetLocations();
   2244   Register src = XRegisterFrom(locations->InAt(0));
   2245   Location src_pos = locations->InAt(1);
   2246   Register dst = XRegisterFrom(locations->InAt(2));
   2247   Location dst_pos = locations->InAt(3);
   2248   Location length = locations->InAt(4);
   2249 
   2250   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   2251   codegen_->AddSlowPath(slow_path);
   2252 
   2253   // If source and destination are the same, take the slow path. Overlapping copy regions must be
   2254   // copied in reverse and we can't know in all cases if it's needed.
   2255   __ Cmp(src, dst);
   2256   __ B(slow_path->GetEntryLabel(), eq);
   2257 
   2258   // Bail out if the source is null.
   2259   __ Cbz(src, slow_path->GetEntryLabel());
   2260 
   2261   // Bail out if the destination is null.
   2262   __ Cbz(dst, slow_path->GetEntryLabel());
   2263 
   2264   if (!length.IsConstant()) {
   2265     // Merge the following two comparisons into one:
   2266     //   If the length is negative, bail out (delegate to libcore's native implementation).
   2267     //   If the length > 32 then (currently) prefer libcore's native implementation.
   2268     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
   2269     __ B(slow_path->GetEntryLabel(), hi);
   2270   } else {
   2271     // We have already checked in the LocationsBuilder for the constant case.
   2272     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
   2273     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
   2274   }
   2275 
   2276   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
   2277   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
   2278   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
   2279 
   2280   CheckSystemArrayCopyPosition(masm,
   2281                                src_pos,
   2282                                src,
   2283                                length,
   2284                                slow_path,
   2285                                src_curr_addr,
   2286                                false);
   2287 
   2288   CheckSystemArrayCopyPosition(masm,
   2289                                dst_pos,
   2290                                dst,
   2291                                length,
   2292                                slow_path,
   2293                                src_curr_addr,
   2294                                false);
   2295 
   2296   src_curr_addr = src_curr_addr.X();
   2297   dst_curr_addr = dst_curr_addr.X();
   2298   src_stop_addr = src_stop_addr.X();
   2299 
   2300   GenSystemArrayCopyAddresses(masm,
   2301                               Primitive::kPrimChar,
   2302                               src,
   2303                               src_pos,
   2304                               dst,
   2305                               dst_pos,
   2306                               length,
   2307                               src_curr_addr,
   2308                               dst_curr_addr,
   2309                               src_stop_addr);
   2310 
   2311   // Iterate over the arrays and do a raw copy of the chars.
   2312   const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   2313   UseScratchRegisterScope temps(masm);
   2314   Register tmp = temps.AcquireW();
   2315   vixl::aarch64::Label loop, done;
   2316   __ Bind(&loop);
   2317   __ Cmp(src_curr_addr, src_stop_addr);
   2318   __ B(&done, eq);
   2319   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
   2320   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
   2321   __ B(&loop);
   2322   __ Bind(&done);
   2323 
   2324   __ Bind(slow_path->GetExitLabel());
   2325 }
   2326 
   2327 // We can choose to use the native implementation there for longer copy lengths.
   2328 static constexpr int32_t kSystemArrayCopyThreshold = 128;
   2329 
   2330 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
   2331 // We want to use two temporary registers in order to reduce the register pressure in arm64.
   2332 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
   2333 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
   2334   // The only read barrier implementation supporting the
   2335   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2336   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2337     return;
   2338   }
   2339 
   2340   // Check to see if we have known failures that will cause us to have to bail out
   2341   // to the runtime, and just generate the runtime call directly.
   2342   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   2343   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
   2344 
   2345   // The positions must be non-negative.
   2346   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   2347       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
   2348     // We will have to fail anyways.
   2349     return;
   2350   }
   2351 
   2352   // The length must be >= 0.
   2353   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   2354   if (length != nullptr) {
   2355     int32_t len = length->GetValue();
   2356     if (len < 0 || len >= kSystemArrayCopyThreshold) {
   2357       // Just call as normal.
   2358       return;
   2359     }
   2360   }
   2361 
   2362   SystemArrayCopyOptimizations optimizations(invoke);
   2363 
   2364   if (optimizations.GetDestinationIsSource()) {
   2365     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
   2366       // We only support backward copying if source and destination are the same.
   2367       return;
   2368     }
   2369   }
   2370 
   2371   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
   2372     // We currently don't intrinsify primitive copying.
   2373     return;
   2374   }
   2375 
   2376   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
   2377   LocationSummary* locations = new (allocator) LocationSummary(invoke,
   2378                                                                LocationSummary::kCallOnSlowPath,
   2379                                                                kIntrinsified);
   2380   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
   2381   locations->SetInAt(0, Location::RequiresRegister());
   2382   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
   2383   locations->SetInAt(2, Location::RequiresRegister());
   2384   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
   2385   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
   2386 
   2387   locations->AddTemp(Location::RequiresRegister());
   2388   locations->AddTemp(Location::RequiresRegister());
   2389   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2390     // Temporary register IP0, obtained from the VIXL scratch register
   2391     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
   2392     // (because that register is clobbered by ReadBarrierMarkRegX
   2393     // entry points). It cannot be used in calls to
   2394     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
   2395     // either. For these reasons, get a third extra temporary register
   2396     // from the register allocator.
   2397     locations->AddTemp(Location::RequiresRegister());
   2398   } else {
   2399     // Cases other than Baker read barriers: the third temporary will
   2400     // be acquired from the VIXL scratch register pool.
   2401   }
   2402 }
   2403 
   2404 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
   2405   // The only read barrier implementation supporting the
   2406   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2407   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2408 
   2409   MacroAssembler* masm = GetVIXLAssembler();
   2410   LocationSummary* locations = invoke->GetLocations();
   2411 
   2412   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2413   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2414   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2415   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   2416   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   2417 
   2418   Register src = XRegisterFrom(locations->InAt(0));
   2419   Location src_pos = locations->InAt(1);
   2420   Register dest = XRegisterFrom(locations->InAt(2));
   2421   Location dest_pos = locations->InAt(3);
   2422   Location length = locations->InAt(4);
   2423   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   2424   Location temp1_loc = LocationFrom(temp1);
   2425   Register temp2 = WRegisterFrom(locations->GetTemp(1));
   2426   Location temp2_loc = LocationFrom(temp2);
   2427 
   2428   SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   2429   codegen_->AddSlowPath(intrinsic_slow_path);
   2430 
   2431   vixl::aarch64::Label conditions_on_positions_validated;
   2432   SystemArrayCopyOptimizations optimizations(invoke);
   2433 
   2434   // If source and destination are the same, we go to slow path if we need to do
   2435   // forward copying.
   2436   if (src_pos.IsConstant()) {
   2437     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2438     if (dest_pos.IsConstant()) {
   2439       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2440       if (optimizations.GetDestinationIsSource()) {
   2441         // Checked when building locations.
   2442         DCHECK_GE(src_pos_constant, dest_pos_constant);
   2443       } else if (src_pos_constant < dest_pos_constant) {
   2444         __ Cmp(src, dest);
   2445         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
   2446       }
   2447       // Checked when building locations.
   2448       DCHECK(!optimizations.GetDestinationIsSource()
   2449              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
   2450     } else {
   2451       if (!optimizations.GetDestinationIsSource()) {
   2452         __ Cmp(src, dest);
   2453         __ B(&conditions_on_positions_validated, ne);
   2454       }
   2455       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
   2456       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
   2457     }
   2458   } else {
   2459     if (!optimizations.GetDestinationIsSource()) {
   2460       __ Cmp(src, dest);
   2461       __ B(&conditions_on_positions_validated, ne);
   2462     }
   2463     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
   2464            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
   2465     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
   2466   }
   2467 
   2468   __ Bind(&conditions_on_positions_validated);
   2469 
   2470   if (!optimizations.GetSourceIsNotNull()) {
   2471     // Bail out if the source is null.
   2472     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
   2473   }
   2474 
   2475   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
   2476     // Bail out if the destination is null.
   2477     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
   2478   }
   2479 
   2480   // We have already checked in the LocationsBuilder for the constant case.
   2481   if (!length.IsConstant() &&
   2482       !optimizations.GetCountIsSourceLength() &&
   2483       !optimizations.GetCountIsDestinationLength()) {
   2484     // Merge the following two comparisons into one:
   2485     //   If the length is negative, bail out (delegate to libcore's native implementation).
   2486     //   If the length >= 128 then (currently) prefer native implementation.
   2487     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
   2488     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
   2489   }
   2490   // Validity checks: source.
   2491   CheckSystemArrayCopyPosition(masm,
   2492                                src_pos,
   2493                                src,
   2494                                length,
   2495                                intrinsic_slow_path,
   2496                                temp1,
   2497                                optimizations.GetCountIsSourceLength());
   2498 
   2499   // Validity checks: dest.
   2500   CheckSystemArrayCopyPosition(masm,
   2501                                dest_pos,
   2502                                dest,
   2503                                length,
   2504                                intrinsic_slow_path,
   2505                                temp1,
   2506                                optimizations.GetCountIsDestinationLength());
   2507   {
   2508     // We use a block to end the scratch scope before the write barrier, thus
   2509     // freeing the temporary registers so they can be used in `MarkGCCard`.
   2510     UseScratchRegisterScope temps(masm);
   2511     Location temp3_loc;  // Used only for Baker read barrier.
   2512     Register temp3;
   2513     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2514       temp3_loc = locations->GetTemp(2);
   2515       temp3 = WRegisterFrom(temp3_loc);
   2516     } else {
   2517       temp3 = temps.AcquireW();
   2518     }
   2519 
   2520     if (!optimizations.GetDoesNotNeedTypeCheck()) {
   2521       // Check whether all elements of the source array are assignable to the component
   2522       // type of the destination array. We do two checks: the classes are the same,
   2523       // or the destination is Object[]. If none of these checks succeed, we go to the
   2524       // slow path.
   2525 
   2526       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2527         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2528           // /* HeapReference<Class> */ temp1 = src->klass_
   2529           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2530                                                           temp1_loc,
   2531                                                           src.W(),
   2532                                                           class_offset,
   2533                                                           temp3_loc,
   2534                                                           /* needs_null_check */ false,
   2535                                                           /* use_load_acquire */ false);
   2536           // Bail out if the source is not a non primitive array.
   2537           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2538           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2539                                                           temp1_loc,
   2540                                                           temp1,
   2541                                                           component_offset,
   2542                                                           temp3_loc,
   2543                                                           /* needs_null_check */ false,
   2544                                                           /* use_load_acquire */ false);
   2545           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
   2546           // If heap poisoning is enabled, `temp1` has been unpoisoned
   2547           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2548           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
   2549           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
   2550           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2551           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2552         }
   2553 
   2554         // /* HeapReference<Class> */ temp1 = dest->klass_
   2555         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2556                                                         temp1_loc,
   2557                                                         dest.W(),
   2558                                                         class_offset,
   2559                                                         temp3_loc,
   2560                                                         /* needs_null_check */ false,
   2561                                                         /* use_load_acquire */ false);
   2562 
   2563         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2564           // Bail out if the destination is not a non primitive array.
   2565           //
   2566           // Register `temp1` is not trashed by the read barrier emitted
   2567           // by GenerateFieldLoadWithBakerReadBarrier below, as that
   2568           // method produces a call to a ReadBarrierMarkRegX entry point,
   2569           // which saves all potentially live registers, including
   2570           // temporaries such a `temp1`.
   2571           // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2572           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2573                                                           temp2_loc,
   2574                                                           temp1,
   2575                                                           component_offset,
   2576                                                           temp3_loc,
   2577                                                           /* needs_null_check */ false,
   2578                                                           /* use_load_acquire */ false);
   2579           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2580           // If heap poisoning is enabled, `temp2` has been unpoisoned
   2581           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2582           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
   2583           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
   2584           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2585           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
   2586         }
   2587 
   2588         // For the same reason given earlier, `temp1` is not trashed by the
   2589         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
   2590         // /* HeapReference<Class> */ temp2 = src->klass_
   2591         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2592                                                         temp2_loc,
   2593                                                         src.W(),
   2594                                                         class_offset,
   2595                                                         temp3_loc,
   2596                                                         /* needs_null_check */ false,
   2597                                                         /* use_load_acquire */ false);
   2598         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
   2599         __ Cmp(temp1, temp2);
   2600 
   2601         if (optimizations.GetDestinationIsTypedObjectArray()) {
   2602           vixl::aarch64::Label do_copy;
   2603           __ B(&do_copy, eq);
   2604           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2605           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2606                                                           temp1_loc,
   2607                                                           temp1,
   2608                                                           component_offset,
   2609                                                           temp3_loc,
   2610                                                           /* needs_null_check */ false,
   2611                                                           /* use_load_acquire */ false);
   2612           // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2613           // We do not need to emit a read barrier for the following
   2614           // heap reference load, as `temp1` is only used in a
   2615           // comparison with null below, and this reference is not
   2616           // kept afterwards.
   2617           __ Ldr(temp1, HeapOperand(temp1, super_offset));
   2618           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2619           __ Bind(&do_copy);
   2620         } else {
   2621           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
   2622         }
   2623       } else {
   2624         // Non read barrier code.
   2625 
   2626         // /* HeapReference<Class> */ temp1 = dest->klass_
   2627         __ Ldr(temp1, MemOperand(dest, class_offset));
   2628         // /* HeapReference<Class> */ temp2 = src->klass_
   2629         __ Ldr(temp2, MemOperand(src, class_offset));
   2630         bool did_unpoison = false;
   2631         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
   2632             !optimizations.GetSourceIsNonPrimitiveArray()) {
   2633           // One or two of the references need to be unpoisoned. Unpoison them
   2634           // both to make the identity check valid.
   2635           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2636           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
   2637           did_unpoison = true;
   2638         }
   2639 
   2640         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2641           // Bail out if the destination is not a non primitive array.
   2642           // /* HeapReference<Class> */ temp3 = temp1->component_type_
   2643           __ Ldr(temp3, HeapOperand(temp1, component_offset));
   2644           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
   2645           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
   2646           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2647           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
   2648           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2649           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
   2650         }
   2651 
   2652         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2653           // Bail out if the source is not a non primitive array.
   2654           // /* HeapReference<Class> */ temp3 = temp2->component_type_
   2655           __ Ldr(temp3, HeapOperand(temp2, component_offset));
   2656           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
   2657           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
   2658           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2659           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
   2660           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2661           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
   2662         }
   2663 
   2664         __ Cmp(temp1, temp2);
   2665 
   2666         if (optimizations.GetDestinationIsTypedObjectArray()) {
   2667           vixl::aarch64::Label do_copy;
   2668           __ B(&do_copy, eq);
   2669           if (!did_unpoison) {
   2670             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2671           }
   2672           // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2673           __ Ldr(temp1, HeapOperand(temp1, component_offset));
   2674           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2675           // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2676           __ Ldr(temp1, HeapOperand(temp1, super_offset));
   2677           // No need to unpoison the result, we're comparing against null.
   2678           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
   2679           __ Bind(&do_copy);
   2680         } else {
   2681           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
   2682         }
   2683       }
   2684     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2685       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
   2686       // Bail out if the source is not a non primitive array.
   2687       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2688         // /* HeapReference<Class> */ temp1 = src->klass_
   2689         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2690                                                         temp1_loc,
   2691                                                         src.W(),
   2692                                                         class_offset,
   2693                                                         temp3_loc,
   2694                                                         /* needs_null_check */ false,
   2695                                                         /* use_load_acquire */ false);
   2696         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2697         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
   2698                                                         temp2_loc,
   2699                                                         temp1,
   2700                                                         component_offset,
   2701                                                         temp3_loc,
   2702                                                         /* needs_null_check */ false,
   2703                                                         /* use_load_acquire */ false);
   2704         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2705         // If heap poisoning is enabled, `temp2` has been unpoisoned
   2706         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2707       } else {
   2708         // /* HeapReference<Class> */ temp1 = src->klass_
   2709         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
   2710         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
   2711         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2712         __ Ldr(temp2, HeapOperand(temp1, component_offset));
   2713         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
   2714         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
   2715       }
   2716       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
   2717       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
   2718       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2719       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
   2720     }
   2721 
   2722     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
   2723       // Null constant length: not need to emit the loop code at all.
   2724     } else {
   2725       Register src_curr_addr = temp1.X();
   2726       Register dst_curr_addr = temp2.X();
   2727       Register src_stop_addr = temp3.X();
   2728       vixl::aarch64::Label done;
   2729       const Primitive::Type type = Primitive::kPrimNot;
   2730       const int32_t element_size = Primitive::ComponentSize(type);
   2731 
   2732       if (length.IsRegister()) {
   2733         // Don't enter the copy loop if the length is null.
   2734         __ Cbz(WRegisterFrom(length), &done);
   2735       }
   2736 
   2737       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2738         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
   2739 
   2740         // SystemArrayCopy implementation for Baker read barriers (see
   2741         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
   2742         //
   2743         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
   2744         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   2745         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   2746         //   if (is_gray) {
   2747         //     // Slow-path copy.
   2748         //     do {
   2749         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
   2750         //     } while (src_ptr != end_ptr)
   2751         //   } else {
   2752         //     // Fast-path copy.
   2753         //     do {
   2754         //       *dest_ptr++ = *src_ptr++;
   2755         //     } while (src_ptr != end_ptr)
   2756         //   }
   2757 
   2758         // Make sure `tmp` is not IP0, as it is clobbered by
   2759         // ReadBarrierMarkRegX entry points in
   2760         // ReadBarrierSystemArrayCopySlowPathARM64.
   2761         DCHECK(temps.IsAvailable(ip0));
   2762         temps.Exclude(ip0);
   2763         Register tmp = temps.AcquireW();
   2764         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
   2765         // Put IP0 back in the pool so that VIXL has at least one
   2766         // scratch register available to emit macro-instructions (note
   2767         // that IP1 is already used for `tmp`). Indeed some
   2768         // macro-instructions used in GenSystemArrayCopyAddresses
   2769         // (invoked hereunder) may require a scratch register (for
   2770         // instance to emit a load with a large constant offset).
   2771         temps.Include(ip0);
   2772 
   2773         // /* int32_t */ monitor = src->monitor_
   2774         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
   2775         // /* LockWord */ lock_word = LockWord(monitor)
   2776         static_assert(sizeof(LockWord) == sizeof(int32_t),
   2777                       "art::LockWord and int32_t have different sizes.");
   2778 
   2779         // Introduce a dependency on the lock_word including rb_state,
   2780         // to prevent load-load reordering, and without using
   2781         // a memory barrier (which would be more expensive).
   2782         // `src` is unchanged by this operation, but its value now depends
   2783         // on `tmp`.
   2784         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
   2785 
   2786         // Compute base source address, base destination address, and end
   2787         // source address for System.arraycopy* intrinsics in `src_base`,
   2788         // `dst_base` and `src_end` respectively.
   2789         // Note that `src_curr_addr` is computed from from `src` (and
   2790         // `src_pos`) here, and thus honors the artificial dependency
   2791         // of `src` on `tmp`.
   2792         GenSystemArrayCopyAddresses(masm,
   2793                                     type,
   2794                                     src,
   2795                                     src_pos,
   2796                                     dest,
   2797                                     dest_pos,
   2798                                     length,
   2799                                     src_curr_addr,
   2800                                     dst_curr_addr,
   2801                                     src_stop_addr);
   2802 
   2803         // Slow path used to copy array when `src` is gray.
   2804         SlowPathCodeARM64* read_barrier_slow_path =
   2805             new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
   2806         codegen_->AddSlowPath(read_barrier_slow_path);
   2807 
   2808         // Given the numeric representation, it's enough to check the low bit of the rb_state.
   2809         static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   2810         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   2811         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
   2812 
   2813         // Fast-path copy.
   2814         // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2815         // poison/unpoison.
   2816         vixl::aarch64::Label loop;
   2817         __ Bind(&loop);
   2818         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
   2819         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
   2820         __ Cmp(src_curr_addr, src_stop_addr);
   2821         __ B(&loop, ne);
   2822 
   2823         __ Bind(read_barrier_slow_path->GetExitLabel());
   2824       } else {
   2825         // Non read barrier code.
   2826         // Compute base source address, base destination address, and end
   2827         // source address for System.arraycopy* intrinsics in `src_base`,
   2828         // `dst_base` and `src_end` respectively.
   2829         GenSystemArrayCopyAddresses(masm,
   2830                                     type,
   2831                                     src,
   2832                                     src_pos,
   2833                                     dest,
   2834                                     dest_pos,
   2835                                     length,
   2836                                     src_curr_addr,
   2837                                     dst_curr_addr,
   2838                                     src_stop_addr);
   2839         // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2840         // poison/unpoison.
   2841         vixl::aarch64::Label loop;
   2842         __ Bind(&loop);
   2843         {
   2844           Register tmp = temps.AcquireW();
   2845           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
   2846           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
   2847         }
   2848         __ Cmp(src_curr_addr, src_stop_addr);
   2849         __ B(&loop, ne);
   2850       }
   2851       __ Bind(&done);
   2852     }
   2853   }
   2854 
   2855   // We only need one card marking on the destination array.
   2856   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
   2857 
   2858   __ Bind(intrinsic_slow_path->GetExitLabel());
   2859 }
   2860 
   2861 static void GenIsInfinite(LocationSummary* locations,
   2862                           bool is64bit,
   2863                           MacroAssembler* masm) {
   2864   Operand infinity;
   2865   Register out;
   2866 
   2867   if (is64bit) {
   2868     infinity = kPositiveInfinityDouble;
   2869     out = XRegisterFrom(locations->Out());
   2870   } else {
   2871     infinity = kPositiveInfinityFloat;
   2872     out = WRegisterFrom(locations->Out());
   2873   }
   2874 
   2875   const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
   2876 
   2877   MoveFPToInt(locations, is64bit, masm);
   2878   __ Eor(out, out, infinity);
   2879   // We don't care about the sign bit, so shift left.
   2880   __ Cmp(zero, Operand(out, LSL, 1));
   2881   __ Cset(out, eq);
   2882 }
   2883 
   2884 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
   2885   CreateFPToIntLocations(arena_, invoke);
   2886 }
   2887 
   2888 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
   2889   GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
   2890 }
   2891 
   2892 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
   2893   CreateFPToIntLocations(arena_, invoke);
   2894 }
   2895 
   2896 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
   2897   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
   2898 }
   2899 
   2900 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
   2901   InvokeRuntimeCallingConvention calling_convention;
   2902   IntrinsicVisitor::ComputeIntegerValueOfLocations(
   2903       invoke,
   2904       codegen_,
   2905       calling_convention.GetReturnLocation(Primitive::kPrimNot),
   2906       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   2907 }
   2908 
   2909 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
   2910   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
   2911   LocationSummary* locations = invoke->GetLocations();
   2912   MacroAssembler* masm = GetVIXLAssembler();
   2913 
   2914   Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
   2915   UseScratchRegisterScope temps(masm);
   2916   Register temp = temps.AcquireW();
   2917   InvokeRuntimeCallingConvention calling_convention;
   2918   Register argument = calling_convention.GetRegisterAt(0);
   2919   if (invoke->InputAt(0)->IsConstant()) {
   2920     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
   2921     if (value >= info.low && value <= info.high) {
   2922       // Just embed the j.l.Integer in the code.
   2923       ScopedObjectAccess soa(Thread::Current());
   2924       mirror::Object* boxed = info.cache->Get(value + (-info.low));
   2925       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
   2926       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
   2927       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   2928     } else {
   2929       // Allocate and initialize a new j.l.Integer.
   2930       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
   2931       // JIT object table.
   2932       uint32_t address =
   2933           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   2934       __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   2935       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   2936       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   2937       __ Mov(temp.W(), value);
   2938       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
   2939       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   2940       // one.
   2941       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   2942     }
   2943   } else {
   2944     Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
   2945     // Check bounds of our cache.
   2946     __ Add(out.W(), in.W(), -info.low);
   2947     __ Cmp(out.W(), info.high - info.low + 1);
   2948     vixl::aarch64::Label allocate, done;
   2949     __ B(&allocate, hs);
   2950     // If the value is within the bounds, load the j.l.Integer directly from the array.
   2951     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   2952     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
   2953     __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
   2954     MemOperand source = HeapOperand(
   2955         temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
   2956     codegen_->Load(Primitive::kPrimNot, out, source);
   2957     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
   2958     __ B(&done);
   2959     __ Bind(&allocate);
   2960     // Otherwise allocate and initialize a new j.l.Integer.
   2961     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   2962     __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   2963     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   2964     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   2965     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
   2966     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   2967     // one.
   2968     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   2969     __ Bind(&done);
   2970   }
   2971 }
   2972 
   2973 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
   2974   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2975                                                             LocationSummary::kNoCall,
   2976                                                             kIntrinsified);
   2977   locations->SetOut(Location::RequiresRegister());
   2978 }
   2979 
   2980 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
   2981   MacroAssembler* masm = GetVIXLAssembler();
   2982   Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt);
   2983   UseScratchRegisterScope temps(masm);
   2984   Register temp = temps.AcquireX();
   2985 
   2986   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
   2987   __ Ldar(out.W(), MemOperand(temp));
   2988 
   2989   vixl::aarch64::Label done;
   2990   __ Cbz(out.W(), &done);
   2991   __ Stlr(wzr, MemOperand(temp));
   2992   __ Bind(&done);
   2993 }
   2994 
   2995 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
   2996 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
   2997 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
   2998 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
   2999 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
   3000 
   3001 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
   3002 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
   3003 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
   3004 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
   3005 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
   3006 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
   3007 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
   3008 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
   3009 
   3010 // 1.8.
   3011 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
   3012 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
   3013 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
   3014 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
   3015 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
   3016 
   3017 UNREACHABLE_INTRINSICS(ARM64)
   3018 
   3019 #undef __
   3020 
   3021 }  // namespace arm64
   3022 }  // namespace art
   3023