Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "intrinsics_arm_vixl.h"
     18 
     19 #include "arch/arm/instruction_set_features_arm.h"
     20 #include "art_method.h"
     21 #include "code_generator_arm_vixl.h"
     22 #include "common_arm.h"
     23 #include "lock_word.h"
     24 #include "mirror/array-inl.h"
     25 #include "mirror/object_array-inl.h"
     26 #include "mirror/reference.h"
     27 #include "mirror/string.h"
     28 #include "scoped_thread_state_change-inl.h"
     29 #include "thread-current-inl.h"
     30 
     31 #include "aarch32/constants-aarch32.h"
     32 
     33 namespace art {
     34 namespace arm {
     35 
     36 #define __ assembler->GetVIXLAssembler()->
     37 
     38 using helpers::DRegisterFrom;
     39 using helpers::HighRegisterFrom;
     40 using helpers::InputDRegisterAt;
     41 using helpers::InputRegisterAt;
     42 using helpers::InputSRegisterAt;
     43 using helpers::InputVRegisterAt;
     44 using helpers::Int32ConstantFrom;
     45 using helpers::LocationFrom;
     46 using helpers::LowRegisterFrom;
     47 using helpers::LowSRegisterFrom;
     48 using helpers::HighSRegisterFrom;
     49 using helpers::OutputDRegister;
     50 using helpers::OutputSRegister;
     51 using helpers::OutputRegister;
     52 using helpers::OutputVRegister;
     53 using helpers::RegisterFrom;
     54 using helpers::SRegisterFrom;
     55 using helpers::DRegisterFromS;
     56 
     57 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
     58 
     59 using vixl::ExactAssemblyScope;
     60 using vixl::CodeBufferCheckScope;
     61 
     62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
     63   return codegen_->GetAssembler();
     64 }
     65 
     66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
     67   return codegen_->GetGraph()->GetArena();
     68 }
     69 
     70 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
     71 // intrinsified call. This will copy the arguments into the positions for a regular call.
     72 //
     73 // Note: The actual parameters are required to be in the locations given by the invoke's location
     74 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
     75 //       restored!
     76 //
     77 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
     78 //       sub-optimal (compared to a direct pointer call), but this is a slow-path.
     79 
     80 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
     81  public:
     82   explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
     83       : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
     84 
     85   Location MoveArguments(CodeGenerator* codegen) {
     86     InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
     87     IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
     88     return calling_convention_visitor.GetMethodLocation();
     89   }
     90 
     91   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     92     ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
     93     __ Bind(GetEntryLabel());
     94 
     95     SaveLiveRegisters(codegen, invoke_->GetLocations());
     96 
     97     Location method_loc = MoveArguments(codegen);
     98 
     99     if (invoke_->IsInvokeStaticOrDirect()) {
    100       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
    101     } else {
    102       codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
    103     }
    104 
    105     // Copy the result back to the expected output.
    106     Location out = invoke_->GetLocations()->Out();
    107     if (out.IsValid()) {
    108       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
    109       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    110       codegen->MoveFromReturnRegister(out, invoke_->GetType());
    111     }
    112 
    113     RestoreLiveRegisters(codegen, invoke_->GetLocations());
    114     __ B(GetExitLabel());
    115   }
    116 
    117   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
    118 
    119  private:
    120   // The instruction where this slow path is happening.
    121   HInvoke* const invoke_;
    122 
    123   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
    124 };
    125 
    126 // Compute base address for the System.arraycopy intrinsic in `base`.
    127 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
    128                                           Primitive::Type type,
    129                                           const vixl32::Register& array,
    130                                           const Location& pos,
    131                                           const vixl32::Register& base) {
    132   // This routine is only used by the SystemArrayCopy intrinsic at the
    133   // moment. We can allow Primitive::kPrimNot as `type` to implement
    134   // the SystemArrayCopyChar intrinsic.
    135   DCHECK_EQ(type, Primitive::kPrimNot);
    136   const int32_t element_size = Primitive::ComponentSize(type);
    137   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
    138   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
    139 
    140   if (pos.IsConstant()) {
    141     int32_t constant = Int32ConstantFrom(pos);
    142     __ Add(base, array, element_size * constant + data_offset);
    143   } else {
    144     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
    145     __ Add(base, base, data_offset);
    146   }
    147 }
    148 
    149 // Compute end address for the System.arraycopy intrinsic in `end`.
    150 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
    151                                          Primitive::Type type,
    152                                          const Location& copy_length,
    153                                          const vixl32::Register& base,
    154                                          const vixl32::Register& end) {
    155   // This routine is only used by the SystemArrayCopy intrinsic at the
    156   // moment. We can allow Primitive::kPrimNot as `type` to implement
    157   // the SystemArrayCopyChar intrinsic.
    158   DCHECK_EQ(type, Primitive::kPrimNot);
    159   const int32_t element_size = Primitive::ComponentSize(type);
    160   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
    161 
    162   if (copy_length.IsConstant()) {
    163     int32_t constant = Int32ConstantFrom(copy_length);
    164     __ Add(end, base, element_size * constant);
    165   } else {
    166     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
    167   }
    168 }
    169 
    170 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
    171 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
    172  public:
    173   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
    174       : SlowPathCodeARMVIXL(instruction) {
    175     DCHECK(kEmitCompilerReadBarrier);
    176     DCHECK(kUseBakerReadBarrier);
    177   }
    178 
    179   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    180     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
    181     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
    182     LocationSummary* locations = instruction_->GetLocations();
    183     DCHECK(locations->CanCall());
    184     DCHECK(instruction_->IsInvokeStaticOrDirect())
    185         << "Unexpected instruction in read barrier arraycopy slow path: "
    186         << instruction_->DebugName();
    187     DCHECK(instruction_->GetLocations()->Intrinsified());
    188     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
    189 
    190     Primitive::Type type = Primitive::kPrimNot;
    191     const int32_t element_size = Primitive::ComponentSize(type);
    192 
    193     vixl32::Register dest = InputRegisterAt(instruction_, 2);
    194     Location dest_pos = locations->InAt(3);
    195     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
    196     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
    197     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
    198     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
    199 
    200     __ Bind(GetEntryLabel());
    201     // Compute the base destination address in `dst_curr_addr`.
    202     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
    203 
    204     vixl32::Label loop;
    205     __ Bind(&loop);
    206     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
    207     assembler->MaybeUnpoisonHeapReference(tmp);
    208     // TODO: Inline the mark bit check before calling the runtime?
    209     // tmp = ReadBarrier::Mark(tmp);
    210     // No need to save live registers; it's taken care of by the
    211     // entrypoint. Also, there is no need to update the stack mask,
    212     // as this runtime call will not trigger a garbage collection.
    213     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
    214     // explanations.)
    215     DCHECK(!tmp.IsSP());
    216     DCHECK(!tmp.IsLR());
    217     DCHECK(!tmp.IsPC());
    218     // IP is used internally by the ReadBarrierMarkRegX entry point
    219     // as a temporary (and not preserved).  It thus cannot be used by
    220     // any live register in this slow path.
    221     DCHECK(!src_curr_addr.Is(ip));
    222     DCHECK(!dst_curr_addr.Is(ip));
    223     DCHECK(!src_stop_addr.Is(ip));
    224     DCHECK(!tmp.Is(ip));
    225     DCHECK(tmp.IsRegister()) << tmp;
    226     // TODO: Load the entrypoint once before the loop, instead of
    227     // loading it at every iteration.
    228     int32_t entry_point_offset =
    229         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
    230     // This runtime call does not require a stack map.
    231     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    232     assembler->MaybePoisonHeapReference(tmp);
    233     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
    234     __ Cmp(src_curr_addr, src_stop_addr);
    235     __ B(ne, &loop, /* far_target */ false);
    236     __ B(GetExitLabel());
    237   }
    238 
    239   const char* GetDescription() const OVERRIDE {
    240     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
    241   }
    242 
    243  private:
    244   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
    245 };
    246 
    247 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
    248     : arena_(codegen->GetGraph()->GetArena()),
    249       codegen_(codegen),
    250       assembler_(codegen->GetAssembler()),
    251       features_(codegen->GetInstructionSetFeatures()) {}
    252 
    253 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
    254   Dispatch(invoke);
    255   LocationSummary* res = invoke->GetLocations();
    256   if (res == nullptr) {
    257     return false;
    258   }
    259   return res->Intrinsified();
    260 }
    261 
    262 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    263   LocationSummary* locations = new (arena) LocationSummary(invoke,
    264                                                            LocationSummary::kNoCall,
    265                                                            kIntrinsified);
    266   locations->SetInAt(0, Location::RequiresFpuRegister());
    267   locations->SetOut(Location::RequiresRegister());
    268 }
    269 
    270 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    271   LocationSummary* locations = new (arena) LocationSummary(invoke,
    272                                                            LocationSummary::kNoCall,
    273                                                            kIntrinsified);
    274   locations->SetInAt(0, Location::RequiresRegister());
    275   locations->SetOut(Location::RequiresFpuRegister());
    276 }
    277 
    278 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
    279   Location input = locations->InAt(0);
    280   Location output = locations->Out();
    281   if (is64bit) {
    282     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
    283   } else {
    284     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
    285   }
    286 }
    287 
    288 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
    289   Location input = locations->InAt(0);
    290   Location output = locations->Out();
    291   if (is64bit) {
    292     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
    293   } else {
    294     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
    295   }
    296 }
    297 
    298 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    299   CreateFPToIntLocations(arena_, invoke);
    300 }
    301 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    302   CreateIntToFPLocations(arena_, invoke);
    303 }
    304 
    305 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    306   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    307 }
    308 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    309   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    310 }
    311 
    312 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    313   CreateFPToIntLocations(arena_, invoke);
    314 }
    315 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    316   CreateIntToFPLocations(arena_, invoke);
    317 }
    318 
    319 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    320   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    321 }
    322 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    323   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    324 }
    325 
    326 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    327   LocationSummary* locations = new (arena) LocationSummary(invoke,
    328                                                            LocationSummary::kNoCall,
    329                                                            kIntrinsified);
    330   locations->SetInAt(0, Location::RequiresRegister());
    331   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    332 }
    333 
    334 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    335   LocationSummary* locations = new (arena) LocationSummary(invoke,
    336                                                            LocationSummary::kNoCall,
    337                                                            kIntrinsified);
    338   locations->SetInAt(0, Location::RequiresFpuRegister());
    339   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    340 }
    341 
    342 static void GenNumberOfLeadingZeros(HInvoke* invoke,
    343                                     Primitive::Type type,
    344                                     CodeGeneratorARMVIXL* codegen) {
    345   ArmVIXLAssembler* assembler = codegen->GetAssembler();
    346   LocationSummary* locations = invoke->GetLocations();
    347   Location in = locations->InAt(0);
    348   vixl32::Register out = RegisterFrom(locations->Out());
    349 
    350   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
    351 
    352   if (type == Primitive::kPrimLong) {
    353     vixl32::Register in_reg_lo = LowRegisterFrom(in);
    354     vixl32::Register in_reg_hi = HighRegisterFrom(in);
    355     vixl32::Label end;
    356     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
    357     __ Clz(out, in_reg_hi);
    358     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
    359     __ Clz(out, in_reg_lo);
    360     __ Add(out, out, 32);
    361     if (end.IsReferenced()) {
    362       __ Bind(&end);
    363     }
    364   } else {
    365     __ Clz(out, RegisterFrom(in));
    366   }
    367 }
    368 
    369 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    370   CreateIntToIntLocations(arena_, invoke);
    371 }
    372 
    373 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
    374   GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
    375 }
    376 
    377 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    378   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    379                                                            LocationSummary::kNoCall,
    380                                                            kIntrinsified);
    381   locations->SetInAt(0, Location::RequiresRegister());
    382   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    383 }
    384 
    385 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
    386   GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
    387 }
    388 
    389 static void GenNumberOfTrailingZeros(HInvoke* invoke,
    390                                      Primitive::Type type,
    391                                      CodeGeneratorARMVIXL* codegen) {
    392   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
    393 
    394   ArmVIXLAssembler* assembler = codegen->GetAssembler();
    395   LocationSummary* locations = invoke->GetLocations();
    396   vixl32::Register out = RegisterFrom(locations->Out());
    397 
    398   if (type == Primitive::kPrimLong) {
    399     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
    400     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
    401     vixl32::Label end;
    402     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
    403     __ Rbit(out, in_reg_lo);
    404     __ Clz(out, out);
    405     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
    406     __ Rbit(out, in_reg_hi);
    407     __ Clz(out, out);
    408     __ Add(out, out, 32);
    409     if (end.IsReferenced()) {
    410       __ Bind(&end);
    411     }
    412   } else {
    413     vixl32::Register in = RegisterFrom(locations->InAt(0));
    414     __ Rbit(out, in);
    415     __ Clz(out, out);
    416   }
    417 }
    418 
    419 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    420   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    421                                                             LocationSummary::kNoCall,
    422                                                             kIntrinsified);
    423   locations->SetInAt(0, Location::RequiresRegister());
    424   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    425 }
    426 
    427 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
    428   GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
    429 }
    430 
    431 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    432   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    433                                                             LocationSummary::kNoCall,
    434                                                             kIntrinsified);
    435   locations->SetInAt(0, Location::RequiresRegister());
    436   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    437 }
    438 
    439 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
    440   GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
    441 }
    442 
    443 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
    444   __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
    445 }
    446 
    447 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
    448   CreateFPToFPLocations(arena_, invoke);
    449 }
    450 
    451 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
    452   MathAbsFP(invoke, GetAssembler());
    453 }
    454 
    455 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
    456   CreateFPToFPLocations(arena_, invoke);
    457 }
    458 
    459 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
    460   MathAbsFP(invoke, GetAssembler());
    461 }
    462 
    463 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
    464   LocationSummary* locations = new (arena) LocationSummary(invoke,
    465                                                            LocationSummary::kNoCall,
    466                                                            kIntrinsified);
    467   locations->SetInAt(0, Location::RequiresRegister());
    468   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    469 
    470   locations->AddTemp(Location::RequiresRegister());
    471 }
    472 
    473 static void GenAbsInteger(LocationSummary* locations,
    474                           bool is64bit,
    475                           ArmVIXLAssembler* assembler) {
    476   Location in = locations->InAt(0);
    477   Location output = locations->Out();
    478 
    479   vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
    480 
    481   if (is64bit) {
    482     vixl32::Register in_reg_lo = LowRegisterFrom(in);
    483     vixl32::Register in_reg_hi = HighRegisterFrom(in);
    484     vixl32::Register out_reg_lo = LowRegisterFrom(output);
    485     vixl32::Register out_reg_hi = HighRegisterFrom(output);
    486 
    487     DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
    488 
    489     __ Asr(mask, in_reg_hi, 31);
    490     __ Adds(out_reg_lo, in_reg_lo, mask);
    491     __ Adc(out_reg_hi, in_reg_hi, mask);
    492     __ Eor(out_reg_lo, mask, out_reg_lo);
    493     __ Eor(out_reg_hi, mask, out_reg_hi);
    494   } else {
    495     vixl32::Register in_reg = RegisterFrom(in);
    496     vixl32::Register out_reg = RegisterFrom(output);
    497 
    498     __ Asr(mask, in_reg, 31);
    499     __ Add(out_reg, in_reg, mask);
    500     __ Eor(out_reg, mask, out_reg);
    501   }
    502 }
    503 
    504 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
    505   CreateIntToIntPlusTemp(arena_, invoke);
    506 }
    507 
    508 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
    509   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    510 }
    511 
    512 
    513 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
    514   CreateIntToIntPlusTemp(arena_, invoke);
    515 }
    516 
    517 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
    518   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    519 }
    520 
    521 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
    522   ArmVIXLAssembler* assembler = codegen->GetAssembler();
    523   Location op1_loc = invoke->GetLocations()->InAt(0);
    524   Location op2_loc = invoke->GetLocations()->InAt(1);
    525   Location out_loc = invoke->GetLocations()->Out();
    526 
    527   // Optimization: don't generate any code if inputs are the same.
    528   if (op1_loc.Equals(op2_loc)) {
    529     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
    530     return;
    531   }
    532 
    533   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
    534   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
    535   vixl32::SRegister out = OutputSRegister(invoke);
    536   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
    537   const vixl32::Register temp1 = temps.Acquire();
    538   vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
    539   vixl32::Label nan, done;
    540   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
    541 
    542   DCHECK(op1.Is(out));
    543 
    544   __ Vcmp(op1, op2);
    545   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
    546   __ B(vs, &nan, /* far_target */ false);  // if un-ordered, go to NaN handling.
    547 
    548   // op1 <> op2
    549   vixl32::ConditionType cond = is_min ? gt : lt;
    550   {
    551     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
    552                                 2 * kMaxInstructionSizeInBytes,
    553                                 CodeBufferCheckScope::kMaximumSize);
    554     __ it(cond);
    555     __ vmov(cond, F32, out, op2);
    556   }
    557   // for <>(not equal), we've done min/max calculation.
    558   __ B(ne, final_label, /* far_target */ false);
    559 
    560   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
    561   __ Vmov(temp1, op1);
    562   __ Vmov(temp2, op2);
    563   if (is_min) {
    564     __ Orr(temp1, temp1, temp2);
    565   } else {
    566     __ And(temp1, temp1, temp2);
    567   }
    568   __ Vmov(out, temp1);
    569   __ B(final_label);
    570 
    571   // handle NaN input.
    572   __ Bind(&nan);
    573   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
    574   __ Vmov(out, temp1);
    575 
    576   if (done.IsReferenced()) {
    577     __ Bind(&done);
    578   }
    579 }
    580 
    581 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    582   LocationSummary* locations = new (arena) LocationSummary(invoke,
    583                                                            LocationSummary::kNoCall,
    584                                                            kIntrinsified);
    585   locations->SetInAt(0, Location::RequiresFpuRegister());
    586   locations->SetInAt(1, Location::RequiresFpuRegister());
    587   locations->SetOut(Location::SameAsFirstInput());
    588 }
    589 
    590 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
    591   CreateFPFPToFPLocations(arena_, invoke);
    592   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
    593 }
    594 
    595 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
    596   GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
    597 }
    598 
    599 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
    600   CreateFPFPToFPLocations(arena_, invoke);
    601   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
    602 }
    603 
    604 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
    605   GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
    606 }
    607 
    608 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
    609   ArmVIXLAssembler* assembler = codegen->GetAssembler();
    610   Location op1_loc = invoke->GetLocations()->InAt(0);
    611   Location op2_loc = invoke->GetLocations()->InAt(1);
    612   Location out_loc = invoke->GetLocations()->Out();
    613 
    614   // Optimization: don't generate any code if inputs are the same.
    615   if (op1_loc.Equals(op2_loc)) {
    616     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
    617     return;
    618   }
    619 
    620   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
    621   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
    622   vixl32::DRegister out = OutputDRegister(invoke);
    623   vixl32::Label handle_nan_eq, done;
    624   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
    625 
    626   DCHECK(op1.Is(out));
    627 
    628   __ Vcmp(op1, op2);
    629   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
    630   __ B(vs, &handle_nan_eq, /* far_target */ false);  // if un-ordered, go to NaN handling.
    631 
    632   // op1 <> op2
    633   vixl32::ConditionType cond = is_min ? gt : lt;
    634   {
    635     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
    636                                 2 * kMaxInstructionSizeInBytes,
    637                                 CodeBufferCheckScope::kMaximumSize);
    638     __ it(cond);
    639     __ vmov(cond, F64, out, op2);
    640   }
    641   // for <>(not equal), we've done min/max calculation.
    642   __ B(ne, final_label, /* far_target */ false);
    643 
    644   // handle op1 == op2, max(+0.0,-0.0).
    645   if (!is_min) {
    646     __ Vand(F64, out, op1, op2);
    647     __ B(final_label);
    648   }
    649 
    650   // handle op1 == op2, min(+0.0,-0.0), NaN input.
    651   __ Bind(&handle_nan_eq);
    652   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
    653 
    654   if (done.IsReferenced()) {
    655     __ Bind(&done);
    656   }
    657 }
    658 
    659 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
    660   CreateFPFPToFPLocations(arena_, invoke);
    661 }
    662 
    663 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
    664   GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
    665 }
    666 
    667 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    668   CreateFPFPToFPLocations(arena_, invoke);
    669 }
    670 
    671 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    672   GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
    673 }
    674 
    675 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
    676   Location op1_loc = invoke->GetLocations()->InAt(0);
    677   Location op2_loc = invoke->GetLocations()->InAt(1);
    678   Location out_loc = invoke->GetLocations()->Out();
    679 
    680   // Optimization: don't generate any code if inputs are the same.
    681   if (op1_loc.Equals(op2_loc)) {
    682     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
    683     return;
    684   }
    685 
    686   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
    687   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
    688   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
    689   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
    690   vixl32::Register out_lo = LowRegisterFrom(out_loc);
    691   vixl32::Register out_hi = HighRegisterFrom(out_loc);
    692   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
    693   const vixl32::Register temp = temps.Acquire();
    694 
    695   DCHECK(op1_lo.Is(out_lo));
    696   DCHECK(op1_hi.Is(out_hi));
    697 
    698   // Compare op1 >= op2, or op1 < op2.
    699   __ Cmp(out_lo, op2_lo);
    700   __ Sbcs(temp, out_hi, op2_hi);
    701 
    702   // Now GE/LT condition code is correct for the long comparison.
    703   {
    704     vixl32::ConditionType cond = is_min ? ge : lt;
    705     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
    706                                 3 * kMaxInstructionSizeInBytes,
    707                                 CodeBufferCheckScope::kMaximumSize);
    708     __ itt(cond);
    709     __ mov(cond, out_lo, op2_lo);
    710     __ mov(cond, out_hi, op2_hi);
    711   }
    712 }
    713 
    714 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
    715   LocationSummary* locations = new (arena) LocationSummary(invoke,
    716                                                            LocationSummary::kNoCall,
    717                                                            kIntrinsified);
    718   locations->SetInAt(0, Location::RequiresRegister());
    719   locations->SetInAt(1, Location::RequiresRegister());
    720   locations->SetOut(Location::SameAsFirstInput());
    721 }
    722 
    723 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
    724   CreateLongLongToLongLocations(arena_, invoke);
    725 }
    726 
    727 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
    728   GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
    729 }
    730 
    731 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
    732   CreateLongLongToLongLocations(arena_, invoke);
    733 }
    734 
    735 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
    736   GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
    737 }
    738 
    739 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
    740   vixl32::Register op1 = InputRegisterAt(invoke, 0);
    741   vixl32::Register op2 = InputRegisterAt(invoke, 1);
    742   vixl32::Register out = OutputRegister(invoke);
    743 
    744   __ Cmp(op1, op2);
    745 
    746   {
    747     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
    748                            3 * kMaxInstructionSizeInBytes,
    749                            CodeBufferCheckScope::kMaximumSize);
    750 
    751     __ ite(is_min ? lt : gt);
    752     __ mov(is_min ? lt : gt, out, op1);
    753     __ mov(is_min ? ge : le, out, op2);
    754   }
    755 }
    756 
    757 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    758   LocationSummary* locations = new (arena) LocationSummary(invoke,
    759                                                            LocationSummary::kNoCall,
    760                                                            kIntrinsified);
    761   locations->SetInAt(0, Location::RequiresRegister());
    762   locations->SetInAt(1, Location::RequiresRegister());
    763   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
    764 }
    765 
    766 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
    767   CreateIntIntToIntLocations(arena_, invoke);
    768 }
    769 
    770 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
    771   GenMinMax(invoke, /* is_min */ true, GetAssembler());
    772 }
    773 
    774 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
    775   CreateIntIntToIntLocations(arena_, invoke);
    776 }
    777 
    778 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
    779   GenMinMax(invoke, /* is_min */ false, GetAssembler());
    780 }
    781 
    782 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
    783   CreateFPToFPLocations(arena_, invoke);
    784 }
    785 
    786 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
    787   ArmVIXLAssembler* assembler = GetAssembler();
    788   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
    789 }
    790 
    791 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
    792   if (features_.HasARMv8AInstructions()) {
    793     CreateFPToFPLocations(arena_, invoke);
    794   }
    795 }
    796 
    797 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
    798   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
    799   ArmVIXLAssembler* assembler = GetAssembler();
    800   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
    801 }
    802 
    803 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
    804   if (features_.HasARMv8AInstructions()) {
    805     LocationSummary* locations = new (arena_) LocationSummary(invoke,
    806                                                               LocationSummary::kNoCall,
    807                                                               kIntrinsified);
    808     locations->SetInAt(0, Location::RequiresFpuRegister());
    809     locations->SetOut(Location::RequiresRegister());
    810     locations->AddTemp(Location::RequiresFpuRegister());
    811   }
    812 }
    813 
    814 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
    815   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
    816 
    817   ArmVIXLAssembler* assembler = GetAssembler();
    818   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
    819   vixl32::Register out_reg = OutputRegister(invoke);
    820   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
    821   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
    822   vixl32::Label done;
    823   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
    824 
    825   // Round to nearest integer, ties away from zero.
    826   __ Vcvta(S32, F32, temp1, in_reg);
    827   __ Vmov(out_reg, temp1);
    828 
    829   // For positive, zero or NaN inputs, rounding is done.
    830   __ Cmp(out_reg, 0);
    831   __ B(ge, final_label, /* far_target */ false);
    832 
    833   // Handle input < 0 cases.
    834   // If input is negative but not a tie, previous result (round to nearest) is valid.
    835   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
    836   __ Vrinta(F32, F32, temp1, in_reg);
    837   __ Vmov(temp2, 0.5);
    838   __ Vsub(F32, temp1, in_reg, temp1);
    839   __ Vcmp(F32, temp1, temp2);
    840   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
    841   {
    842     // Use ExactAsemblyScope here because we are using IT.
    843     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
    844                                 2 * kMaxInstructionSizeInBytes,
    845                                 CodeBufferCheckScope::kMaximumSize);
    846     __ it(eq);
    847     __ add(eq, out_reg, out_reg, 1);
    848   }
    849 
    850   if (done.IsReferenced()) {
    851     __ Bind(&done);
    852   }
    853 }
    854 
    855 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
    856   CreateIntToIntLocations(arena_, invoke);
    857 }
    858 
    859 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
    860   ArmVIXLAssembler* assembler = GetAssembler();
    861   // Ignore upper 4B of long address.
    862   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    863 }
    864 
    865 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
    866   CreateIntToIntLocations(arena_, invoke);
    867 }
    868 
    869 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
    870   ArmVIXLAssembler* assembler = GetAssembler();
    871   // Ignore upper 4B of long address.
    872   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    873 }
    874 
    875 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
    876   CreateIntToIntLocations(arena_, invoke);
    877 }
    878 
    879 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
    880   ArmVIXLAssembler* assembler = GetAssembler();
    881   // Ignore upper 4B of long address.
    882   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
    883   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
    884   // exception. So we can't use ldrd as addr may be unaligned.
    885   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
    886   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
    887   if (addr.Is(lo)) {
    888     __ Ldr(hi, MemOperand(addr, 4));
    889     __ Ldr(lo, MemOperand(addr));
    890   } else {
    891     __ Ldr(lo, MemOperand(addr));
    892     __ Ldr(hi, MemOperand(addr, 4));
    893   }
    894 }
    895 
    896 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
    897   CreateIntToIntLocations(arena_, invoke);
    898 }
    899 
    900 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
    901   ArmVIXLAssembler* assembler = GetAssembler();
    902   // Ignore upper 4B of long address.
    903   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    904 }
    905 
    906 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
    907   LocationSummary* locations = new (arena) LocationSummary(invoke,
    908                                                            LocationSummary::kNoCall,
    909                                                            kIntrinsified);
    910   locations->SetInAt(0, Location::RequiresRegister());
    911   locations->SetInAt(1, Location::RequiresRegister());
    912 }
    913 
    914 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
    915   CreateIntIntToVoidLocations(arena_, invoke);
    916 }
    917 
    918 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
    919   ArmVIXLAssembler* assembler = GetAssembler();
    920   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    921 }
    922 
    923 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
    924   CreateIntIntToVoidLocations(arena_, invoke);
    925 }
    926 
    927 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
    928   ArmVIXLAssembler* assembler = GetAssembler();
    929   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    930 }
    931 
    932 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
    933   CreateIntIntToVoidLocations(arena_, invoke);
    934 }
    935 
    936 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
    937   ArmVIXLAssembler* assembler = GetAssembler();
    938   // Ignore upper 4B of long address.
    939   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
    940   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
    941   // exception. So we can't use ldrd as addr may be unaligned.
    942   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
    943   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
    944 }
    945 
    946 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
    947   CreateIntIntToVoidLocations(arena_, invoke);
    948 }
    949 
    950 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
    951   ArmVIXLAssembler* assembler = GetAssembler();
    952   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
    953 }
    954 
    955 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
    956   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    957                                                             LocationSummary::kNoCall,
    958                                                             kIntrinsified);
    959   locations->SetOut(Location::RequiresRegister());
    960 }
    961 
    962 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
    963   ArmVIXLAssembler* assembler = GetAssembler();
    964   __ Ldr(OutputRegister(invoke),
    965          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
    966 }
    967 
    968 static void GenUnsafeGet(HInvoke* invoke,
    969                          Primitive::Type type,
    970                          bool is_volatile,
    971                          CodeGeneratorARMVIXL* codegen) {
    972   LocationSummary* locations = invoke->GetLocations();
    973   ArmVIXLAssembler* assembler = codegen->GetAssembler();
    974   Location base_loc = locations->InAt(1);
    975   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
    976   Location offset_loc = locations->InAt(2);
    977   vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
    978   Location trg_loc = locations->Out();
    979 
    980   switch (type) {
    981     case Primitive::kPrimInt: {
    982       vixl32::Register trg = RegisterFrom(trg_loc);
    983       __ Ldr(trg, MemOperand(base, offset));
    984       if (is_volatile) {
    985         __ Dmb(vixl32::ISH);
    986       }
    987       break;
    988     }
    989 
    990     case Primitive::kPrimNot: {
    991       vixl32::Register trg = RegisterFrom(trg_loc);
    992       if (kEmitCompilerReadBarrier) {
    993         if (kUseBakerReadBarrier) {
    994           Location temp = locations->GetTemp(0);
    995           codegen->GenerateReferenceLoadWithBakerReadBarrier(
    996               invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
    997           if (is_volatile) {
    998             __ Dmb(vixl32::ISH);
    999           }
   1000         } else {
   1001           __ Ldr(trg, MemOperand(base, offset));
   1002           if (is_volatile) {
   1003             __ Dmb(vixl32::ISH);
   1004           }
   1005           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   1006         }
   1007       } else {
   1008         __ Ldr(trg, MemOperand(base, offset));
   1009         if (is_volatile) {
   1010           __ Dmb(vixl32::ISH);
   1011         }
   1012         assembler->MaybeUnpoisonHeapReference(trg);
   1013       }
   1014       break;
   1015     }
   1016 
   1017     case Primitive::kPrimLong: {
   1018       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
   1019       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
   1020       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
   1021         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   1022         const vixl32::Register temp_reg = temps.Acquire();
   1023         __ Add(temp_reg, base, offset);
   1024         __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
   1025       } else {
   1026         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
   1027       }
   1028       if (is_volatile) {
   1029         __ Dmb(vixl32::ISH);
   1030       }
   1031       break;
   1032     }
   1033 
   1034     default:
   1035       LOG(FATAL) << "Unexpected type " << type;
   1036       UNREACHABLE();
   1037   }
   1038 }
   1039 
   1040 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
   1041                                           HInvoke* invoke,
   1042                                           Primitive::Type type) {
   1043   bool can_call = kEmitCompilerReadBarrier &&
   1044       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
   1045        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   1046   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1047                                                            (can_call
   1048                                                                 ? LocationSummary::kCallOnSlowPath
   1049                                                                 : LocationSummary::kNoCall),
   1050                                                            kIntrinsified);
   1051   if (can_call && kUseBakerReadBarrier) {
   1052     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   1053   }
   1054   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1055   locations->SetInAt(1, Location::RequiresRegister());
   1056   locations->SetInAt(2, Location::RequiresRegister());
   1057   locations->SetOut(Location::RequiresRegister(),
   1058                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   1059   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1060     // We need a temporary register for the read barrier marking slow
   1061     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
   1062     locations->AddTemp(Location::RequiresRegister());
   1063   }
   1064 }
   1065 
   1066 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
   1067   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
   1068 }
   1069 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
   1070   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
   1071 }
   1072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
   1073   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
   1074 }
   1075 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   1076   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
   1077 }
   1078 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
   1079   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
   1080 }
   1081 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   1082   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
   1083 }
   1084 
   1085 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
   1086   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
   1087 }
   1088 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
   1089   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
   1090 }
   1091 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
   1092   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
   1093 }
   1094 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   1095   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
   1096 }
   1097 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
   1098   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
   1099 }
   1100 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   1101   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
   1102 }
   1103 
   1104 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
   1105                                      const ArmInstructionSetFeatures& features,
   1106                                      Primitive::Type type,
   1107                                      bool is_volatile,
   1108                                      HInvoke* invoke) {
   1109   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1110                                                            LocationSummary::kNoCall,
   1111                                                            kIntrinsified);
   1112   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1113   locations->SetInAt(1, Location::RequiresRegister());
   1114   locations->SetInAt(2, Location::RequiresRegister());
   1115   locations->SetInAt(3, Location::RequiresRegister());
   1116 
   1117   if (type == Primitive::kPrimLong) {
   1118     // Potentially need temps for ldrexd-strexd loop.
   1119     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
   1120       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
   1121       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
   1122     }
   1123   } else if (type == Primitive::kPrimNot) {
   1124     // Temps for card-marking.
   1125     locations->AddTemp(Location::RequiresRegister());  // Temp.
   1126     locations->AddTemp(Location::RequiresRegister());  // Card.
   1127   }
   1128 }
   1129 
   1130 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
   1131   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
   1132 }
   1133 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
   1134   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
   1135 }
   1136 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
   1137   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
   1138 }
   1139 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
   1140   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
   1141 }
   1142 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   1143   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
   1144 }
   1145 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   1146   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
   1147 }
   1148 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
   1149   CreateIntIntIntIntToVoid(
   1150       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
   1151 }
   1152 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   1153   CreateIntIntIntIntToVoid(
   1154       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
   1155 }
   1156 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   1157   CreateIntIntIntIntToVoid(
   1158       arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
   1159 }
   1160 
   1161 static void GenUnsafePut(LocationSummary* locations,
   1162                          Primitive::Type type,
   1163                          bool is_volatile,
   1164                          bool is_ordered,
   1165                          CodeGeneratorARMVIXL* codegen) {
   1166   ArmVIXLAssembler* assembler = codegen->GetAssembler();
   1167 
   1168   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
   1169   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
   1170   vixl32::Register value;
   1171 
   1172   if (is_volatile || is_ordered) {
   1173     __ Dmb(vixl32::ISH);
   1174   }
   1175 
   1176   if (type == Primitive::kPrimLong) {
   1177     vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
   1178     vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
   1179     value = value_lo;
   1180     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
   1181       vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
   1182       vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
   1183       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   1184       const vixl32::Register temp_reg = temps.Acquire();
   1185 
   1186       __ Add(temp_reg, base, offset);
   1187       vixl32::Label loop_head;
   1188       __ Bind(&loop_head);
   1189       __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
   1190       __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
   1191       __ Cmp(temp_lo, 0);
   1192       __ B(ne, &loop_head, /* far_target */ false);
   1193     } else {
   1194       __ Strd(value_lo, value_hi, MemOperand(base, offset));
   1195     }
   1196   } else {
   1197     value = RegisterFrom(locations->InAt(3));
   1198     vixl32::Register source = value;
   1199     if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1200       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
   1201       __ Mov(temp, value);
   1202       assembler->PoisonHeapReference(temp);
   1203       source = temp;
   1204     }
   1205     __ Str(source, MemOperand(base, offset));
   1206   }
   1207 
   1208   if (is_volatile) {
   1209     __ Dmb(vixl32::ISH);
   1210   }
   1211 
   1212   if (type == Primitive::kPrimNot) {
   1213     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
   1214     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
   1215     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1216     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
   1217   }
   1218 }
   1219 
   1220 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
   1221   GenUnsafePut(invoke->GetLocations(),
   1222                Primitive::kPrimInt,
   1223                /* is_volatile */ false,
   1224                /* is_ordered */ false,
   1225                codegen_);
   1226 }
   1227 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
   1228   GenUnsafePut(invoke->GetLocations(),
   1229                Primitive::kPrimInt,
   1230                /* is_volatile */ false,
   1231                /* is_ordered */ true,
   1232                codegen_);
   1233 }
   1234 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
   1235   GenUnsafePut(invoke->GetLocations(),
   1236                Primitive::kPrimInt,
   1237                /* is_volatile */ true,
   1238                /* is_ordered */ false,
   1239                codegen_);
   1240 }
   1241 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
   1242   GenUnsafePut(invoke->GetLocations(),
   1243                Primitive::kPrimNot,
   1244                /* is_volatile */ false,
   1245                /* is_ordered */ false,
   1246                codegen_);
   1247 }
   1248 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   1249   GenUnsafePut(invoke->GetLocations(),
   1250                Primitive::kPrimNot,
   1251                /* is_volatile */ false,
   1252                /* is_ordered */ true,
   1253                codegen_);
   1254 }
   1255 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   1256   GenUnsafePut(invoke->GetLocations(),
   1257                Primitive::kPrimNot,
   1258                /* is_volatile */ true,
   1259                /* is_ordered */ false,
   1260                codegen_);
   1261 }
   1262 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
   1263   GenUnsafePut(invoke->GetLocations(),
   1264                Primitive::kPrimLong,
   1265                /* is_volatile */ false,
   1266                /* is_ordered */ false,
   1267                codegen_);
   1268 }
   1269 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   1270   GenUnsafePut(invoke->GetLocations(),
   1271                Primitive::kPrimLong,
   1272                /* is_volatile */ false,
   1273                /* is_ordered */ true,
   1274                codegen_);
   1275 }
   1276 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   1277   GenUnsafePut(invoke->GetLocations(),
   1278                Primitive::kPrimLong,
   1279                /* is_volatile */ true,
   1280                /* is_ordered */ false,
   1281                codegen_);
   1282 }
   1283 
   1284 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
   1285                                                 HInvoke* invoke,
   1286                                                 Primitive::Type type) {
   1287   bool can_call = kEmitCompilerReadBarrier &&
   1288       kUseBakerReadBarrier &&
   1289       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   1290   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1291                                                            (can_call
   1292                                                                 ? LocationSummary::kCallOnSlowPath
   1293                                                                 : LocationSummary::kNoCall),
   1294                                                            kIntrinsified);
   1295   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   1296   locations->SetInAt(1, Location::RequiresRegister());
   1297   locations->SetInAt(2, Location::RequiresRegister());
   1298   locations->SetInAt(3, Location::RequiresRegister());
   1299   locations->SetInAt(4, Location::RequiresRegister());
   1300 
   1301   // If heap poisoning is enabled, we don't want the unpoisoning
   1302   // operations to potentially clobber the output. Likewise when
   1303   // emitting a (Baker) read barrier, which may call.
   1304   Location::OutputOverlap overlaps =
   1305       ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
   1306       ? Location::kOutputOverlap
   1307       : Location::kNoOutputOverlap;
   1308   locations->SetOut(Location::RequiresRegister(), overlaps);
   1309 
   1310   // Temporary registers used in CAS. In the object case
   1311   // (UnsafeCASObject intrinsic), these are also used for
   1312   // card-marking, and possibly for (Baker) read barrier.
   1313   locations->AddTemp(Location::RequiresRegister());  // Pointer.
   1314   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
   1315 }
   1316 
   1317 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
   1318   DCHECK_NE(type, Primitive::kPrimLong);
   1319 
   1320   ArmVIXLAssembler* assembler = codegen->GetAssembler();
   1321   LocationSummary* locations = invoke->GetLocations();
   1322 
   1323   Location out_loc = locations->Out();
   1324   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
   1325 
   1326   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
   1327   Location offset_loc = locations->InAt(2);
   1328   vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
   1329   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
   1330   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
   1331 
   1332   Location tmp_ptr_loc = locations->GetTemp(0);
   1333   vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
   1334   vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
   1335 
   1336   if (type == Primitive::kPrimNot) {
   1337     // The only read barrier implementation supporting the
   1338     // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1339     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1340 
   1341     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
   1342     // object and scan the receiver at the next GC for nothing.
   1343     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   1344     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
   1345 
   1346     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   1347       // Need to make sure the reference stored in the field is a to-space
   1348       // one before attempting the CAS or the CAS could fail incorrectly.
   1349       codegen->UpdateReferenceFieldWithBakerReadBarrier(
   1350           invoke,
   1351           out_loc,  // Unused, used only as a "temporary" within the read barrier.
   1352           base,
   1353           /* field_offset */ offset_loc,
   1354           tmp_ptr_loc,
   1355           /* needs_null_check */ false,
   1356           tmp);
   1357     }
   1358   }
   1359 
   1360   // Prevent reordering with prior memory operations.
   1361   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
   1362   // latter allows a preceding load to be delayed past the STXR
   1363   // instruction below.
   1364   __ Dmb(vixl32::ISH);
   1365 
   1366   __ Add(tmp_ptr, base, offset);
   1367 
   1368   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1369     codegen->GetAssembler()->PoisonHeapReference(expected);
   1370     if (value.Is(expected)) {
   1371       // Do not poison `value`, as it is the same register as
   1372       // `expected`, which has just been poisoned.
   1373     } else {
   1374       codegen->GetAssembler()->PoisonHeapReference(value);
   1375     }
   1376   }
   1377 
   1378   // do {
   1379   //   tmp = [r_ptr] - expected;
   1380   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
   1381   // result = tmp != 0;
   1382 
   1383   vixl32::Label loop_head;
   1384   __ Bind(&loop_head);
   1385 
   1386   __ Ldrex(tmp, MemOperand(tmp_ptr));
   1387 
   1388   __ Subs(tmp, tmp, expected);
   1389 
   1390   {
   1391     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1392                            3 * kMaxInstructionSizeInBytes,
   1393                            CodeBufferCheckScope::kMaximumSize);
   1394 
   1395     __ itt(eq);
   1396     __ strex(eq, tmp, value, MemOperand(tmp_ptr));
   1397     __ cmp(eq, tmp, 1);
   1398   }
   1399 
   1400   __ B(eq, &loop_head, /* far_target */ false);
   1401 
   1402   __ Dmb(vixl32::ISH);
   1403 
   1404   __ Rsbs(out, tmp, 1);
   1405 
   1406   {
   1407     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1408                            2 * kMaxInstructionSizeInBytes,
   1409                            CodeBufferCheckScope::kMaximumSize);
   1410 
   1411     __ it(cc);
   1412     __ mov(cc, out, 0);
   1413   }
   1414 
   1415   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   1416     codegen->GetAssembler()->UnpoisonHeapReference(expected);
   1417     if (value.Is(expected)) {
   1418       // Do not unpoison `value`, as it is the same register as
   1419       // `expected`, which has just been unpoisoned.
   1420     } else {
   1421       codegen->GetAssembler()->UnpoisonHeapReference(value);
   1422     }
   1423   }
   1424 }
   1425 
   1426 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
   1427   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
   1428 }
   1429 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
   1430   // The only read barrier implementation supporting the
   1431   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1432   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   1433     return;
   1434   }
   1435 
   1436   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
   1437 }
   1438 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
   1439   GenCas(invoke, Primitive::kPrimInt, codegen_);
   1440 }
   1441 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
   1442   // The only read barrier implementation supporting the
   1443   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   1444   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   1445 
   1446   GenCas(invoke, Primitive::kPrimNot, codegen_);
   1447 }
   1448 
   1449 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
   1450   // The inputs plus one temp.
   1451   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1452                                                             invoke->InputAt(1)->CanBeNull()
   1453                                                                 ? LocationSummary::kCallOnSlowPath
   1454                                                                 : LocationSummary::kNoCall,
   1455                                                             kIntrinsified);
   1456   locations->SetInAt(0, Location::RequiresRegister());
   1457   locations->SetInAt(1, Location::RequiresRegister());
   1458   locations->AddTemp(Location::RequiresRegister());
   1459   locations->AddTemp(Location::RequiresRegister());
   1460   locations->AddTemp(Location::RequiresRegister());
   1461   // Need temporary registers for String compression's feature.
   1462   if (mirror::kUseStringCompression) {
   1463     locations->AddTemp(Location::RequiresRegister());
   1464   }
   1465   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   1466 }
   1467 
   1468 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
   1469   ArmVIXLAssembler* assembler = GetAssembler();
   1470   LocationSummary* locations = invoke->GetLocations();
   1471 
   1472   vixl32::Register str = InputRegisterAt(invoke, 0);
   1473   vixl32::Register arg = InputRegisterAt(invoke, 1);
   1474   vixl32::Register out = OutputRegister(invoke);
   1475 
   1476   vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
   1477   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
   1478   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
   1479   vixl32::Register temp3;
   1480   if (mirror::kUseStringCompression) {
   1481     temp3 = RegisterFrom(locations->GetTemp(3));
   1482   }
   1483 
   1484   vixl32::Label loop;
   1485   vixl32::Label find_char_diff;
   1486   vixl32::Label end;
   1487   vixl32::Label different_compression;
   1488 
   1489   // Get offsets of count and value fields within a string object.
   1490   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1491   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1492 
   1493   // Note that the null check must have been done earlier.
   1494   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1495 
   1496   // Take slow path and throw if input can be and is null.
   1497   SlowPathCodeARMVIXL* slow_path = nullptr;
   1498   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
   1499   if (can_slow_path) {
   1500     slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
   1501     codegen_->AddSlowPath(slow_path);
   1502     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
   1503   }
   1504 
   1505   // Reference equality check, return 0 if same reference.
   1506   __ Subs(out, str, arg);
   1507   __ B(eq, &end);
   1508 
   1509   if (mirror::kUseStringCompression) {
   1510     // Load `count` fields of this and argument strings.
   1511     __ Ldr(temp3, MemOperand(str, count_offset));
   1512     __ Ldr(temp2, MemOperand(arg, count_offset));
   1513     // Extract lengths from the `count` fields.
   1514     __ Lsr(temp0, temp3, 1u);
   1515     __ Lsr(temp1, temp2, 1u);
   1516   } else {
   1517     // Load lengths of this and argument strings.
   1518     __ Ldr(temp0, MemOperand(str, count_offset));
   1519     __ Ldr(temp1, MemOperand(arg, count_offset));
   1520   }
   1521   // out = length diff.
   1522   __ Subs(out, temp0, temp1);
   1523   // temp0 = min(len(str), len(arg)).
   1524 
   1525   {
   1526     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1527                            2 * kMaxInstructionSizeInBytes,
   1528                            CodeBufferCheckScope::kMaximumSize);
   1529 
   1530     __ it(gt);
   1531     __ mov(gt, temp0, temp1);
   1532   }
   1533 
   1534   // Shorter string is empty?
   1535   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
   1536   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
   1537   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
   1538 
   1539   if (mirror::kUseStringCompression) {
   1540     // Check if both strings using same compression style to use this comparison loop.
   1541     __ Eors(temp2, temp2, temp3);
   1542     __ Lsrs(temp2, temp2, 1u);
   1543     __ B(cs, &different_compression);
   1544     // For string compression, calculate the number of bytes to compare (not chars).
   1545     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
   1546     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
   1547 
   1548     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1549                            2 * kMaxInstructionSizeInBytes,
   1550                            CodeBufferCheckScope::kMaximumSize);
   1551 
   1552     __ it(ne);
   1553     __ add(ne, temp0, temp0, temp0);
   1554   }
   1555 
   1556   // Store offset of string value in preparation for comparison loop.
   1557   __ Mov(temp1, value_offset);
   1558 
   1559   // Assertions that must hold in order to compare multiple characters at a time.
   1560   CHECK_ALIGNED(value_offset, 8);
   1561   static_assert(IsAligned<8>(kObjectAlignment),
   1562                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
   1563 
   1564   const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   1565   DCHECK_EQ(char_size, 2u);
   1566 
   1567   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   1568 
   1569   vixl32::Label find_char_diff_2nd_cmp;
   1570   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
   1571   __ Bind(&loop);
   1572   vixl32::Register temp_reg = temps.Acquire();
   1573   __ Ldr(temp_reg, MemOperand(str, temp1));
   1574   __ Ldr(temp2, MemOperand(arg, temp1));
   1575   __ Cmp(temp_reg, temp2);
   1576   __ B(ne, &find_char_diff, /* far_target */ false);
   1577   __ Add(temp1, temp1, char_size * 2);
   1578 
   1579   __ Ldr(temp_reg, MemOperand(str, temp1));
   1580   __ Ldr(temp2, MemOperand(arg, temp1));
   1581   __ Cmp(temp_reg, temp2);
   1582   __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
   1583   __ Add(temp1, temp1, char_size * 2);
   1584   // With string compression, we have compared 8 bytes, otherwise 4 chars.
   1585   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
   1586   __ B(hi, &loop, /* far_target */ false);
   1587   __ B(&end);
   1588 
   1589   __ Bind(&find_char_diff_2nd_cmp);
   1590   if (mirror::kUseStringCompression) {
   1591     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
   1592     __ B(ls, &end, /* far_target */ false);  // Was the second comparison fully beyond the end?
   1593   } else {
   1594     // Without string compression, we can start treating temp0 as signed
   1595     // and rely on the signed comparison below.
   1596     __ Sub(temp0, temp0, 2);
   1597   }
   1598 
   1599   // Find the single character difference.
   1600   __ Bind(&find_char_diff);
   1601   // Get the bit position of the first character that differs.
   1602   __ Eor(temp1, temp2, temp_reg);
   1603   __ Rbit(temp1, temp1);
   1604   __ Clz(temp1, temp1);
   1605 
   1606   // temp0 = number of characters remaining to compare.
   1607   // (Without string compression, it could be < 1 if a difference is found by the second CMP
   1608   // in the comparison loop, and after the end of the shorter string data).
   1609 
   1610   // Without string compression (temp1 >> 4) = character where difference occurs between the last
   1611   // two words compared, in the interval [0,1].
   1612   // (0 for low half-word different, 1 for high half-word different).
   1613   // With string compression, (temp1 << 3) = byte where the difference occurs,
   1614   // in the interval [0,3].
   1615 
   1616   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
   1617   // the remaining string data, so just return length diff (out).
   1618   // The comparison is unsigned for string compression, otherwise signed.
   1619   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
   1620   __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
   1621 
   1622   // Extract the characters and calculate the difference.
   1623   if (mirror::kUseStringCompression) {
   1624     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
   1625     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
   1626     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
   1627     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
   1628     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
   1629     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
   1630     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
   1631     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
   1632     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
   1633     __ And(temp2, temp2, temp3);
   1634     __ And(out, out, temp3);
   1635   } else {
   1636     __ Bic(temp1, temp1, 0xf);
   1637     __ Lsr(temp2, temp2, temp1);
   1638     __ Lsr(out, temp_reg, temp1);
   1639     __ Movt(temp2, 0);
   1640     __ Movt(out, 0);
   1641   }
   1642 
   1643   __ Sub(out, out, temp2);
   1644   temps.Release(temp_reg);
   1645 
   1646   if (mirror::kUseStringCompression) {
   1647     __ B(&end);
   1648     __ Bind(&different_compression);
   1649 
   1650     // Comparison for different compression style.
   1651     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
   1652     DCHECK_EQ(c_char_size, 1u);
   1653 
   1654     // We want to free up the temp3, currently holding `str.count`, for comparison.
   1655     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
   1656     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
   1657     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
   1658     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
   1659     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
   1660     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
   1661     __ Mov(temp1, str);
   1662     __ Mov(temp2, arg);
   1663     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
   1664     {
   1665       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1666                              3 * kMaxInstructionSizeInBytes,
   1667                              CodeBufferCheckScope::kMaximumSize);
   1668       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
   1669       __ mov(cs, temp1, arg);                 // Preserves flags.
   1670       __ mov(cs, temp2, str);                 // Preserves flags.
   1671     }
   1672     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
   1673 
   1674     // Adjust temp1 and temp2 from string pointers to data pointers.
   1675     __ Add(temp1, temp1, value_offset);
   1676     __ Add(temp2, temp2, value_offset);
   1677 
   1678     vixl32::Label different_compression_loop;
   1679     vixl32::Label different_compression_diff;
   1680 
   1681     // Main loop for different compression.
   1682     temp_reg = temps.Acquire();
   1683     __ Bind(&different_compression_loop);
   1684     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
   1685     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
   1686     __ Cmp(temp_reg, temp3);
   1687     __ B(ne, &different_compression_diff, /* far_target */ false);
   1688     __ Subs(temp0, temp0, 2);
   1689     __ B(hi, &different_compression_loop, /* far_target */ false);
   1690     __ B(&end);
   1691 
   1692     // Calculate the difference.
   1693     __ Bind(&different_compression_diff);
   1694     __ Sub(out, temp_reg, temp3);
   1695     temps.Release(temp_reg);
   1696     // Flip the difference if the `arg` is compressed.
   1697     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
   1698     __ Lsrs(temp0, temp0, 1u);
   1699     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1700                   "Expecting 0=compressed, 1=uncompressed");
   1701 
   1702     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1703                            2 * kMaxInstructionSizeInBytes,
   1704                            CodeBufferCheckScope::kMaximumSize);
   1705     __ it(cc);
   1706     __ rsb(cc, out, out, 0);
   1707   }
   1708 
   1709   __ Bind(&end);
   1710 
   1711   if (can_slow_path) {
   1712     __ Bind(slow_path->GetExitLabel());
   1713   }
   1714 }
   1715 
   1716 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
   1717   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1718                                                             LocationSummary::kNoCall,
   1719                                                             kIntrinsified);
   1720   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1721   locations->SetInAt(0, Location::RequiresRegister());
   1722   locations->SetInAt(1, Location::RequiresRegister());
   1723   // Temporary registers to store lengths of strings and for calculations.
   1724   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
   1725   locations->AddTemp(LocationFrom(r0));
   1726   locations->AddTemp(Location::RequiresRegister());
   1727   locations->AddTemp(Location::RequiresRegister());
   1728 
   1729   locations->SetOut(Location::RequiresRegister());
   1730 }
   1731 
   1732 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
   1733   ArmVIXLAssembler* assembler = GetAssembler();
   1734   LocationSummary* locations = invoke->GetLocations();
   1735 
   1736   vixl32::Register str = InputRegisterAt(invoke, 0);
   1737   vixl32::Register arg = InputRegisterAt(invoke, 1);
   1738   vixl32::Register out = OutputRegister(invoke);
   1739 
   1740   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
   1741   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
   1742   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
   1743 
   1744   vixl32::Label loop;
   1745   vixl32::Label end;
   1746   vixl32::Label return_true;
   1747   vixl32::Label return_false;
   1748   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
   1749 
   1750   // Get offsets of count, value, and class fields within a string object.
   1751   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1752   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   1753   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
   1754 
   1755   // Note that the null check must have been done earlier.
   1756   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1757 
   1758   StringEqualsOptimizations optimizations(invoke);
   1759   if (!optimizations.GetArgumentNotNull()) {
   1760     // Check if input is null, return false if it is.
   1761     __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
   1762   }
   1763 
   1764   // Reference equality check, return true if same reference.
   1765   __ Cmp(str, arg);
   1766   __ B(eq, &return_true, /* far_target */ false);
   1767 
   1768   if (!optimizations.GetArgumentIsString()) {
   1769     // Instanceof check for the argument by comparing class fields.
   1770     // All string objects must have the same type since String cannot be subclassed.
   1771     // Receiver must be a string object, so its class field is equal to all strings' class fields.
   1772     // If the argument is a string object, its class field must be equal to receiver's class field.
   1773     __ Ldr(temp, MemOperand(str, class_offset));
   1774     __ Ldr(temp1, MemOperand(arg, class_offset));
   1775     __ Cmp(temp, temp1);
   1776     __ B(ne, &return_false, /* far_target */ false);
   1777   }
   1778 
   1779   // Load `count` fields of this and argument strings.
   1780   __ Ldr(temp, MemOperand(str, count_offset));
   1781   __ Ldr(temp1, MemOperand(arg, count_offset));
   1782   // Check if `count` fields are equal, return false if they're not.
   1783   // Also compares the compression style, if differs return false.
   1784   __ Cmp(temp, temp1);
   1785   __ B(ne, &return_false, /* far_target */ false);
   1786   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
   1787   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1788                 "Expecting 0=compressed, 1=uncompressed");
   1789   __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
   1790 
   1791   // Assertions that must hold in order to compare strings 4 bytes at a time.
   1792   DCHECK_ALIGNED(value_offset, 4);
   1793   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
   1794 
   1795   if (mirror::kUseStringCompression) {
   1796     // For string compression, calculate the number of bytes to compare (not chars).
   1797     // This could in theory exceed INT32_MAX, so treat temp as unsigned.
   1798     __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
   1799     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
   1800                            2 * kMaxInstructionSizeInBytes,
   1801                            CodeBufferCheckScope::kMaximumSize);
   1802     __ it(cs);                                      // If uncompressed,
   1803     __ add(cs, temp, temp, temp);                   //   double the byte count.
   1804   }
   1805 
   1806   // Store offset of string value in preparation for comparison loop.
   1807   __ Mov(temp1, value_offset);
   1808 
   1809   // Loop to compare strings 4 bytes at a time starting at the front of the string.
   1810   // Ok to do this because strings are zero-padded to kObjectAlignment.
   1811   __ Bind(&loop);
   1812   __ Ldr(out, MemOperand(str, temp1));
   1813   __ Ldr(temp2, MemOperand(arg, temp1));
   1814   __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
   1815   __ Cmp(out, temp2);
   1816   __ B(ne, &return_false, /* far_target */ false);
   1817   // With string compression, we have compared 4 bytes, otherwise 2 chars.
   1818   __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
   1819   __ B(hi, &loop, /* far_target */ false);
   1820 
   1821   // Return true and exit the function.
   1822   // If loop does not result in returning false, we return true.
   1823   __ Bind(&return_true);
   1824   __ Mov(out, 1);
   1825   __ B(final_label);
   1826 
   1827   // Return false and exit the function.
   1828   __ Bind(&return_false);
   1829   __ Mov(out, 0);
   1830 
   1831   if (end.IsReferenced()) {
   1832     __ Bind(&end);
   1833   }
   1834 }
   1835 
   1836 static void GenerateVisitStringIndexOf(HInvoke* invoke,
   1837                                        ArmVIXLAssembler* assembler,
   1838                                        CodeGeneratorARMVIXL* codegen,
   1839                                        ArenaAllocator* allocator,
   1840                                        bool start_at_zero) {
   1841   LocationSummary* locations = invoke->GetLocations();
   1842 
   1843   // Note that the null check must have been done earlier.
   1844   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1845 
   1846   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   1847   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   1848   SlowPathCodeARMVIXL* slow_path = nullptr;
   1849   HInstruction* code_point = invoke->InputAt(1);
   1850   if (code_point->IsIntConstant()) {
   1851     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
   1852         std::numeric_limits<uint16_t>::max()) {
   1853       // Always needs the slow-path. We could directly dispatch to it, but this case should be
   1854       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
   1855       slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
   1856       codegen->AddSlowPath(slow_path);
   1857       __ B(slow_path->GetEntryLabel());
   1858       __ Bind(slow_path->GetExitLabel());
   1859       return;
   1860     }
   1861   } else if (code_point->GetType() != Primitive::kPrimChar) {
   1862     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
   1863     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
   1864     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
   1865     slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
   1866     codegen->AddSlowPath(slow_path);
   1867     __ B(hs, slow_path->GetEntryLabel());
   1868   }
   1869 
   1870   if (start_at_zero) {
   1871     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
   1872     DCHECK(tmp_reg.Is(r2));
   1873     // Start-index = 0.
   1874     __ Mov(tmp_reg, 0);
   1875   }
   1876 
   1877   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
   1878   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   1879 
   1880   if (slow_path != nullptr) {
   1881     __ Bind(slow_path->GetExitLabel());
   1882   }
   1883 }
   1884 
   1885 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
   1886   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1887                                                             LocationSummary::kCallOnMainAndSlowPath,
   1888                                                             kIntrinsified);
   1889   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1890   // best to align the inputs accordingly.
   1891   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1892   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1893   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1894   locations->SetOut(LocationFrom(r0));
   1895 
   1896   // Need to send start-index=0.
   1897   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
   1898 }
   1899 
   1900 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
   1901   GenerateVisitStringIndexOf(
   1902       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
   1903 }
   1904 
   1905 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
   1906   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1907                                                             LocationSummary::kCallOnMainAndSlowPath,
   1908                                                             kIntrinsified);
   1909   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   1910   // best to align the inputs accordingly.
   1911   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1912   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1913   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1914   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1915   locations->SetOut(LocationFrom(r0));
   1916 }
   1917 
   1918 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
   1919   GenerateVisitStringIndexOf(
   1920       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
   1921 }
   1922 
   1923 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1924   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1925                                                             LocationSummary::kCallOnMainAndSlowPath,
   1926                                                             kIntrinsified);
   1927   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1928   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1929   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1930   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1931   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
   1932   locations->SetOut(LocationFrom(r0));
   1933 }
   1934 
   1935 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1936   ArmVIXLAssembler* assembler = GetAssembler();
   1937   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
   1938   __ Cmp(byte_array, 0);
   1939   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
   1940   codegen_->AddSlowPath(slow_path);
   1941   __ B(eq, slow_path->GetEntryLabel());
   1942 
   1943   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
   1944   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   1945   __ Bind(slow_path->GetExitLabel());
   1946 }
   1947 
   1948 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
   1949   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1950                                                             LocationSummary::kCallOnMainOnly,
   1951                                                             kIntrinsified);
   1952   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1953   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1954   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   1955   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   1956   locations->SetOut(LocationFrom(r0));
   1957 }
   1958 
   1959 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
   1960   // No need to emit code checking whether `locations->InAt(2)` is a null
   1961   // pointer, as callers of the native method
   1962   //
   1963   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   1964   //
   1965   // all include a null check on `data` before calling that method.
   1966   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
   1967   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   1968 }
   1969 
   1970 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
   1971   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1972                                                             LocationSummary::kCallOnMainAndSlowPath,
   1973                                                             kIntrinsified);
   1974   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   1975   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   1976   locations->SetOut(LocationFrom(r0));
   1977 }
   1978 
   1979 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
   1980   ArmVIXLAssembler* assembler = GetAssembler();
   1981   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
   1982   __ Cmp(string_to_copy, 0);
   1983   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
   1984   codegen_->AddSlowPath(slow_path);
   1985   __ B(eq, slow_path->GetEntryLabel());
   1986 
   1987   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
   1988   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   1989 
   1990   __ Bind(slow_path->GetExitLabel());
   1991 }
   1992 
   1993 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
   1994   // The only read barrier implementation supporting the
   1995   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   1996   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   1997     return;
   1998   }
   1999 
   2000   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   2001   LocationSummary* locations = invoke->GetLocations();
   2002   if (locations == nullptr) {
   2003     return;
   2004   }
   2005 
   2006   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   2007   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
   2008   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   2009 
   2010   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
   2011     locations->SetInAt(1, Location::RequiresRegister());
   2012   }
   2013   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
   2014     locations->SetInAt(3, Location::RequiresRegister());
   2015   }
   2016   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
   2017     locations->SetInAt(4, Location::RequiresRegister());
   2018   }
   2019   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2020     // Temporary register IP cannot be used in
   2021     // ReadBarrierSystemArrayCopySlowPathARM (because that register
   2022     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
   2023     // temporary register from the register allocator.
   2024     locations->AddTemp(Location::RequiresRegister());
   2025     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
   2026     arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
   2027   }
   2028 }
   2029 
   2030 static void CheckPosition(ArmVIXLAssembler* assembler,
   2031                           Location pos,
   2032                           vixl32::Register input,
   2033                           Location length,
   2034                           SlowPathCodeARMVIXL* slow_path,
   2035                           vixl32::Register temp,
   2036                           bool length_is_input_length = false) {
   2037   // Where is the length in the Array?
   2038   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
   2039 
   2040   if (pos.IsConstant()) {
   2041     int32_t pos_const = Int32ConstantFrom(pos);
   2042     if (pos_const == 0) {
   2043       if (!length_is_input_length) {
   2044         // Check that length(input) >= length.
   2045         __ Ldr(temp, MemOperand(input, length_offset));
   2046         if (length.IsConstant()) {
   2047           __ Cmp(temp, Int32ConstantFrom(length));
   2048         } else {
   2049           __ Cmp(temp, RegisterFrom(length));
   2050         }
   2051         __ B(lt, slow_path->GetEntryLabel());
   2052       }
   2053     } else {
   2054       // Check that length(input) >= pos.
   2055       __ Ldr(temp, MemOperand(input, length_offset));
   2056       __ Subs(temp, temp, pos_const);
   2057       __ B(lt, slow_path->GetEntryLabel());
   2058 
   2059       // Check that (length(input) - pos) >= length.
   2060       if (length.IsConstant()) {
   2061         __ Cmp(temp, Int32ConstantFrom(length));
   2062       } else {
   2063         __ Cmp(temp, RegisterFrom(length));
   2064       }
   2065       __ B(lt, slow_path->GetEntryLabel());
   2066     }
   2067   } else if (length_is_input_length) {
   2068     // The only way the copy can succeed is if pos is zero.
   2069     vixl32::Register pos_reg = RegisterFrom(pos);
   2070     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
   2071   } else {
   2072     // Check that pos >= 0.
   2073     vixl32::Register pos_reg = RegisterFrom(pos);
   2074     __ Cmp(pos_reg, 0);
   2075     __ B(lt, slow_path->GetEntryLabel());
   2076 
   2077     // Check that pos <= length(input).
   2078     __ Ldr(temp, MemOperand(input, length_offset));
   2079     __ Subs(temp, temp, pos_reg);
   2080     __ B(lt, slow_path->GetEntryLabel());
   2081 
   2082     // Check that (length(input) - pos) >= length.
   2083     if (length.IsConstant()) {
   2084       __ Cmp(temp, Int32ConstantFrom(length));
   2085     } else {
   2086       __ Cmp(temp, RegisterFrom(length));
   2087     }
   2088     __ B(lt, slow_path->GetEntryLabel());
   2089   }
   2090 }
   2091 
   2092 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
   2093   // The only read barrier implementation supporting the
   2094   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2095   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2096 
   2097   ArmVIXLAssembler* assembler = GetAssembler();
   2098   LocationSummary* locations = invoke->GetLocations();
   2099 
   2100   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2101   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2102   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2103   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   2104   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   2105 
   2106   vixl32::Register src = InputRegisterAt(invoke, 0);
   2107   Location src_pos = locations->InAt(1);
   2108   vixl32::Register dest = InputRegisterAt(invoke, 2);
   2109   Location dest_pos = locations->InAt(3);
   2110   Location length = locations->InAt(4);
   2111   Location temp1_loc = locations->GetTemp(0);
   2112   vixl32::Register temp1 = RegisterFrom(temp1_loc);
   2113   Location temp2_loc = locations->GetTemp(1);
   2114   vixl32::Register temp2 = RegisterFrom(temp2_loc);
   2115   Location temp3_loc = locations->GetTemp(2);
   2116   vixl32::Register temp3 = RegisterFrom(temp3_loc);
   2117 
   2118   SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
   2119   codegen_->AddSlowPath(intrinsic_slow_path);
   2120 
   2121   vixl32::Label conditions_on_positions_validated;
   2122   SystemArrayCopyOptimizations optimizations(invoke);
   2123 
   2124   // If source and destination are the same, we go to slow path if we need to do
   2125   // forward copying.
   2126   if (src_pos.IsConstant()) {
   2127     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
   2128     if (dest_pos.IsConstant()) {
   2129       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
   2130       if (optimizations.GetDestinationIsSource()) {
   2131         // Checked when building locations.
   2132         DCHECK_GE(src_pos_constant, dest_pos_constant);
   2133       } else if (src_pos_constant < dest_pos_constant) {
   2134         __ Cmp(src, dest);
   2135         __ B(eq, intrinsic_slow_path->GetEntryLabel());
   2136       }
   2137 
   2138       // Checked when building locations.
   2139       DCHECK(!optimizations.GetDestinationIsSource()
   2140              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
   2141     } else {
   2142       if (!optimizations.GetDestinationIsSource()) {
   2143         __ Cmp(src, dest);
   2144         __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
   2145       }
   2146       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
   2147       __ B(gt, intrinsic_slow_path->GetEntryLabel());
   2148     }
   2149   } else {
   2150     if (!optimizations.GetDestinationIsSource()) {
   2151       __ Cmp(src, dest);
   2152       __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
   2153     }
   2154     if (dest_pos.IsConstant()) {
   2155       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
   2156       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
   2157     } else {
   2158       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
   2159     }
   2160     __ B(lt, intrinsic_slow_path->GetEntryLabel());
   2161   }
   2162 
   2163   __ Bind(&conditions_on_positions_validated);
   2164 
   2165   if (!optimizations.GetSourceIsNotNull()) {
   2166     // Bail out if the source is null.
   2167     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
   2168   }
   2169 
   2170   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
   2171     // Bail out if the destination is null.
   2172     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
   2173   }
   2174 
   2175   // If the length is negative, bail out.
   2176   // We have already checked in the LocationsBuilder for the constant case.
   2177   if (!length.IsConstant() &&
   2178       !optimizations.GetCountIsSourceLength() &&
   2179       !optimizations.GetCountIsDestinationLength()) {
   2180     __ Cmp(RegisterFrom(length), 0);
   2181     __ B(lt, intrinsic_slow_path->GetEntryLabel());
   2182   }
   2183 
   2184   // Validity checks: source.
   2185   CheckPosition(assembler,
   2186                 src_pos,
   2187                 src,
   2188                 length,
   2189                 intrinsic_slow_path,
   2190                 temp1,
   2191                 optimizations.GetCountIsSourceLength());
   2192 
   2193   // Validity checks: dest.
   2194   CheckPosition(assembler,
   2195                 dest_pos,
   2196                 dest,
   2197                 length,
   2198                 intrinsic_slow_path,
   2199                 temp1,
   2200                 optimizations.GetCountIsDestinationLength());
   2201 
   2202   if (!optimizations.GetDoesNotNeedTypeCheck()) {
   2203     // Check whether all elements of the source array are assignable to the component
   2204     // type of the destination array. We do two checks: the classes are the same,
   2205     // or the destination is Object[]. If none of these checks succeed, we go to the
   2206     // slow path.
   2207 
   2208     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2209       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2210         // /* HeapReference<Class> */ temp1 = src->klass_
   2211         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2212             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
   2213         // Bail out if the source is not a non primitive array.
   2214         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2215         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2216             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
   2217         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
   2218         // If heap poisoning is enabled, `temp1` has been unpoisoned
   2219         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2220         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
   2221         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
   2222         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2223         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
   2224       }
   2225 
   2226       // /* HeapReference<Class> */ temp1 = dest->klass_
   2227       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2228           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
   2229 
   2230       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2231         // Bail out if the destination is not a non primitive array.
   2232         //
   2233         // Register `temp1` is not trashed by the read barrier emitted
   2234         // by GenerateFieldLoadWithBakerReadBarrier below, as that
   2235         // method produces a call to a ReadBarrierMarkRegX entry point,
   2236         // which saves all potentially live registers, including
   2237         // temporaries such a `temp1`.
   2238         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   2239         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2240             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
   2241         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
   2242         // If heap poisoning is enabled, `temp2` has been unpoisoned
   2243         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2244         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
   2245         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
   2246         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2247         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
   2248       }
   2249 
   2250       // For the same reason given earlier, `temp1` is not trashed by the
   2251       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
   2252       // /* HeapReference<Class> */ temp2 = src->klass_
   2253       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2254           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
   2255       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
   2256       __ Cmp(temp1, temp2);
   2257 
   2258       if (optimizations.GetDestinationIsTypedObjectArray()) {
   2259         vixl32::Label do_copy;
   2260         __ B(eq, &do_copy, /* far_target */ false);
   2261         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2262         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2263             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
   2264         // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2265         // We do not need to emit a read barrier for the following
   2266         // heap reference load, as `temp1` is only used in a
   2267         // comparison with null below, and this reference is not
   2268         // kept afterwards.
   2269         __ Ldr(temp1, MemOperand(temp1, super_offset));
   2270         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
   2271         __ Bind(&do_copy);
   2272       } else {
   2273         __ B(ne, intrinsic_slow_path->GetEntryLabel());
   2274       }
   2275     } else {
   2276       // Non read barrier code.
   2277 
   2278       // /* HeapReference<Class> */ temp1 = dest->klass_
   2279       __ Ldr(temp1, MemOperand(dest, class_offset));
   2280       // /* HeapReference<Class> */ temp2 = src->klass_
   2281       __ Ldr(temp2, MemOperand(src, class_offset));
   2282       bool did_unpoison = false;
   2283       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
   2284           !optimizations.GetSourceIsNonPrimitiveArray()) {
   2285         // One or two of the references need to be unpoisoned. Unpoison them
   2286         // both to make the identity check valid.
   2287         assembler->MaybeUnpoisonHeapReference(temp1);
   2288         assembler->MaybeUnpoisonHeapReference(temp2);
   2289         did_unpoison = true;
   2290       }
   2291 
   2292       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   2293         // Bail out if the destination is not a non primitive array.
   2294         // /* HeapReference<Class> */ temp3 = temp1->component_type_
   2295         __ Ldr(temp3, MemOperand(temp1, component_offset));
   2296         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2297         assembler->MaybeUnpoisonHeapReference(temp3);
   2298         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2299         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
   2300         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2301         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2302       }
   2303 
   2304       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2305         // Bail out if the source is not a non primitive array.
   2306         // /* HeapReference<Class> */ temp3 = temp2->component_type_
   2307         __ Ldr(temp3, MemOperand(temp2, component_offset));
   2308         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2309         assembler->MaybeUnpoisonHeapReference(temp3);
   2310         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2311         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
   2312         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2313         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2314       }
   2315 
   2316       __ Cmp(temp1, temp2);
   2317 
   2318       if (optimizations.GetDestinationIsTypedObjectArray()) {
   2319         vixl32::Label do_copy;
   2320         __ B(eq, &do_copy, /* far_target */ false);
   2321         if (!did_unpoison) {
   2322           assembler->MaybeUnpoisonHeapReference(temp1);
   2323         }
   2324         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   2325         __ Ldr(temp1, MemOperand(temp1, component_offset));
   2326         assembler->MaybeUnpoisonHeapReference(temp1);
   2327         // /* HeapReference<Class> */ temp1 = temp1->super_class_
   2328         __ Ldr(temp1, MemOperand(temp1, super_offset));
   2329         // No need to unpoison the result, we're comparing against null.
   2330         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
   2331         __ Bind(&do_copy);
   2332       } else {
   2333         __ B(ne, intrinsic_slow_path->GetEntryLabel());
   2334       }
   2335     }
   2336   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   2337     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
   2338     // Bail out if the source is not a non primitive array.
   2339     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2340       // /* HeapReference<Class> */ temp1 = src->klass_
   2341       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2342           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
   2343       // /* HeapReference<Class> */ temp3 = temp1->component_type_
   2344       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2345           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
   2346       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2347       // If heap poisoning is enabled, `temp3` has been unpoisoned
   2348       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   2349     } else {
   2350       // /* HeapReference<Class> */ temp1 = src->klass_
   2351       __ Ldr(temp1, MemOperand(src, class_offset));
   2352       assembler->MaybeUnpoisonHeapReference(temp1);
   2353       // /* HeapReference<Class> */ temp3 = temp1->component_type_
   2354       __ Ldr(temp3, MemOperand(temp1, component_offset));
   2355       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2356       assembler->MaybeUnpoisonHeapReference(temp3);
   2357     }
   2358     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
   2359     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
   2360     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   2361     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   2362   }
   2363 
   2364   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
   2365     // Null constant length: not need to emit the loop code at all.
   2366   } else {
   2367     vixl32::Label done;
   2368     const Primitive::Type type = Primitive::kPrimNot;
   2369     const int32_t element_size = Primitive::ComponentSize(type);
   2370 
   2371     if (length.IsRegister()) {
   2372       // Don't enter the copy loop if the length is null.
   2373       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
   2374     }
   2375 
   2376     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2377       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
   2378 
   2379       // SystemArrayCopy implementation for Baker read barriers (see
   2380       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
   2381       //
   2382       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
   2383       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   2384       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   2385       //   if (is_gray) {
   2386       //     // Slow-path copy.
   2387       //     do {
   2388       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
   2389       //     } while (src_ptr != end_ptr)
   2390       //   } else {
   2391       //     // Fast-path copy.
   2392       //     do {
   2393       //       *dest_ptr++ = *src_ptr++;
   2394       //     } while (src_ptr != end_ptr)
   2395       //   }
   2396 
   2397       // /* int32_t */ monitor = src->monitor_
   2398       __ Ldr(temp2, MemOperand(src, monitor_offset));
   2399       // /* LockWord */ lock_word = LockWord(monitor)
   2400       static_assert(sizeof(LockWord) == sizeof(int32_t),
   2401                     "art::LockWord and int32_t have different sizes.");
   2402 
   2403       // Introduce a dependency on the lock_word including the rb_state,
   2404       // which shall prevent load-load reordering without using
   2405       // a memory barrier (which would be more expensive).
   2406       // `src` is unchanged by this operation, but its value now depends
   2407       // on `temp2`.
   2408       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
   2409 
   2410       // Compute the base source address in `temp1`.
   2411       // Note that `temp1` (the base source address) is computed from
   2412       // `src` (and `src_pos`) here, and thus honors the artificial
   2413       // dependency of `src` on `temp2`.
   2414       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
   2415       // Compute the end source address in `temp3`.
   2416       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   2417       // The base destination address is computed later, as `temp2` is
   2418       // used for intermediate computations.
   2419 
   2420       // Slow path used to copy array when `src` is gray.
   2421       // Note that the base destination address is computed in `temp2`
   2422       // by the slow path code.
   2423       SlowPathCodeARMVIXL* read_barrier_slow_path =
   2424           new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
   2425       codegen_->AddSlowPath(read_barrier_slow_path);
   2426 
   2427       // Given the numeric representation, it's enough to check the low bit of the
   2428       // rb_state. We do that by shifting the bit out of the lock word with LSRS
   2429       // which can be a 16-bit instruction unlike the TST immediate.
   2430       static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   2431       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   2432       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
   2433       // Carry flag is the last bit shifted out by LSRS.
   2434       __ B(cs, read_barrier_slow_path->GetEntryLabel());
   2435 
   2436       // Fast-path copy.
   2437       // Compute the base destination address in `temp2`.
   2438       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   2439       // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2440       // poison/unpoison.
   2441       vixl32::Label loop;
   2442       __ Bind(&loop);
   2443       {
   2444         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   2445         const vixl32::Register temp_reg = temps.Acquire();
   2446         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
   2447         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
   2448       }
   2449       __ Cmp(temp1, temp3);
   2450       __ B(ne, &loop, /* far_target */ false);
   2451 
   2452       __ Bind(read_barrier_slow_path->GetExitLabel());
   2453     } else {
   2454       // Non read barrier code.
   2455       // Compute the base source address in `temp1`.
   2456       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
   2457       // Compute the base destination address in `temp2`.
   2458       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   2459       // Compute the end source address in `temp3`.
   2460       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   2461       // Iterate over the arrays and do a raw copy of the objects. We don't need to
   2462       // poison/unpoison.
   2463       vixl32::Label loop;
   2464       __ Bind(&loop);
   2465       {
   2466         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   2467         const vixl32::Register temp_reg = temps.Acquire();
   2468         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
   2469         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
   2470       }
   2471       __ Cmp(temp1, temp3);
   2472       __ B(ne, &loop, /* far_target */ false);
   2473     }
   2474     __ Bind(&done);
   2475   }
   2476 
   2477   // We only need one card marking on the destination array.
   2478   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
   2479 
   2480   __ Bind(intrinsic_slow_path->GetExitLabel());
   2481 }
   2482 
   2483 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
   2484   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
   2485   // the code generator. Furthermore, the register allocator creates fixed live intervals
   2486   // for all caller-saved registers because we are doing a function call. As a result, if
   2487   // the input and output locations are unallocated, the register allocator runs out of
   2488   // registers and fails; however, a debuggable graph is not the common case.
   2489   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
   2490     return;
   2491   }
   2492 
   2493   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
   2494   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
   2495   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
   2496 
   2497   LocationSummary* const locations = new (arena) LocationSummary(invoke,
   2498                                                                  LocationSummary::kCallOnMainOnly,
   2499                                                                  kIntrinsified);
   2500   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
   2501 
   2502   locations->SetInAt(0, Location::RequiresFpuRegister());
   2503   locations->SetOut(Location::RequiresFpuRegister());
   2504   // Native code uses the soft float ABI.
   2505   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   2506   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
   2507 }
   2508 
   2509 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
   2510   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
   2511   // the code generator. Furthermore, the register allocator creates fixed live intervals
   2512   // for all caller-saved registers because we are doing a function call. As a result, if
   2513   // the input and output locations are unallocated, the register allocator runs out of
   2514   // registers and fails; however, a debuggable graph is not the common case.
   2515   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
   2516     return;
   2517   }
   2518 
   2519   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
   2520   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
   2521   DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
   2522   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
   2523 
   2524   LocationSummary* const locations = new (arena) LocationSummary(invoke,
   2525                                                                  LocationSummary::kCallOnMainOnly,
   2526                                                                  kIntrinsified);
   2527   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
   2528 
   2529   locations->SetInAt(0, Location::RequiresFpuRegister());
   2530   locations->SetInAt(1, Location::RequiresFpuRegister());
   2531   locations->SetOut(Location::RequiresFpuRegister());
   2532   // Native code uses the soft float ABI.
   2533   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   2534   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
   2535   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
   2536   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
   2537 }
   2538 
   2539 static void GenFPToFPCall(HInvoke* invoke,
   2540                           ArmVIXLAssembler* assembler,
   2541                           CodeGeneratorARMVIXL* codegen,
   2542                           QuickEntrypointEnum entry) {
   2543   LocationSummary* const locations = invoke->GetLocations();
   2544 
   2545   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
   2546   DCHECK(locations->WillCall() && locations->Intrinsified());
   2547 
   2548   // Native code uses the soft float ABI.
   2549   __ Vmov(RegisterFrom(locations->GetTemp(0)),
   2550           RegisterFrom(locations->GetTemp(1)),
   2551           InputDRegisterAt(invoke, 0));
   2552   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
   2553   __ Vmov(OutputDRegister(invoke),
   2554           RegisterFrom(locations->GetTemp(0)),
   2555           RegisterFrom(locations->GetTemp(1)));
   2556 }
   2557 
   2558 static void GenFPFPToFPCall(HInvoke* invoke,
   2559                             ArmVIXLAssembler* assembler,
   2560                             CodeGeneratorARMVIXL* codegen,
   2561                             QuickEntrypointEnum entry) {
   2562   LocationSummary* const locations = invoke->GetLocations();
   2563 
   2564   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
   2565   DCHECK(locations->WillCall() && locations->Intrinsified());
   2566 
   2567   // Native code uses the soft float ABI.
   2568   __ Vmov(RegisterFrom(locations->GetTemp(0)),
   2569           RegisterFrom(locations->GetTemp(1)),
   2570           InputDRegisterAt(invoke, 0));
   2571   __ Vmov(RegisterFrom(locations->GetTemp(2)),
   2572           RegisterFrom(locations->GetTemp(3)),
   2573           InputDRegisterAt(invoke, 1));
   2574   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
   2575   __ Vmov(OutputDRegister(invoke),
   2576           RegisterFrom(locations->GetTemp(0)),
   2577           RegisterFrom(locations->GetTemp(1)));
   2578 }
   2579 
   2580 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
   2581   CreateFPToFPCallLocations(arena_, invoke);
   2582 }
   2583 
   2584 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
   2585   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
   2586 }
   2587 
   2588 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
   2589   CreateFPToFPCallLocations(arena_, invoke);
   2590 }
   2591 
   2592 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
   2593   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
   2594 }
   2595 
   2596 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
   2597   CreateFPToFPCallLocations(arena_, invoke);
   2598 }
   2599 
   2600 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
   2601   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
   2602 }
   2603 
   2604 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
   2605   CreateFPToFPCallLocations(arena_, invoke);
   2606 }
   2607 
   2608 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
   2609   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
   2610 }
   2611 
   2612 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
   2613   CreateFPToFPCallLocations(arena_, invoke);
   2614 }
   2615 
   2616 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
   2617   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
   2618 }
   2619 
   2620 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
   2621   CreateFPToFPCallLocations(arena_, invoke);
   2622 }
   2623 
   2624 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
   2625   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
   2626 }
   2627 
   2628 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
   2629   CreateFPToFPCallLocations(arena_, invoke);
   2630 }
   2631 
   2632 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
   2633   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
   2634 }
   2635 
   2636 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
   2637   CreateFPToFPCallLocations(arena_, invoke);
   2638 }
   2639 
   2640 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
   2641   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
   2642 }
   2643 
   2644 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
   2645   CreateFPToFPCallLocations(arena_, invoke);
   2646 }
   2647 
   2648 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
   2649   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
   2650 }
   2651 
   2652 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
   2653   CreateFPToFPCallLocations(arena_, invoke);
   2654 }
   2655 
   2656 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
   2657   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
   2658 }
   2659 
   2660 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
   2661   CreateFPToFPCallLocations(arena_, invoke);
   2662 }
   2663 
   2664 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
   2665   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
   2666 }
   2667 
   2668 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
   2669   CreateFPToFPCallLocations(arena_, invoke);
   2670 }
   2671 
   2672 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
   2673   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
   2674 }
   2675 
   2676 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
   2677   CreateFPToFPCallLocations(arena_, invoke);
   2678 }
   2679 
   2680 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
   2681   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
   2682 }
   2683 
   2684 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
   2685   CreateFPToFPCallLocations(arena_, invoke);
   2686 }
   2687 
   2688 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
   2689   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
   2690 }
   2691 
   2692 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
   2693   CreateFPFPToFPCallLocations(arena_, invoke);
   2694 }
   2695 
   2696 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
   2697   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
   2698 }
   2699 
   2700 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
   2701   CreateFPFPToFPCallLocations(arena_, invoke);
   2702 }
   2703 
   2704 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
   2705   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
   2706 }
   2707 
   2708 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
   2709   CreateFPFPToFPCallLocations(arena_, invoke);
   2710 }
   2711 
   2712 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
   2713   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
   2714 }
   2715 
   2716 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
   2717   CreateIntToIntLocations(arena_, invoke);
   2718 }
   2719 
   2720 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
   2721   ArmVIXLAssembler* assembler = GetAssembler();
   2722   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
   2723 }
   2724 
   2725 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
   2726   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2727                                                             LocationSummary::kNoCall,
   2728                                                             kIntrinsified);
   2729   locations->SetInAt(0, Location::RequiresRegister());
   2730   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   2731 }
   2732 
   2733 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
   2734   ArmVIXLAssembler* assembler = GetAssembler();
   2735   LocationSummary* locations = invoke->GetLocations();
   2736 
   2737   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
   2738   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
   2739   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
   2740   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
   2741 
   2742   __ Rbit(out_reg_lo, in_reg_hi);
   2743   __ Rbit(out_reg_hi, in_reg_lo);
   2744 }
   2745 
   2746 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
   2747   CreateIntToIntLocations(arena_, invoke);
   2748 }
   2749 
   2750 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
   2751   ArmVIXLAssembler* assembler = GetAssembler();
   2752   __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
   2753 }
   2754 
   2755 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
   2756   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2757                                                             LocationSummary::kNoCall,
   2758                                                             kIntrinsified);
   2759   locations->SetInAt(0, Location::RequiresRegister());
   2760   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   2761 }
   2762 
   2763 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
   2764   ArmVIXLAssembler* assembler = GetAssembler();
   2765   LocationSummary* locations = invoke->GetLocations();
   2766 
   2767   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
   2768   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
   2769   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
   2770   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
   2771 
   2772   __ Rev(out_reg_lo, in_reg_hi);
   2773   __ Rev(out_reg_hi, in_reg_lo);
   2774 }
   2775 
   2776 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
   2777   CreateIntToIntLocations(arena_, invoke);
   2778 }
   2779 
   2780 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
   2781   ArmVIXLAssembler* assembler = GetAssembler();
   2782   __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
   2783 }
   2784 
   2785 static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
   2786   DCHECK(Primitive::IsIntOrLongType(type)) << type;
   2787   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
   2788   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
   2789 
   2790   bool is_long = type == Primitive::kPrimLong;
   2791   LocationSummary* locations = instr->GetLocations();
   2792   Location in = locations->InAt(0);
   2793   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
   2794   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
   2795   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
   2796   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
   2797   vixl32::Register  out_r = OutputRegister(instr);
   2798 
   2799   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
   2800   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
   2801   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
   2802   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
   2803   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
   2804   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
   2805   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
   2806   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
   2807   if (is_long) {
   2808     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
   2809   }
   2810   __ Vmov(out_r, tmp_s);
   2811 }
   2812 
   2813 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
   2814   CreateIntToIntLocations(arena_, invoke);
   2815   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   2816 }
   2817 
   2818 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
   2819   GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
   2820 }
   2821 
   2822 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
   2823   VisitIntegerBitCount(invoke);
   2824 }
   2825 
   2826 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
   2827   GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
   2828 }
   2829 
   2830 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   2831   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2832                                                             LocationSummary::kNoCall,
   2833                                                             kIntrinsified);
   2834   locations->SetInAt(0, Location::RequiresRegister());
   2835   locations->SetInAt(1, Location::RequiresRegister());
   2836   locations->SetInAt(2, Location::RequiresRegister());
   2837   locations->SetInAt(3, Location::RequiresRegister());
   2838   locations->SetInAt(4, Location::RequiresRegister());
   2839 
   2840   // Temporary registers to store lengths of strings and for calculations.
   2841   locations->AddTemp(Location::RequiresRegister());
   2842   locations->AddTemp(Location::RequiresRegister());
   2843   locations->AddTemp(Location::RequiresRegister());
   2844 }
   2845 
   2846 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   2847   ArmVIXLAssembler* assembler = GetAssembler();
   2848   LocationSummary* locations = invoke->GetLocations();
   2849 
   2850   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   2851   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   2852   DCHECK_EQ(char_size, 2u);
   2853 
   2854   // Location of data in char array buffer.
   2855   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
   2856 
   2857   // Location of char array data in string.
   2858   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   2859 
   2860   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   2861   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
   2862   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
   2863   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
   2864   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
   2865   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
   2866   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
   2867 
   2868   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
   2869   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
   2870   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
   2871 
   2872   vixl32::Label done, compressed_string_loop;
   2873   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
   2874   // dst to be copied.
   2875   __ Add(dst_ptr, dstObj, data_offset);
   2876   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
   2877 
   2878   __ Subs(num_chr, srcEnd, srcBegin);
   2879   // Early out for valid zero-length retrievals.
   2880   __ B(eq, final_label, /* far_target */ false);
   2881 
   2882   // src range to copy.
   2883   __ Add(src_ptr, srcObj, value_offset);
   2884 
   2885   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   2886   vixl32::Register temp;
   2887   vixl32::Label compressed_string_preloop;
   2888   if (mirror::kUseStringCompression) {
   2889     // Location of count in string.
   2890     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   2891     temp = temps.Acquire();
   2892     // String's length.
   2893     __ Ldr(temp, MemOperand(srcObj, count_offset));
   2894     __ Tst(temp, 1);
   2895     temps.Release(temp);
   2896     __ B(eq, &compressed_string_preloop, /* far_target */ false);
   2897   }
   2898   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
   2899 
   2900   // Do the copy.
   2901   vixl32::Label loop, remainder;
   2902 
   2903   temp = temps.Acquire();
   2904   // Save repairing the value of num_chr on the < 4 character path.
   2905   __ Subs(temp, num_chr, 4);
   2906   __ B(lt, &remainder, /* far_target */ false);
   2907 
   2908   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
   2909   __ Mov(num_chr, temp);
   2910 
   2911   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
   2912   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
   2913   // to rectify these everywhere this intrinsic applies.)
   2914   __ Bind(&loop);
   2915   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
   2916   __ Subs(num_chr, num_chr, 4);
   2917   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
   2918   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
   2919   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
   2920   temps.Release(temp);
   2921   __ B(ge, &loop, /* far_target */ false);
   2922 
   2923   __ Adds(num_chr, num_chr, 4);
   2924   __ B(eq, final_label, /* far_target */ false);
   2925 
   2926   // Main loop for < 4 character case and remainder handling. Loads and stores one
   2927   // 16-bit Java character at a time.
   2928   __ Bind(&remainder);
   2929   temp = temps.Acquire();
   2930   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
   2931   __ Subs(num_chr, num_chr, 1);
   2932   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
   2933   temps.Release(temp);
   2934   __ B(gt, &remainder, /* far_target */ false);
   2935 
   2936   if (mirror::kUseStringCompression) {
   2937     __ B(final_label);
   2938 
   2939     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
   2940     DCHECK_EQ(c_char_size, 1u);
   2941     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
   2942     __ Bind(&compressed_string_preloop);
   2943     __ Add(src_ptr, src_ptr, srcBegin);
   2944     __ Bind(&compressed_string_loop);
   2945     temp = temps.Acquire();
   2946     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
   2947     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
   2948     temps.Release(temp);
   2949     __ Subs(num_chr, num_chr, 1);
   2950     __ B(gt, &compressed_string_loop, /* far_target */ false);
   2951   }
   2952 
   2953   if (done.IsReferenced()) {
   2954     __ Bind(&done);
   2955   }
   2956 }
   2957 
   2958 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
   2959   CreateFPToIntLocations(arena_, invoke);
   2960 }
   2961 
   2962 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
   2963   ArmVIXLAssembler* const assembler = GetAssembler();
   2964   const vixl32::Register out = OutputRegister(invoke);
   2965   // Shifting left by 1 bit makes the value encodable as an immediate operand;
   2966   // we don't care about the sign bit anyway.
   2967   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
   2968 
   2969   __ Vmov(out, InputSRegisterAt(invoke, 0));
   2970   // We don't care about the sign bit, so shift left.
   2971   __ Lsl(out, out, 1);
   2972   __ Eor(out, out, infinity);
   2973   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
   2974 }
   2975 
   2976 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
   2977   CreateFPToIntLocations(arena_, invoke);
   2978 }
   2979 
   2980 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
   2981   ArmVIXLAssembler* const assembler = GetAssembler();
   2982   const vixl32::Register out = OutputRegister(invoke);
   2983   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   2984   const vixl32::Register temp = temps.Acquire();
   2985   // The highest 32 bits of double precision positive infinity separated into
   2986   // two constants encodable as immediate operands.
   2987   constexpr uint32_t infinity_high  = 0x7f000000U;
   2988   constexpr uint32_t infinity_high2 = 0x00f00000U;
   2989 
   2990   static_assert((infinity_high | infinity_high2) ==
   2991                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
   2992                 "The constants do not add up to the high 32 bits of double "
   2993                 "precision positive infinity.");
   2994   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
   2995   __ Eor(out, out, infinity_high);
   2996   __ Eor(out, out, infinity_high2);
   2997   // We don't care about the sign bit, so shift left.
   2998   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
   2999   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
   3000 }
   3001 
   3002 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
   3003   if (features_.HasARMv8AInstructions()) {
   3004     CreateFPToFPLocations(arena_, invoke);
   3005   }
   3006 }
   3007 
   3008 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
   3009   ArmVIXLAssembler* assembler = GetAssembler();
   3010   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
   3011   __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
   3012 }
   3013 
   3014 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
   3015   if (features_.HasARMv8AInstructions()) {
   3016     CreateFPToFPLocations(arena_, invoke);
   3017   }
   3018 }
   3019 
   3020 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
   3021   ArmVIXLAssembler* assembler = GetAssembler();
   3022   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
   3023   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
   3024 }
   3025 
   3026 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
   3027   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   3028   IntrinsicVisitor::ComputeIntegerValueOfLocations(
   3029       invoke,
   3030       codegen_,
   3031       LocationFrom(r0),
   3032       LocationFrom(calling_convention.GetRegisterAt(0)));
   3033 }
   3034 
   3035 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
   3036   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
   3037   LocationSummary* locations = invoke->GetLocations();
   3038   ArmVIXLAssembler* const assembler = GetAssembler();
   3039 
   3040   vixl32::Register out = RegisterFrom(locations->Out());
   3041   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   3042   vixl32::Register temp = temps.Acquire();
   3043   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   3044   vixl32::Register argument = calling_convention.GetRegisterAt(0);
   3045   if (invoke->InputAt(0)->IsConstant()) {
   3046     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
   3047     if (value >= info.low && value <= info.high) {
   3048       // Just embed the j.l.Integer in the code.
   3049       ScopedObjectAccess soa(Thread::Current());
   3050       mirror::Object* boxed = info.cache->Get(value + (-info.low));
   3051       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
   3052       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
   3053       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
   3054     } else {
   3055       // Allocate and initialize a new j.l.Integer.
   3056       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
   3057       // JIT object table.
   3058       uint32_t address =
   3059           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3060       __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
   3061       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3062       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3063       __ Mov(temp, value);
   3064       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
   3065       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   3066       // one.
   3067       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   3068     }
   3069   } else {
   3070     vixl32::Register in = RegisterFrom(locations->InAt(0));
   3071     // Check bounds of our cache.
   3072     __ Add(out, in, -info.low);
   3073     __ Cmp(out, info.high - info.low + 1);
   3074     vixl32::Label allocate, done;
   3075     __ B(hs, &allocate, /* is_far_target */ false);
   3076     // If the value is within the bounds, load the j.l.Integer directly from the array.
   3077     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   3078     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
   3079     __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
   3080     codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
   3081     assembler->MaybeUnpoisonHeapReference(out);
   3082     __ B(&done);
   3083     __ Bind(&allocate);
   3084     // Otherwise allocate and initialize a new j.l.Integer.
   3085     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3086     __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
   3087     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3088     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3089     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
   3090     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
   3091     // one.
   3092     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   3093     __ Bind(&done);
   3094   }
   3095 }
   3096 
   3097 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
   3098   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   3099                                                             LocationSummary::kNoCall,
   3100                                                             kIntrinsified);
   3101   locations->SetOut(Location::RequiresRegister());
   3102 }
   3103 
   3104 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
   3105   ArmVIXLAssembler* assembler = GetAssembler();
   3106   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
   3107   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
   3108   __ Ldr(out, MemOperand(tr, offset));
   3109   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   3110   vixl32::Register temp = temps.Acquire();
   3111   vixl32::Label done;
   3112   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
   3113   __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
   3114   __ Dmb(vixl32::ISH);
   3115   __ Mov(temp, 0);
   3116   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
   3117   __ Dmb(vixl32::ISH);
   3118   if (done.IsReferenced()) {
   3119     __ Bind(&done);
   3120   }
   3121 }
   3122 
   3123 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
   3124 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
   3125 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
   3126 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
   3127 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
   3128 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
   3129 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
   3130 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
   3131 
   3132 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
   3133 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
   3134 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
   3135 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
   3136 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
   3137 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
   3138 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
   3139 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
   3140 
   3141 // 1.8.
   3142 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
   3143 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
   3144 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
   3145 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
   3146 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
   3147 
   3148 UNREACHABLE_INTRINSICS(ARMVIXL)
   3149 
   3150 #undef __
   3151 
   3152 }  // namespace arm
   3153 }  // namespace art
   3154