Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_arm64.h"
     18 
     19 #include "arch/arm64/instruction_set_features_arm64.h"
     20 #include "mirror/array-inl.h"
     21 #include "mirror/string.h"
     22 
     23 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     24 
     25 namespace art {
     26 namespace arm64 {
     27 
     28 using helpers::ARM64EncodableConstantOrRegister;
     29 using helpers::Arm64CanEncodeConstantAsImmediate;
     30 using helpers::DRegisterFrom;
     31 using helpers::HeapOperand;
     32 using helpers::InputRegisterAt;
     33 using helpers::Int64FromLocation;
     34 using helpers::OutputRegister;
     35 using helpers::VRegisterFrom;
     36 using helpers::WRegisterFrom;
     37 using helpers::XRegisterFrom;
     38 
     39 #define __ GetVIXLAssembler()->
     40 
     41 // Build-time switch for Armv8.4-a dot product instructions.
     42 // TODO: Enable dot product when there is a device to test it on.
     43 static constexpr bool kArm64EmitDotProdInstructions = false;
     44 
     45 // Returns whether dot product instructions should be emitted.
     46 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
     47   return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
     48 }
     49 
     50 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     51   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
     52   HInstruction* input = instruction->InputAt(0);
     53   switch (instruction->GetPackedType()) {
     54     case DataType::Type::kBool:
     55     case DataType::Type::kUint8:
     56     case DataType::Type::kInt8:
     57     case DataType::Type::kUint16:
     58     case DataType::Type::kInt16:
     59     case DataType::Type::kInt32:
     60     case DataType::Type::kInt64:
     61       locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
     62       locations->SetOut(Location::RequiresFpuRegister());
     63       break;
     64     case DataType::Type::kFloat32:
     65     case DataType::Type::kFloat64:
     66       if (input->IsConstant() &&
     67           Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
     68         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
     69         locations->SetOut(Location::RequiresFpuRegister());
     70       } else {
     71         locations->SetInAt(0, Location::RequiresFpuRegister());
     72         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
     73       }
     74       break;
     75     default:
     76       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
     77       UNREACHABLE();
     78   }
     79 }
     80 
     81 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     82   LocationSummary* locations = instruction->GetLocations();
     83   Location src_loc = locations->InAt(0);
     84   VRegister dst = VRegisterFrom(locations->Out());
     85   switch (instruction->GetPackedType()) {
     86     case DataType::Type::kBool:
     87     case DataType::Type::kUint8:
     88     case DataType::Type::kInt8:
     89       DCHECK_EQ(16u, instruction->GetVectorLength());
     90       if (src_loc.IsConstant()) {
     91         __ Movi(dst.V16B(), Int64FromLocation(src_loc));
     92       } else {
     93         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
     94       }
     95       break;
     96     case DataType::Type::kUint16:
     97     case DataType::Type::kInt16:
     98       DCHECK_EQ(8u, instruction->GetVectorLength());
     99       if (src_loc.IsConstant()) {
    100         __ Movi(dst.V8H(), Int64FromLocation(src_loc));
    101       } else {
    102         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
    103       }
    104       break;
    105     case DataType::Type::kInt32:
    106       DCHECK_EQ(4u, instruction->GetVectorLength());
    107       if (src_loc.IsConstant()) {
    108         __ Movi(dst.V4S(), Int64FromLocation(src_loc));
    109       } else {
    110         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
    111       }
    112       break;
    113     case DataType::Type::kInt64:
    114       DCHECK_EQ(2u, instruction->GetVectorLength());
    115       if (src_loc.IsConstant()) {
    116         __ Movi(dst.V2D(), Int64FromLocation(src_loc));
    117       } else {
    118         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
    119       }
    120       break;
    121     case DataType::Type::kFloat32:
    122       DCHECK_EQ(4u, instruction->GetVectorLength());
    123       if (src_loc.IsConstant()) {
    124         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
    125       } else {
    126         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
    127       }
    128       break;
    129     case DataType::Type::kFloat64:
    130       DCHECK_EQ(2u, instruction->GetVectorLength());
    131       if (src_loc.IsConstant()) {
    132         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
    133       } else {
    134         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
    135       }
    136       break;
    137     default:
    138       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    139       UNREACHABLE();
    140   }
    141 }
    142 
    143 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    144   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    145   switch (instruction->GetPackedType()) {
    146     case DataType::Type::kBool:
    147     case DataType::Type::kUint8:
    148     case DataType::Type::kInt8:
    149     case DataType::Type::kUint16:
    150     case DataType::Type::kInt16:
    151     case DataType::Type::kInt32:
    152     case DataType::Type::kInt64:
    153       locations->SetInAt(0, Location::RequiresFpuRegister());
    154       locations->SetOut(Location::RequiresRegister());
    155       break;
    156     case DataType::Type::kFloat32:
    157     case DataType::Type::kFloat64:
    158       locations->SetInAt(0, Location::RequiresFpuRegister());
    159       locations->SetOut(Location::SameAsFirstInput());
    160       break;
    161     default:
    162       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    163       UNREACHABLE();
    164   }
    165 }
    166 
    167 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    168   LocationSummary* locations = instruction->GetLocations();
    169   VRegister src = VRegisterFrom(locations->InAt(0));
    170   switch (instruction->GetPackedType()) {
    171     case DataType::Type::kInt32:
    172       DCHECK_EQ(4u, instruction->GetVectorLength());
    173       __ Umov(OutputRegister(instruction), src.V4S(), 0);
    174       break;
    175     case DataType::Type::kInt64:
    176       DCHECK_EQ(2u, instruction->GetVectorLength());
    177       __ Umov(OutputRegister(instruction), src.V2D(), 0);
    178       break;
    179     case DataType::Type::kFloat32:
    180     case DataType::Type::kFloat64:
    181       DCHECK_LE(2u, instruction->GetVectorLength());
    182       DCHECK_LE(instruction->GetVectorLength(), 4u);
    183       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
    184       break;
    185     default:
    186       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    187       UNREACHABLE();
    188   }
    189 }
    190 
    191 // Helper to set up locations for vector unary operations.
    192 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
    193   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    194   switch (instruction->GetPackedType()) {
    195     case DataType::Type::kBool:
    196       locations->SetInAt(0, Location::RequiresFpuRegister());
    197       locations->SetOut(Location::RequiresFpuRegister(),
    198                         instruction->IsVecNot() ? Location::kOutputOverlap
    199                                                 : Location::kNoOutputOverlap);
    200       break;
    201     case DataType::Type::kUint8:
    202     case DataType::Type::kInt8:
    203     case DataType::Type::kUint16:
    204     case DataType::Type::kInt16:
    205     case DataType::Type::kInt32:
    206     case DataType::Type::kInt64:
    207     case DataType::Type::kFloat32:
    208     case DataType::Type::kFloat64:
    209       locations->SetInAt(0, Location::RequiresFpuRegister());
    210       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    211       break;
    212     default:
    213       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    214       UNREACHABLE();
    215   }
    216 }
    217 
    218 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
    219   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    220 }
    221 
    222 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
    223   LocationSummary* locations = instruction->GetLocations();
    224   VRegister src = VRegisterFrom(locations->InAt(0));
    225   VRegister dst = DRegisterFrom(locations->Out());
    226   switch (instruction->GetPackedType()) {
    227     case DataType::Type::kInt32:
    228       DCHECK_EQ(4u, instruction->GetVectorLength());
    229       switch (instruction->GetReductionKind()) {
    230         case HVecReduce::kSum:
    231           __ Addv(dst.S(), src.V4S());
    232           break;
    233         case HVecReduce::kMin:
    234           __ Sminv(dst.S(), src.V4S());
    235           break;
    236         case HVecReduce::kMax:
    237           __ Smaxv(dst.S(), src.V4S());
    238           break;
    239       }
    240       break;
    241     case DataType::Type::kInt64:
    242       DCHECK_EQ(2u, instruction->GetVectorLength());
    243       switch (instruction->GetReductionKind()) {
    244         case HVecReduce::kSum:
    245           __ Addp(dst.D(), src.V2D());
    246           break;
    247         default:
    248           LOG(FATAL) << "Unsupported SIMD min/max";
    249           UNREACHABLE();
    250       }
    251       break;
    252     default:
    253       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    254       UNREACHABLE();
    255   }
    256 }
    257 
    258 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
    259   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    260 }
    261 
    262 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
    263   LocationSummary* locations = instruction->GetLocations();
    264   VRegister src = VRegisterFrom(locations->InAt(0));
    265   VRegister dst = VRegisterFrom(locations->Out());
    266   DataType::Type from = instruction->GetInputType();
    267   DataType::Type to = instruction->GetResultType();
    268   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
    269     DCHECK_EQ(4u, instruction->GetVectorLength());
    270     __ Scvtf(dst.V4S(), src.V4S());
    271   } else {
    272     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    273   }
    274 }
    275 
    276 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
    277   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    278 }
    279 
    280 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
    281   LocationSummary* locations = instruction->GetLocations();
    282   VRegister src = VRegisterFrom(locations->InAt(0));
    283   VRegister dst = VRegisterFrom(locations->Out());
    284   switch (instruction->GetPackedType()) {
    285     case DataType::Type::kUint8:
    286     case DataType::Type::kInt8:
    287       DCHECK_EQ(16u, instruction->GetVectorLength());
    288       __ Neg(dst.V16B(), src.V16B());
    289       break;
    290     case DataType::Type::kUint16:
    291     case DataType::Type::kInt16:
    292       DCHECK_EQ(8u, instruction->GetVectorLength());
    293       __ Neg(dst.V8H(), src.V8H());
    294       break;
    295     case DataType::Type::kInt32:
    296       DCHECK_EQ(4u, instruction->GetVectorLength());
    297       __ Neg(dst.V4S(), src.V4S());
    298       break;
    299     case DataType::Type::kInt64:
    300       DCHECK_EQ(2u, instruction->GetVectorLength());
    301       __ Neg(dst.V2D(), src.V2D());
    302       break;
    303     case DataType::Type::kFloat32:
    304       DCHECK_EQ(4u, instruction->GetVectorLength());
    305       __ Fneg(dst.V4S(), src.V4S());
    306       break;
    307     case DataType::Type::kFloat64:
    308       DCHECK_EQ(2u, instruction->GetVectorLength());
    309       __ Fneg(dst.V2D(), src.V2D());
    310       break;
    311     default:
    312       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    313       UNREACHABLE();
    314   }
    315 }
    316 
    317 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
    318   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    319 }
    320 
    321 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
    322   LocationSummary* locations = instruction->GetLocations();
    323   VRegister src = VRegisterFrom(locations->InAt(0));
    324   VRegister dst = VRegisterFrom(locations->Out());
    325   switch (instruction->GetPackedType()) {
    326     case DataType::Type::kInt8:
    327       DCHECK_EQ(16u, instruction->GetVectorLength());
    328       __ Abs(dst.V16B(), src.V16B());
    329       break;
    330     case DataType::Type::kInt16:
    331       DCHECK_EQ(8u, instruction->GetVectorLength());
    332       __ Abs(dst.V8H(), src.V8H());
    333       break;
    334     case DataType::Type::kInt32:
    335       DCHECK_EQ(4u, instruction->GetVectorLength());
    336       __ Abs(dst.V4S(), src.V4S());
    337       break;
    338     case DataType::Type::kInt64:
    339       DCHECK_EQ(2u, instruction->GetVectorLength());
    340       __ Abs(dst.V2D(), src.V2D());
    341       break;
    342     case DataType::Type::kFloat32:
    343       DCHECK_EQ(4u, instruction->GetVectorLength());
    344       __ Fabs(dst.V4S(), src.V4S());
    345       break;
    346     case DataType::Type::kFloat64:
    347       DCHECK_EQ(2u, instruction->GetVectorLength());
    348       __ Fabs(dst.V2D(), src.V2D());
    349       break;
    350     default:
    351       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    352       UNREACHABLE();
    353   }
    354 }
    355 
    356 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
    357   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    358 }
    359 
    360 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
    361   LocationSummary* locations = instruction->GetLocations();
    362   VRegister src = VRegisterFrom(locations->InAt(0));
    363   VRegister dst = VRegisterFrom(locations->Out());
    364   switch (instruction->GetPackedType()) {
    365     case DataType::Type::kBool:  // special case boolean-not
    366       DCHECK_EQ(16u, instruction->GetVectorLength());
    367       __ Movi(dst.V16B(), 1);
    368       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
    369       break;
    370     case DataType::Type::kUint8:
    371     case DataType::Type::kInt8:
    372     case DataType::Type::kUint16:
    373     case DataType::Type::kInt16:
    374     case DataType::Type::kInt32:
    375     case DataType::Type::kInt64:
    376       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
    377       break;
    378     default:
    379       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    380       UNREACHABLE();
    381   }
    382 }
    383 
    384 // Helper to set up locations for vector binary operations.
    385 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    386   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    387   switch (instruction->GetPackedType()) {
    388     case DataType::Type::kBool:
    389     case DataType::Type::kUint8:
    390     case DataType::Type::kInt8:
    391     case DataType::Type::kUint16:
    392     case DataType::Type::kInt16:
    393     case DataType::Type::kInt32:
    394     case DataType::Type::kInt64:
    395     case DataType::Type::kFloat32:
    396     case DataType::Type::kFloat64:
    397       locations->SetInAt(0, Location::RequiresFpuRegister());
    398       locations->SetInAt(1, Location::RequiresFpuRegister());
    399       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    400       break;
    401     default:
    402       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    403       UNREACHABLE();
    404   }
    405 }
    406 
    407 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
    408   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    409 }
    410 
    411 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
    412   LocationSummary* locations = instruction->GetLocations();
    413   VRegister lhs = VRegisterFrom(locations->InAt(0));
    414   VRegister rhs = VRegisterFrom(locations->InAt(1));
    415   VRegister dst = VRegisterFrom(locations->Out());
    416   switch (instruction->GetPackedType()) {
    417     case DataType::Type::kUint8:
    418     case DataType::Type::kInt8:
    419       DCHECK_EQ(16u, instruction->GetVectorLength());
    420       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
    421       break;
    422     case DataType::Type::kUint16:
    423     case DataType::Type::kInt16:
    424       DCHECK_EQ(8u, instruction->GetVectorLength());
    425       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
    426       break;
    427     case DataType::Type::kInt32:
    428       DCHECK_EQ(4u, instruction->GetVectorLength());
    429       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
    430       break;
    431     case DataType::Type::kInt64:
    432       DCHECK_EQ(2u, instruction->GetVectorLength());
    433       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
    434       break;
    435     case DataType::Type::kFloat32:
    436       DCHECK_EQ(4u, instruction->GetVectorLength());
    437       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
    438       break;
    439     case DataType::Type::kFloat64:
    440       DCHECK_EQ(2u, instruction->GetVectorLength());
    441       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
    442       break;
    443     default:
    444       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    445       UNREACHABLE();
    446   }
    447 }
    448 
    449 void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
    450   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    451 }
    452 
    453 void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
    454   LocationSummary* locations = instruction->GetLocations();
    455   VRegister lhs = VRegisterFrom(locations->InAt(0));
    456   VRegister rhs = VRegisterFrom(locations->InAt(1));
    457   VRegister dst = VRegisterFrom(locations->Out());
    458   switch (instruction->GetPackedType()) {
    459     case DataType::Type::kUint8:
    460       DCHECK_EQ(16u, instruction->GetVectorLength());
    461       __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    462       break;
    463     case DataType::Type::kInt8:
    464       DCHECK_EQ(16u, instruction->GetVectorLength());
    465       __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    466       break;
    467     case DataType::Type::kUint16:
    468       DCHECK_EQ(8u, instruction->GetVectorLength());
    469       __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    470       break;
    471     case DataType::Type::kInt16:
    472       DCHECK_EQ(8u, instruction->GetVectorLength());
    473       __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    474       break;
    475     default:
    476       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    477       UNREACHABLE();
    478   }
    479 }
    480 
    481 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    482   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    483 }
    484 
    485 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    486   LocationSummary* locations = instruction->GetLocations();
    487   VRegister lhs = VRegisterFrom(locations->InAt(0));
    488   VRegister rhs = VRegisterFrom(locations->InAt(1));
    489   VRegister dst = VRegisterFrom(locations->Out());
    490   switch (instruction->GetPackedType()) {
    491     case DataType::Type::kUint8:
    492       DCHECK_EQ(16u, instruction->GetVectorLength());
    493       instruction->IsRounded()
    494           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
    495           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    496       break;
    497     case DataType::Type::kInt8:
    498       DCHECK_EQ(16u, instruction->GetVectorLength());
    499       instruction->IsRounded()
    500           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
    501           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    502       break;
    503     case DataType::Type::kUint16:
    504       DCHECK_EQ(8u, instruction->GetVectorLength());
    505       instruction->IsRounded()
    506           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
    507           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    508       break;
    509     case DataType::Type::kInt16:
    510       DCHECK_EQ(8u, instruction->GetVectorLength());
    511       instruction->IsRounded()
    512           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
    513           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    514       break;
    515     default:
    516       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    517       UNREACHABLE();
    518   }
    519 }
    520 
    521 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
    522   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    523 }
    524 
    525 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
    526   LocationSummary* locations = instruction->GetLocations();
    527   VRegister lhs = VRegisterFrom(locations->InAt(0));
    528   VRegister rhs = VRegisterFrom(locations->InAt(1));
    529   VRegister dst = VRegisterFrom(locations->Out());
    530   switch (instruction->GetPackedType()) {
    531     case DataType::Type::kUint8:
    532     case DataType::Type::kInt8:
    533       DCHECK_EQ(16u, instruction->GetVectorLength());
    534       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
    535       break;
    536     case DataType::Type::kUint16:
    537     case DataType::Type::kInt16:
    538       DCHECK_EQ(8u, instruction->GetVectorLength());
    539       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
    540       break;
    541     case DataType::Type::kInt32:
    542       DCHECK_EQ(4u, instruction->GetVectorLength());
    543       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
    544       break;
    545     case DataType::Type::kInt64:
    546       DCHECK_EQ(2u, instruction->GetVectorLength());
    547       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
    548       break;
    549     case DataType::Type::kFloat32:
    550       DCHECK_EQ(4u, instruction->GetVectorLength());
    551       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
    552       break;
    553     case DataType::Type::kFloat64:
    554       DCHECK_EQ(2u, instruction->GetVectorLength());
    555       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
    556       break;
    557     default:
    558       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    559       UNREACHABLE();
    560   }
    561 }
    562 
    563 void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
    564   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    565 }
    566 
    567 void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
    568   LocationSummary* locations = instruction->GetLocations();
    569   VRegister lhs = VRegisterFrom(locations->InAt(0));
    570   VRegister rhs = VRegisterFrom(locations->InAt(1));
    571   VRegister dst = VRegisterFrom(locations->Out());
    572   switch (instruction->GetPackedType()) {
    573     case DataType::Type::kUint8:
    574       DCHECK_EQ(16u, instruction->GetVectorLength());
    575       __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
    576       break;
    577     case DataType::Type::kInt8:
    578       DCHECK_EQ(16u, instruction->GetVectorLength());
    579       __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
    580       break;
    581     case DataType::Type::kUint16:
    582       DCHECK_EQ(8u, instruction->GetVectorLength());
    583       __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
    584       break;
    585     case DataType::Type::kInt16:
    586       DCHECK_EQ(8u, instruction->GetVectorLength());
    587       __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
    588       break;
    589     default:
    590       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    591       UNREACHABLE();
    592   }
    593 }
    594 
    595 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
    596   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    597 }
    598 
    599 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
    600   LocationSummary* locations = instruction->GetLocations();
    601   VRegister lhs = VRegisterFrom(locations->InAt(0));
    602   VRegister rhs = VRegisterFrom(locations->InAt(1));
    603   VRegister dst = VRegisterFrom(locations->Out());
    604   switch (instruction->GetPackedType()) {
    605     case DataType::Type::kUint8:
    606     case DataType::Type::kInt8:
    607       DCHECK_EQ(16u, instruction->GetVectorLength());
    608       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
    609       break;
    610     case DataType::Type::kUint16:
    611     case DataType::Type::kInt16:
    612       DCHECK_EQ(8u, instruction->GetVectorLength());
    613       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
    614       break;
    615     case DataType::Type::kInt32:
    616       DCHECK_EQ(4u, instruction->GetVectorLength());
    617       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
    618       break;
    619     case DataType::Type::kFloat32:
    620       DCHECK_EQ(4u, instruction->GetVectorLength());
    621       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
    622       break;
    623     case DataType::Type::kFloat64:
    624       DCHECK_EQ(2u, instruction->GetVectorLength());
    625       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
    626       break;
    627     default:
    628       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    629       UNREACHABLE();
    630   }
    631 }
    632 
    633 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
    634   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    635 }
    636 
    637 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
    638   LocationSummary* locations = instruction->GetLocations();
    639   VRegister lhs = VRegisterFrom(locations->InAt(0));
    640   VRegister rhs = VRegisterFrom(locations->InAt(1));
    641   VRegister dst = VRegisterFrom(locations->Out());
    642   switch (instruction->GetPackedType()) {
    643     case DataType::Type::kFloat32:
    644       DCHECK_EQ(4u, instruction->GetVectorLength());
    645       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
    646       break;
    647     case DataType::Type::kFloat64:
    648       DCHECK_EQ(2u, instruction->GetVectorLength());
    649       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
    650       break;
    651     default:
    652       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    653       UNREACHABLE();
    654   }
    655 }
    656 
    657 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
    658   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    659 }
    660 
    661 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
    662   LocationSummary* locations = instruction->GetLocations();
    663   VRegister lhs = VRegisterFrom(locations->InAt(0));
    664   VRegister rhs = VRegisterFrom(locations->InAt(1));
    665   VRegister dst = VRegisterFrom(locations->Out());
    666   switch (instruction->GetPackedType()) {
    667     case DataType::Type::kUint8:
    668       DCHECK_EQ(16u, instruction->GetVectorLength());
    669       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
    670       break;
    671     case DataType::Type::kInt8:
    672       DCHECK_EQ(16u, instruction->GetVectorLength());
    673       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
    674       break;
    675     case DataType::Type::kUint16:
    676       DCHECK_EQ(8u, instruction->GetVectorLength());
    677       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
    678       break;
    679     case DataType::Type::kInt16:
    680       DCHECK_EQ(8u, instruction->GetVectorLength());
    681       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
    682       break;
    683     case DataType::Type::kUint32:
    684       DCHECK_EQ(4u, instruction->GetVectorLength());
    685       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
    686       break;
    687     case DataType::Type::kInt32:
    688       DCHECK_EQ(4u, instruction->GetVectorLength());
    689       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
    690       break;
    691     case DataType::Type::kFloat32:
    692       DCHECK_EQ(4u, instruction->GetVectorLength());
    693       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
    694       break;
    695     case DataType::Type::kFloat64:
    696       DCHECK_EQ(2u, instruction->GetVectorLength());
    697       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
    698       break;
    699     default:
    700       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    701       UNREACHABLE();
    702   }
    703 }
    704 
    705 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
    706   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    707 }
    708 
    709 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
    710   LocationSummary* locations = instruction->GetLocations();
    711   VRegister lhs = VRegisterFrom(locations->InAt(0));
    712   VRegister rhs = VRegisterFrom(locations->InAt(1));
    713   VRegister dst = VRegisterFrom(locations->Out());
    714   switch (instruction->GetPackedType()) {
    715     case DataType::Type::kUint8:
    716       DCHECK_EQ(16u, instruction->GetVectorLength());
    717       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
    718       break;
    719     case DataType::Type::kInt8:
    720       DCHECK_EQ(16u, instruction->GetVectorLength());
    721       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
    722       break;
    723     case DataType::Type::kUint16:
    724       DCHECK_EQ(8u, instruction->GetVectorLength());
    725       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
    726       break;
    727     case DataType::Type::kInt16:
    728       DCHECK_EQ(8u, instruction->GetVectorLength());
    729       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
    730       break;
    731     case DataType::Type::kUint32:
    732       DCHECK_EQ(4u, instruction->GetVectorLength());
    733       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
    734       break;
    735     case DataType::Type::kInt32:
    736       DCHECK_EQ(4u, instruction->GetVectorLength());
    737       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
    738       break;
    739     case DataType::Type::kFloat32:
    740       DCHECK_EQ(4u, instruction->GetVectorLength());
    741       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
    742       break;
    743     case DataType::Type::kFloat64:
    744       DCHECK_EQ(2u, instruction->GetVectorLength());
    745       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
    746       break;
    747     default:
    748       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    749       UNREACHABLE();
    750   }
    751 }
    752 
    753 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
    754   // TODO: Allow constants supported by BIC (vector, immediate).
    755   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    756 }
    757 
    758 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
    759   LocationSummary* locations = instruction->GetLocations();
    760   VRegister lhs = VRegisterFrom(locations->InAt(0));
    761   VRegister rhs = VRegisterFrom(locations->InAt(1));
    762   VRegister dst = VRegisterFrom(locations->Out());
    763   switch (instruction->GetPackedType()) {
    764     case DataType::Type::kBool:
    765     case DataType::Type::kUint8:
    766     case DataType::Type::kInt8:
    767     case DataType::Type::kUint16:
    768     case DataType::Type::kInt16:
    769     case DataType::Type::kInt32:
    770     case DataType::Type::kInt64:
    771     case DataType::Type::kFloat32:
    772     case DataType::Type::kFloat64:
    773       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    774       break;
    775     default:
    776       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    777       UNREACHABLE();
    778   }
    779 }
    780 
    781 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
    782   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
    783 }
    784 
    785 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
    786   // TODO: Use BIC (vector, register).
    787   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
    788 }
    789 
    790 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
    791   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    792 }
    793 
    794 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
    795   LocationSummary* locations = instruction->GetLocations();
    796   VRegister lhs = VRegisterFrom(locations->InAt(0));
    797   VRegister rhs = VRegisterFrom(locations->InAt(1));
    798   VRegister dst = VRegisterFrom(locations->Out());
    799   switch (instruction->GetPackedType()) {
    800     case DataType::Type::kBool:
    801     case DataType::Type::kUint8:
    802     case DataType::Type::kInt8:
    803     case DataType::Type::kUint16:
    804     case DataType::Type::kInt16:
    805     case DataType::Type::kInt32:
    806     case DataType::Type::kInt64:
    807     case DataType::Type::kFloat32:
    808     case DataType::Type::kFloat64:
    809       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    810       break;
    811     default:
    812       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    813       UNREACHABLE();
    814   }
    815 }
    816 
    817 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
    818   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    819 }
    820 
    821 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
    822   LocationSummary* locations = instruction->GetLocations();
    823   VRegister lhs = VRegisterFrom(locations->InAt(0));
    824   VRegister rhs = VRegisterFrom(locations->InAt(1));
    825   VRegister dst = VRegisterFrom(locations->Out());
    826   switch (instruction->GetPackedType()) {
    827     case DataType::Type::kBool:
    828     case DataType::Type::kUint8:
    829     case DataType::Type::kInt8:
    830     case DataType::Type::kUint16:
    831     case DataType::Type::kInt16:
    832     case DataType::Type::kInt32:
    833     case DataType::Type::kInt64:
    834     case DataType::Type::kFloat32:
    835     case DataType::Type::kFloat64:
    836       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    837       break;
    838     default:
    839       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    840       UNREACHABLE();
    841   }
    842 }
    843 
    844 // Helper to set up locations for vector shift operations.
    845 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    846   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    847   switch (instruction->GetPackedType()) {
    848     case DataType::Type::kUint8:
    849     case DataType::Type::kInt8:
    850     case DataType::Type::kUint16:
    851     case DataType::Type::kInt16:
    852     case DataType::Type::kInt32:
    853     case DataType::Type::kInt64:
    854       locations->SetInAt(0, Location::RequiresFpuRegister());
    855       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    856       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    857       break;
    858     default:
    859       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    860       UNREACHABLE();
    861   }
    862 }
    863 
    864 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
    865   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    866 }
    867 
    868 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
    869   LocationSummary* locations = instruction->GetLocations();
    870   VRegister lhs = VRegisterFrom(locations->InAt(0));
    871   VRegister dst = VRegisterFrom(locations->Out());
    872   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    873   switch (instruction->GetPackedType()) {
    874     case DataType::Type::kUint8:
    875     case DataType::Type::kInt8:
    876       DCHECK_EQ(16u, instruction->GetVectorLength());
    877       __ Shl(dst.V16B(), lhs.V16B(), value);
    878       break;
    879     case DataType::Type::kUint16:
    880     case DataType::Type::kInt16:
    881       DCHECK_EQ(8u, instruction->GetVectorLength());
    882       __ Shl(dst.V8H(), lhs.V8H(), value);
    883       break;
    884     case DataType::Type::kInt32:
    885       DCHECK_EQ(4u, instruction->GetVectorLength());
    886       __ Shl(dst.V4S(), lhs.V4S(), value);
    887       break;
    888     case DataType::Type::kInt64:
    889       DCHECK_EQ(2u, instruction->GetVectorLength());
    890       __ Shl(dst.V2D(), lhs.V2D(), value);
    891       break;
    892     default:
    893       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    894       UNREACHABLE();
    895   }
    896 }
    897 
    898 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
    899   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    900 }
    901 
    902 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
    903   LocationSummary* locations = instruction->GetLocations();
    904   VRegister lhs = VRegisterFrom(locations->InAt(0));
    905   VRegister dst = VRegisterFrom(locations->Out());
    906   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    907   switch (instruction->GetPackedType()) {
    908     case DataType::Type::kUint8:
    909     case DataType::Type::kInt8:
    910       DCHECK_EQ(16u, instruction->GetVectorLength());
    911       __ Sshr(dst.V16B(), lhs.V16B(), value);
    912       break;
    913     case DataType::Type::kUint16:
    914     case DataType::Type::kInt16:
    915       DCHECK_EQ(8u, instruction->GetVectorLength());
    916       __ Sshr(dst.V8H(), lhs.V8H(), value);
    917       break;
    918     case DataType::Type::kInt32:
    919       DCHECK_EQ(4u, instruction->GetVectorLength());
    920       __ Sshr(dst.V4S(), lhs.V4S(), value);
    921       break;
    922     case DataType::Type::kInt64:
    923       DCHECK_EQ(2u, instruction->GetVectorLength());
    924       __ Sshr(dst.V2D(), lhs.V2D(), value);
    925       break;
    926     default:
    927       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    928       UNREACHABLE();
    929   }
    930 }
    931 
    932 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
    933   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    934 }
    935 
    936 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
    937   LocationSummary* locations = instruction->GetLocations();
    938   VRegister lhs = VRegisterFrom(locations->InAt(0));
    939   VRegister dst = VRegisterFrom(locations->Out());
    940   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    941   switch (instruction->GetPackedType()) {
    942     case DataType::Type::kUint8:
    943     case DataType::Type::kInt8:
    944       DCHECK_EQ(16u, instruction->GetVectorLength());
    945       __ Ushr(dst.V16B(), lhs.V16B(), value);
    946       break;
    947     case DataType::Type::kUint16:
    948     case DataType::Type::kInt16:
    949       DCHECK_EQ(8u, instruction->GetVectorLength());
    950       __ Ushr(dst.V8H(), lhs.V8H(), value);
    951       break;
    952     case DataType::Type::kInt32:
    953       DCHECK_EQ(4u, instruction->GetVectorLength());
    954       __ Ushr(dst.V4S(), lhs.V4S(), value);
    955       break;
    956     case DataType::Type::kInt64:
    957       DCHECK_EQ(2u, instruction->GetVectorLength());
    958       __ Ushr(dst.V2D(), lhs.V2D(), value);
    959       break;
    960     default:
    961       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    962       UNREACHABLE();
    963   }
    964 }
    965 
    966 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
    967   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    968 
    969   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    970 
    971   HInstruction* input = instruction->InputAt(0);
    972   bool is_zero = IsZeroBitPattern(input);
    973 
    974   switch (instruction->GetPackedType()) {
    975     case DataType::Type::kBool:
    976     case DataType::Type::kUint8:
    977     case DataType::Type::kInt8:
    978     case DataType::Type::kUint16:
    979     case DataType::Type::kInt16:
    980     case DataType::Type::kInt32:
    981     case DataType::Type::kInt64:
    982       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    983                                     : Location::RequiresRegister());
    984       locations->SetOut(Location::RequiresFpuRegister());
    985       break;
    986     case DataType::Type::kFloat32:
    987     case DataType::Type::kFloat64:
    988       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    989                                     : Location::RequiresFpuRegister());
    990       locations->SetOut(Location::RequiresFpuRegister());
    991       break;
    992     default:
    993       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
    994       UNREACHABLE();
    995   }
    996 }
    997 
    998 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
    999   LocationSummary* locations = instruction->GetLocations();
   1000   VRegister dst = VRegisterFrom(locations->Out());
   1001 
   1002   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
   1003 
   1004   // Zero out all other elements first.
   1005   __ Movi(dst.V16B(), 0);
   1006 
   1007   // Shorthand for any type of zero.
   1008   if (IsZeroBitPattern(instruction->InputAt(0))) {
   1009     return;
   1010   }
   1011 
   1012   // Set required elements.
   1013   switch (instruction->GetPackedType()) {
   1014     case DataType::Type::kBool:
   1015     case DataType::Type::kUint8:
   1016     case DataType::Type::kInt8:
   1017       DCHECK_EQ(16u, instruction->GetVectorLength());
   1018       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
   1019       break;
   1020     case DataType::Type::kUint16:
   1021     case DataType::Type::kInt16:
   1022       DCHECK_EQ(8u, instruction->GetVectorLength());
   1023       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
   1024       break;
   1025     case DataType::Type::kInt32:
   1026       DCHECK_EQ(4u, instruction->GetVectorLength());
   1027       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
   1028       break;
   1029     case DataType::Type::kInt64:
   1030       DCHECK_EQ(2u, instruction->GetVectorLength());
   1031       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
   1032       break;
   1033     default:
   1034       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1035       UNREACHABLE();
   1036   }
   1037 }
   1038 
   1039 // Helper to set up locations for vector accumulations.
   1040 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
   1041   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1042   switch (instruction->GetPackedType()) {
   1043     case DataType::Type::kUint8:
   1044     case DataType::Type::kInt8:
   1045     case DataType::Type::kUint16:
   1046     case DataType::Type::kInt16:
   1047     case DataType::Type::kInt32:
   1048     case DataType::Type::kInt64:
   1049       locations->SetInAt(0, Location::RequiresFpuRegister());
   1050       locations->SetInAt(1, Location::RequiresFpuRegister());
   1051       locations->SetInAt(2, Location::RequiresFpuRegister());
   1052       locations->SetOut(Location::SameAsFirstInput());
   1053       break;
   1054     default:
   1055       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1056       UNREACHABLE();
   1057   }
   1058 }
   1059 
   1060 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1061   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1062 }
   1063 
   1064 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
   1065 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
   1066 // However vector MultiplyAccumulate instruction is not affected.
   1067 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1068   LocationSummary* locations = instruction->GetLocations();
   1069   VRegister acc = VRegisterFrom(locations->InAt(0));
   1070   VRegister left = VRegisterFrom(locations->InAt(1));
   1071   VRegister right = VRegisterFrom(locations->InAt(2));
   1072 
   1073   DCHECK(locations->InAt(0).Equals(locations->Out()));
   1074 
   1075   switch (instruction->GetPackedType()) {
   1076     case DataType::Type::kUint8:
   1077     case DataType::Type::kInt8:
   1078       DCHECK_EQ(16u, instruction->GetVectorLength());
   1079       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1080         __ Mla(acc.V16B(), left.V16B(), right.V16B());
   1081       } else {
   1082         __ Mls(acc.V16B(), left.V16B(), right.V16B());
   1083       }
   1084       break;
   1085     case DataType::Type::kUint16:
   1086     case DataType::Type::kInt16:
   1087       DCHECK_EQ(8u, instruction->GetVectorLength());
   1088       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1089         __ Mla(acc.V8H(), left.V8H(), right.V8H());
   1090       } else {
   1091         __ Mls(acc.V8H(), left.V8H(), right.V8H());
   1092       }
   1093       break;
   1094     case DataType::Type::kInt32:
   1095       DCHECK_EQ(4u, instruction->GetVectorLength());
   1096       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1097         __ Mla(acc.V4S(), left.V4S(), right.V4S());
   1098       } else {
   1099         __ Mls(acc.V4S(), left.V4S(), right.V4S());
   1100       }
   1101       break;
   1102     default:
   1103       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1104       UNREACHABLE();
   1105   }
   1106 }
   1107 
   1108 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1109   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1110   // Some conversions require temporary registers.
   1111   LocationSummary* locations = instruction->GetLocations();
   1112   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   1113   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   1114   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
   1115             HVecOperation::ToSignedType(b->GetPackedType()));
   1116   switch (a->GetPackedType()) {
   1117     case DataType::Type::kUint8:
   1118     case DataType::Type::kInt8:
   1119       switch (instruction->GetPackedType()) {
   1120         case DataType::Type::kInt64:
   1121           locations->AddTemp(Location::RequiresFpuRegister());
   1122           locations->AddTemp(Location::RequiresFpuRegister());
   1123           FALLTHROUGH_INTENDED;
   1124         case DataType::Type::kInt32:
   1125           locations->AddTemp(Location::RequiresFpuRegister());
   1126           locations->AddTemp(Location::RequiresFpuRegister());
   1127           break;
   1128         default:
   1129           break;
   1130       }
   1131       break;
   1132     case DataType::Type::kUint16:
   1133     case DataType::Type::kInt16:
   1134       if (instruction->GetPackedType() == DataType::Type::kInt64) {
   1135         locations->AddTemp(Location::RequiresFpuRegister());
   1136         locations->AddTemp(Location::RequiresFpuRegister());
   1137       }
   1138       break;
   1139     case DataType::Type::kInt32:
   1140     case DataType::Type::kInt64:
   1141       if (instruction->GetPackedType() == a->GetPackedType()) {
   1142         locations->AddTemp(Location::RequiresFpuRegister());
   1143       }
   1144       break;
   1145     default:
   1146       break;
   1147   }
   1148 }
   1149 
   1150 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1151   LocationSummary* locations = instruction->GetLocations();
   1152   VRegister acc = VRegisterFrom(locations->InAt(0));
   1153   VRegister left = VRegisterFrom(locations->InAt(1));
   1154   VRegister right = VRegisterFrom(locations->InAt(2));
   1155 
   1156   DCHECK(locations->InAt(0).Equals(locations->Out()));
   1157 
   1158   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
   1159   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   1160   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   1161   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
   1162             HVecOperation::ToSignedType(b->GetPackedType()));
   1163   switch (a->GetPackedType()) {
   1164     case DataType::Type::kUint8:
   1165     case DataType::Type::kInt8:
   1166       DCHECK_EQ(16u, a->GetVectorLength());
   1167       switch (instruction->GetPackedType()) {
   1168         case DataType::Type::kInt16:
   1169           DCHECK_EQ(8u, instruction->GetVectorLength());
   1170           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
   1171           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
   1172           break;
   1173         case DataType::Type::kInt32: {
   1174           DCHECK_EQ(4u, instruction->GetVectorLength());
   1175           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1176           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1177           __ Sxtl(tmp1.V8H(), left.V8B());
   1178           __ Sxtl(tmp2.V8H(), right.V8B());
   1179           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
   1180           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
   1181           __ Sxtl2(tmp1.V8H(), left.V16B());
   1182           __ Sxtl2(tmp2.V8H(), right.V16B());
   1183           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
   1184           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
   1185           break;
   1186         }
   1187         case DataType::Type::kInt64: {
   1188           DCHECK_EQ(2u, instruction->GetVectorLength());
   1189           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1190           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1191           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
   1192           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
   1193           __ Sxtl(tmp1.V8H(), left.V8B());
   1194           __ Sxtl(tmp2.V8H(), right.V8B());
   1195           __ Sxtl(tmp3.V4S(), tmp1.V4H());
   1196           __ Sxtl(tmp4.V4S(), tmp2.V4H());
   1197           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1198           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1199           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
   1200           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
   1201           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1202           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1203           __ Sxtl2(tmp1.V8H(), left.V16B());
   1204           __ Sxtl2(tmp2.V8H(), right.V16B());
   1205           __ Sxtl(tmp3.V4S(), tmp1.V4H());
   1206           __ Sxtl(tmp4.V4S(), tmp2.V4H());
   1207           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1208           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1209           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
   1210           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
   1211           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1212           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1213           break;
   1214         }
   1215         default:
   1216           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1217           UNREACHABLE();
   1218       }
   1219       break;
   1220     case DataType::Type::kUint16:
   1221     case DataType::Type::kInt16:
   1222       DCHECK_EQ(8u, a->GetVectorLength());
   1223       switch (instruction->GetPackedType()) {
   1224         case DataType::Type::kInt32:
   1225           DCHECK_EQ(4u, instruction->GetVectorLength());
   1226           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
   1227           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
   1228           break;
   1229         case DataType::Type::kInt64: {
   1230           DCHECK_EQ(2u, instruction->GetVectorLength());
   1231           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1232           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1233           __ Sxtl(tmp1.V4S(), left.V4H());
   1234           __ Sxtl(tmp2.V4S(), right.V4H());
   1235           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
   1236           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
   1237           __ Sxtl2(tmp1.V4S(), left.V8H());
   1238           __ Sxtl2(tmp2.V4S(), right.V8H());
   1239           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
   1240           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
   1241           break;
   1242         }
   1243         default:
   1244           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1245           UNREACHABLE();
   1246       }
   1247       break;
   1248     case DataType::Type::kInt32:
   1249       DCHECK_EQ(4u, a->GetVectorLength());
   1250       switch (instruction->GetPackedType()) {
   1251         case DataType::Type::kInt32: {
   1252           DCHECK_EQ(4u, instruction->GetVectorLength());
   1253           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1254           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
   1255           __ Abs(tmp.V4S(), tmp.V4S());
   1256           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
   1257           break;
   1258         }
   1259         case DataType::Type::kInt64:
   1260           DCHECK_EQ(2u, instruction->GetVectorLength());
   1261           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
   1262           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
   1263           break;
   1264         default:
   1265           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1266           UNREACHABLE();
   1267       }
   1268       break;
   1269     case DataType::Type::kInt64:
   1270       DCHECK_EQ(2u, a->GetVectorLength());
   1271       switch (instruction->GetPackedType()) {
   1272         case DataType::Type::kInt64: {
   1273           DCHECK_EQ(2u, instruction->GetVectorLength());
   1274           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1275           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
   1276           __ Abs(tmp.V2D(), tmp.V2D());
   1277           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
   1278           break;
   1279         }
   1280         default:
   1281           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1282           UNREACHABLE();
   1283       }
   1284       break;
   1285     default:
   1286       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1287   }
   1288 }
   1289 
   1290 void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
   1291   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   1292   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
   1293   locations->SetInAt(0, Location::RequiresFpuRegister());
   1294   locations->SetInAt(1, Location::RequiresFpuRegister());
   1295   locations->SetInAt(2, Location::RequiresFpuRegister());
   1296   locations->SetOut(Location::SameAsFirstInput());
   1297 
   1298   // For Int8 and Uint8 general case we need a temp register.
   1299   if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
   1300       !ShouldEmitDotProductInstructions(codegen_)) {
   1301     locations->AddTemp(Location::RequiresFpuRegister());
   1302   }
   1303 }
   1304 
   1305 void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
   1306   LocationSummary* locations = instruction->GetLocations();
   1307   DCHECK(locations->InAt(0).Equals(locations->Out()));
   1308   VRegister acc = VRegisterFrom(locations->InAt(0));
   1309   VRegister left = VRegisterFrom(locations->InAt(1));
   1310   VRegister right = VRegisterFrom(locations->InAt(2));
   1311   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   1312   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   1313   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
   1314             HVecOperation::ToSignedType(b->GetPackedType()));
   1315   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
   1316   DCHECK_EQ(4u, instruction->GetVectorLength());
   1317 
   1318   size_t inputs_data_size = DataType::Size(a->GetPackedType());
   1319   switch (inputs_data_size) {
   1320     case 1u: {
   1321       DCHECK_EQ(16u, a->GetVectorLength());
   1322       if (instruction->IsZeroExtending()) {
   1323         if (ShouldEmitDotProductInstructions(codegen_)) {
   1324           __ Udot(acc.V4S(), left.V16B(), right.V16B());
   1325         } else {
   1326           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1327           __ Umull(tmp.V8H(), left.V8B(), right.V8B());
   1328           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
   1329           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
   1330 
   1331           __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
   1332           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
   1333           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
   1334         }
   1335       } else {
   1336         if (ShouldEmitDotProductInstructions(codegen_)) {
   1337           __ Sdot(acc.V4S(), left.V16B(), right.V16B());
   1338         } else {
   1339           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1340           __ Smull(tmp.V8H(), left.V8B(), right.V8B());
   1341           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
   1342           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
   1343 
   1344           __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
   1345           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
   1346           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
   1347         }
   1348       }
   1349       break;
   1350     }
   1351     case 2u:
   1352       DCHECK_EQ(8u, a->GetVectorLength());
   1353       if (instruction->IsZeroExtending()) {
   1354         __ Umlal(acc.V4S(), left.V4H(), right.V4H());
   1355         __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
   1356       } else {
   1357         __ Smlal(acc.V4S(), left.V4H(), right.V4H());
   1358         __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
   1359       }
   1360       break;
   1361     default:
   1362       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
   1363   }
   1364 }
   1365 
   1366 // Helper to set up locations for vector memory operations.
   1367 static void CreateVecMemLocations(ArenaAllocator* allocator,
   1368                                   HVecMemoryOperation* instruction,
   1369                                   bool is_load) {
   1370   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1371   switch (instruction->GetPackedType()) {
   1372     case DataType::Type::kBool:
   1373     case DataType::Type::kUint8:
   1374     case DataType::Type::kInt8:
   1375     case DataType::Type::kUint16:
   1376     case DataType::Type::kInt16:
   1377     case DataType::Type::kInt32:
   1378     case DataType::Type::kInt64:
   1379     case DataType::Type::kFloat32:
   1380     case DataType::Type::kFloat64:
   1381       locations->SetInAt(0, Location::RequiresRegister());
   1382       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   1383       if (is_load) {
   1384         locations->SetOut(Location::RequiresFpuRegister());
   1385       } else {
   1386         locations->SetInAt(2, Location::RequiresFpuRegister());
   1387       }
   1388       break;
   1389     default:
   1390       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1391       UNREACHABLE();
   1392   }
   1393 }
   1394 
   1395 // Helper to set up locations for vector memory operations. Returns the memory operand and,
   1396 // if used, sets the output parameter scratch to a temporary register used in this operand,
   1397 // so that the client can release it right after the memory operand use.
   1398 MemOperand InstructionCodeGeneratorARM64::VecAddress(
   1399     HVecMemoryOperation* instruction,
   1400     UseScratchRegisterScope* temps_scope,
   1401     size_t size,
   1402     bool is_string_char_at,
   1403     /*out*/ Register* scratch) {
   1404   LocationSummary* locations = instruction->GetLocations();
   1405   Register base = InputRegisterAt(instruction, 0);
   1406 
   1407   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
   1408     DCHECK(!is_string_char_at);
   1409     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
   1410   }
   1411 
   1412   Location index = locations->InAt(1);
   1413   uint32_t offset = is_string_char_at
   1414       ? mirror::String::ValueOffset().Uint32Value()
   1415       : mirror::Array::DataOffset(size).Uint32Value();
   1416   size_t shift = ComponentSizeShiftWidth(size);
   1417 
   1418   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
   1419   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
   1420 
   1421   if (index.IsConstant()) {
   1422     offset += Int64FromLocation(index) << shift;
   1423     return HeapOperand(base, offset);
   1424   } else {
   1425     *scratch = temps_scope->AcquireSameSizeAs(base);
   1426     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
   1427     return HeapOperand(*scratch, offset);
   1428   }
   1429 }
   1430 
   1431 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
   1432   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
   1433 }
   1434 
   1435 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
   1436   LocationSummary* locations = instruction->GetLocations();
   1437   size_t size = DataType::Size(instruction->GetPackedType());
   1438   VRegister reg = VRegisterFrom(locations->Out());
   1439   UseScratchRegisterScope temps(GetVIXLAssembler());
   1440   Register scratch;
   1441 
   1442   switch (instruction->GetPackedType()) {
   1443     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
   1444     case DataType::Type::kUint16:
   1445       DCHECK_EQ(8u, instruction->GetVectorLength());
   1446       // Special handling of compressed/uncompressed string load.
   1447       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1448         vixl::aarch64::Label uncompressed_load, done;
   1449         // Test compression bit.
   1450         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1451                       "Expecting 0=compressed, 1=uncompressed");
   1452         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1453         Register length = temps.AcquireW();
   1454         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
   1455         __ Tbnz(length.W(), 0, &uncompressed_load);
   1456         temps.Release(length);  // no longer needed
   1457         // Zero extend 8 compressed bytes into 8 chars.
   1458         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
   1459                VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
   1460         __ Uxtl(reg.V8H(), reg.V8B());
   1461         __ B(&done);
   1462         if (scratch.IsValid()) {
   1463           temps.Release(scratch);  // if used, no longer needed
   1464         }
   1465         // Load 8 direct uncompressed chars.
   1466         __ Bind(&uncompressed_load);
   1467         __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
   1468         __ Bind(&done);
   1469         return;
   1470       }
   1471       FALLTHROUGH_INTENDED;
   1472     case DataType::Type::kBool:
   1473     case DataType::Type::kUint8:
   1474     case DataType::Type::kInt8:
   1475     case DataType::Type::kInt32:
   1476     case DataType::Type::kFloat32:
   1477     case DataType::Type::kInt64:
   1478     case DataType::Type::kFloat64:
   1479       DCHECK_LE(2u, instruction->GetVectorLength());
   1480       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1481       __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
   1482       break;
   1483     default:
   1484       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1485       UNREACHABLE();
   1486   }
   1487 }
   1488 
   1489 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
   1490   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
   1491 }
   1492 
   1493 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
   1494   LocationSummary* locations = instruction->GetLocations();
   1495   size_t size = DataType::Size(instruction->GetPackedType());
   1496   VRegister reg = VRegisterFrom(locations->InAt(2));
   1497   UseScratchRegisterScope temps(GetVIXLAssembler());
   1498   Register scratch;
   1499 
   1500   switch (instruction->GetPackedType()) {
   1501     case DataType::Type::kBool:
   1502     case DataType::Type::kUint8:
   1503     case DataType::Type::kInt8:
   1504     case DataType::Type::kUint16:
   1505     case DataType::Type::kInt16:
   1506     case DataType::Type::kInt32:
   1507     case DataType::Type::kFloat32:
   1508     case DataType::Type::kInt64:
   1509     case DataType::Type::kFloat64:
   1510       DCHECK_LE(2u, instruction->GetVectorLength());
   1511       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1512       __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
   1513       break;
   1514     default:
   1515       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   1516       UNREACHABLE();
   1517   }
   1518 }
   1519 
   1520 #undef __
   1521 
   1522 }  // namespace arm64
   1523 }  // namespace art
   1524