Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_arm64.h"
     18 
     19 #include "mirror/array-inl.h"
     20 #include "mirror/string.h"
     21 
     22 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     23 
     24 namespace art {
     25 namespace arm64 {
     26 
     27 using helpers::ARM64EncodableConstantOrRegister;
     28 using helpers::Arm64CanEncodeConstantAsImmediate;
     29 using helpers::DRegisterFrom;
     30 using helpers::HeapOperand;
     31 using helpers::InputRegisterAt;
     32 using helpers::Int64ConstantFrom;
     33 using helpers::OutputRegister;
     34 using helpers::VRegisterFrom;
     35 using helpers::WRegisterFrom;
     36 using helpers::XRegisterFrom;
     37 
     38 #define __ GetVIXLAssembler()->
     39 
     40 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     41   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
     42   HInstruction* input = instruction->InputAt(0);
     43   switch (instruction->GetPackedType()) {
     44     case DataType::Type::kBool:
     45     case DataType::Type::kUint8:
     46     case DataType::Type::kInt8:
     47     case DataType::Type::kUint16:
     48     case DataType::Type::kInt16:
     49     case DataType::Type::kInt32:
     50     case DataType::Type::kInt64:
     51       locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
     52       locations->SetOut(Location::RequiresFpuRegister());
     53       break;
     54     case DataType::Type::kFloat32:
     55     case DataType::Type::kFloat64:
     56       if (input->IsConstant() &&
     57           Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
     58         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
     59         locations->SetOut(Location::RequiresFpuRegister());
     60       } else {
     61         locations->SetInAt(0, Location::RequiresFpuRegister());
     62         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
     63       }
     64       break;
     65     default:
     66       LOG(FATAL) << "Unsupported SIMD type";
     67       UNREACHABLE();
     68   }
     69 }
     70 
     71 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     72   LocationSummary* locations = instruction->GetLocations();
     73   Location src_loc = locations->InAt(0);
     74   VRegister dst = VRegisterFrom(locations->Out());
     75   switch (instruction->GetPackedType()) {
     76     case DataType::Type::kBool:
     77     case DataType::Type::kUint8:
     78     case DataType::Type::kInt8:
     79       DCHECK_EQ(16u, instruction->GetVectorLength());
     80       if (src_loc.IsConstant()) {
     81         __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
     82       } else {
     83         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
     84       }
     85       break;
     86     case DataType::Type::kUint16:
     87     case DataType::Type::kInt16:
     88       DCHECK_EQ(8u, instruction->GetVectorLength());
     89       if (src_loc.IsConstant()) {
     90         __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
     91       } else {
     92         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
     93       }
     94       break;
     95     case DataType::Type::kInt32:
     96       DCHECK_EQ(4u, instruction->GetVectorLength());
     97       if (src_loc.IsConstant()) {
     98         __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
     99       } else {
    100         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
    101       }
    102       break;
    103     case DataType::Type::kInt64:
    104       DCHECK_EQ(2u, instruction->GetVectorLength());
    105       if (src_loc.IsConstant()) {
    106         __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
    107       } else {
    108         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
    109       }
    110       break;
    111     case DataType::Type::kFloat32:
    112       DCHECK_EQ(4u, instruction->GetVectorLength());
    113       if (src_loc.IsConstant()) {
    114         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
    115       } else {
    116         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
    117       }
    118       break;
    119     case DataType::Type::kFloat64:
    120       DCHECK_EQ(2u, instruction->GetVectorLength());
    121       if (src_loc.IsConstant()) {
    122         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
    123       } else {
    124         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
    125       }
    126       break;
    127     default:
    128       LOG(FATAL) << "Unsupported SIMD type";
    129       UNREACHABLE();
    130   }
    131 }
    132 
    133 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    134   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    135   switch (instruction->GetPackedType()) {
    136     case DataType::Type::kBool:
    137     case DataType::Type::kUint8:
    138     case DataType::Type::kInt8:
    139     case DataType::Type::kUint16:
    140     case DataType::Type::kInt16:
    141     case DataType::Type::kInt32:
    142     case DataType::Type::kInt64:
    143       locations->SetInAt(0, Location::RequiresFpuRegister());
    144       locations->SetOut(Location::RequiresRegister());
    145       break;
    146     case DataType::Type::kFloat32:
    147     case DataType::Type::kFloat64:
    148       locations->SetInAt(0, Location::RequiresFpuRegister());
    149       locations->SetOut(Location::SameAsFirstInput());
    150       break;
    151     default:
    152       LOG(FATAL) << "Unsupported SIMD type";
    153       UNREACHABLE();
    154   }
    155 }
    156 
    157 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    158   LocationSummary* locations = instruction->GetLocations();
    159   VRegister src = VRegisterFrom(locations->InAt(0));
    160   switch (instruction->GetPackedType()) {
    161     case DataType::Type::kInt32:
    162       DCHECK_EQ(4u, instruction->GetVectorLength());
    163       __ Umov(OutputRegister(instruction), src.V4S(), 0);
    164       break;
    165     case DataType::Type::kInt64:
    166       DCHECK_EQ(2u, instruction->GetVectorLength());
    167       __ Umov(OutputRegister(instruction), src.V2D(), 0);
    168       break;
    169     case DataType::Type::kFloat32:
    170     case DataType::Type::kFloat64:
    171       DCHECK_LE(2u, instruction->GetVectorLength());
    172       DCHECK_LE(instruction->GetVectorLength(), 4u);
    173       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
    174       break;
    175     default:
    176       LOG(FATAL) << "Unsupported SIMD type";
    177       UNREACHABLE();
    178   }
    179 }
    180 
    181 // Helper to set up locations for vector unary operations.
    182 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
    183   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    184   switch (instruction->GetPackedType()) {
    185     case DataType::Type::kBool:
    186       locations->SetInAt(0, Location::RequiresFpuRegister());
    187       locations->SetOut(Location::RequiresFpuRegister(),
    188                         instruction->IsVecNot() ? Location::kOutputOverlap
    189                                                 : Location::kNoOutputOverlap);
    190       break;
    191     case DataType::Type::kUint8:
    192     case DataType::Type::kInt8:
    193     case DataType::Type::kUint16:
    194     case DataType::Type::kInt16:
    195     case DataType::Type::kInt32:
    196     case DataType::Type::kInt64:
    197     case DataType::Type::kFloat32:
    198     case DataType::Type::kFloat64:
    199       locations->SetInAt(0, Location::RequiresFpuRegister());
    200       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    201       break;
    202     default:
    203       LOG(FATAL) << "Unsupported SIMD type";
    204       UNREACHABLE();
    205   }
    206 }
    207 
    208 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
    209   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    210 }
    211 
    212 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
    213   LocationSummary* locations = instruction->GetLocations();
    214   VRegister src = VRegisterFrom(locations->InAt(0));
    215   VRegister dst = DRegisterFrom(locations->Out());
    216   switch (instruction->GetPackedType()) {
    217     case DataType::Type::kInt32:
    218       DCHECK_EQ(4u, instruction->GetVectorLength());
    219       switch (instruction->GetKind()) {
    220         case HVecReduce::kSum:
    221           __ Addv(dst.S(), src.V4S());
    222           break;
    223         case HVecReduce::kMin:
    224           __ Sminv(dst.S(), src.V4S());
    225           break;
    226         case HVecReduce::kMax:
    227           __ Smaxv(dst.S(), src.V4S());
    228           break;
    229       }
    230       break;
    231     case DataType::Type::kInt64:
    232       DCHECK_EQ(2u, instruction->GetVectorLength());
    233       switch (instruction->GetKind()) {
    234         case HVecReduce::kSum:
    235           __ Addp(dst.D(), src.V2D());
    236           break;
    237         default:
    238           LOG(FATAL) << "Unsupported SIMD min/max";
    239           UNREACHABLE();
    240       }
    241       break;
    242     default:
    243       LOG(FATAL) << "Unsupported SIMD type";
    244       UNREACHABLE();
    245   }
    246 }
    247 
    248 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
    249   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    250 }
    251 
    252 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
    253   LocationSummary* locations = instruction->GetLocations();
    254   VRegister src = VRegisterFrom(locations->InAt(0));
    255   VRegister dst = VRegisterFrom(locations->Out());
    256   DataType::Type from = instruction->GetInputType();
    257   DataType::Type to = instruction->GetResultType();
    258   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
    259     DCHECK_EQ(4u, instruction->GetVectorLength());
    260     __ Scvtf(dst.V4S(), src.V4S());
    261   } else {
    262     LOG(FATAL) << "Unsupported SIMD type";
    263   }
    264 }
    265 
    266 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
    267   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    268 }
    269 
    270 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
    271   LocationSummary* locations = instruction->GetLocations();
    272   VRegister src = VRegisterFrom(locations->InAt(0));
    273   VRegister dst = VRegisterFrom(locations->Out());
    274   switch (instruction->GetPackedType()) {
    275     case DataType::Type::kUint8:
    276     case DataType::Type::kInt8:
    277       DCHECK_EQ(16u, instruction->GetVectorLength());
    278       __ Neg(dst.V16B(), src.V16B());
    279       break;
    280     case DataType::Type::kUint16:
    281     case DataType::Type::kInt16:
    282       DCHECK_EQ(8u, instruction->GetVectorLength());
    283       __ Neg(dst.V8H(), src.V8H());
    284       break;
    285     case DataType::Type::kInt32:
    286       DCHECK_EQ(4u, instruction->GetVectorLength());
    287       __ Neg(dst.V4S(), src.V4S());
    288       break;
    289     case DataType::Type::kInt64:
    290       DCHECK_EQ(2u, instruction->GetVectorLength());
    291       __ Neg(dst.V2D(), src.V2D());
    292       break;
    293     case DataType::Type::kFloat32:
    294       DCHECK_EQ(4u, instruction->GetVectorLength());
    295       __ Fneg(dst.V4S(), src.V4S());
    296       break;
    297     case DataType::Type::kFloat64:
    298       DCHECK_EQ(2u, instruction->GetVectorLength());
    299       __ Fneg(dst.V2D(), src.V2D());
    300       break;
    301     default:
    302       LOG(FATAL) << "Unsupported SIMD type";
    303       UNREACHABLE();
    304   }
    305 }
    306 
    307 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
    308   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    309 }
    310 
    311 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
    312   LocationSummary* locations = instruction->GetLocations();
    313   VRegister src = VRegisterFrom(locations->InAt(0));
    314   VRegister dst = VRegisterFrom(locations->Out());
    315   switch (instruction->GetPackedType()) {
    316     case DataType::Type::kInt8:
    317       DCHECK_EQ(16u, instruction->GetVectorLength());
    318       __ Abs(dst.V16B(), src.V16B());
    319       break;
    320     case DataType::Type::kInt16:
    321       DCHECK_EQ(8u, instruction->GetVectorLength());
    322       __ Abs(dst.V8H(), src.V8H());
    323       break;
    324     case DataType::Type::kInt32:
    325       DCHECK_EQ(4u, instruction->GetVectorLength());
    326       __ Abs(dst.V4S(), src.V4S());
    327       break;
    328     case DataType::Type::kInt64:
    329       DCHECK_EQ(2u, instruction->GetVectorLength());
    330       __ Abs(dst.V2D(), src.V2D());
    331       break;
    332     case DataType::Type::kFloat32:
    333       DCHECK_EQ(4u, instruction->GetVectorLength());
    334       __ Fabs(dst.V4S(), src.V4S());
    335       break;
    336     case DataType::Type::kFloat64:
    337       DCHECK_EQ(2u, instruction->GetVectorLength());
    338       __ Fabs(dst.V2D(), src.V2D());
    339       break;
    340     default:
    341       LOG(FATAL) << "Unsupported SIMD type";
    342       UNREACHABLE();
    343   }
    344 }
    345 
    346 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
    347   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    348 }
    349 
    350 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
    351   LocationSummary* locations = instruction->GetLocations();
    352   VRegister src = VRegisterFrom(locations->InAt(0));
    353   VRegister dst = VRegisterFrom(locations->Out());
    354   switch (instruction->GetPackedType()) {
    355     case DataType::Type::kBool:  // special case boolean-not
    356       DCHECK_EQ(16u, instruction->GetVectorLength());
    357       __ Movi(dst.V16B(), 1);
    358       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
    359       break;
    360     case DataType::Type::kUint8:
    361     case DataType::Type::kInt8:
    362     case DataType::Type::kUint16:
    363     case DataType::Type::kInt16:
    364     case DataType::Type::kInt32:
    365     case DataType::Type::kInt64:
    366       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
    367       break;
    368     default:
    369       LOG(FATAL) << "Unsupported SIMD type";
    370       UNREACHABLE();
    371   }
    372 }
    373 
    374 // Helper to set up locations for vector binary operations.
    375 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    376   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    377   switch (instruction->GetPackedType()) {
    378     case DataType::Type::kBool:
    379     case DataType::Type::kUint8:
    380     case DataType::Type::kInt8:
    381     case DataType::Type::kUint16:
    382     case DataType::Type::kInt16:
    383     case DataType::Type::kInt32:
    384     case DataType::Type::kInt64:
    385     case DataType::Type::kFloat32:
    386     case DataType::Type::kFloat64:
    387       locations->SetInAt(0, Location::RequiresFpuRegister());
    388       locations->SetInAt(1, Location::RequiresFpuRegister());
    389       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    390       break;
    391     default:
    392       LOG(FATAL) << "Unsupported SIMD type";
    393       UNREACHABLE();
    394   }
    395 }
    396 
    397 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
    398   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    399 }
    400 
    401 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
    402   LocationSummary* locations = instruction->GetLocations();
    403   VRegister lhs = VRegisterFrom(locations->InAt(0));
    404   VRegister rhs = VRegisterFrom(locations->InAt(1));
    405   VRegister dst = VRegisterFrom(locations->Out());
    406   switch (instruction->GetPackedType()) {
    407     case DataType::Type::kUint8:
    408     case DataType::Type::kInt8:
    409       DCHECK_EQ(16u, instruction->GetVectorLength());
    410       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
    411       break;
    412     case DataType::Type::kUint16:
    413     case DataType::Type::kInt16:
    414       DCHECK_EQ(8u, instruction->GetVectorLength());
    415       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
    416       break;
    417     case DataType::Type::kInt32:
    418       DCHECK_EQ(4u, instruction->GetVectorLength());
    419       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
    420       break;
    421     case DataType::Type::kInt64:
    422       DCHECK_EQ(2u, instruction->GetVectorLength());
    423       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
    424       break;
    425     case DataType::Type::kFloat32:
    426       DCHECK_EQ(4u, instruction->GetVectorLength());
    427       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
    428       break;
    429     case DataType::Type::kFloat64:
    430       DCHECK_EQ(2u, instruction->GetVectorLength());
    431       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
    432       break;
    433     default:
    434       LOG(FATAL) << "Unsupported SIMD type";
    435       UNREACHABLE();
    436   }
    437 }
    438 
    439 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    440   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    441 }
    442 
    443 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    444   LocationSummary* locations = instruction->GetLocations();
    445   VRegister lhs = VRegisterFrom(locations->InAt(0));
    446   VRegister rhs = VRegisterFrom(locations->InAt(1));
    447   VRegister dst = VRegisterFrom(locations->Out());
    448   switch (instruction->GetPackedType()) {
    449     case DataType::Type::kUint8:
    450       DCHECK_EQ(16u, instruction->GetVectorLength());
    451       instruction->IsRounded()
    452           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
    453           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    454       break;
    455     case DataType::Type::kInt8:
    456       DCHECK_EQ(16u, instruction->GetVectorLength());
    457       instruction->IsRounded()
    458           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
    459           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
    460       break;
    461     case DataType::Type::kUint16:
    462       DCHECK_EQ(8u, instruction->GetVectorLength());
    463       instruction->IsRounded()
    464           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
    465           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    466       break;
    467     case DataType::Type::kInt16:
    468       DCHECK_EQ(8u, instruction->GetVectorLength());
    469       instruction->IsRounded()
    470           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
    471           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
    472       break;
    473     default:
    474       LOG(FATAL) << "Unsupported SIMD type";
    475       UNREACHABLE();
    476   }
    477 }
    478 
    479 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
    480   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    481 }
    482 
    483 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
    484   LocationSummary* locations = instruction->GetLocations();
    485   VRegister lhs = VRegisterFrom(locations->InAt(0));
    486   VRegister rhs = VRegisterFrom(locations->InAt(1));
    487   VRegister dst = VRegisterFrom(locations->Out());
    488   switch (instruction->GetPackedType()) {
    489     case DataType::Type::kUint8:
    490     case DataType::Type::kInt8:
    491       DCHECK_EQ(16u, instruction->GetVectorLength());
    492       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
    493       break;
    494     case DataType::Type::kUint16:
    495     case DataType::Type::kInt16:
    496       DCHECK_EQ(8u, instruction->GetVectorLength());
    497       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
    498       break;
    499     case DataType::Type::kInt32:
    500       DCHECK_EQ(4u, instruction->GetVectorLength());
    501       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
    502       break;
    503     case DataType::Type::kInt64:
    504       DCHECK_EQ(2u, instruction->GetVectorLength());
    505       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
    506       break;
    507     case DataType::Type::kFloat32:
    508       DCHECK_EQ(4u, instruction->GetVectorLength());
    509       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
    510       break;
    511     case DataType::Type::kFloat64:
    512       DCHECK_EQ(2u, instruction->GetVectorLength());
    513       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
    514       break;
    515     default:
    516       LOG(FATAL) << "Unsupported SIMD type";
    517       UNREACHABLE();
    518   }
    519 }
    520 
    521 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
    522   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    523 }
    524 
    525 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
    526   LocationSummary* locations = instruction->GetLocations();
    527   VRegister lhs = VRegisterFrom(locations->InAt(0));
    528   VRegister rhs = VRegisterFrom(locations->InAt(1));
    529   VRegister dst = VRegisterFrom(locations->Out());
    530   switch (instruction->GetPackedType()) {
    531     case DataType::Type::kUint8:
    532     case DataType::Type::kInt8:
    533       DCHECK_EQ(16u, instruction->GetVectorLength());
    534       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
    535       break;
    536     case DataType::Type::kUint16:
    537     case DataType::Type::kInt16:
    538       DCHECK_EQ(8u, instruction->GetVectorLength());
    539       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
    540       break;
    541     case DataType::Type::kInt32:
    542       DCHECK_EQ(4u, instruction->GetVectorLength());
    543       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
    544       break;
    545     case DataType::Type::kFloat32:
    546       DCHECK_EQ(4u, instruction->GetVectorLength());
    547       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
    548       break;
    549     case DataType::Type::kFloat64:
    550       DCHECK_EQ(2u, instruction->GetVectorLength());
    551       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
    552       break;
    553     default:
    554       LOG(FATAL) << "Unsupported SIMD type";
    555       UNREACHABLE();
    556   }
    557 }
    558 
    559 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
    560   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    561 }
    562 
    563 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
    564   LocationSummary* locations = instruction->GetLocations();
    565   VRegister lhs = VRegisterFrom(locations->InAt(0));
    566   VRegister rhs = VRegisterFrom(locations->InAt(1));
    567   VRegister dst = VRegisterFrom(locations->Out());
    568   switch (instruction->GetPackedType()) {
    569     case DataType::Type::kFloat32:
    570       DCHECK_EQ(4u, instruction->GetVectorLength());
    571       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
    572       break;
    573     case DataType::Type::kFloat64:
    574       DCHECK_EQ(2u, instruction->GetVectorLength());
    575       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
    576       break;
    577     default:
    578       LOG(FATAL) << "Unsupported SIMD type";
    579       UNREACHABLE();
    580   }
    581 }
    582 
    583 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
    584   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    585 }
    586 
    587 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
    588   LocationSummary* locations = instruction->GetLocations();
    589   VRegister lhs = VRegisterFrom(locations->InAt(0));
    590   VRegister rhs = VRegisterFrom(locations->InAt(1));
    591   VRegister dst = VRegisterFrom(locations->Out());
    592   switch (instruction->GetPackedType()) {
    593     case DataType::Type::kUint8:
    594       DCHECK_EQ(16u, instruction->GetVectorLength());
    595       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
    596       break;
    597     case DataType::Type::kInt8:
    598       DCHECK_EQ(16u, instruction->GetVectorLength());
    599       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
    600       break;
    601     case DataType::Type::kUint16:
    602       DCHECK_EQ(8u, instruction->GetVectorLength());
    603       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
    604       break;
    605     case DataType::Type::kInt16:
    606       DCHECK_EQ(8u, instruction->GetVectorLength());
    607       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
    608       break;
    609     case DataType::Type::kUint32:
    610       DCHECK_EQ(4u, instruction->GetVectorLength());
    611       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
    612       break;
    613     case DataType::Type::kInt32:
    614       DCHECK_EQ(4u, instruction->GetVectorLength());
    615       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
    616       break;
    617     case DataType::Type::kFloat32:
    618       DCHECK_EQ(4u, instruction->GetVectorLength());
    619       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
    620       break;
    621     case DataType::Type::kFloat64:
    622       DCHECK_EQ(2u, instruction->GetVectorLength());
    623       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
    624       break;
    625     default:
    626       LOG(FATAL) << "Unsupported SIMD type";
    627       UNREACHABLE();
    628   }
    629 }
    630 
    631 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
    632   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    633 }
    634 
    635 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
    636   LocationSummary* locations = instruction->GetLocations();
    637   VRegister lhs = VRegisterFrom(locations->InAt(0));
    638   VRegister rhs = VRegisterFrom(locations->InAt(1));
    639   VRegister dst = VRegisterFrom(locations->Out());
    640   switch (instruction->GetPackedType()) {
    641     case DataType::Type::kUint8:
    642       DCHECK_EQ(16u, instruction->GetVectorLength());
    643       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
    644       break;
    645     case DataType::Type::kInt8:
    646       DCHECK_EQ(16u, instruction->GetVectorLength());
    647       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
    648       break;
    649     case DataType::Type::kUint16:
    650       DCHECK_EQ(8u, instruction->GetVectorLength());
    651       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
    652       break;
    653     case DataType::Type::kInt16:
    654       DCHECK_EQ(8u, instruction->GetVectorLength());
    655       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
    656       break;
    657     case DataType::Type::kUint32:
    658       DCHECK_EQ(4u, instruction->GetVectorLength());
    659       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
    660       break;
    661     case DataType::Type::kInt32:
    662       DCHECK_EQ(4u, instruction->GetVectorLength());
    663       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
    664       break;
    665     case DataType::Type::kFloat32:
    666       DCHECK_EQ(4u, instruction->GetVectorLength());
    667       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
    668       break;
    669     case DataType::Type::kFloat64:
    670       DCHECK_EQ(2u, instruction->GetVectorLength());
    671       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
    672       break;
    673     default:
    674       LOG(FATAL) << "Unsupported SIMD type";
    675       UNREACHABLE();
    676   }
    677 }
    678 
    679 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
    680   // TODO: Allow constants supported by BIC (vector, immediate).
    681   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    682 }
    683 
    684 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
    685   LocationSummary* locations = instruction->GetLocations();
    686   VRegister lhs = VRegisterFrom(locations->InAt(0));
    687   VRegister rhs = VRegisterFrom(locations->InAt(1));
    688   VRegister dst = VRegisterFrom(locations->Out());
    689   switch (instruction->GetPackedType()) {
    690     case DataType::Type::kBool:
    691     case DataType::Type::kUint8:
    692     case DataType::Type::kInt8:
    693     case DataType::Type::kUint16:
    694     case DataType::Type::kInt16:
    695     case DataType::Type::kInt32:
    696     case DataType::Type::kInt64:
    697     case DataType::Type::kFloat32:
    698     case DataType::Type::kFloat64:
    699       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    700       break;
    701     default:
    702       LOG(FATAL) << "Unsupported SIMD type";
    703       UNREACHABLE();
    704   }
    705 }
    706 
    707 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
    708   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
    709 }
    710 
    711 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
    712   // TODO: Use BIC (vector, register).
    713   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
    714 }
    715 
    716 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
    717   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    718 }
    719 
    720 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
    721   LocationSummary* locations = instruction->GetLocations();
    722   VRegister lhs = VRegisterFrom(locations->InAt(0));
    723   VRegister rhs = VRegisterFrom(locations->InAt(1));
    724   VRegister dst = VRegisterFrom(locations->Out());
    725   switch (instruction->GetPackedType()) {
    726     case DataType::Type::kBool:
    727     case DataType::Type::kUint8:
    728     case DataType::Type::kInt8:
    729     case DataType::Type::kUint16:
    730     case DataType::Type::kInt16:
    731     case DataType::Type::kInt32:
    732     case DataType::Type::kInt64:
    733     case DataType::Type::kFloat32:
    734     case DataType::Type::kFloat64:
    735       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    736       break;
    737     default:
    738       LOG(FATAL) << "Unsupported SIMD type";
    739       UNREACHABLE();
    740   }
    741 }
    742 
    743 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
    744   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    745 }
    746 
    747 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
    748   LocationSummary* locations = instruction->GetLocations();
    749   VRegister lhs = VRegisterFrom(locations->InAt(0));
    750   VRegister rhs = VRegisterFrom(locations->InAt(1));
    751   VRegister dst = VRegisterFrom(locations->Out());
    752   switch (instruction->GetPackedType()) {
    753     case DataType::Type::kBool:
    754     case DataType::Type::kUint8:
    755     case DataType::Type::kInt8:
    756     case DataType::Type::kUint16:
    757     case DataType::Type::kInt16:
    758     case DataType::Type::kInt32:
    759     case DataType::Type::kInt64:
    760     case DataType::Type::kFloat32:
    761     case DataType::Type::kFloat64:
    762       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
    763       break;
    764     default:
    765       LOG(FATAL) << "Unsupported SIMD type";
    766       UNREACHABLE();
    767   }
    768 }
    769 
    770 // Helper to set up locations for vector shift operations.
    771 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    772   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    773   switch (instruction->GetPackedType()) {
    774     case DataType::Type::kUint8:
    775     case DataType::Type::kInt8:
    776     case DataType::Type::kUint16:
    777     case DataType::Type::kInt16:
    778     case DataType::Type::kInt32:
    779     case DataType::Type::kInt64:
    780       locations->SetInAt(0, Location::RequiresFpuRegister());
    781       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    782       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
    783       break;
    784     default:
    785       LOG(FATAL) << "Unsupported SIMD type";
    786       UNREACHABLE();
    787   }
    788 }
    789 
    790 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
    791   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    792 }
    793 
    794 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
    795   LocationSummary* locations = instruction->GetLocations();
    796   VRegister lhs = VRegisterFrom(locations->InAt(0));
    797   VRegister dst = VRegisterFrom(locations->Out());
    798   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    799   switch (instruction->GetPackedType()) {
    800     case DataType::Type::kUint8:
    801     case DataType::Type::kInt8:
    802       DCHECK_EQ(16u, instruction->GetVectorLength());
    803       __ Shl(dst.V16B(), lhs.V16B(), value);
    804       break;
    805     case DataType::Type::kUint16:
    806     case DataType::Type::kInt16:
    807       DCHECK_EQ(8u, instruction->GetVectorLength());
    808       __ Shl(dst.V8H(), lhs.V8H(), value);
    809       break;
    810     case DataType::Type::kInt32:
    811       DCHECK_EQ(4u, instruction->GetVectorLength());
    812       __ Shl(dst.V4S(), lhs.V4S(), value);
    813       break;
    814     case DataType::Type::kInt64:
    815       DCHECK_EQ(2u, instruction->GetVectorLength());
    816       __ Shl(dst.V2D(), lhs.V2D(), value);
    817       break;
    818     default:
    819       LOG(FATAL) << "Unsupported SIMD type";
    820       UNREACHABLE();
    821   }
    822 }
    823 
    824 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
    825   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    826 }
    827 
    828 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
    829   LocationSummary* locations = instruction->GetLocations();
    830   VRegister lhs = VRegisterFrom(locations->InAt(0));
    831   VRegister dst = VRegisterFrom(locations->Out());
    832   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    833   switch (instruction->GetPackedType()) {
    834     case DataType::Type::kUint8:
    835     case DataType::Type::kInt8:
    836       DCHECK_EQ(16u, instruction->GetVectorLength());
    837       __ Sshr(dst.V16B(), lhs.V16B(), value);
    838       break;
    839     case DataType::Type::kUint16:
    840     case DataType::Type::kInt16:
    841       DCHECK_EQ(8u, instruction->GetVectorLength());
    842       __ Sshr(dst.V8H(), lhs.V8H(), value);
    843       break;
    844     case DataType::Type::kInt32:
    845       DCHECK_EQ(4u, instruction->GetVectorLength());
    846       __ Sshr(dst.V4S(), lhs.V4S(), value);
    847       break;
    848     case DataType::Type::kInt64:
    849       DCHECK_EQ(2u, instruction->GetVectorLength());
    850       __ Sshr(dst.V2D(), lhs.V2D(), value);
    851       break;
    852     default:
    853       LOG(FATAL) << "Unsupported SIMD type";
    854       UNREACHABLE();
    855   }
    856 }
    857 
    858 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
    859   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    860 }
    861 
    862 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
    863   LocationSummary* locations = instruction->GetLocations();
    864   VRegister lhs = VRegisterFrom(locations->InAt(0));
    865   VRegister dst = VRegisterFrom(locations->Out());
    866   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    867   switch (instruction->GetPackedType()) {
    868     case DataType::Type::kUint8:
    869     case DataType::Type::kInt8:
    870       DCHECK_EQ(16u, instruction->GetVectorLength());
    871       __ Ushr(dst.V16B(), lhs.V16B(), value);
    872       break;
    873     case DataType::Type::kUint16:
    874     case DataType::Type::kInt16:
    875       DCHECK_EQ(8u, instruction->GetVectorLength());
    876       __ Ushr(dst.V8H(), lhs.V8H(), value);
    877       break;
    878     case DataType::Type::kInt32:
    879       DCHECK_EQ(4u, instruction->GetVectorLength());
    880       __ Ushr(dst.V4S(), lhs.V4S(), value);
    881       break;
    882     case DataType::Type::kInt64:
    883       DCHECK_EQ(2u, instruction->GetVectorLength());
    884       __ Ushr(dst.V2D(), lhs.V2D(), value);
    885       break;
    886     default:
    887       LOG(FATAL) << "Unsupported SIMD type";
    888       UNREACHABLE();
    889   }
    890 }
    891 
    892 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
    893   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    894 
    895   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    896 
    897   HInstruction* input = instruction->InputAt(0);
    898   bool is_zero = IsZeroBitPattern(input);
    899 
    900   switch (instruction->GetPackedType()) {
    901     case DataType::Type::kBool:
    902     case DataType::Type::kUint8:
    903     case DataType::Type::kInt8:
    904     case DataType::Type::kUint16:
    905     case DataType::Type::kInt16:
    906     case DataType::Type::kInt32:
    907     case DataType::Type::kInt64:
    908       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    909                                     : Location::RequiresRegister());
    910       locations->SetOut(Location::RequiresFpuRegister());
    911       break;
    912     case DataType::Type::kFloat32:
    913     case DataType::Type::kFloat64:
    914       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    915                                     : Location::RequiresFpuRegister());
    916       locations->SetOut(Location::RequiresFpuRegister());
    917       break;
    918     default:
    919       LOG(FATAL) << "Unsupported SIMD type";
    920       UNREACHABLE();
    921   }
    922 }
    923 
    924 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
    925   LocationSummary* locations = instruction->GetLocations();
    926   VRegister dst = VRegisterFrom(locations->Out());
    927 
    928   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    929 
    930   // Zero out all other elements first.
    931   __ Movi(dst.V16B(), 0);
    932 
    933   // Shorthand for any type of zero.
    934   if (IsZeroBitPattern(instruction->InputAt(0))) {
    935     return;
    936   }
    937 
    938   // Set required elements.
    939   switch (instruction->GetPackedType()) {
    940     case DataType::Type::kBool:
    941     case DataType::Type::kUint8:
    942     case DataType::Type::kInt8:
    943       DCHECK_EQ(16u, instruction->GetVectorLength());
    944       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
    945       break;
    946     case DataType::Type::kUint16:
    947     case DataType::Type::kInt16:
    948       DCHECK_EQ(8u, instruction->GetVectorLength());
    949       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
    950       break;
    951     case DataType::Type::kInt32:
    952       DCHECK_EQ(4u, instruction->GetVectorLength());
    953       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
    954       break;
    955     case DataType::Type::kInt64:
    956       DCHECK_EQ(2u, instruction->GetVectorLength());
    957       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
    958       break;
    959     default:
    960       LOG(FATAL) << "Unsupported SIMD type";
    961       UNREACHABLE();
    962   }
    963 }
    964 
    965 // Helper to set up locations for vector accumulations.
    966 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
    967   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    968   switch (instruction->GetPackedType()) {
    969     case DataType::Type::kUint8:
    970     case DataType::Type::kInt8:
    971     case DataType::Type::kUint16:
    972     case DataType::Type::kInt16:
    973     case DataType::Type::kInt32:
    974     case DataType::Type::kInt64:
    975       locations->SetInAt(0, Location::RequiresFpuRegister());
    976       locations->SetInAt(1, Location::RequiresFpuRegister());
    977       locations->SetInAt(2, Location::RequiresFpuRegister());
    978       locations->SetOut(Location::SameAsFirstInput());
    979       break;
    980     default:
    981       LOG(FATAL) << "Unsupported SIMD type";
    982       UNREACHABLE();
    983   }
    984 }
    985 
    986 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
    987   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
    988 }
    989 
    990 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
    991 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
    992 // However vector MultiplyAccumulate instruction is not affected.
    993 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
    994   LocationSummary* locations = instruction->GetLocations();
    995   VRegister acc = VRegisterFrom(locations->InAt(0));
    996   VRegister left = VRegisterFrom(locations->InAt(1));
    997   VRegister right = VRegisterFrom(locations->InAt(2));
    998 
    999   DCHECK(locations->InAt(0).Equals(locations->Out()));
   1000 
   1001   switch (instruction->GetPackedType()) {
   1002     case DataType::Type::kUint8:
   1003     case DataType::Type::kInt8:
   1004       DCHECK_EQ(16u, instruction->GetVectorLength());
   1005       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1006         __ Mla(acc.V16B(), left.V16B(), right.V16B());
   1007       } else {
   1008         __ Mls(acc.V16B(), left.V16B(), right.V16B());
   1009       }
   1010       break;
   1011     case DataType::Type::kUint16:
   1012     case DataType::Type::kInt16:
   1013       DCHECK_EQ(8u, instruction->GetVectorLength());
   1014       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1015         __ Mla(acc.V8H(), left.V8H(), right.V8H());
   1016       } else {
   1017         __ Mls(acc.V8H(), left.V8H(), right.V8H());
   1018       }
   1019       break;
   1020     case DataType::Type::kInt32:
   1021       DCHECK_EQ(4u, instruction->GetVectorLength());
   1022       if (instruction->GetOpKind() == HInstruction::kAdd) {
   1023         __ Mla(acc.V4S(), left.V4S(), right.V4S());
   1024       } else {
   1025         __ Mls(acc.V4S(), left.V4S(), right.V4S());
   1026       }
   1027       break;
   1028     default:
   1029       LOG(FATAL) << "Unsupported SIMD type";
   1030       UNREACHABLE();
   1031   }
   1032 }
   1033 
   1034 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1035   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1036   // Some conversions require temporary registers.
   1037   LocationSummary* locations = instruction->GetLocations();
   1038   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   1039   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   1040   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
   1041             HVecOperation::ToSignedType(b->GetPackedType()));
   1042   switch (a->GetPackedType()) {
   1043     case DataType::Type::kUint8:
   1044     case DataType::Type::kInt8:
   1045       switch (instruction->GetPackedType()) {
   1046         case DataType::Type::kInt64:
   1047           locations->AddTemp(Location::RequiresFpuRegister());
   1048           locations->AddTemp(Location::RequiresFpuRegister());
   1049           FALLTHROUGH_INTENDED;
   1050         case DataType::Type::kInt32:
   1051           locations->AddTemp(Location::RequiresFpuRegister());
   1052           locations->AddTemp(Location::RequiresFpuRegister());
   1053           break;
   1054         default:
   1055           break;
   1056       }
   1057       break;
   1058     case DataType::Type::kUint16:
   1059     case DataType::Type::kInt16:
   1060       if (instruction->GetPackedType() == DataType::Type::kInt64) {
   1061         locations->AddTemp(Location::RequiresFpuRegister());
   1062         locations->AddTemp(Location::RequiresFpuRegister());
   1063       }
   1064       break;
   1065     case DataType::Type::kInt32:
   1066     case DataType::Type::kInt64:
   1067       if (instruction->GetPackedType() == a->GetPackedType()) {
   1068         locations->AddTemp(Location::RequiresFpuRegister());
   1069       }
   1070       break;
   1071     default:
   1072       break;
   1073   }
   1074 }
   1075 
   1076 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1077   LocationSummary* locations = instruction->GetLocations();
   1078   VRegister acc = VRegisterFrom(locations->InAt(0));
   1079   VRegister left = VRegisterFrom(locations->InAt(1));
   1080   VRegister right = VRegisterFrom(locations->InAt(2));
   1081 
   1082   DCHECK(locations->InAt(0).Equals(locations->Out()));
   1083 
   1084   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
   1085   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   1086   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   1087   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
   1088             HVecOperation::ToSignedType(b->GetPackedType()));
   1089   switch (a->GetPackedType()) {
   1090     case DataType::Type::kUint8:
   1091     case DataType::Type::kInt8:
   1092       DCHECK_EQ(16u, a->GetVectorLength());
   1093       switch (instruction->GetPackedType()) {
   1094         case DataType::Type::kInt16:
   1095           DCHECK_EQ(8u, instruction->GetVectorLength());
   1096           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
   1097           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
   1098           break;
   1099         case DataType::Type::kInt32: {
   1100           DCHECK_EQ(4u, instruction->GetVectorLength());
   1101           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1102           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1103           __ Sxtl(tmp1.V8H(), left.V8B());
   1104           __ Sxtl(tmp2.V8H(), right.V8B());
   1105           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
   1106           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
   1107           __ Sxtl2(tmp1.V8H(), left.V16B());
   1108           __ Sxtl2(tmp2.V8H(), right.V16B());
   1109           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
   1110           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
   1111           break;
   1112         }
   1113         case DataType::Type::kInt64: {
   1114           DCHECK_EQ(2u, instruction->GetVectorLength());
   1115           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1116           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1117           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
   1118           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
   1119           __ Sxtl(tmp1.V8H(), left.V8B());
   1120           __ Sxtl(tmp2.V8H(), right.V8B());
   1121           __ Sxtl(tmp3.V4S(), tmp1.V4H());
   1122           __ Sxtl(tmp4.V4S(), tmp2.V4H());
   1123           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1124           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1125           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
   1126           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
   1127           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1128           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1129           __ Sxtl2(tmp1.V8H(), left.V16B());
   1130           __ Sxtl2(tmp2.V8H(), right.V16B());
   1131           __ Sxtl(tmp3.V4S(), tmp1.V4H());
   1132           __ Sxtl(tmp4.V4S(), tmp2.V4H());
   1133           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1134           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1135           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
   1136           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
   1137           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
   1138           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
   1139           break;
   1140         }
   1141         default:
   1142           LOG(FATAL) << "Unsupported SIMD type";
   1143           UNREACHABLE();
   1144       }
   1145       break;
   1146     case DataType::Type::kUint16:
   1147     case DataType::Type::kInt16:
   1148       DCHECK_EQ(8u, a->GetVectorLength());
   1149       switch (instruction->GetPackedType()) {
   1150         case DataType::Type::kInt32:
   1151           DCHECK_EQ(4u, instruction->GetVectorLength());
   1152           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
   1153           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
   1154           break;
   1155         case DataType::Type::kInt64: {
   1156           DCHECK_EQ(2u, instruction->GetVectorLength());
   1157           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
   1158           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
   1159           __ Sxtl(tmp1.V4S(), left.V4H());
   1160           __ Sxtl(tmp2.V4S(), right.V4H());
   1161           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
   1162           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
   1163           __ Sxtl2(tmp1.V4S(), left.V8H());
   1164           __ Sxtl2(tmp2.V4S(), right.V8H());
   1165           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
   1166           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
   1167           break;
   1168         }
   1169         default:
   1170           LOG(FATAL) << "Unsupported SIMD type";
   1171           UNREACHABLE();
   1172       }
   1173       break;
   1174     case DataType::Type::kInt32:
   1175       DCHECK_EQ(4u, a->GetVectorLength());
   1176       switch (instruction->GetPackedType()) {
   1177         case DataType::Type::kInt32: {
   1178           DCHECK_EQ(4u, instruction->GetVectorLength());
   1179           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1180           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
   1181           __ Abs(tmp.V4S(), tmp.V4S());
   1182           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
   1183           break;
   1184         }
   1185         case DataType::Type::kInt64:
   1186           DCHECK_EQ(2u, instruction->GetVectorLength());
   1187           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
   1188           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
   1189           break;
   1190         default:
   1191           LOG(FATAL) << "Unsupported SIMD type";
   1192           UNREACHABLE();
   1193       }
   1194       break;
   1195     case DataType::Type::kInt64:
   1196       DCHECK_EQ(2u, a->GetVectorLength());
   1197       switch (instruction->GetPackedType()) {
   1198         case DataType::Type::kInt64: {
   1199           DCHECK_EQ(2u, instruction->GetVectorLength());
   1200           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
   1201           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
   1202           __ Abs(tmp.V2D(), tmp.V2D());
   1203           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
   1204           break;
   1205         }
   1206         default:
   1207           LOG(FATAL) << "Unsupported SIMD type";
   1208           UNREACHABLE();
   1209       }
   1210       break;
   1211     default:
   1212       LOG(FATAL) << "Unsupported SIMD type";
   1213   }
   1214 }
   1215 
   1216 // Helper to set up locations for vector memory operations.
   1217 static void CreateVecMemLocations(ArenaAllocator* allocator,
   1218                                   HVecMemoryOperation* instruction,
   1219                                   bool is_load) {
   1220   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1221   switch (instruction->GetPackedType()) {
   1222     case DataType::Type::kBool:
   1223     case DataType::Type::kUint8:
   1224     case DataType::Type::kInt8:
   1225     case DataType::Type::kUint16:
   1226     case DataType::Type::kInt16:
   1227     case DataType::Type::kInt32:
   1228     case DataType::Type::kInt64:
   1229     case DataType::Type::kFloat32:
   1230     case DataType::Type::kFloat64:
   1231       locations->SetInAt(0, Location::RequiresRegister());
   1232       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   1233       if (is_load) {
   1234         locations->SetOut(Location::RequiresFpuRegister());
   1235       } else {
   1236         locations->SetInAt(2, Location::RequiresFpuRegister());
   1237       }
   1238       break;
   1239     default:
   1240       LOG(FATAL) << "Unsupported SIMD type";
   1241       UNREACHABLE();
   1242   }
   1243 }
   1244 
   1245 // Helper to set up locations for vector memory operations. Returns the memory operand and,
   1246 // if used, sets the output parameter scratch to a temporary register used in this operand,
   1247 // so that the client can release it right after the memory operand use.
   1248 MemOperand InstructionCodeGeneratorARM64::VecAddress(
   1249     HVecMemoryOperation* instruction,
   1250     UseScratchRegisterScope* temps_scope,
   1251     size_t size,
   1252     bool is_string_char_at,
   1253     /*out*/ Register* scratch) {
   1254   LocationSummary* locations = instruction->GetLocations();
   1255   Register base = InputRegisterAt(instruction, 0);
   1256 
   1257   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
   1258     DCHECK(!is_string_char_at);
   1259     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
   1260   }
   1261 
   1262   Location index = locations->InAt(1);
   1263   uint32_t offset = is_string_char_at
   1264       ? mirror::String::ValueOffset().Uint32Value()
   1265       : mirror::Array::DataOffset(size).Uint32Value();
   1266   size_t shift = ComponentSizeShiftWidth(size);
   1267 
   1268   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
   1269   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
   1270 
   1271   if (index.IsConstant()) {
   1272     offset += Int64ConstantFrom(index) << shift;
   1273     return HeapOperand(base, offset);
   1274   } else {
   1275     *scratch = temps_scope->AcquireSameSizeAs(base);
   1276     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
   1277     return HeapOperand(*scratch, offset);
   1278   }
   1279 }
   1280 
   1281 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
   1282   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
   1283 }
   1284 
   1285 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
   1286   LocationSummary* locations = instruction->GetLocations();
   1287   size_t size = DataType::Size(instruction->GetPackedType());
   1288   VRegister reg = VRegisterFrom(locations->Out());
   1289   UseScratchRegisterScope temps(GetVIXLAssembler());
   1290   Register scratch;
   1291 
   1292   switch (instruction->GetPackedType()) {
   1293     case DataType::Type::kUint16:
   1294       DCHECK_EQ(8u, instruction->GetVectorLength());
   1295       // Special handling of compressed/uncompressed string load.
   1296       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1297         vixl::aarch64::Label uncompressed_load, done;
   1298         // Test compression bit.
   1299         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1300                       "Expecting 0=compressed, 1=uncompressed");
   1301         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1302         Register length = temps.AcquireW();
   1303         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
   1304         __ Tbnz(length.W(), 0, &uncompressed_load);
   1305         temps.Release(length);  // no longer needed
   1306         // Zero extend 8 compressed bytes into 8 chars.
   1307         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
   1308                VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
   1309         __ Uxtl(reg.V8H(), reg.V8B());
   1310         __ B(&done);
   1311         if (scratch.IsValid()) {
   1312           temps.Release(scratch);  // if used, no longer needed
   1313         }
   1314         // Load 8 direct uncompressed chars.
   1315         __ Bind(&uncompressed_load);
   1316         __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
   1317         __ Bind(&done);
   1318         return;
   1319       }
   1320       FALLTHROUGH_INTENDED;
   1321     case DataType::Type::kBool:
   1322     case DataType::Type::kUint8:
   1323     case DataType::Type::kInt8:
   1324     case DataType::Type::kInt16:
   1325     case DataType::Type::kInt32:
   1326     case DataType::Type::kFloat32:
   1327     case DataType::Type::kInt64:
   1328     case DataType::Type::kFloat64:
   1329       DCHECK_LE(2u, instruction->GetVectorLength());
   1330       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1331       __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
   1332       break;
   1333     default:
   1334       LOG(FATAL) << "Unsupported SIMD type";
   1335       UNREACHABLE();
   1336   }
   1337 }
   1338 
   1339 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
   1340   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
   1341 }
   1342 
   1343 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
   1344   LocationSummary* locations = instruction->GetLocations();
   1345   size_t size = DataType::Size(instruction->GetPackedType());
   1346   VRegister reg = VRegisterFrom(locations->InAt(2));
   1347   UseScratchRegisterScope temps(GetVIXLAssembler());
   1348   Register scratch;
   1349 
   1350   switch (instruction->GetPackedType()) {
   1351     case DataType::Type::kBool:
   1352     case DataType::Type::kUint8:
   1353     case DataType::Type::kInt8:
   1354     case DataType::Type::kUint16:
   1355     case DataType::Type::kInt16:
   1356     case DataType::Type::kInt32:
   1357     case DataType::Type::kFloat32:
   1358     case DataType::Type::kInt64:
   1359     case DataType::Type::kFloat64:
   1360       DCHECK_LE(2u, instruction->GetVectorLength());
   1361       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1362       __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
   1363       break;
   1364     default:
   1365       LOG(FATAL) << "Unsupported SIMD type";
   1366       UNREACHABLE();
   1367   }
   1368 }
   1369 
   1370 #undef __
   1371 
   1372 }  // namespace arm64
   1373 }  // namespace art
   1374