Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86_64.h"
     18 
     19 #include "mirror/array-inl.h"
     20 #include "mirror/string.h"
     21 
     22 namespace art {
     23 namespace x86_64 {
     24 
     25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     26 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
     27 
     28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
     30   HInstruction* input = instruction->InputAt(0);
     31   bool is_zero = IsZeroBitPattern(input);
     32   switch (instruction->GetPackedType()) {
     33     case DataType::Type::kBool:
     34     case DataType::Type::kUint8:
     35     case DataType::Type::kInt8:
     36     case DataType::Type::kUint16:
     37     case DataType::Type::kInt16:
     38     case DataType::Type::kInt32:
     39     case DataType::Type::kInt64:
     40       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
     41                                     : Location::RequiresRegister());
     42       locations->SetOut(Location::RequiresFpuRegister());
     43       break;
     44     case DataType::Type::kFloat32:
     45     case DataType::Type::kFloat64:
     46       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
     47                                     : Location::RequiresFpuRegister());
     48       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
     49                                 : Location::SameAsFirstInput());
     50       break;
     51     default:
     52       LOG(FATAL) << "Unsupported SIMD type";
     53       UNREACHABLE();
     54   }
     55 }
     56 
     57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     58   LocationSummary* locations = instruction->GetLocations();
     59   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
     60 
     61   // Shorthand for any type of zero.
     62   if (IsZeroBitPattern(instruction->InputAt(0))) {
     63     __ xorps(dst, dst);
     64     return;
     65   }
     66 
     67   switch (instruction->GetPackedType()) {
     68     case DataType::Type::kBool:
     69     case DataType::Type::kUint8:
     70     case DataType::Type::kInt8:
     71       DCHECK_EQ(16u, instruction->GetVectorLength());
     72       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
     73       __ punpcklbw(dst, dst);
     74       __ punpcklwd(dst, dst);
     75       __ pshufd(dst, dst, Immediate(0));
     76       break;
     77     case DataType::Type::kUint16:
     78     case DataType::Type::kInt16:
     79       DCHECK_EQ(8u, instruction->GetVectorLength());
     80       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
     81       __ punpcklwd(dst, dst);
     82       __ pshufd(dst, dst, Immediate(0));
     83       break;
     84     case DataType::Type::kInt32:
     85       DCHECK_EQ(4u, instruction->GetVectorLength());
     86       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
     87       __ pshufd(dst, dst, Immediate(0));
     88       break;
     89     case DataType::Type::kInt64:
     90       DCHECK_EQ(2u, instruction->GetVectorLength());
     91       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
     92       __ punpcklqdq(dst, dst);
     93       break;
     94     case DataType::Type::kFloat32:
     95       DCHECK_EQ(4u, instruction->GetVectorLength());
     96       DCHECK(locations->InAt(0).Equals(locations->Out()));
     97       __ shufps(dst, dst, Immediate(0));
     98       break;
     99     case DataType::Type::kFloat64:
    100       DCHECK_EQ(2u, instruction->GetVectorLength());
    101       DCHECK(locations->InAt(0).Equals(locations->Out()));
    102       __ shufpd(dst, dst, Immediate(0));
    103       break;
    104     default:
    105       LOG(FATAL) << "Unsupported SIMD type";
    106       UNREACHABLE();
    107   }
    108 }
    109 
    110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    111   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    112   switch (instruction->GetPackedType()) {
    113     case DataType::Type::kBool:
    114     case DataType::Type::kUint8:
    115     case DataType::Type::kInt8:
    116     case DataType::Type::kUint16:
    117     case DataType::Type::kInt16:
    118     case DataType::Type::kInt32:
    119     case DataType::Type::kInt64:
    120       locations->SetInAt(0, Location::RequiresFpuRegister());
    121       locations->SetOut(Location::RequiresRegister());
    122       break;
    123     case DataType::Type::kFloat32:
    124     case DataType::Type::kFloat64:
    125       locations->SetInAt(0, Location::RequiresFpuRegister());
    126       locations->SetOut(Location::SameAsFirstInput());
    127       break;
    128     default:
    129       LOG(FATAL) << "Unsupported SIMD type";
    130       UNREACHABLE();
    131   }
    132 }
    133 
    134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    135   LocationSummary* locations = instruction->GetLocations();
    136   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    137   switch (instruction->GetPackedType()) {
    138     case DataType::Type::kBool:
    139     case DataType::Type::kUint8:
    140     case DataType::Type::kInt8:
    141     case DataType::Type::kUint16:
    142     case DataType::Type::kInt16:  // TODO: up to here, and?
    143       LOG(FATAL) << "Unsupported SIMD type";
    144       UNREACHABLE();
    145     case DataType::Type::kInt32:
    146       DCHECK_EQ(4u, instruction->GetVectorLength());
    147       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
    148       break;
    149     case DataType::Type::kInt64:
    150       DCHECK_EQ(2u, instruction->GetVectorLength());
    151       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
    152       break;
    153     case DataType::Type::kFloat32:
    154     case DataType::Type::kFloat64:
    155       DCHECK_LE(2u, instruction->GetVectorLength());
    156       DCHECK_LE(instruction->GetVectorLength(), 4u);
    157       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
    158       break;
    159     default:
    160       LOG(FATAL) << "Unsupported SIMD type";
    161       UNREACHABLE();
    162   }
    163 }
    164 
    165 // Helper to set up locations for vector unary operations.
    166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
    167   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    168   switch (instruction->GetPackedType()) {
    169     case DataType::Type::kBool:
    170     case DataType::Type::kUint8:
    171     case DataType::Type::kInt8:
    172     case DataType::Type::kUint16:
    173     case DataType::Type::kInt16:
    174     case DataType::Type::kInt32:
    175     case DataType::Type::kInt64:
    176     case DataType::Type::kFloat32:
    177     case DataType::Type::kFloat64:
    178       locations->SetInAt(0, Location::RequiresFpuRegister());
    179       locations->SetOut(Location::RequiresFpuRegister());
    180       break;
    181     default:
    182       LOG(FATAL) << "Unsupported SIMD type";
    183       UNREACHABLE();
    184   }
    185 }
    186 
    187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
    188   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    189   // Long reduction or min/max require a temporary.
    190   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
    191       instruction->GetKind() == HVecReduce::kMin ||
    192       instruction->GetKind() == HVecReduce::kMax) {
    193     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    194   }
    195 }
    196 
    197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
    198   LocationSummary* locations = instruction->GetLocations();
    199   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    200   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    201   switch (instruction->GetPackedType()) {
    202     case DataType::Type::kInt32:
    203       DCHECK_EQ(4u, instruction->GetVectorLength());
    204       switch (instruction->GetKind()) {
    205         case HVecReduce::kSum:
    206           __ movaps(dst, src);
    207           __ phaddd(dst, dst);
    208           __ phaddd(dst, dst);
    209           break;
    210         case HVecReduce::kMin: {
    211           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    212           __ movaps(tmp, src);
    213           __ movaps(dst, src);
    214           __ psrldq(tmp, Immediate(8));
    215           __ pminsd(dst, tmp);
    216           __ psrldq(tmp, Immediate(4));
    217           __ pminsd(dst, tmp);
    218           break;
    219         }
    220         case HVecReduce::kMax: {
    221           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    222           __ movaps(tmp, src);
    223           __ movaps(dst, src);
    224           __ psrldq(tmp, Immediate(8));
    225           __ pmaxsd(dst, tmp);
    226           __ psrldq(tmp, Immediate(4));
    227           __ pmaxsd(dst, tmp);
    228           break;
    229         }
    230       }
    231       break;
    232     case DataType::Type::kInt64: {
    233       DCHECK_EQ(2u, instruction->GetVectorLength());
    234       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    235       switch (instruction->GetKind()) {
    236         case HVecReduce::kSum:
    237           __ movaps(tmp, src);
    238           __ movaps(dst, src);
    239           __ punpckhqdq(tmp, tmp);
    240           __ paddq(dst, tmp);
    241           break;
    242         case HVecReduce::kMin:
    243         case HVecReduce::kMax:
    244           LOG(FATAL) << "Unsupported SIMD type";
    245       }
    246       break;
    247     }
    248     default:
    249       LOG(FATAL) << "Unsupported SIMD type";
    250       UNREACHABLE();
    251   }
    252 }
    253 
    254 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
    255   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    256 }
    257 
    258 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
    259   LocationSummary* locations = instruction->GetLocations();
    260   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    261   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    262   DataType::Type from = instruction->GetInputType();
    263   DataType::Type to = instruction->GetResultType();
    264   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
    265     DCHECK_EQ(4u, instruction->GetVectorLength());
    266     __ cvtdq2ps(dst, src);
    267   } else {
    268     LOG(FATAL) << "Unsupported SIMD type";
    269   }
    270 }
    271 
    272 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
    273   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    274 }
    275 
    276 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
    277   LocationSummary* locations = instruction->GetLocations();
    278   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    279   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    280   switch (instruction->GetPackedType()) {
    281     case DataType::Type::kUint8:
    282     case DataType::Type::kInt8:
    283       DCHECK_EQ(16u, instruction->GetVectorLength());
    284       __ pxor(dst, dst);
    285       __ psubb(dst, src);
    286       break;
    287     case DataType::Type::kUint16:
    288     case DataType::Type::kInt16:
    289       DCHECK_EQ(8u, instruction->GetVectorLength());
    290       __ pxor(dst, dst);
    291       __ psubw(dst, src);
    292       break;
    293     case DataType::Type::kInt32:
    294       DCHECK_EQ(4u, instruction->GetVectorLength());
    295       __ pxor(dst, dst);
    296       __ psubd(dst, src);
    297       break;
    298     case DataType::Type::kInt64:
    299       DCHECK_EQ(2u, instruction->GetVectorLength());
    300       __ pxor(dst, dst);
    301       __ psubq(dst, src);
    302       break;
    303     case DataType::Type::kFloat32:
    304       DCHECK_EQ(4u, instruction->GetVectorLength());
    305       __ xorps(dst, dst);
    306       __ subps(dst, src);
    307       break;
    308     case DataType::Type::kFloat64:
    309       DCHECK_EQ(2u, instruction->GetVectorLength());
    310       __ xorpd(dst, dst);
    311       __ subpd(dst, src);
    312       break;
    313     default:
    314       LOG(FATAL) << "Unsupported SIMD type";
    315       UNREACHABLE();
    316   }
    317 }
    318 
    319 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
    320   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    321   // Integral-abs requires a temporary for the comparison.
    322   if (instruction->GetPackedType() == DataType::Type::kInt32) {
    323     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    324   }
    325 }
    326 
    327 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
    328   LocationSummary* locations = instruction->GetLocations();
    329   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    330   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    331   switch (instruction->GetPackedType()) {
    332     case DataType::Type::kInt32: {
    333       DCHECK_EQ(4u, instruction->GetVectorLength());
    334       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    335       __ movaps(dst, src);
    336       __ pxor(tmp, tmp);
    337       __ pcmpgtd(tmp, dst);
    338       __ pxor(dst, tmp);
    339       __ psubd(dst, tmp);
    340       break;
    341     }
    342     case DataType::Type::kFloat32:
    343       DCHECK_EQ(4u, instruction->GetVectorLength());
    344       __ pcmpeqb(dst, dst);  // all ones
    345       __ psrld(dst, Immediate(1));
    346       __ andps(dst, src);
    347       break;
    348     case DataType::Type::kFloat64:
    349       DCHECK_EQ(2u, instruction->GetVectorLength());
    350       __ pcmpeqb(dst, dst);  // all ones
    351       __ psrlq(dst, Immediate(1));
    352       __ andpd(dst, src);
    353       break;
    354     default:
    355       LOG(FATAL) << "Unsupported SIMD type";
    356       UNREACHABLE();
    357   }
    358 }
    359 
    360 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
    361   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    362   // Boolean-not requires a temporary to construct the 16 x one.
    363   if (instruction->GetPackedType() == DataType::Type::kBool) {
    364     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    365   }
    366 }
    367 
    368 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
    369   LocationSummary* locations = instruction->GetLocations();
    370   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    371   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    372   switch (instruction->GetPackedType()) {
    373     case DataType::Type::kBool: {  // special case boolean-not
    374       DCHECK_EQ(16u, instruction->GetVectorLength());
    375       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    376       __ pxor(dst, dst);
    377       __ pcmpeqb(tmp, tmp);  // all ones
    378       __ psubb(dst, tmp);  // 16 x one
    379       __ pxor(dst, src);
    380       break;
    381     }
    382     case DataType::Type::kUint8:
    383     case DataType::Type::kInt8:
    384     case DataType::Type::kUint16:
    385     case DataType::Type::kInt16:
    386     case DataType::Type::kInt32:
    387     case DataType::Type::kInt64:
    388       DCHECK_LE(2u, instruction->GetVectorLength());
    389       DCHECK_LE(instruction->GetVectorLength(), 16u);
    390       __ pcmpeqb(dst, dst);  // all ones
    391       __ pxor(dst, src);
    392       break;
    393     case DataType::Type::kFloat32:
    394       DCHECK_EQ(4u, instruction->GetVectorLength());
    395       __ pcmpeqb(dst, dst);  // all ones
    396       __ xorps(dst, src);
    397       break;
    398     case DataType::Type::kFloat64:
    399       DCHECK_EQ(2u, instruction->GetVectorLength());
    400       __ pcmpeqb(dst, dst);  // all ones
    401       __ xorpd(dst, src);
    402       break;
    403     default:
    404       LOG(FATAL) << "Unsupported SIMD type";
    405       UNREACHABLE();
    406   }
    407 }
    408 
    409 // Helper to set up locations for vector binary operations.
    410 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    411   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    412   switch (instruction->GetPackedType()) {
    413     case DataType::Type::kBool:
    414     case DataType::Type::kUint8:
    415     case DataType::Type::kInt8:
    416     case DataType::Type::kUint16:
    417     case DataType::Type::kInt16:
    418     case DataType::Type::kInt32:
    419     case DataType::Type::kInt64:
    420     case DataType::Type::kFloat32:
    421     case DataType::Type::kFloat64:
    422       locations->SetInAt(0, Location::RequiresFpuRegister());
    423       locations->SetInAt(1, Location::RequiresFpuRegister());
    424       locations->SetOut(Location::SameAsFirstInput());
    425       break;
    426     default:
    427       LOG(FATAL) << "Unsupported SIMD type";
    428       UNREACHABLE();
    429   }
    430 }
    431 
    432 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
    433   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    434 }
    435 
    436 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
    437   LocationSummary* locations = instruction->GetLocations();
    438   DCHECK(locations->InAt(0).Equals(locations->Out()));
    439   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    440   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    441   switch (instruction->GetPackedType()) {
    442     case DataType::Type::kUint8:
    443     case DataType::Type::kInt8:
    444       DCHECK_EQ(16u, instruction->GetVectorLength());
    445       __ paddb(dst, src);
    446       break;
    447     case DataType::Type::kUint16:
    448     case DataType::Type::kInt16:
    449       DCHECK_EQ(8u, instruction->GetVectorLength());
    450       __ paddw(dst, src);
    451       break;
    452     case DataType::Type::kInt32:
    453       DCHECK_EQ(4u, instruction->GetVectorLength());
    454       __ paddd(dst, src);
    455       break;
    456     case DataType::Type::kInt64:
    457       DCHECK_EQ(2u, instruction->GetVectorLength());
    458       __ paddq(dst, src);
    459       break;
    460     case DataType::Type::kFloat32:
    461       DCHECK_EQ(4u, instruction->GetVectorLength());
    462       __ addps(dst, src);
    463       break;
    464     case DataType::Type::kFloat64:
    465       DCHECK_EQ(2u, instruction->GetVectorLength());
    466       __ addpd(dst, src);
    467       break;
    468     default:
    469       LOG(FATAL) << "Unsupported SIMD type";
    470       UNREACHABLE();
    471   }
    472 }
    473 
    474 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    475   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    476 }
    477 
    478 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    479   LocationSummary* locations = instruction->GetLocations();
    480   DCHECK(locations->InAt(0).Equals(locations->Out()));
    481   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    482   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    483 
    484   DCHECK(instruction->IsRounded());
    485 
    486   switch (instruction->GetPackedType()) {
    487     case DataType::Type::kUint8:
    488       DCHECK_EQ(16u, instruction->GetVectorLength());
    489      __ pavgb(dst, src);
    490      return;
    491     case DataType::Type::kUint16:
    492       DCHECK_EQ(8u, instruction->GetVectorLength());
    493       __ pavgw(dst, src);
    494       return;
    495     default:
    496       LOG(FATAL) << "Unsupported SIMD type";
    497       UNREACHABLE();
    498   }
    499 }
    500 
    501 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
    502   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    503 }
    504 
    505 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
    506   LocationSummary* locations = instruction->GetLocations();
    507   DCHECK(locations->InAt(0).Equals(locations->Out()));
    508   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    509   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    510   switch (instruction->GetPackedType()) {
    511     case DataType::Type::kUint8:
    512     case DataType::Type::kInt8:
    513       DCHECK_EQ(16u, instruction->GetVectorLength());
    514       __ psubb(dst, src);
    515       break;
    516     case DataType::Type::kUint16:
    517     case DataType::Type::kInt16:
    518       DCHECK_EQ(8u, instruction->GetVectorLength());
    519       __ psubw(dst, src);
    520       break;
    521     case DataType::Type::kInt32:
    522       DCHECK_EQ(4u, instruction->GetVectorLength());
    523       __ psubd(dst, src);
    524       break;
    525     case DataType::Type::kInt64:
    526       DCHECK_EQ(2u, instruction->GetVectorLength());
    527       __ psubq(dst, src);
    528       break;
    529     case DataType::Type::kFloat32:
    530       DCHECK_EQ(4u, instruction->GetVectorLength());
    531       __ subps(dst, src);
    532       break;
    533     case DataType::Type::kFloat64:
    534       DCHECK_EQ(2u, instruction->GetVectorLength());
    535       __ subpd(dst, src);
    536       break;
    537     default:
    538       LOG(FATAL) << "Unsupported SIMD type";
    539       UNREACHABLE();
    540   }
    541 }
    542 
    543 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
    544   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    545 }
    546 
    547 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
    548   LocationSummary* locations = instruction->GetLocations();
    549   DCHECK(locations->InAt(0).Equals(locations->Out()));
    550   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    551   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    552   switch (instruction->GetPackedType()) {
    553     case DataType::Type::kUint16:
    554     case DataType::Type::kInt16:
    555       DCHECK_EQ(8u, instruction->GetVectorLength());
    556       __ pmullw(dst, src);
    557       break;
    558     case DataType::Type::kInt32:
    559       DCHECK_EQ(4u, instruction->GetVectorLength());
    560       __ pmulld(dst, src);
    561       break;
    562     case DataType::Type::kFloat32:
    563       DCHECK_EQ(4u, instruction->GetVectorLength());
    564       __ mulps(dst, src);
    565       break;
    566     case DataType::Type::kFloat64:
    567       DCHECK_EQ(2u, instruction->GetVectorLength());
    568       __ mulpd(dst, src);
    569       break;
    570     default:
    571       LOG(FATAL) << "Unsupported SIMD type";
    572       UNREACHABLE();
    573   }
    574 }
    575 
    576 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
    577   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    578 }
    579 
    580 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
    581   LocationSummary* locations = instruction->GetLocations();
    582   DCHECK(locations->InAt(0).Equals(locations->Out()));
    583   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    584   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    585   switch (instruction->GetPackedType()) {
    586     case DataType::Type::kFloat32:
    587       DCHECK_EQ(4u, instruction->GetVectorLength());
    588       __ divps(dst, src);
    589       break;
    590     case DataType::Type::kFloat64:
    591       DCHECK_EQ(2u, instruction->GetVectorLength());
    592       __ divpd(dst, src);
    593       break;
    594     default:
    595       LOG(FATAL) << "Unsupported SIMD type";
    596       UNREACHABLE();
    597   }
    598 }
    599 
    600 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
    601   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    602 }
    603 
    604 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
    605   LocationSummary* locations = instruction->GetLocations();
    606   DCHECK(locations->InAt(0).Equals(locations->Out()));
    607   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    608   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    609   switch (instruction->GetPackedType()) {
    610     case DataType::Type::kUint8:
    611       DCHECK_EQ(16u, instruction->GetVectorLength());
    612       __ pminub(dst, src);
    613       break;
    614     case DataType::Type::kInt8:
    615       DCHECK_EQ(16u, instruction->GetVectorLength());
    616       __ pminsb(dst, src);
    617       break;
    618     case DataType::Type::kUint16:
    619       DCHECK_EQ(8u, instruction->GetVectorLength());
    620       __ pminuw(dst, src);
    621       break;
    622     case DataType::Type::kInt16:
    623       DCHECK_EQ(8u, instruction->GetVectorLength());
    624       __ pminsw(dst, src);
    625       break;
    626     case DataType::Type::kUint32:
    627       DCHECK_EQ(4u, instruction->GetVectorLength());
    628       __ pminud(dst, src);
    629       break;
    630     case DataType::Type::kInt32:
    631       DCHECK_EQ(4u, instruction->GetVectorLength());
    632       __ pminsd(dst, src);
    633       break;
    634     // Next cases are sloppy wrt 0.0 vs -0.0.
    635     case DataType::Type::kFloat32:
    636       DCHECK_EQ(4u, instruction->GetVectorLength());
    637       __ minps(dst, src);
    638       break;
    639     case DataType::Type::kFloat64:
    640       DCHECK_EQ(2u, instruction->GetVectorLength());
    641       __ minpd(dst, src);
    642       break;
    643     default:
    644       LOG(FATAL) << "Unsupported SIMD type";
    645       UNREACHABLE();
    646   }
    647 }
    648 
    649 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
    650   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    651 }
    652 
    653 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
    654   LocationSummary* locations = instruction->GetLocations();
    655   DCHECK(locations->InAt(0).Equals(locations->Out()));
    656   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    657   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    658   switch (instruction->GetPackedType()) {
    659     case DataType::Type::kUint8:
    660       DCHECK_EQ(16u, instruction->GetVectorLength());
    661       __ pmaxub(dst, src);
    662       break;
    663     case DataType::Type::kInt8:
    664       DCHECK_EQ(16u, instruction->GetVectorLength());
    665       __ pmaxsb(dst, src);
    666       break;
    667     case DataType::Type::kUint16:
    668       DCHECK_EQ(8u, instruction->GetVectorLength());
    669       __ pmaxuw(dst, src);
    670       break;
    671     case DataType::Type::kInt16:
    672       DCHECK_EQ(8u, instruction->GetVectorLength());
    673       __ pmaxsw(dst, src);
    674       break;
    675     case DataType::Type::kUint32:
    676       DCHECK_EQ(4u, instruction->GetVectorLength());
    677       __ pmaxud(dst, src);
    678       break;
    679     case DataType::Type::kInt32:
    680       DCHECK_EQ(4u, instruction->GetVectorLength());
    681       __ pmaxsd(dst, src);
    682       break;
    683     // Next cases are sloppy wrt 0.0 vs -0.0.
    684     case DataType::Type::kFloat32:
    685       DCHECK_EQ(4u, instruction->GetVectorLength());
    686       __ maxps(dst, src);
    687       break;
    688     case DataType::Type::kFloat64:
    689       DCHECK_EQ(2u, instruction->GetVectorLength());
    690       __ maxpd(dst, src);
    691       break;
    692     default:
    693       LOG(FATAL) << "Unsupported SIMD type";
    694       UNREACHABLE();
    695   }
    696 }
    697 
    698 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
    699   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    700 }
    701 
    702 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
    703   LocationSummary* locations = instruction->GetLocations();
    704   DCHECK(locations->InAt(0).Equals(locations->Out()));
    705   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    706   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    707   switch (instruction->GetPackedType()) {
    708     case DataType::Type::kBool:
    709     case DataType::Type::kUint8:
    710     case DataType::Type::kInt8:
    711     case DataType::Type::kUint16:
    712     case DataType::Type::kInt16:
    713     case DataType::Type::kInt32:
    714     case DataType::Type::kInt64:
    715       DCHECK_LE(2u, instruction->GetVectorLength());
    716       DCHECK_LE(instruction->GetVectorLength(), 16u);
    717       __ pand(dst, src);
    718       break;
    719     case DataType::Type::kFloat32:
    720       DCHECK_EQ(4u, instruction->GetVectorLength());
    721       __ andps(dst, src);
    722       break;
    723     case DataType::Type::kFloat64:
    724       DCHECK_EQ(2u, instruction->GetVectorLength());
    725       __ andpd(dst, src);
    726       break;
    727     default:
    728       LOG(FATAL) << "Unsupported SIMD type";
    729       UNREACHABLE();
    730   }
    731 }
    732 
    733 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
    734   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    735 }
    736 
    737 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
    738   LocationSummary* locations = instruction->GetLocations();
    739   DCHECK(locations->InAt(0).Equals(locations->Out()));
    740   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    741   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    742   switch (instruction->GetPackedType()) {
    743     case DataType::Type::kBool:
    744     case DataType::Type::kUint8:
    745     case DataType::Type::kInt8:
    746     case DataType::Type::kUint16:
    747     case DataType::Type::kInt16:
    748     case DataType::Type::kInt32:
    749     case DataType::Type::kInt64:
    750       DCHECK_LE(2u, instruction->GetVectorLength());
    751       DCHECK_LE(instruction->GetVectorLength(), 16u);
    752       __ pandn(dst, src);
    753       break;
    754     case DataType::Type::kFloat32:
    755       DCHECK_EQ(4u, instruction->GetVectorLength());
    756       __ andnps(dst, src);
    757       break;
    758     case DataType::Type::kFloat64:
    759       DCHECK_EQ(2u, instruction->GetVectorLength());
    760       __ andnpd(dst, src);
    761       break;
    762     default:
    763       LOG(FATAL) << "Unsupported SIMD type";
    764       UNREACHABLE();
    765   }
    766 }
    767 
    768 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
    769   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    770 }
    771 
    772 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
    773   LocationSummary* locations = instruction->GetLocations();
    774   DCHECK(locations->InAt(0).Equals(locations->Out()));
    775   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    776   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    777   switch (instruction->GetPackedType()) {
    778     case DataType::Type::kBool:
    779     case DataType::Type::kUint8:
    780     case DataType::Type::kInt8:
    781     case DataType::Type::kUint16:
    782     case DataType::Type::kInt16:
    783     case DataType::Type::kInt32:
    784     case DataType::Type::kInt64:
    785       DCHECK_LE(2u, instruction->GetVectorLength());
    786       DCHECK_LE(instruction->GetVectorLength(), 16u);
    787       __ por(dst, src);
    788       break;
    789     case DataType::Type::kFloat32:
    790       DCHECK_EQ(4u, instruction->GetVectorLength());
    791       __ orps(dst, src);
    792       break;
    793     case DataType::Type::kFloat64:
    794       DCHECK_EQ(2u, instruction->GetVectorLength());
    795       __ orpd(dst, src);
    796       break;
    797     default:
    798       LOG(FATAL) << "Unsupported SIMD type";
    799       UNREACHABLE();
    800   }
    801 }
    802 
    803 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
    804   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    805 }
    806 
    807 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
    808   LocationSummary* locations = instruction->GetLocations();
    809   DCHECK(locations->InAt(0).Equals(locations->Out()));
    810   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    811   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    812   switch (instruction->GetPackedType()) {
    813     case DataType::Type::kBool:
    814     case DataType::Type::kUint8:
    815     case DataType::Type::kInt8:
    816     case DataType::Type::kUint16:
    817     case DataType::Type::kInt16:
    818     case DataType::Type::kInt32:
    819     case DataType::Type::kInt64:
    820       DCHECK_LE(2u, instruction->GetVectorLength());
    821       DCHECK_LE(instruction->GetVectorLength(), 16u);
    822       __ pxor(dst, src);
    823       break;
    824     case DataType::Type::kFloat32:
    825       DCHECK_EQ(4u, instruction->GetVectorLength());
    826       __ xorps(dst, src);
    827       break;
    828     case DataType::Type::kFloat64:
    829       DCHECK_EQ(2u, instruction->GetVectorLength());
    830       __ xorpd(dst, src);
    831       break;
    832     default:
    833       LOG(FATAL) << "Unsupported SIMD type";
    834       UNREACHABLE();
    835   }
    836 }
    837 
    838 // Helper to set up locations for vector shift operations.
    839 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    840   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    841   switch (instruction->GetPackedType()) {
    842     case DataType::Type::kUint16:
    843     case DataType::Type::kInt16:
    844     case DataType::Type::kInt32:
    845     case DataType::Type::kInt64:
    846       locations->SetInAt(0, Location::RequiresFpuRegister());
    847       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    848       locations->SetOut(Location::SameAsFirstInput());
    849       break;
    850     default:
    851       LOG(FATAL) << "Unsupported SIMD type";
    852       UNREACHABLE();
    853   }
    854 }
    855 
    856 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
    857   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    858 }
    859 
    860 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
    861   LocationSummary* locations = instruction->GetLocations();
    862   DCHECK(locations->InAt(0).Equals(locations->Out()));
    863   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    864   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    865   switch (instruction->GetPackedType()) {
    866     case DataType::Type::kUint16:
    867     case DataType::Type::kInt16:
    868       DCHECK_EQ(8u, instruction->GetVectorLength());
    869       __ psllw(dst, Immediate(static_cast<int8_t>(value)));
    870       break;
    871     case DataType::Type::kInt32:
    872       DCHECK_EQ(4u, instruction->GetVectorLength());
    873       __ pslld(dst, Immediate(static_cast<int8_t>(value)));
    874       break;
    875     case DataType::Type::kInt64:
    876       DCHECK_EQ(2u, instruction->GetVectorLength());
    877       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
    878       break;
    879     default:
    880       LOG(FATAL) << "Unsupported SIMD type";
    881       UNREACHABLE();
    882   }
    883 }
    884 
    885 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
    886   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    887 }
    888 
    889 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
    890   LocationSummary* locations = instruction->GetLocations();
    891   DCHECK(locations->InAt(0).Equals(locations->Out()));
    892   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    893   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    894   switch (instruction->GetPackedType()) {
    895     case DataType::Type::kUint16:
    896     case DataType::Type::kInt16:
    897       DCHECK_EQ(8u, instruction->GetVectorLength());
    898       __ psraw(dst, Immediate(static_cast<int8_t>(value)));
    899       break;
    900     case DataType::Type::kInt32:
    901       DCHECK_EQ(4u, instruction->GetVectorLength());
    902       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
    903       break;
    904     default:
    905       LOG(FATAL) << "Unsupported SIMD type";
    906       UNREACHABLE();
    907   }
    908 }
    909 
    910 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
    911   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    912 }
    913 
    914 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
    915   LocationSummary* locations = instruction->GetLocations();
    916   DCHECK(locations->InAt(0).Equals(locations->Out()));
    917   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    918   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    919   switch (instruction->GetPackedType()) {
    920     case DataType::Type::kUint16:
    921     case DataType::Type::kInt16:
    922       DCHECK_EQ(8u, instruction->GetVectorLength());
    923       __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
    924       break;
    925     case DataType::Type::kInt32:
    926       DCHECK_EQ(4u, instruction->GetVectorLength());
    927       __ psrld(dst, Immediate(static_cast<int8_t>(value)));
    928       break;
    929     case DataType::Type::kInt64:
    930       DCHECK_EQ(2u, instruction->GetVectorLength());
    931       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
    932       break;
    933     default:
    934       LOG(FATAL) << "Unsupported SIMD type";
    935       UNREACHABLE();
    936   }
    937 }
    938 
    939 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
    940   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    941 
    942   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    943 
    944   HInstruction* input = instruction->InputAt(0);
    945   bool is_zero = IsZeroBitPattern(input);
    946 
    947   switch (instruction->GetPackedType()) {
    948     case DataType::Type::kBool:
    949     case DataType::Type::kUint8:
    950     case DataType::Type::kInt8:
    951     case DataType::Type::kUint16:
    952     case DataType::Type::kInt16:
    953     case DataType::Type::kInt32:
    954     case DataType::Type::kInt64:
    955       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    956                                     : Location::RequiresRegister());
    957       locations->SetOut(Location::RequiresFpuRegister());
    958       break;
    959     case DataType::Type::kFloat32:
    960     case DataType::Type::kFloat64:
    961       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    962                                     : Location::RequiresFpuRegister());
    963       locations->SetOut(Location::RequiresFpuRegister());
    964       break;
    965     default:
    966       LOG(FATAL) << "Unsupported SIMD type";
    967       UNREACHABLE();
    968   }
    969 }
    970 
    971 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
    972   LocationSummary* locations = instruction->GetLocations();
    973   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    974 
    975   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    976 
    977   // Zero out all other elements first.
    978   __ xorps(dst, dst);
    979 
    980   // Shorthand for any type of zero.
    981   if (IsZeroBitPattern(instruction->InputAt(0))) {
    982     return;
    983   }
    984 
    985   // Set required elements.
    986   switch (instruction->GetPackedType()) {
    987     case DataType::Type::kBool:
    988     case DataType::Type::kUint8:
    989     case DataType::Type::kInt8:
    990     case DataType::Type::kUint16:
    991     case DataType::Type::kInt16:  // TODO: up to here, and?
    992       LOG(FATAL) << "Unsupported SIMD type";
    993       UNREACHABLE();
    994     case DataType::Type::kInt32:
    995       DCHECK_EQ(4u, instruction->GetVectorLength());
    996       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
    997       break;
    998     case DataType::Type::kInt64:
    999       DCHECK_EQ(2u, instruction->GetVectorLength());
   1000       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
   1001       break;
   1002     case DataType::Type::kFloat32:
   1003       DCHECK_EQ(4u, instruction->GetVectorLength());
   1004       __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
   1005       break;
   1006     case DataType::Type::kFloat64:
   1007       DCHECK_EQ(2u, instruction->GetVectorLength());
   1008       __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
   1009       break;
   1010     default:
   1011       LOG(FATAL) << "Unsupported SIMD type";
   1012       UNREACHABLE();
   1013   }
   1014 }
   1015 
   1016 // Helper to set up locations for vector accumulations.
   1017 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
   1018   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1019   switch (instruction->GetPackedType()) {
   1020     case DataType::Type::kUint8:
   1021     case DataType::Type::kInt8:
   1022     case DataType::Type::kUint16:
   1023     case DataType::Type::kInt16:
   1024     case DataType::Type::kInt32:
   1025     case DataType::Type::kInt64:
   1026       locations->SetInAt(0, Location::RequiresFpuRegister());
   1027       locations->SetInAt(1, Location::RequiresFpuRegister());
   1028       locations->SetInAt(2, Location::RequiresFpuRegister());
   1029       locations->SetOut(Location::SameAsFirstInput());
   1030       break;
   1031     default:
   1032       LOG(FATAL) << "Unsupported SIMD type";
   1033       UNREACHABLE();
   1034   }
   1035 }
   1036 
   1037 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1038   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1039 }
   1040 
   1041 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1042   // TODO: pmaddwd?
   1043   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   1044 }
   1045 
   1046 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1047   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1048 }
   1049 
   1050 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1051   // TODO: psadbw for unsigned?
   1052   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   1053 }
   1054 
   1055 // Helper to set up locations for vector memory operations.
   1056 static void CreateVecMemLocations(ArenaAllocator* allocator,
   1057                                   HVecMemoryOperation* instruction,
   1058                                   bool is_load) {
   1059   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1060   switch (instruction->GetPackedType()) {
   1061     case DataType::Type::kBool:
   1062     case DataType::Type::kUint8:
   1063     case DataType::Type::kInt8:
   1064     case DataType::Type::kUint16:
   1065     case DataType::Type::kInt16:
   1066     case DataType::Type::kInt32:
   1067     case DataType::Type::kInt64:
   1068     case DataType::Type::kFloat32:
   1069     case DataType::Type::kFloat64:
   1070       locations->SetInAt(0, Location::RequiresRegister());
   1071       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   1072       if (is_load) {
   1073         locations->SetOut(Location::RequiresFpuRegister());
   1074       } else {
   1075         locations->SetInAt(2, Location::RequiresFpuRegister());
   1076       }
   1077       break;
   1078     default:
   1079       LOG(FATAL) << "Unsupported SIMD type";
   1080       UNREACHABLE();
   1081   }
   1082 }
   1083 
   1084 // Helper to construct address for vector memory operations.
   1085 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   1086   Location base = locations->InAt(0);
   1087   Location index = locations->InAt(1);
   1088   ScaleFactor scale = TIMES_1;
   1089   switch (size) {
   1090     case 2: scale = TIMES_2; break;
   1091     case 4: scale = TIMES_4; break;
   1092     case 8: scale = TIMES_8; break;
   1093     default: break;
   1094   }
   1095   // Incorporate the string or array offset in the address computation.
   1096   uint32_t offset = is_string_char_at
   1097       ? mirror::String::ValueOffset().Uint32Value()
   1098       : mirror::Array::DataOffset(size).Uint32Value();
   1099   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
   1100 }
   1101 
   1102 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
   1103   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
   1104   // String load requires a temporary for the compressed load.
   1105   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1106     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   1107   }
   1108 }
   1109 
   1110 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
   1111   LocationSummary* locations = instruction->GetLocations();
   1112   size_t size = DataType::Size(instruction->GetPackedType());
   1113   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
   1114   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   1115   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   1116   switch (instruction->GetPackedType()) {
   1117     case DataType::Type::kUint16:
   1118       DCHECK_EQ(8u, instruction->GetVectorLength());
   1119       // Special handling of compressed/uncompressed string load.
   1120       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1121         NearLabel done, not_compressed;
   1122         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   1123         // Test compression bit.
   1124         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1125                       "Expecting 0=compressed, 1=uncompressed");
   1126         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1127         __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
   1128         __ j(kNotZero, &not_compressed);
   1129         // Zero extend 8 compressed bytes into 8 chars.
   1130         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
   1131         __ pxor(tmp, tmp);
   1132         __ punpcklbw(reg, tmp);
   1133         __ jmp(&done);
   1134         // Load 8 direct uncompressed chars.
   1135         __ Bind(&not_compressed);
   1136         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
   1137         __ Bind(&done);
   1138         return;
   1139       }
   1140       FALLTHROUGH_INTENDED;
   1141     case DataType::Type::kBool:
   1142     case DataType::Type::kUint8:
   1143     case DataType::Type::kInt8:
   1144     case DataType::Type::kInt16:
   1145     case DataType::Type::kInt32:
   1146     case DataType::Type::kInt64:
   1147       DCHECK_LE(2u, instruction->GetVectorLength());
   1148       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1149       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
   1150       break;
   1151     case DataType::Type::kFloat32:
   1152       DCHECK_EQ(4u, instruction->GetVectorLength());
   1153       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
   1154       break;
   1155     case DataType::Type::kFloat64:
   1156       DCHECK_EQ(2u, instruction->GetVectorLength());
   1157       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
   1158       break;
   1159     default:
   1160       LOG(FATAL) << "Unsupported SIMD type";
   1161       UNREACHABLE();
   1162   }
   1163 }
   1164 
   1165 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
   1166   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
   1167 }
   1168 
   1169 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
   1170   LocationSummary* locations = instruction->GetLocations();
   1171   size_t size = DataType::Size(instruction->GetPackedType());
   1172   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
   1173   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   1174   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   1175   switch (instruction->GetPackedType()) {
   1176     case DataType::Type::kBool:
   1177     case DataType::Type::kUint8:
   1178     case DataType::Type::kInt8:
   1179     case DataType::Type::kUint16:
   1180     case DataType::Type::kInt16:
   1181     case DataType::Type::kInt32:
   1182     case DataType::Type::kInt64:
   1183       DCHECK_LE(2u, instruction->GetVectorLength());
   1184       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1185       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
   1186       break;
   1187     case DataType::Type::kFloat32:
   1188       DCHECK_EQ(4u, instruction->GetVectorLength());
   1189       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
   1190       break;
   1191     case DataType::Type::kFloat64:
   1192       DCHECK_EQ(2u, instruction->GetVectorLength());
   1193       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
   1194       break;
   1195     default:
   1196       LOG(FATAL) << "Unsupported SIMD type";
   1197       UNREACHABLE();
   1198   }
   1199 }
   1200 
   1201 #undef __
   1202 
   1203 }  // namespace x86_64
   1204 }  // namespace art
   1205