Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86.h"
     18 
     19 #include "mirror/array-inl.h"
     20 #include "mirror/string.h"
     21 
     22 namespace art {
     23 namespace x86 {
     24 
     25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     26 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
     27 
     28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
     30   HInstruction* input = instruction->InputAt(0);
     31   bool is_zero = IsZeroBitPattern(input);
     32   switch (instruction->GetPackedType()) {
     33     case DataType::Type::kInt64:
     34       // Long needs extra temporary to load from the register pair.
     35       if (!is_zero) {
     36         locations->AddTemp(Location::RequiresFpuRegister());
     37       }
     38       FALLTHROUGH_INTENDED;
     39     case DataType::Type::kBool:
     40     case DataType::Type::kUint8:
     41     case DataType::Type::kInt8:
     42     case DataType::Type::kUint16:
     43     case DataType::Type::kInt16:
     44     case DataType::Type::kInt32:
     45       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
     46                                     : Location::RequiresRegister());
     47       locations->SetOut(Location::RequiresFpuRegister());
     48       break;
     49     case DataType::Type::kFloat32:
     50     case DataType::Type::kFloat64:
     51       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
     52                                     : Location::RequiresFpuRegister());
     53       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
     54                                 : Location::SameAsFirstInput());
     55       break;
     56     default:
     57       LOG(FATAL) << "Unsupported SIMD type";
     58       UNREACHABLE();
     59   }
     60 }
     61 
     62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     63   LocationSummary* locations = instruction->GetLocations();
     64   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
     65 
     66   // Shorthand for any type of zero.
     67   if (IsZeroBitPattern(instruction->InputAt(0))) {
     68     __ xorps(dst, dst);
     69     return;
     70   }
     71 
     72   switch (instruction->GetPackedType()) {
     73     case DataType::Type::kBool:
     74     case DataType::Type::kUint8:
     75     case DataType::Type::kInt8:
     76       DCHECK_EQ(16u, instruction->GetVectorLength());
     77       __ movd(dst, locations->InAt(0).AsRegister<Register>());
     78       __ punpcklbw(dst, dst);
     79       __ punpcklwd(dst, dst);
     80       __ pshufd(dst, dst, Immediate(0));
     81       break;
     82     case DataType::Type::kUint16:
     83     case DataType::Type::kInt16:
     84       DCHECK_EQ(8u, instruction->GetVectorLength());
     85       __ movd(dst, locations->InAt(0).AsRegister<Register>());
     86       __ punpcklwd(dst, dst);
     87       __ pshufd(dst, dst, Immediate(0));
     88       break;
     89     case DataType::Type::kInt32:
     90       DCHECK_EQ(4u, instruction->GetVectorLength());
     91       __ movd(dst, locations->InAt(0).AsRegister<Register>());
     92       __ pshufd(dst, dst, Immediate(0));
     93       break;
     94     case DataType::Type::kInt64: {
     95       DCHECK_EQ(2u, instruction->GetVectorLength());
     96       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
     97       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
     98       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
     99       __ punpckldq(dst, tmp);
    100       __ punpcklqdq(dst, dst);
    101       break;
    102     }
    103     case DataType::Type::kFloat32:
    104       DCHECK_EQ(4u, instruction->GetVectorLength());
    105       DCHECK(locations->InAt(0).Equals(locations->Out()));
    106       __ shufps(dst, dst, Immediate(0));
    107       break;
    108     case DataType::Type::kFloat64:
    109       DCHECK_EQ(2u, instruction->GetVectorLength());
    110       DCHECK(locations->InAt(0).Equals(locations->Out()));
    111       __ shufpd(dst, dst, Immediate(0));
    112       break;
    113     default:
    114       LOG(FATAL) << "Unsupported SIMD type";
    115       UNREACHABLE();
    116   }
    117 }
    118 
    119 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    120   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    121   switch (instruction->GetPackedType()) {
    122     case DataType::Type::kInt64:
    123       // Long needs extra temporary to store into the register pair.
    124       locations->AddTemp(Location::RequiresFpuRegister());
    125       FALLTHROUGH_INTENDED;
    126     case DataType::Type::kBool:
    127     case DataType::Type::kUint8:
    128     case DataType::Type::kInt8:
    129     case DataType::Type::kUint16:
    130     case DataType::Type::kInt16:
    131     case DataType::Type::kInt32:
    132       locations->SetInAt(0, Location::RequiresFpuRegister());
    133       locations->SetOut(Location::RequiresRegister());
    134       break;
    135     case DataType::Type::kFloat32:
    136     case DataType::Type::kFloat64:
    137       locations->SetInAt(0, Location::RequiresFpuRegister());
    138       locations->SetOut(Location::SameAsFirstInput());
    139       break;
    140     default:
    141       LOG(FATAL) << "Unsupported SIMD type";
    142       UNREACHABLE();
    143   }
    144 }
    145 
    146 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
    147   LocationSummary* locations = instruction->GetLocations();
    148   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    149   switch (instruction->GetPackedType()) {
    150     case DataType::Type::kBool:
    151     case DataType::Type::kUint8:
    152     case DataType::Type::kInt8:
    153     case DataType::Type::kUint16:
    154     case DataType::Type::kInt16:  // TODO: up to here, and?
    155       LOG(FATAL) << "Unsupported SIMD type";
    156       UNREACHABLE();
    157     case DataType::Type::kInt32:
    158       DCHECK_LE(4u, instruction->GetVectorLength());
    159       DCHECK_LE(instruction->GetVectorLength(), 16u);
    160       __ movd(locations->Out().AsRegister<Register>(), src);
    161       break;
    162     case DataType::Type::kInt64: {
    163       DCHECK_EQ(2u, instruction->GetVectorLength());
    164       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    165       __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
    166       __ pshufd(tmp, src, Immediate(1));
    167       __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
    168       break;
    169     }
    170     case DataType::Type::kFloat32:
    171     case DataType::Type::kFloat64:
    172       DCHECK_LE(2u, instruction->GetVectorLength());
    173       DCHECK_LE(instruction->GetVectorLength(), 4u);
    174       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
    175       break;
    176     default:
    177       LOG(FATAL) << "Unsupported SIMD type";
    178       UNREACHABLE();
    179   }
    180 }
    181 
    182 // Helper to set up locations for vector unary operations.
    183 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
    184   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    185   switch (instruction->GetPackedType()) {
    186     case DataType::Type::kBool:
    187     case DataType::Type::kUint8:
    188     case DataType::Type::kInt8:
    189     case DataType::Type::kUint16:
    190     case DataType::Type::kInt16:
    191     case DataType::Type::kInt32:
    192     case DataType::Type::kInt64:
    193     case DataType::Type::kFloat32:
    194     case DataType::Type::kFloat64:
    195       locations->SetInAt(0, Location::RequiresFpuRegister());
    196       locations->SetOut(Location::RequiresFpuRegister());
    197       break;
    198     default:
    199       LOG(FATAL) << "Unsupported SIMD type";
    200       UNREACHABLE();
    201   }
    202 }
    203 
    204 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
    205   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    206   // Long reduction or min/max require a temporary.
    207   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
    208       instruction->GetKind() == HVecReduce::kMin ||
    209       instruction->GetKind() == HVecReduce::kMax) {
    210     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    211   }
    212 }
    213 
    214 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
    215   LocationSummary* locations = instruction->GetLocations();
    216   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    217   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    218   switch (instruction->GetPackedType()) {
    219     case DataType::Type::kInt32:
    220       DCHECK_EQ(4u, instruction->GetVectorLength());
    221       switch (instruction->GetKind()) {
    222         case HVecReduce::kSum:
    223           __ movaps(dst, src);
    224           __ phaddd(dst, dst);
    225           __ phaddd(dst, dst);
    226           break;
    227         case HVecReduce::kMin: {
    228           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    229           __ movaps(tmp, src);
    230           __ movaps(dst, src);
    231           __ psrldq(tmp, Immediate(8));
    232           __ pminsd(dst, tmp);
    233           __ psrldq(tmp, Immediate(4));
    234           __ pminsd(dst, tmp);
    235           break;
    236         }
    237         case HVecReduce::kMax: {
    238           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    239           __ movaps(tmp, src);
    240           __ movaps(dst, src);
    241           __ psrldq(tmp, Immediate(8));
    242           __ pmaxsd(dst, tmp);
    243           __ psrldq(tmp, Immediate(4));
    244           __ pmaxsd(dst, tmp);
    245           break;
    246         }
    247       }
    248       break;
    249     case DataType::Type::kInt64: {
    250       DCHECK_EQ(2u, instruction->GetVectorLength());
    251       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    252       switch (instruction->GetKind()) {
    253         case HVecReduce::kSum:
    254           __ movaps(tmp, src);
    255           __ movaps(dst, src);
    256           __ punpckhqdq(tmp, tmp);
    257           __ paddq(dst, tmp);
    258           break;
    259         case HVecReduce::kMin:
    260         case HVecReduce::kMax:
    261           LOG(FATAL) << "Unsupported SIMD type";
    262       }
    263       break;
    264     }
    265     default:
    266       LOG(FATAL) << "Unsupported SIMD type";
    267       UNREACHABLE();
    268   }
    269 }
    270 
    271 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
    272   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    273 }
    274 
    275 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
    276   LocationSummary* locations = instruction->GetLocations();
    277   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    278   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    279   DataType::Type from = instruction->GetInputType();
    280   DataType::Type to = instruction->GetResultType();
    281   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
    282     DCHECK_EQ(4u, instruction->GetVectorLength());
    283     __ cvtdq2ps(dst, src);
    284   } else {
    285     LOG(FATAL) << "Unsupported SIMD type";
    286   }
    287 }
    288 
    289 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
    290   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    291 }
    292 
    293 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
    294   LocationSummary* locations = instruction->GetLocations();
    295   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    296   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    297   switch (instruction->GetPackedType()) {
    298     case DataType::Type::kUint8:
    299     case DataType::Type::kInt8:
    300       DCHECK_EQ(16u, instruction->GetVectorLength());
    301       __ pxor(dst, dst);
    302       __ psubb(dst, src);
    303       break;
    304     case DataType::Type::kUint16:
    305     case DataType::Type::kInt16:
    306       DCHECK_EQ(8u, instruction->GetVectorLength());
    307       __ pxor(dst, dst);
    308       __ psubw(dst, src);
    309       break;
    310     case DataType::Type::kInt32:
    311       DCHECK_EQ(4u, instruction->GetVectorLength());
    312       __ pxor(dst, dst);
    313       __ psubd(dst, src);
    314       break;
    315     case DataType::Type::kInt64:
    316       DCHECK_EQ(2u, instruction->GetVectorLength());
    317       __ pxor(dst, dst);
    318       __ psubq(dst, src);
    319       break;
    320     case DataType::Type::kFloat32:
    321       DCHECK_EQ(4u, instruction->GetVectorLength());
    322       __ xorps(dst, dst);
    323       __ subps(dst, src);
    324       break;
    325     case DataType::Type::kFloat64:
    326       DCHECK_EQ(2u, instruction->GetVectorLength());
    327       __ xorpd(dst, dst);
    328       __ subpd(dst, src);
    329       break;
    330     default:
    331       LOG(FATAL) << "Unsupported SIMD type";
    332       UNREACHABLE();
    333   }
    334 }
    335 
    336 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
    337   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    338   // Integral-abs requires a temporary for the comparison.
    339   if (instruction->GetPackedType() == DataType::Type::kInt32) {
    340     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    341   }
    342 }
    343 
    344 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
    345   LocationSummary* locations = instruction->GetLocations();
    346   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    347   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    348   switch (instruction->GetPackedType()) {
    349     case DataType::Type::kInt32: {
    350       DCHECK_EQ(4u, instruction->GetVectorLength());
    351       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    352       __ movaps(dst, src);
    353       __ pxor(tmp, tmp);
    354       __ pcmpgtd(tmp, dst);
    355       __ pxor(dst, tmp);
    356       __ psubd(dst, tmp);
    357       break;
    358     }
    359     case DataType::Type::kFloat32:
    360       DCHECK_EQ(4u, instruction->GetVectorLength());
    361       __ pcmpeqb(dst, dst);  // all ones
    362       __ psrld(dst, Immediate(1));
    363       __ andps(dst, src);
    364       break;
    365     case DataType::Type::kFloat64:
    366       DCHECK_EQ(2u, instruction->GetVectorLength());
    367       __ pcmpeqb(dst, dst);  // all ones
    368       __ psrlq(dst, Immediate(1));
    369       __ andpd(dst, src);
    370       break;
    371     default:
    372       LOG(FATAL) << "Unsupported SIMD type";
    373       UNREACHABLE();
    374   }
    375 }
    376 
    377 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
    378   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
    379   // Boolean-not requires a temporary to construct the 16 x one.
    380   if (instruction->GetPackedType() == DataType::Type::kBool) {
    381     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    382   }
    383 }
    384 
    385 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
    386   LocationSummary* locations = instruction->GetLocations();
    387   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    388   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    389   switch (instruction->GetPackedType()) {
    390     case DataType::Type::kBool: {  // special case boolean-not
    391       DCHECK_EQ(16u, instruction->GetVectorLength());
    392       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    393       __ pxor(dst, dst);
    394       __ pcmpeqb(tmp, tmp);  // all ones
    395       __ psubb(dst, tmp);  // 16 x one
    396       __ pxor(dst, src);
    397       break;
    398     }
    399     case DataType::Type::kUint8:
    400     case DataType::Type::kInt8:
    401     case DataType::Type::kUint16:
    402     case DataType::Type::kInt16:
    403     case DataType::Type::kInt32:
    404     case DataType::Type::kInt64:
    405       DCHECK_LE(2u, instruction->GetVectorLength());
    406       DCHECK_LE(instruction->GetVectorLength(), 16u);
    407       __ pcmpeqb(dst, dst);  // all ones
    408       __ pxor(dst, src);
    409       break;
    410     case DataType::Type::kFloat32:
    411       DCHECK_EQ(4u, instruction->GetVectorLength());
    412       __ pcmpeqb(dst, dst);  // all ones
    413       __ xorps(dst, src);
    414       break;
    415     case DataType::Type::kFloat64:
    416       DCHECK_EQ(2u, instruction->GetVectorLength());
    417       __ pcmpeqb(dst, dst);  // all ones
    418       __ xorpd(dst, src);
    419       break;
    420     default:
    421       LOG(FATAL) << "Unsupported SIMD type";
    422       UNREACHABLE();
    423   }
    424 }
    425 
    426 // Helper to set up locations for vector binary operations.
    427 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    428   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    429   switch (instruction->GetPackedType()) {
    430     case DataType::Type::kBool:
    431     case DataType::Type::kUint8:
    432     case DataType::Type::kInt8:
    433     case DataType::Type::kUint16:
    434     case DataType::Type::kInt16:
    435     case DataType::Type::kInt32:
    436     case DataType::Type::kInt64:
    437     case DataType::Type::kFloat32:
    438     case DataType::Type::kFloat64:
    439       locations->SetInAt(0, Location::RequiresFpuRegister());
    440       locations->SetInAt(1, Location::RequiresFpuRegister());
    441       locations->SetOut(Location::SameAsFirstInput());
    442       break;
    443     default:
    444       LOG(FATAL) << "Unsupported SIMD type";
    445       UNREACHABLE();
    446   }
    447 }
    448 
    449 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
    450   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    451 }
    452 
    453 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
    454   LocationSummary* locations = instruction->GetLocations();
    455   DCHECK(locations->InAt(0).Equals(locations->Out()));
    456   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    457   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    458   switch (instruction->GetPackedType()) {
    459     case DataType::Type::kUint8:
    460     case DataType::Type::kInt8:
    461       DCHECK_EQ(16u, instruction->GetVectorLength());
    462       __ paddb(dst, src);
    463       break;
    464     case DataType::Type::kUint16:
    465     case DataType::Type::kInt16:
    466       DCHECK_EQ(8u, instruction->GetVectorLength());
    467       __ paddw(dst, src);
    468       break;
    469     case DataType::Type::kInt32:
    470       DCHECK_EQ(4u, instruction->GetVectorLength());
    471       __ paddd(dst, src);
    472       break;
    473     case DataType::Type::kInt64:
    474       DCHECK_EQ(2u, instruction->GetVectorLength());
    475       __ paddq(dst, src);
    476       break;
    477     case DataType::Type::kFloat32:
    478       DCHECK_EQ(4u, instruction->GetVectorLength());
    479       __ addps(dst, src);
    480       break;
    481     case DataType::Type::kFloat64:
    482       DCHECK_EQ(2u, instruction->GetVectorLength());
    483       __ addpd(dst, src);
    484       break;
    485     default:
    486       LOG(FATAL) << "Unsupported SIMD type";
    487       UNREACHABLE();
    488   }
    489 }
    490 
    491 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    492   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    493 }
    494 
    495 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    496   LocationSummary* locations = instruction->GetLocations();
    497   DCHECK(locations->InAt(0).Equals(locations->Out()));
    498   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    499   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    500 
    501   DCHECK(instruction->IsRounded());
    502 
    503   switch (instruction->GetPackedType()) {
    504     case DataType::Type::kUint8:
    505       DCHECK_EQ(16u, instruction->GetVectorLength());
    506      __ pavgb(dst, src);
    507      return;
    508     case DataType::Type::kUint16:
    509       DCHECK_EQ(8u, instruction->GetVectorLength());
    510       __ pavgw(dst, src);
    511       return;
    512     default:
    513       LOG(FATAL) << "Unsupported SIMD type";
    514       UNREACHABLE();
    515   }
    516 }
    517 
    518 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
    519   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    520 }
    521 
    522 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
    523   LocationSummary* locations = instruction->GetLocations();
    524   DCHECK(locations->InAt(0).Equals(locations->Out()));
    525   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    526   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    527   switch (instruction->GetPackedType()) {
    528     case DataType::Type::kUint8:
    529     case DataType::Type::kInt8:
    530       DCHECK_EQ(16u, instruction->GetVectorLength());
    531       __ psubb(dst, src);
    532       break;
    533     case DataType::Type::kUint16:
    534     case DataType::Type::kInt16:
    535       DCHECK_EQ(8u, instruction->GetVectorLength());
    536       __ psubw(dst, src);
    537       break;
    538     case DataType::Type::kInt32:
    539       DCHECK_EQ(4u, instruction->GetVectorLength());
    540       __ psubd(dst, src);
    541       break;
    542     case DataType::Type::kInt64:
    543       DCHECK_EQ(2u, instruction->GetVectorLength());
    544       __ psubq(dst, src);
    545       break;
    546     case DataType::Type::kFloat32:
    547       DCHECK_EQ(4u, instruction->GetVectorLength());
    548       __ subps(dst, src);
    549       break;
    550     case DataType::Type::kFloat64:
    551       DCHECK_EQ(2u, instruction->GetVectorLength());
    552       __ subpd(dst, src);
    553       break;
    554     default:
    555       LOG(FATAL) << "Unsupported SIMD type";
    556       UNREACHABLE();
    557   }
    558 }
    559 
    560 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
    561   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    562 }
    563 
    564 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
    565   LocationSummary* locations = instruction->GetLocations();
    566   DCHECK(locations->InAt(0).Equals(locations->Out()));
    567   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    568   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    569   switch (instruction->GetPackedType()) {
    570     case DataType::Type::kUint16:
    571     case DataType::Type::kInt16:
    572       DCHECK_EQ(8u, instruction->GetVectorLength());
    573       __ pmullw(dst, src);
    574       break;
    575     case DataType::Type::kInt32:
    576       DCHECK_EQ(4u, instruction->GetVectorLength());
    577       __ pmulld(dst, src);
    578       break;
    579     case DataType::Type::kFloat32:
    580       DCHECK_EQ(4u, instruction->GetVectorLength());
    581       __ mulps(dst, src);
    582       break;
    583     case DataType::Type::kFloat64:
    584       DCHECK_EQ(2u, instruction->GetVectorLength());
    585       __ mulpd(dst, src);
    586       break;
    587     default:
    588       LOG(FATAL) << "Unsupported SIMD type";
    589       UNREACHABLE();
    590   }
    591 }
    592 
    593 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
    594   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    595 }
    596 
    597 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
    598   LocationSummary* locations = instruction->GetLocations();
    599   DCHECK(locations->InAt(0).Equals(locations->Out()));
    600   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    601   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    602   switch (instruction->GetPackedType()) {
    603     case DataType::Type::kFloat32:
    604       DCHECK_EQ(4u, instruction->GetVectorLength());
    605       __ divps(dst, src);
    606       break;
    607     case DataType::Type::kFloat64:
    608       DCHECK_EQ(2u, instruction->GetVectorLength());
    609       __ divpd(dst, src);
    610       break;
    611     default:
    612       LOG(FATAL) << "Unsupported SIMD type";
    613       UNREACHABLE();
    614   }
    615 }
    616 
    617 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
    618   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    619 }
    620 
    621 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
    622   LocationSummary* locations = instruction->GetLocations();
    623   DCHECK(locations->InAt(0).Equals(locations->Out()));
    624   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    625   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    626   switch (instruction->GetPackedType()) {
    627     case DataType::Type::kUint8:
    628       DCHECK_EQ(16u, instruction->GetVectorLength());
    629       __ pminub(dst, src);
    630       break;
    631     case DataType::Type::kInt8:
    632       DCHECK_EQ(16u, instruction->GetVectorLength());
    633       __ pminsb(dst, src);
    634       break;
    635     case DataType::Type::kUint16:
    636       DCHECK_EQ(8u, instruction->GetVectorLength());
    637       __ pminuw(dst, src);
    638       break;
    639     case DataType::Type::kInt16:
    640       DCHECK_EQ(8u, instruction->GetVectorLength());
    641       __ pminsw(dst, src);
    642       break;
    643     case DataType::Type::kUint32:
    644       DCHECK_EQ(4u, instruction->GetVectorLength());
    645       __ pminud(dst, src);
    646       break;
    647     case DataType::Type::kInt32:
    648       DCHECK_EQ(4u, instruction->GetVectorLength());
    649       __ pminsd(dst, src);
    650       break;
    651     // Next cases are sloppy wrt 0.0 vs -0.0.
    652     case DataType::Type::kFloat32:
    653       DCHECK_EQ(4u, instruction->GetVectorLength());
    654       __ minps(dst, src);
    655       break;
    656     case DataType::Type::kFloat64:
    657       DCHECK_EQ(2u, instruction->GetVectorLength());
    658       __ minpd(dst, src);
    659       break;
    660     default:
    661       LOG(FATAL) << "Unsupported SIMD type";
    662       UNREACHABLE();
    663   }
    664 }
    665 
    666 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
    667   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    668 }
    669 
    670 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
    671   LocationSummary* locations = instruction->GetLocations();
    672   DCHECK(locations->InAt(0).Equals(locations->Out()));
    673   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    674   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    675   switch (instruction->GetPackedType()) {
    676     case DataType::Type::kUint8:
    677       DCHECK_EQ(16u, instruction->GetVectorLength());
    678       __ pmaxub(dst, src);
    679       break;
    680     case DataType::Type::kInt8:
    681       DCHECK_EQ(16u, instruction->GetVectorLength());
    682       __ pmaxsb(dst, src);
    683       break;
    684     case DataType::Type::kUint16:
    685       DCHECK_EQ(8u, instruction->GetVectorLength());
    686       __ pmaxuw(dst, src);
    687       break;
    688     case DataType::Type::kInt16:
    689       DCHECK_EQ(8u, instruction->GetVectorLength());
    690       __ pmaxsw(dst, src);
    691       break;
    692     case DataType::Type::kUint32:
    693       DCHECK_EQ(4u, instruction->GetVectorLength());
    694       __ pmaxud(dst, src);
    695       break;
    696     case DataType::Type::kInt32:
    697       DCHECK_EQ(4u, instruction->GetVectorLength());
    698       __ pmaxsd(dst, src);
    699       break;
    700     // Next cases are sloppy wrt 0.0 vs -0.0.
    701     case DataType::Type::kFloat32:
    702       DCHECK_EQ(4u, instruction->GetVectorLength());
    703       __ maxps(dst, src);
    704       break;
    705     case DataType::Type::kFloat64:
    706       DCHECK_EQ(2u, instruction->GetVectorLength());
    707       __ maxpd(dst, src);
    708       break;
    709     default:
    710       LOG(FATAL) << "Unsupported SIMD type";
    711       UNREACHABLE();
    712   }
    713 }
    714 
    715 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
    716   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    717 }
    718 
    719 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
    720   LocationSummary* locations = instruction->GetLocations();
    721   DCHECK(locations->InAt(0).Equals(locations->Out()));
    722   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    723   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    724   switch (instruction->GetPackedType()) {
    725     case DataType::Type::kBool:
    726     case DataType::Type::kUint8:
    727     case DataType::Type::kInt8:
    728     case DataType::Type::kUint16:
    729     case DataType::Type::kInt16:
    730     case DataType::Type::kInt32:
    731     case DataType::Type::kInt64:
    732       DCHECK_LE(2u, instruction->GetVectorLength());
    733       DCHECK_LE(instruction->GetVectorLength(), 16u);
    734       __ pand(dst, src);
    735       break;
    736     case DataType::Type::kFloat32:
    737       DCHECK_EQ(4u, instruction->GetVectorLength());
    738       __ andps(dst, src);
    739       break;
    740     case DataType::Type::kFloat64:
    741       DCHECK_EQ(2u, instruction->GetVectorLength());
    742       __ andpd(dst, src);
    743       break;
    744     default:
    745       LOG(FATAL) << "Unsupported SIMD type";
    746       UNREACHABLE();
    747   }
    748 }
    749 
    750 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
    751   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    752 }
    753 
    754 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
    755   LocationSummary* locations = instruction->GetLocations();
    756   DCHECK(locations->InAt(0).Equals(locations->Out()));
    757   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    758   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    759   switch (instruction->GetPackedType()) {
    760     case DataType::Type::kBool:
    761     case DataType::Type::kUint8:
    762     case DataType::Type::kInt8:
    763     case DataType::Type::kUint16:
    764     case DataType::Type::kInt16:
    765     case DataType::Type::kInt32:
    766     case DataType::Type::kInt64:
    767       DCHECK_LE(2u, instruction->GetVectorLength());
    768       DCHECK_LE(instruction->GetVectorLength(), 16u);
    769       __ pandn(dst, src);
    770       break;
    771     case DataType::Type::kFloat32:
    772       DCHECK_EQ(4u, instruction->GetVectorLength());
    773       __ andnps(dst, src);
    774       break;
    775     case DataType::Type::kFloat64:
    776       DCHECK_EQ(2u, instruction->GetVectorLength());
    777       __ andnpd(dst, src);
    778       break;
    779     default:
    780       LOG(FATAL) << "Unsupported SIMD type";
    781       UNREACHABLE();
    782   }
    783 }
    784 
    785 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
    786   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    787 }
    788 
    789 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
    790   LocationSummary* locations = instruction->GetLocations();
    791   DCHECK(locations->InAt(0).Equals(locations->Out()));
    792   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    793   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    794   switch (instruction->GetPackedType()) {
    795     case DataType::Type::kBool:
    796     case DataType::Type::kUint8:
    797     case DataType::Type::kInt8:
    798     case DataType::Type::kUint16:
    799     case DataType::Type::kInt16:
    800     case DataType::Type::kInt32:
    801     case DataType::Type::kInt64:
    802       DCHECK_LE(2u, instruction->GetVectorLength());
    803       DCHECK_LE(instruction->GetVectorLength(), 16u);
    804       __ por(dst, src);
    805       break;
    806     case DataType::Type::kFloat32:
    807       DCHECK_EQ(4u, instruction->GetVectorLength());
    808       __ orps(dst, src);
    809       break;
    810     case DataType::Type::kFloat64:
    811       DCHECK_EQ(2u, instruction->GetVectorLength());
    812       __ orpd(dst, src);
    813       break;
    814     default:
    815       LOG(FATAL) << "Unsupported SIMD type";
    816       UNREACHABLE();
    817   }
    818 }
    819 
    820 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
    821   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
    822 }
    823 
    824 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
    825   LocationSummary* locations = instruction->GetLocations();
    826   DCHECK(locations->InAt(0).Equals(locations->Out()));
    827   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    828   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    829   switch (instruction->GetPackedType()) {
    830     case DataType::Type::kBool:
    831     case DataType::Type::kUint8:
    832     case DataType::Type::kInt8:
    833     case DataType::Type::kUint16:
    834     case DataType::Type::kInt16:
    835     case DataType::Type::kInt32:
    836     case DataType::Type::kInt64:
    837       DCHECK_LE(2u, instruction->GetVectorLength());
    838       DCHECK_LE(instruction->GetVectorLength(), 16u);
    839       __ pxor(dst, src);
    840       break;
    841     case DataType::Type::kFloat32:
    842       DCHECK_EQ(4u, instruction->GetVectorLength());
    843       __ xorps(dst, src);
    844       break;
    845     case DataType::Type::kFloat64:
    846       DCHECK_EQ(2u, instruction->GetVectorLength());
    847       __ xorpd(dst, src);
    848       break;
    849     default:
    850       LOG(FATAL) << "Unsupported SIMD type";
    851       UNREACHABLE();
    852   }
    853 }
    854 
    855 // Helper to set up locations for vector shift operations.
    856 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
    857   LocationSummary* locations = new (allocator) LocationSummary(instruction);
    858   switch (instruction->GetPackedType()) {
    859     case DataType::Type::kUint16:
    860     case DataType::Type::kInt16:
    861     case DataType::Type::kInt32:
    862     case DataType::Type::kInt64:
    863       locations->SetInAt(0, Location::RequiresFpuRegister());
    864       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    865       locations->SetOut(Location::SameAsFirstInput());
    866       break;
    867     default:
    868       LOG(FATAL) << "Unsupported SIMD type";
    869       UNREACHABLE();
    870   }
    871 }
    872 
    873 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
    874   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    875 }
    876 
    877 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
    878   LocationSummary* locations = instruction->GetLocations();
    879   DCHECK(locations->InAt(0).Equals(locations->Out()));
    880   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    881   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    882   switch (instruction->GetPackedType()) {
    883     case DataType::Type::kUint16:
    884     case DataType::Type::kInt16:
    885       DCHECK_EQ(8u, instruction->GetVectorLength());
    886       __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
    887       break;
    888     case DataType::Type::kInt32:
    889       DCHECK_EQ(4u, instruction->GetVectorLength());
    890       __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
    891       break;
    892     case DataType::Type::kInt64:
    893       DCHECK_EQ(2u, instruction->GetVectorLength());
    894       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
    895       break;
    896     default:
    897       LOG(FATAL) << "Unsupported SIMD type";
    898       UNREACHABLE();
    899   }
    900 }
    901 
    902 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
    903   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    904 }
    905 
    906 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
    907   LocationSummary* locations = instruction->GetLocations();
    908   DCHECK(locations->InAt(0).Equals(locations->Out()));
    909   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    910   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    911   switch (instruction->GetPackedType()) {
    912     case DataType::Type::kUint16:
    913     case DataType::Type::kInt16:
    914       DCHECK_EQ(8u, instruction->GetVectorLength());
    915       __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
    916       break;
    917     case DataType::Type::kInt32:
    918       DCHECK_EQ(4u, instruction->GetVectorLength());
    919       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
    920       break;
    921     default:
    922       LOG(FATAL) << "Unsupported SIMD type";
    923       UNREACHABLE();
    924   }
    925 }
    926 
    927 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
    928   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
    929 }
    930 
    931 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
    932   LocationSummary* locations = instruction->GetLocations();
    933   DCHECK(locations->InAt(0).Equals(locations->Out()));
    934   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    935   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    936   switch (instruction->GetPackedType()) {
    937     case DataType::Type::kUint16:
    938     case DataType::Type::kInt16:
    939       DCHECK_EQ(8u, instruction->GetVectorLength());
    940       __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
    941       break;
    942     case DataType::Type::kInt32:
    943       DCHECK_EQ(4u, instruction->GetVectorLength());
    944       __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
    945       break;
    946     case DataType::Type::kInt64:
    947       DCHECK_EQ(2u, instruction->GetVectorLength());
    948       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
    949       break;
    950     default:
    951       LOG(FATAL) << "Unsupported SIMD type";
    952       UNREACHABLE();
    953   }
    954 }
    955 
    956 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
    957   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
    958 
    959   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    960 
    961   HInstruction* input = instruction->InputAt(0);
    962   bool is_zero = IsZeroBitPattern(input);
    963 
    964   switch (instruction->GetPackedType()) {
    965     case DataType::Type::kInt64:
    966       // Long needs extra temporary to load from register pairs.
    967       if (!is_zero) {
    968         locations->AddTemp(Location::RequiresFpuRegister());
    969       }
    970       FALLTHROUGH_INTENDED;
    971     case DataType::Type::kBool:
    972     case DataType::Type::kUint8:
    973     case DataType::Type::kInt8:
    974     case DataType::Type::kUint16:
    975     case DataType::Type::kInt16:
    976     case DataType::Type::kInt32:
    977       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    978                                     : Location::RequiresRegister());
    979       locations->SetOut(Location::RequiresFpuRegister());
    980       break;
    981     case DataType::Type::kFloat32:
    982     case DataType::Type::kFloat64:
    983       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
    984                                     : Location::RequiresFpuRegister());
    985       locations->SetOut(Location::RequiresFpuRegister());
    986       break;
    987     default:
    988       LOG(FATAL) << "Unsupported SIMD type";
    989       UNREACHABLE();
    990   }
    991 }
    992 
    993 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
    994   LocationSummary* locations = instruction->GetLocations();
    995   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    996 
    997   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
    998 
    999   // Zero out all other elements first.
   1000   __ xorps(dst, dst);
   1001 
   1002   // Shorthand for any type of zero.
   1003   if (IsZeroBitPattern(instruction->InputAt(0))) {
   1004     return;
   1005   }
   1006 
   1007   // Set required elements.
   1008   switch (instruction->GetPackedType()) {
   1009     case DataType::Type::kBool:
   1010     case DataType::Type::kUint8:
   1011     case DataType::Type::kInt8:
   1012     case DataType::Type::kUint16:
   1013     case DataType::Type::kInt16:  // TODO: up to here, and?
   1014       LOG(FATAL) << "Unsupported SIMD type";
   1015       UNREACHABLE();
   1016     case DataType::Type::kInt32:
   1017       DCHECK_EQ(4u, instruction->GetVectorLength());
   1018       __ movd(dst, locations->InAt(0).AsRegister<Register>());
   1019       break;
   1020     case DataType::Type::kInt64: {
   1021       DCHECK_EQ(2u, instruction->GetVectorLength());
   1022       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   1023       __ xorps(tmp, tmp);
   1024       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
   1025       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
   1026       __ punpckldq(dst, tmp);
   1027       break;
   1028     }
   1029     case DataType::Type::kFloat32:
   1030       DCHECK_EQ(4u, instruction->GetVectorLength());
   1031       __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
   1032       break;
   1033     case DataType::Type::kFloat64:
   1034       DCHECK_EQ(2u, instruction->GetVectorLength());
   1035       __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
   1036       break;
   1037     default:
   1038       LOG(FATAL) << "Unsupported SIMD type";
   1039       UNREACHABLE();
   1040   }
   1041 }
   1042 
   1043 // Helper to set up locations for vector accumulations.
   1044 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
   1045   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1046   switch (instruction->GetPackedType()) {
   1047     case DataType::Type::kUint8:
   1048     case DataType::Type::kInt8:
   1049     case DataType::Type::kUint16:
   1050     case DataType::Type::kInt16:
   1051     case DataType::Type::kInt32:
   1052     case DataType::Type::kInt64:
   1053       locations->SetInAt(0, Location::RequiresFpuRegister());
   1054       locations->SetInAt(1, Location::RequiresFpuRegister());
   1055       locations->SetInAt(2, Location::RequiresFpuRegister());
   1056       locations->SetOut(Location::SameAsFirstInput());
   1057       break;
   1058     default:
   1059       LOG(FATAL) << "Unsupported SIMD type";
   1060       UNREACHABLE();
   1061   }
   1062 }
   1063 
   1064 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1065   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1066 }
   1067 
   1068 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   1069   // TODO: pmaddwd?
   1070   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   1071 }
   1072 
   1073 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1074   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   1075 }
   1076 
   1077 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   1078   // TODO: psadbw for unsigned?
   1079   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   1080 }
   1081 
   1082 // Helper to set up locations for vector memory operations.
   1083 static void CreateVecMemLocations(ArenaAllocator* allocator,
   1084                                   HVecMemoryOperation* instruction,
   1085                                   bool is_load) {
   1086   LocationSummary* locations = new (allocator) LocationSummary(instruction);
   1087   switch (instruction->GetPackedType()) {
   1088     case DataType::Type::kBool:
   1089     case DataType::Type::kUint8:
   1090     case DataType::Type::kInt8:
   1091     case DataType::Type::kUint16:
   1092     case DataType::Type::kInt16:
   1093     case DataType::Type::kInt32:
   1094     case DataType::Type::kInt64:
   1095     case DataType::Type::kFloat32:
   1096     case DataType::Type::kFloat64:
   1097       locations->SetInAt(0, Location::RequiresRegister());
   1098       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   1099       if (is_load) {
   1100         locations->SetOut(Location::RequiresFpuRegister());
   1101       } else {
   1102         locations->SetInAt(2, Location::RequiresFpuRegister());
   1103       }
   1104       break;
   1105     default:
   1106       LOG(FATAL) << "Unsupported SIMD type";
   1107       UNREACHABLE();
   1108   }
   1109 }
   1110 
   1111 // Helper to construct address for vector memory operations.
   1112 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   1113   Location base = locations->InAt(0);
   1114   Location index = locations->InAt(1);
   1115   ScaleFactor scale = TIMES_1;
   1116   switch (size) {
   1117     case 2: scale = TIMES_2; break;
   1118     case 4: scale = TIMES_4; break;
   1119     case 8: scale = TIMES_8; break;
   1120     default: break;
   1121   }
   1122   // Incorporate the string or array offset in the address computation.
   1123   uint32_t offset = is_string_char_at
   1124       ? mirror::String::ValueOffset().Uint32Value()
   1125       : mirror::Array::DataOffset(size).Uint32Value();
   1126   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
   1127 }
   1128 
   1129 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
   1130   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
   1131   // String load requires a temporary for the compressed load.
   1132   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1133     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   1134   }
   1135 }
   1136 
   1137 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
   1138   LocationSummary* locations = instruction->GetLocations();
   1139   size_t size = DataType::Size(instruction->GetPackedType());
   1140   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
   1141   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   1142   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   1143   switch (instruction->GetPackedType()) {
   1144     case DataType::Type::kUint16:
   1145       DCHECK_EQ(8u, instruction->GetVectorLength());
   1146       // Special handling of compressed/uncompressed string load.
   1147       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   1148         NearLabel done, not_compressed;
   1149         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   1150         // Test compression bit.
   1151         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1152                       "Expecting 0=compressed, 1=uncompressed");
   1153         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1154         __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
   1155         __ j(kNotZero, &not_compressed);
   1156         // Zero extend 8 compressed bytes into 8 chars.
   1157         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
   1158         __ pxor(tmp, tmp);
   1159         __ punpcklbw(reg, tmp);
   1160         __ jmp(&done);
   1161         // Load 4 direct uncompressed chars.
   1162         __ Bind(&not_compressed);
   1163         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
   1164         __ Bind(&done);
   1165         return;
   1166       }
   1167       FALLTHROUGH_INTENDED;
   1168     case DataType::Type::kBool:
   1169     case DataType::Type::kUint8:
   1170     case DataType::Type::kInt8:
   1171     case DataType::Type::kInt16:
   1172     case DataType::Type::kInt32:
   1173     case DataType::Type::kInt64:
   1174       DCHECK_LE(2u, instruction->GetVectorLength());
   1175       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1176       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
   1177       break;
   1178     case DataType::Type::kFloat32:
   1179       DCHECK_EQ(4u, instruction->GetVectorLength());
   1180       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
   1181       break;
   1182     case DataType::Type::kFloat64:
   1183       DCHECK_EQ(2u, instruction->GetVectorLength());
   1184       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
   1185       break;
   1186     default:
   1187       LOG(FATAL) << "Unsupported SIMD type";
   1188       UNREACHABLE();
   1189   }
   1190 }
   1191 
   1192 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
   1193   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
   1194 }
   1195 
   1196 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
   1197   LocationSummary* locations = instruction->GetLocations();
   1198   size_t size = DataType::Size(instruction->GetPackedType());
   1199   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
   1200   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   1201   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   1202   switch (instruction->GetPackedType()) {
   1203     case DataType::Type::kBool:
   1204     case DataType::Type::kUint8:
   1205     case DataType::Type::kInt8:
   1206     case DataType::Type::kUint16:
   1207     case DataType::Type::kInt16:
   1208     case DataType::Type::kInt32:
   1209     case DataType::Type::kInt64:
   1210       DCHECK_LE(2u, instruction->GetVectorLength());
   1211       DCHECK_LE(instruction->GetVectorLength(), 16u);
   1212       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
   1213       break;
   1214     case DataType::Type::kFloat32:
   1215       DCHECK_EQ(4u, instruction->GetVectorLength());
   1216       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
   1217       break;
   1218     case DataType::Type::kFloat64:
   1219       DCHECK_EQ(2u, instruction->GetVectorLength());
   1220       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
   1221       break;
   1222     default:
   1223       LOG(FATAL) << "Unsupported SIMD type";
   1224       UNREACHABLE();
   1225   }
   1226 }
   1227 
   1228 #undef __
   1229 
   1230 }  // namespace x86
   1231 }  // namespace art
   1232