Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86_64.h"
     18 #include "mirror/array-inl.h"
     19 
     20 namespace art {
     21 namespace x86_64 {
     22 
     23 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     24 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
     25 
     26 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     27   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
     28   switch (instruction->GetPackedType()) {
     29     case Primitive::kPrimBoolean:
     30     case Primitive::kPrimByte:
     31     case Primitive::kPrimChar:
     32     case Primitive::kPrimShort:
     33     case Primitive::kPrimInt:
     34     case Primitive::kPrimLong:
     35       locations->SetInAt(0, Location::RequiresRegister());
     36       locations->SetOut(Location::RequiresFpuRegister());
     37       break;
     38     case Primitive::kPrimFloat:
     39     case Primitive::kPrimDouble:
     40       locations->SetInAt(0, Location::RequiresFpuRegister());
     41       locations->SetOut(Location::SameAsFirstInput());
     42       break;
     43     default:
     44       LOG(FATAL) << "Unsupported SIMD type";
     45       UNREACHABLE();
     46   }
     47 }
     48 
     49 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     50   LocationSummary* locations = instruction->GetLocations();
     51   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
     52   switch (instruction->GetPackedType()) {
     53     case Primitive::kPrimBoolean:
     54     case Primitive::kPrimByte:
     55       DCHECK_EQ(16u, instruction->GetVectorLength());
     56       __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
     57       __ punpcklbw(reg, reg);
     58       __ punpcklwd(reg, reg);
     59       __ pshufd(reg, reg, Immediate(0));
     60       break;
     61     case Primitive::kPrimChar:
     62     case Primitive::kPrimShort:
     63       DCHECK_EQ(8u, instruction->GetVectorLength());
     64       __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
     65       __ punpcklwd(reg, reg);
     66       __ pshufd(reg, reg, Immediate(0));
     67       break;
     68     case Primitive::kPrimInt:
     69       DCHECK_EQ(4u, instruction->GetVectorLength());
     70       __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
     71       __ pshufd(reg, reg, Immediate(0));
     72       break;
     73     case Primitive::kPrimLong:
     74       DCHECK_EQ(2u, instruction->GetVectorLength());
     75       __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
     76       __ punpcklqdq(reg, reg);
     77       break;
     78     case Primitive::kPrimFloat:
     79       DCHECK(locations->InAt(0).Equals(locations->Out()));
     80       DCHECK_EQ(4u, instruction->GetVectorLength());
     81       __ shufps(reg, reg, Immediate(0));
     82       break;
     83     case Primitive::kPrimDouble:
     84       DCHECK(locations->InAt(0).Equals(locations->Out()));
     85       DCHECK_EQ(2u, instruction->GetVectorLength());
     86       __ shufpd(reg, reg, Immediate(0));
     87       break;
     88     default:
     89       LOG(FATAL) << "Unsupported SIMD type";
     90       UNREACHABLE();
     91   }
     92 }
     93 
     94 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
     95   LOG(FATAL) << "No SIMD for " << instruction->GetId();
     96 }
     97 
     98 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
     99   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    100 }
    101 
    102 void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
    103   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    104 }
    105 
    106 void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
    107   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    108 }
    109 
    110 // Helper to set up locations for vector unary operations.
    111 static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
    112   LocationSummary* locations = new (arena) LocationSummary(instruction);
    113   switch (instruction->GetPackedType()) {
    114     case Primitive::kPrimBoolean:
    115     case Primitive::kPrimByte:
    116     case Primitive::kPrimChar:
    117     case Primitive::kPrimShort:
    118     case Primitive::kPrimInt:
    119     case Primitive::kPrimLong:
    120     case Primitive::kPrimFloat:
    121     case Primitive::kPrimDouble:
    122       locations->SetInAt(0, Location::RequiresFpuRegister());
    123       locations->SetOut(Location::RequiresFpuRegister());
    124       break;
    125     default:
    126       LOG(FATAL) << "Unsupported SIMD type";
    127       UNREACHABLE();
    128   }
    129 }
    130 
    131 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
    132   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    133 }
    134 
    135 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
    136   LocationSummary* locations = instruction->GetLocations();
    137   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    138   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    139   Primitive::Type from = instruction->GetInputType();
    140   Primitive::Type to = instruction->GetResultType();
    141   if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
    142     DCHECK_EQ(4u, instruction->GetVectorLength());
    143     __ cvtdq2ps(dst, src);
    144   } else {
    145     LOG(FATAL) << "Unsupported SIMD type";
    146   }
    147 }
    148 
    149 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
    150   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    151 }
    152 
    153 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
    154   LocationSummary* locations = instruction->GetLocations();
    155   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    156   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    157   switch (instruction->GetPackedType()) {
    158     case Primitive::kPrimByte:
    159       DCHECK_EQ(16u, instruction->GetVectorLength());
    160       __ pxor(dst, dst);
    161       __ psubb(dst, src);
    162       break;
    163     case Primitive::kPrimChar:
    164     case Primitive::kPrimShort:
    165       DCHECK_EQ(8u, instruction->GetVectorLength());
    166       __ pxor(dst, dst);
    167       __ psubw(dst, src);
    168       break;
    169     case Primitive::kPrimInt:
    170       DCHECK_EQ(4u, instruction->GetVectorLength());
    171       __ pxor(dst, dst);
    172       __ psubd(dst, src);
    173       break;
    174     case Primitive::kPrimLong:
    175       DCHECK_EQ(2u, instruction->GetVectorLength());
    176       __ pxor(dst, dst);
    177       __ psubq(dst, src);
    178       break;
    179     case Primitive::kPrimFloat:
    180       DCHECK_EQ(4u, instruction->GetVectorLength());
    181       __ xorps(dst, dst);
    182       __ subps(dst, src);
    183       break;
    184     case Primitive::kPrimDouble:
    185       DCHECK_EQ(2u, instruction->GetVectorLength());
    186       __ xorpd(dst, dst);
    187       __ subpd(dst, src);
    188       break;
    189     default:
    190       LOG(FATAL) << "Unsupported SIMD type";
    191       UNREACHABLE();
    192   }
    193 }
    194 
    195 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
    196   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    197   // Integral-abs requires a temporary for the comparison.
    198   if (instruction->GetPackedType() == Primitive::kPrimInt) {
    199     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    200   }
    201 }
    202 
    203 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
    204   LocationSummary* locations = instruction->GetLocations();
    205   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    206   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    207   switch (instruction->GetPackedType()) {
    208     case Primitive::kPrimInt: {
    209       DCHECK_EQ(4u, instruction->GetVectorLength());
    210       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    211       __ movaps(dst, src);
    212       __ pxor(tmp, tmp);
    213       __ pcmpgtd(tmp, dst);
    214       __ pxor(dst, tmp);
    215       __ psubd(dst, tmp);
    216       break;
    217     }
    218     case Primitive::kPrimFloat:
    219       DCHECK_EQ(4u, instruction->GetVectorLength());
    220       __ pcmpeqb(dst, dst);  // all ones
    221       __ psrld(dst, Immediate(1));
    222       __ andps(dst, src);
    223       break;
    224     case Primitive::kPrimDouble:
    225       DCHECK_EQ(2u, instruction->GetVectorLength());
    226       __ pcmpeqb(dst, dst);  // all ones
    227       __ psrlq(dst, Immediate(1));
    228       __ andpd(dst, src);
    229       break;
    230     default:
    231       LOG(FATAL) << "Unsupported SIMD type";
    232       UNREACHABLE();
    233   }
    234 }
    235 
    236 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
    237   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    238   // Boolean-not requires a temporary to construct the 16 x one.
    239   if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
    240     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    241   }
    242 }
    243 
    244 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
    245   LocationSummary* locations = instruction->GetLocations();
    246   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    247   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    248   switch (instruction->GetPackedType()) {
    249     case Primitive::kPrimBoolean: {  // special case boolean-not
    250       DCHECK_EQ(16u, instruction->GetVectorLength());
    251       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    252       __ pxor(dst, dst);
    253       __ pcmpeqb(tmp, tmp);  // all ones
    254       __ psubb(dst, tmp);  // 16 x one
    255       __ pxor(dst, src);
    256       break;
    257     }
    258     case Primitive::kPrimByte:
    259     case Primitive::kPrimChar:
    260     case Primitive::kPrimShort:
    261     case Primitive::kPrimInt:
    262     case Primitive::kPrimLong:
    263       DCHECK_LE(2u, instruction->GetVectorLength());
    264       DCHECK_LE(instruction->GetVectorLength(), 16u);
    265       __ pcmpeqb(dst, dst);  // all ones
    266       __ pxor(dst, src);
    267       break;
    268     case Primitive::kPrimFloat:
    269       DCHECK_EQ(4u, instruction->GetVectorLength());
    270       __ pcmpeqb(dst, dst);  // all ones
    271       __ xorps(dst, src);
    272       break;
    273     case Primitive::kPrimDouble:
    274       DCHECK_EQ(2u, instruction->GetVectorLength());
    275       __ pcmpeqb(dst, dst);  // all ones
    276       __ xorpd(dst, src);
    277       break;
    278     default:
    279       LOG(FATAL) << "Unsupported SIMD type";
    280       UNREACHABLE();
    281   }
    282 }
    283 
    284 // Helper to set up locations for vector binary operations.
    285 static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
    286   LocationSummary* locations = new (arena) LocationSummary(instruction);
    287   switch (instruction->GetPackedType()) {
    288     case Primitive::kPrimBoolean:
    289     case Primitive::kPrimByte:
    290     case Primitive::kPrimChar:
    291     case Primitive::kPrimShort:
    292     case Primitive::kPrimInt:
    293     case Primitive::kPrimLong:
    294     case Primitive::kPrimFloat:
    295     case Primitive::kPrimDouble:
    296       locations->SetInAt(0, Location::RequiresFpuRegister());
    297       locations->SetInAt(1, Location::RequiresFpuRegister());
    298       locations->SetOut(Location::SameAsFirstInput());
    299       break;
    300     default:
    301       LOG(FATAL) << "Unsupported SIMD type";
    302       UNREACHABLE();
    303   }
    304 }
    305 
    306 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
    307   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    308 }
    309 
    310 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
    311   LocationSummary* locations = instruction->GetLocations();
    312   DCHECK(locations->InAt(0).Equals(locations->Out()));
    313   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    314   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    315   switch (instruction->GetPackedType()) {
    316     case Primitive::kPrimByte:
    317       DCHECK_EQ(16u, instruction->GetVectorLength());
    318       __ paddb(dst, src);
    319       break;
    320     case Primitive::kPrimChar:
    321     case Primitive::kPrimShort:
    322       DCHECK_EQ(8u, instruction->GetVectorLength());
    323       __ paddw(dst, src);
    324       break;
    325     case Primitive::kPrimInt:
    326       DCHECK_EQ(4u, instruction->GetVectorLength());
    327       __ paddd(dst, src);
    328       break;
    329     case Primitive::kPrimLong:
    330       DCHECK_EQ(2u, instruction->GetVectorLength());
    331       __ paddq(dst, src);
    332       break;
    333     case Primitive::kPrimFloat:
    334       DCHECK_EQ(4u, instruction->GetVectorLength());
    335       __ addps(dst, src);
    336       break;
    337     case Primitive::kPrimDouble:
    338       DCHECK_EQ(2u, instruction->GetVectorLength());
    339       __ addpd(dst, src);
    340       break;
    341     default:
    342       LOG(FATAL) << "Unsupported SIMD type";
    343       UNREACHABLE();
    344   }
    345 }
    346 
    347 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    348   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    349 }
    350 
    351 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    352   LocationSummary* locations = instruction->GetLocations();
    353   DCHECK(locations->InAt(0).Equals(locations->Out()));
    354   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    355   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    356 
    357   DCHECK(instruction->IsRounded());
    358   DCHECK(instruction->IsUnsigned());
    359 
    360   switch (instruction->GetPackedType()) {
    361     case Primitive::kPrimByte:
    362       DCHECK_EQ(16u, instruction->GetVectorLength());
    363      __ pavgb(dst, src);
    364      return;
    365     case Primitive::kPrimChar:
    366     case Primitive::kPrimShort:
    367       DCHECK_EQ(8u, instruction->GetVectorLength());
    368       __ pavgw(dst, src);
    369       return;
    370     default:
    371       LOG(FATAL) << "Unsupported SIMD type";
    372       UNREACHABLE();
    373   }
    374 }
    375 
    376 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
    377   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    378 }
    379 
    380 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
    381   LocationSummary* locations = instruction->GetLocations();
    382   DCHECK(locations->InAt(0).Equals(locations->Out()));
    383   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    384   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    385   switch (instruction->GetPackedType()) {
    386     case Primitive::kPrimByte:
    387       DCHECK_EQ(16u, instruction->GetVectorLength());
    388       __ psubb(dst, src);
    389       break;
    390     case Primitive::kPrimChar:
    391     case Primitive::kPrimShort:
    392       DCHECK_EQ(8u, instruction->GetVectorLength());
    393       __ psubw(dst, src);
    394       break;
    395     case Primitive::kPrimInt:
    396       DCHECK_EQ(4u, instruction->GetVectorLength());
    397       __ psubd(dst, src);
    398       break;
    399     case Primitive::kPrimLong:
    400       DCHECK_EQ(2u, instruction->GetVectorLength());
    401       __ psubq(dst, src);
    402       break;
    403     case Primitive::kPrimFloat:
    404       DCHECK_EQ(4u, instruction->GetVectorLength());
    405       __ subps(dst, src);
    406       break;
    407     case Primitive::kPrimDouble:
    408       DCHECK_EQ(2u, instruction->GetVectorLength());
    409       __ subpd(dst, src);
    410       break;
    411     default:
    412       LOG(FATAL) << "Unsupported SIMD type";
    413       UNREACHABLE();
    414   }
    415 }
    416 
    417 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
    418   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    419 }
    420 
    421 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
    422   LocationSummary* locations = instruction->GetLocations();
    423   DCHECK(locations->InAt(0).Equals(locations->Out()));
    424   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    425   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    426   switch (instruction->GetPackedType()) {
    427     case Primitive::kPrimChar:
    428     case Primitive::kPrimShort:
    429       DCHECK_EQ(8u, instruction->GetVectorLength());
    430       __ pmullw(dst, src);
    431       break;
    432     case Primitive::kPrimInt:
    433       DCHECK_EQ(4u, instruction->GetVectorLength());
    434       __ pmulld(dst, src);
    435       break;
    436     case Primitive::kPrimFloat:
    437       DCHECK_EQ(4u, instruction->GetVectorLength());
    438       __ mulps(dst, src);
    439       break;
    440     case Primitive::kPrimDouble:
    441       DCHECK_EQ(2u, instruction->GetVectorLength());
    442       __ mulpd(dst, src);
    443       break;
    444     default:
    445       LOG(FATAL) << "Unsupported SIMD type";
    446       UNREACHABLE();
    447   }
    448 }
    449 
    450 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
    451   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    452 }
    453 
    454 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
    455   LocationSummary* locations = instruction->GetLocations();
    456   DCHECK(locations->InAt(0).Equals(locations->Out()));
    457   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    458   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    459   switch (instruction->GetPackedType()) {
    460     case Primitive::kPrimFloat:
    461       DCHECK_EQ(4u, instruction->GetVectorLength());
    462       __ divps(dst, src);
    463       break;
    464     case Primitive::kPrimDouble:
    465       DCHECK_EQ(2u, instruction->GetVectorLength());
    466       __ divpd(dst, src);
    467       break;
    468     default:
    469       LOG(FATAL) << "Unsupported SIMD type";
    470       UNREACHABLE();
    471   }
    472 }
    473 
    474 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
    475   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    476 }
    477 
    478 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
    479   LocationSummary* locations = instruction->GetLocations();
    480   DCHECK(locations->InAt(0).Equals(locations->Out()));
    481   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    482   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    483   switch (instruction->GetPackedType()) {
    484     case Primitive::kPrimByte:
    485       DCHECK_EQ(16u, instruction->GetVectorLength());
    486       if (instruction->IsUnsigned()) {
    487         __ pminub(dst, src);
    488       } else {
    489         __ pminsb(dst, src);
    490       }
    491       break;
    492     case Primitive::kPrimChar:
    493     case Primitive::kPrimShort:
    494       DCHECK_EQ(8u, instruction->GetVectorLength());
    495       if (instruction->IsUnsigned()) {
    496         __ pminuw(dst, src);
    497       } else {
    498         __ pminsw(dst, src);
    499       }
    500       break;
    501     case Primitive::kPrimInt:
    502       DCHECK_EQ(4u, instruction->GetVectorLength());
    503       if (instruction->IsUnsigned()) {
    504         __ pminud(dst, src);
    505       } else {
    506         __ pminsd(dst, src);
    507       }
    508       break;
    509     // Next cases are sloppy wrt 0.0 vs -0.0.
    510     case Primitive::kPrimFloat:
    511       DCHECK_EQ(4u, instruction->GetVectorLength());
    512       DCHECK(!instruction->IsUnsigned());
    513       __ minps(dst, src);
    514       break;
    515     case Primitive::kPrimDouble:
    516       DCHECK_EQ(2u, instruction->GetVectorLength());
    517       DCHECK(!instruction->IsUnsigned());
    518       __ minpd(dst, src);
    519       break;
    520     default:
    521       LOG(FATAL) << "Unsupported SIMD type";
    522       UNREACHABLE();
    523   }
    524 }
    525 
    526 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
    527   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    528 }
    529 
    530 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
    531   LocationSummary* locations = instruction->GetLocations();
    532   DCHECK(locations->InAt(0).Equals(locations->Out()));
    533   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    534   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    535   switch (instruction->GetPackedType()) {
    536     case Primitive::kPrimByte:
    537       DCHECK_EQ(16u, instruction->GetVectorLength());
    538       if (instruction->IsUnsigned()) {
    539         __ pmaxub(dst, src);
    540       } else {
    541         __ pmaxsb(dst, src);
    542       }
    543       break;
    544     case Primitive::kPrimChar:
    545     case Primitive::kPrimShort:
    546       DCHECK_EQ(8u, instruction->GetVectorLength());
    547       if (instruction->IsUnsigned()) {
    548         __ pmaxuw(dst, src);
    549       } else {
    550         __ pmaxsw(dst, src);
    551       }
    552       break;
    553     case Primitive::kPrimInt:
    554       DCHECK_EQ(4u, instruction->GetVectorLength());
    555       if (instruction->IsUnsigned()) {
    556         __ pmaxud(dst, src);
    557       } else {
    558         __ pmaxsd(dst, src);
    559       }
    560       break;
    561     // Next cases are sloppy wrt 0.0 vs -0.0.
    562     case Primitive::kPrimFloat:
    563       DCHECK_EQ(4u, instruction->GetVectorLength());
    564       DCHECK(!instruction->IsUnsigned());
    565       __ maxps(dst, src);
    566       break;
    567     case Primitive::kPrimDouble:
    568       DCHECK_EQ(2u, instruction->GetVectorLength());
    569       DCHECK(!instruction->IsUnsigned());
    570       __ maxpd(dst, src);
    571       break;
    572     default:
    573       LOG(FATAL) << "Unsupported SIMD type";
    574       UNREACHABLE();
    575   }
    576 }
    577 
    578 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
    579   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    580 }
    581 
    582 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
    583   LocationSummary* locations = instruction->GetLocations();
    584   DCHECK(locations->InAt(0).Equals(locations->Out()));
    585   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    586   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    587   switch (instruction->GetPackedType()) {
    588     case Primitive::kPrimBoolean:
    589     case Primitive::kPrimByte:
    590     case Primitive::kPrimChar:
    591     case Primitive::kPrimShort:
    592     case Primitive::kPrimInt:
    593     case Primitive::kPrimLong:
    594       DCHECK_LE(2u, instruction->GetVectorLength());
    595       DCHECK_LE(instruction->GetVectorLength(), 16u);
    596       __ pand(dst, src);
    597       break;
    598     case Primitive::kPrimFloat:
    599       DCHECK_EQ(4u, instruction->GetVectorLength());
    600       __ andps(dst, src);
    601       break;
    602     case Primitive::kPrimDouble:
    603       DCHECK_EQ(2u, instruction->GetVectorLength());
    604       __ andpd(dst, src);
    605       break;
    606     default:
    607       LOG(FATAL) << "Unsupported SIMD type";
    608       UNREACHABLE();
    609   }
    610 }
    611 
    612 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
    613   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    614 }
    615 
    616 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
    617   LocationSummary* locations = instruction->GetLocations();
    618   DCHECK(locations->InAt(0).Equals(locations->Out()));
    619   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    620   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    621   switch (instruction->GetPackedType()) {
    622     case Primitive::kPrimBoolean:
    623     case Primitive::kPrimByte:
    624     case Primitive::kPrimChar:
    625     case Primitive::kPrimShort:
    626     case Primitive::kPrimInt:
    627     case Primitive::kPrimLong:
    628       DCHECK_LE(2u, instruction->GetVectorLength());
    629       DCHECK_LE(instruction->GetVectorLength(), 16u);
    630       __ pandn(dst, src);
    631       break;
    632     case Primitive::kPrimFloat:
    633       DCHECK_EQ(4u, instruction->GetVectorLength());
    634       __ andnps(dst, src);
    635       break;
    636     case Primitive::kPrimDouble:
    637       DCHECK_EQ(2u, instruction->GetVectorLength());
    638       __ andnpd(dst, src);
    639       break;
    640     default:
    641       LOG(FATAL) << "Unsupported SIMD type";
    642       UNREACHABLE();
    643   }
    644 }
    645 
    646 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
    647   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    648 }
    649 
    650 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
    651   LocationSummary* locations = instruction->GetLocations();
    652   DCHECK(locations->InAt(0).Equals(locations->Out()));
    653   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    654   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    655   switch (instruction->GetPackedType()) {
    656     case Primitive::kPrimBoolean:
    657     case Primitive::kPrimByte:
    658     case Primitive::kPrimChar:
    659     case Primitive::kPrimShort:
    660     case Primitive::kPrimInt:
    661     case Primitive::kPrimLong:
    662       DCHECK_LE(2u, instruction->GetVectorLength());
    663       DCHECK_LE(instruction->GetVectorLength(), 16u);
    664       __ por(dst, src);
    665       break;
    666     case Primitive::kPrimFloat:
    667       DCHECK_EQ(4u, instruction->GetVectorLength());
    668       __ orps(dst, src);
    669       break;
    670     case Primitive::kPrimDouble:
    671       DCHECK_EQ(2u, instruction->GetVectorLength());
    672       __ orpd(dst, src);
    673       break;
    674     default:
    675       LOG(FATAL) << "Unsupported SIMD type";
    676       UNREACHABLE();
    677   }
    678 }
    679 
    680 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
    681   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    682 }
    683 
    684 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
    685   LocationSummary* locations = instruction->GetLocations();
    686   DCHECK(locations->InAt(0).Equals(locations->Out()));
    687   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    688   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    689   switch (instruction->GetPackedType()) {
    690     case Primitive::kPrimBoolean:
    691     case Primitive::kPrimByte:
    692     case Primitive::kPrimChar:
    693     case Primitive::kPrimShort:
    694     case Primitive::kPrimInt:
    695     case Primitive::kPrimLong:
    696       DCHECK_LE(2u, instruction->GetVectorLength());
    697       DCHECK_LE(instruction->GetVectorLength(), 16u);
    698       __ pxor(dst, src);
    699       break;
    700     case Primitive::kPrimFloat:
    701       DCHECK_EQ(4u, instruction->GetVectorLength());
    702       __ xorps(dst, src);
    703       break;
    704     case Primitive::kPrimDouble:
    705       DCHECK_EQ(2u, instruction->GetVectorLength());
    706       __ xorpd(dst, src);
    707       break;
    708     default:
    709       LOG(FATAL) << "Unsupported SIMD type";
    710       UNREACHABLE();
    711   }
    712 }
    713 
    714 // Helper to set up locations for vector shift operations.
    715 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
    716   LocationSummary* locations = new (arena) LocationSummary(instruction);
    717   switch (instruction->GetPackedType()) {
    718     case Primitive::kPrimChar:
    719     case Primitive::kPrimShort:
    720     case Primitive::kPrimInt:
    721     case Primitive::kPrimLong:
    722       locations->SetInAt(0, Location::RequiresFpuRegister());
    723       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    724       locations->SetOut(Location::SameAsFirstInput());
    725       break;
    726     default:
    727       LOG(FATAL) << "Unsupported SIMD type";
    728       UNREACHABLE();
    729   }
    730 }
    731 
    732 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
    733   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    734 }
    735 
    736 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
    737   LocationSummary* locations = instruction->GetLocations();
    738   DCHECK(locations->InAt(0).Equals(locations->Out()));
    739   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    740   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    741   switch (instruction->GetPackedType()) {
    742     case Primitive::kPrimChar:
    743     case Primitive::kPrimShort:
    744       DCHECK_EQ(8u, instruction->GetVectorLength());
    745       __ psllw(dst, Immediate(static_cast<int8_t>(value)));
    746       break;
    747     case Primitive::kPrimInt:
    748       DCHECK_EQ(4u, instruction->GetVectorLength());
    749       __ pslld(dst, Immediate(static_cast<int8_t>(value)));
    750       break;
    751     case Primitive::kPrimLong:
    752       DCHECK_EQ(2u, instruction->GetVectorLength());
    753       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
    754       break;
    755     default:
    756       LOG(FATAL) << "Unsupported SIMD type";
    757       UNREACHABLE();
    758   }
    759 }
    760 
    761 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
    762   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    763 }
    764 
    765 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
    766   LocationSummary* locations = instruction->GetLocations();
    767   DCHECK(locations->InAt(0).Equals(locations->Out()));
    768   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    769   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    770   switch (instruction->GetPackedType()) {
    771     case Primitive::kPrimChar:
    772     case Primitive::kPrimShort:
    773       DCHECK_EQ(8u, instruction->GetVectorLength());
    774       __ psraw(dst, Immediate(static_cast<int8_t>(value)));
    775       break;
    776     case Primitive::kPrimInt:
    777       DCHECK_EQ(4u, instruction->GetVectorLength());
    778       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
    779       break;
    780     default:
    781       LOG(FATAL) << "Unsupported SIMD type";
    782       UNREACHABLE();
    783   }
    784 }
    785 
    786 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
    787   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    788 }
    789 
    790 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
    791   LocationSummary* locations = instruction->GetLocations();
    792   DCHECK(locations->InAt(0).Equals(locations->Out()));
    793   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    794   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    795   switch (instruction->GetPackedType()) {
    796     case Primitive::kPrimChar:
    797     case Primitive::kPrimShort:
    798       DCHECK_EQ(8u, instruction->GetVectorLength());
    799       __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
    800       break;
    801     case Primitive::kPrimInt:
    802       DCHECK_EQ(4u, instruction->GetVectorLength());
    803       __ psrld(dst, Immediate(static_cast<int8_t>(value)));
    804       break;
    805     case Primitive::kPrimLong:
    806       DCHECK_EQ(2u, instruction->GetVectorLength());
    807       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
    808       break;
    809     default:
    810       LOG(FATAL) << "Unsupported SIMD type";
    811       UNREACHABLE();
    812   }
    813 }
    814 
    815 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
    816   LOG(FATAL) << "No SIMD for " << instr->GetId();
    817 }
    818 
    819 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
    820   LOG(FATAL) << "No SIMD for " << instr->GetId();
    821 }
    822 
    823 // Helper to set up locations for vector memory operations.
    824 static void CreateVecMemLocations(ArenaAllocator* arena,
    825                                   HVecMemoryOperation* instruction,
    826                                   bool is_load) {
    827   LocationSummary* locations = new (arena) LocationSummary(instruction);
    828   switch (instruction->GetPackedType()) {
    829     case Primitive::kPrimBoolean:
    830     case Primitive::kPrimByte:
    831     case Primitive::kPrimChar:
    832     case Primitive::kPrimShort:
    833     case Primitive::kPrimInt:
    834     case Primitive::kPrimLong:
    835     case Primitive::kPrimFloat:
    836     case Primitive::kPrimDouble:
    837       locations->SetInAt(0, Location::RequiresRegister());
    838       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
    839       if (is_load) {
    840         locations->SetOut(Location::RequiresFpuRegister());
    841       } else {
    842         locations->SetInAt(2, Location::RequiresFpuRegister());
    843       }
    844       break;
    845     default:
    846       LOG(FATAL) << "Unsupported SIMD type";
    847       UNREACHABLE();
    848   }
    849 }
    850 
    851 // Helper to construct address for vector memory operations.
    852 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
    853   Location base = locations->InAt(0);
    854   Location index = locations->InAt(1);
    855   ScaleFactor scale = TIMES_1;
    856   switch (size) {
    857     case 2: scale = TIMES_2; break;
    858     case 4: scale = TIMES_4; break;
    859     case 8: scale = TIMES_8; break;
    860     default: break;
    861   }
    862   uint32_t offset = is_string_char_at
    863       ? mirror::String::ValueOffset().Uint32Value()
    864       : mirror::Array::DataOffset(size).Uint32Value();
    865   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
    866 }
    867 
    868 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
    869   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
    870   // String load requires a temporary for the compressed load.
    871   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
    872     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    873   }
    874 }
    875 
    876 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
    877   LocationSummary* locations = instruction->GetLocations();
    878   size_t size = Primitive::ComponentSize(instruction->GetPackedType());
    879   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
    880   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
    881   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
    882   switch (instruction->GetPackedType()) {
    883     case Primitive::kPrimChar:
    884       DCHECK_EQ(8u, instruction->GetVectorLength());
    885       // Special handling of compressed/uncompressed string load.
    886       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
    887         NearLabel done, not_compressed;
    888         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    889         // Test compression bit.
    890         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
    891                       "Expecting 0=compressed, 1=uncompressed");
    892         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
    893         __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
    894         __ j(kNotZero, &not_compressed);
    895         // Zero extend 8 compressed bytes into 8 chars.
    896         __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
    897         __ pxor(tmp, tmp);
    898         __ punpcklbw(reg, tmp);
    899         __ jmp(&done);
    900         // Load 8 direct uncompressed chars.
    901         __ Bind(&not_compressed);
    902         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
    903         __ Bind(&done);
    904         return;
    905       }
    906       FALLTHROUGH_INTENDED;
    907     case Primitive::kPrimBoolean:
    908     case Primitive::kPrimByte:
    909     case Primitive::kPrimShort:
    910     case Primitive::kPrimInt:
    911     case Primitive::kPrimLong:
    912       DCHECK_LE(2u, instruction->GetVectorLength());
    913       DCHECK_LE(instruction->GetVectorLength(), 16u);
    914       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
    915       break;
    916     case Primitive::kPrimFloat:
    917       DCHECK_EQ(4u, instruction->GetVectorLength());
    918       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
    919       break;
    920     case Primitive::kPrimDouble:
    921       DCHECK_EQ(2u, instruction->GetVectorLength());
    922       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
    923       break;
    924     default:
    925       LOG(FATAL) << "Unsupported SIMD type";
    926       UNREACHABLE();
    927   }
    928 }
    929 
    930 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
    931   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
    932 }
    933 
    934 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
    935   LocationSummary* locations = instruction->GetLocations();
    936   size_t size = Primitive::ComponentSize(instruction->GetPackedType());
    937   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
    938   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
    939   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
    940   switch (instruction->GetPackedType()) {
    941     case Primitive::kPrimBoolean:
    942     case Primitive::kPrimByte:
    943     case Primitive::kPrimChar:
    944     case Primitive::kPrimShort:
    945     case Primitive::kPrimInt:
    946     case Primitive::kPrimLong:
    947       DCHECK_LE(2u, instruction->GetVectorLength());
    948       DCHECK_LE(instruction->GetVectorLength(), 16u);
    949       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
    950       break;
    951     case Primitive::kPrimFloat:
    952       DCHECK_EQ(4u, instruction->GetVectorLength());
    953       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
    954       break;
    955     case Primitive::kPrimDouble:
    956       DCHECK_EQ(2u, instruction->GetVectorLength());
    957       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
    958       break;
    959     default:
    960       LOG(FATAL) << "Unsupported SIMD type";
    961       UNREACHABLE();
    962   }
    963 }
    964 
    965 #undef __
    966 
    967 }  // namespace x86_64
    968 }  // namespace art
    969