Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86.h"
     18 #include "mirror/array-inl.h"
     19 
     20 namespace art {
     21 namespace x86 {
     22 
     23 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     24 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
     25 
     26 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     27   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
     28   switch (instruction->GetPackedType()) {
     29     case Primitive::kPrimLong:
     30       // Long needs extra temporary to load the register pair.
     31       locations->AddTemp(Location::RequiresFpuRegister());
     32       FALLTHROUGH_INTENDED;
     33     case Primitive::kPrimBoolean:
     34     case Primitive::kPrimByte:
     35     case Primitive::kPrimChar:
     36     case Primitive::kPrimShort:
     37     case Primitive::kPrimInt:
     38       locations->SetInAt(0, Location::RequiresRegister());
     39       locations->SetOut(Location::RequiresFpuRegister());
     40       break;
     41     case Primitive::kPrimFloat:
     42     case Primitive::kPrimDouble:
     43       locations->SetInAt(0, Location::RequiresFpuRegister());
     44       locations->SetOut(Location::SameAsFirstInput());
     45       break;
     46     default:
     47       LOG(FATAL) << "Unsupported SIMD type";
     48       UNREACHABLE();
     49   }
     50 }
     51 
     52 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
     53   LocationSummary* locations = instruction->GetLocations();
     54   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
     55   switch (instruction->GetPackedType()) {
     56     case Primitive::kPrimBoolean:
     57     case Primitive::kPrimByte:
     58       DCHECK_EQ(16u, instruction->GetVectorLength());
     59       __ movd(reg, locations->InAt(0).AsRegister<Register>());
     60       __ punpcklbw(reg, reg);
     61       __ punpcklwd(reg, reg);
     62       __ pshufd(reg, reg, Immediate(0));
     63       break;
     64     case Primitive::kPrimChar:
     65     case Primitive::kPrimShort:
     66       DCHECK_EQ(8u, instruction->GetVectorLength());
     67       __ movd(reg, locations->InAt(0).AsRegister<Register>());
     68       __ punpcklwd(reg, reg);
     69       __ pshufd(reg, reg, Immediate(0));
     70       break;
     71     case Primitive::kPrimInt:
     72       DCHECK_EQ(4u, instruction->GetVectorLength());
     73       __ movd(reg, locations->InAt(0).AsRegister<Register>());
     74       __ pshufd(reg, reg, Immediate(0));
     75       break;
     76     case Primitive::kPrimLong: {
     77       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
     78       DCHECK_EQ(2u, instruction->GetVectorLength());
     79       __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
     80       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
     81       __ punpckldq(reg, tmp);
     82       __ punpcklqdq(reg, reg);
     83       break;
     84     }
     85     case Primitive::kPrimFloat:
     86       DCHECK(locations->InAt(0).Equals(locations->Out()));
     87       DCHECK_EQ(4u, instruction->GetVectorLength());
     88       __ shufps(reg, reg, Immediate(0));
     89       break;
     90     case Primitive::kPrimDouble:
     91       DCHECK(locations->InAt(0).Equals(locations->Out()));
     92       DCHECK_EQ(2u, instruction->GetVectorLength());
     93       __ shufpd(reg, reg, Immediate(0));
     94       break;
     95     default:
     96       LOG(FATAL) << "Unsupported SIMD type";
     97       UNREACHABLE();
     98   }
     99 }
    100 
    101 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
    102   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    103 }
    104 
    105 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
    106   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    107 }
    108 
    109 void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
    110   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    111 }
    112 
    113 void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
    114   LOG(FATAL) << "No SIMD for " << instruction->GetId();
    115 }
    116 
    117 // Helper to set up locations for vector unary operations.
    118 static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
    119   LocationSummary* locations = new (arena) LocationSummary(instruction);
    120   switch (instruction->GetPackedType()) {
    121     case Primitive::kPrimBoolean:
    122     case Primitive::kPrimByte:
    123     case Primitive::kPrimChar:
    124     case Primitive::kPrimShort:
    125     case Primitive::kPrimInt:
    126     case Primitive::kPrimLong:
    127     case Primitive::kPrimFloat:
    128     case Primitive::kPrimDouble:
    129       locations->SetInAt(0, Location::RequiresFpuRegister());
    130       locations->SetOut(Location::RequiresFpuRegister());
    131       break;
    132     default:
    133       LOG(FATAL) << "Unsupported SIMD type";
    134       UNREACHABLE();
    135   }
    136 }
    137 
    138 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
    139   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    140 }
    141 
    142 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
    143   LocationSummary* locations = instruction->GetLocations();
    144   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    145   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    146   Primitive::Type from = instruction->GetInputType();
    147   Primitive::Type to = instruction->GetResultType();
    148   if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
    149     DCHECK_EQ(4u, instruction->GetVectorLength());
    150     __ cvtdq2ps(dst, src);
    151   } else {
    152     LOG(FATAL) << "Unsupported SIMD type";
    153   }
    154 }
    155 
    156 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
    157   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    158 }
    159 
    160 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
    161   LocationSummary* locations = instruction->GetLocations();
    162   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    163   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    164   switch (instruction->GetPackedType()) {
    165     case Primitive::kPrimByte:
    166       DCHECK_EQ(16u, instruction->GetVectorLength());
    167       __ pxor(dst, dst);
    168       __ psubb(dst, src);
    169       break;
    170     case Primitive::kPrimChar:
    171     case Primitive::kPrimShort:
    172       DCHECK_EQ(8u, instruction->GetVectorLength());
    173       __ pxor(dst, dst);
    174       __ psubw(dst, src);
    175       break;
    176     case Primitive::kPrimInt:
    177       DCHECK_EQ(4u, instruction->GetVectorLength());
    178       __ pxor(dst, dst);
    179       __ psubd(dst, src);
    180       break;
    181     case Primitive::kPrimLong:
    182       DCHECK_EQ(2u, instruction->GetVectorLength());
    183       __ pxor(dst, dst);
    184       __ psubq(dst, src);
    185       break;
    186     case Primitive::kPrimFloat:
    187       DCHECK_EQ(4u, instruction->GetVectorLength());
    188       __ xorps(dst, dst);
    189       __ subps(dst, src);
    190       break;
    191     case Primitive::kPrimDouble:
    192       DCHECK_EQ(2u, instruction->GetVectorLength());
    193       __ xorpd(dst, dst);
    194       __ subpd(dst, src);
    195       break;
    196     default:
    197       LOG(FATAL) << "Unsupported SIMD type";
    198       UNREACHABLE();
    199   }
    200 }
    201 
    202 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
    203   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    204   // Integral-abs requires a temporary for the comparison.
    205   if (instruction->GetPackedType() == Primitive::kPrimInt) {
    206     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    207   }
    208 }
    209 
    210 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
    211   LocationSummary* locations = instruction->GetLocations();
    212   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    213   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    214   switch (instruction->GetPackedType()) {
    215     case Primitive::kPrimInt: {
    216       DCHECK_EQ(4u, instruction->GetVectorLength());
    217       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    218       __ movaps(dst, src);
    219       __ pxor(tmp, tmp);
    220       __ pcmpgtd(tmp, dst);
    221       __ pxor(dst, tmp);
    222       __ psubd(dst, tmp);
    223       break;
    224     }
    225     case Primitive::kPrimFloat:
    226       DCHECK_EQ(4u, instruction->GetVectorLength());
    227       __ pcmpeqb(dst, dst);  // all ones
    228       __ psrld(dst, Immediate(1));
    229       __ andps(dst, src);
    230       break;
    231     case Primitive::kPrimDouble:
    232       DCHECK_EQ(2u, instruction->GetVectorLength());
    233       __ pcmpeqb(dst, dst);  // all ones
    234       __ psrlq(dst, Immediate(1));
    235       __ andpd(dst, src);
    236       break;
    237     default:
    238       LOG(FATAL) << "Unsupported SIMD type";
    239       UNREACHABLE();
    240   }
    241 }
    242 
    243 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
    244   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
    245   // Boolean-not requires a temporary to construct the 16 x one.
    246   if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
    247     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    248   }
    249 }
    250 
    251 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
    252   LocationSummary* locations = instruction->GetLocations();
    253   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
    254   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    255   switch (instruction->GetPackedType()) {
    256     case Primitive::kPrimBoolean: {  // special case boolean-not
    257       DCHECK_EQ(16u, instruction->GetVectorLength());
    258       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    259       __ pxor(dst, dst);
    260       __ pcmpeqb(tmp, tmp);  // all ones
    261       __ psubb(dst, tmp);  // 16 x one
    262       __ pxor(dst, src);
    263       break;
    264     }
    265     case Primitive::kPrimByte:
    266     case Primitive::kPrimChar:
    267     case Primitive::kPrimShort:
    268     case Primitive::kPrimInt:
    269     case Primitive::kPrimLong:
    270       DCHECK_LE(2u, instruction->GetVectorLength());
    271       DCHECK_LE(instruction->GetVectorLength(), 16u);
    272       __ pcmpeqb(dst, dst);  // all ones
    273       __ pxor(dst, src);
    274       break;
    275     case Primitive::kPrimFloat:
    276       DCHECK_EQ(4u, instruction->GetVectorLength());
    277       __ pcmpeqb(dst, dst);  // all ones
    278       __ xorps(dst, src);
    279       break;
    280     case Primitive::kPrimDouble:
    281       DCHECK_EQ(2u, instruction->GetVectorLength());
    282       __ pcmpeqb(dst, dst);  // all ones
    283       __ xorpd(dst, src);
    284       break;
    285     default:
    286       LOG(FATAL) << "Unsupported SIMD type";
    287       UNREACHABLE();
    288   }
    289 }
    290 
    291 // Helper to set up locations for vector binary operations.
    292 static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
    293   LocationSummary* locations = new (arena) LocationSummary(instruction);
    294   switch (instruction->GetPackedType()) {
    295     case Primitive::kPrimBoolean:
    296     case Primitive::kPrimByte:
    297     case Primitive::kPrimChar:
    298     case Primitive::kPrimShort:
    299     case Primitive::kPrimInt:
    300     case Primitive::kPrimLong:
    301     case Primitive::kPrimFloat:
    302     case Primitive::kPrimDouble:
    303       locations->SetInAt(0, Location::RequiresFpuRegister());
    304       locations->SetInAt(1, Location::RequiresFpuRegister());
    305       locations->SetOut(Location::SameAsFirstInput());
    306       break;
    307     default:
    308       LOG(FATAL) << "Unsupported SIMD type";
    309       UNREACHABLE();
    310   }
    311 }
    312 
    313 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
    314   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    315 }
    316 
    317 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
    318   LocationSummary* locations = instruction->GetLocations();
    319   DCHECK(locations->InAt(0).Equals(locations->Out()));
    320   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    321   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    322   switch (instruction->GetPackedType()) {
    323     case Primitive::kPrimByte:
    324       DCHECK_EQ(16u, instruction->GetVectorLength());
    325       __ paddb(dst, src);
    326       break;
    327     case Primitive::kPrimChar:
    328     case Primitive::kPrimShort:
    329       DCHECK_EQ(8u, instruction->GetVectorLength());
    330       __ paddw(dst, src);
    331       break;
    332     case Primitive::kPrimInt:
    333       DCHECK_EQ(4u, instruction->GetVectorLength());
    334       __ paddd(dst, src);
    335       break;
    336     case Primitive::kPrimLong:
    337       DCHECK_EQ(2u, instruction->GetVectorLength());
    338       __ paddq(dst, src);
    339       break;
    340     case Primitive::kPrimFloat:
    341       DCHECK_EQ(4u, instruction->GetVectorLength());
    342       __ addps(dst, src);
    343       break;
    344     case Primitive::kPrimDouble:
    345       DCHECK_EQ(2u, instruction->GetVectorLength());
    346       __ addpd(dst, src);
    347       break;
    348     default:
    349       LOG(FATAL) << "Unsupported SIMD type";
    350       UNREACHABLE();
    351   }
    352 }
    353 
    354 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    355   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    356 }
    357 
    358 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
    359   LocationSummary* locations = instruction->GetLocations();
    360   DCHECK(locations->InAt(0).Equals(locations->Out()));
    361   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    362   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    363 
    364   DCHECK(instruction->IsRounded());
    365   DCHECK(instruction->IsUnsigned());
    366 
    367   switch (instruction->GetPackedType()) {
    368     case Primitive::kPrimByte:
    369       DCHECK_EQ(16u, instruction->GetVectorLength());
    370      __ pavgb(dst, src);
    371      return;
    372     case Primitive::kPrimChar:
    373     case Primitive::kPrimShort:
    374       DCHECK_EQ(8u, instruction->GetVectorLength());
    375       __ pavgw(dst, src);
    376       return;
    377     default:
    378       LOG(FATAL) << "Unsupported SIMD type";
    379       UNREACHABLE();
    380   }
    381 }
    382 
    383 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
    384   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    385 }
    386 
    387 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
    388   LocationSummary* locations = instruction->GetLocations();
    389   DCHECK(locations->InAt(0).Equals(locations->Out()));
    390   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    391   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    392   switch (instruction->GetPackedType()) {
    393     case Primitive::kPrimByte:
    394       DCHECK_EQ(16u, instruction->GetVectorLength());
    395       __ psubb(dst, src);
    396       break;
    397     case Primitive::kPrimChar:
    398     case Primitive::kPrimShort:
    399       DCHECK_EQ(8u, instruction->GetVectorLength());
    400       __ psubw(dst, src);
    401       break;
    402     case Primitive::kPrimInt:
    403       DCHECK_EQ(4u, instruction->GetVectorLength());
    404       __ psubd(dst, src);
    405       break;
    406     case Primitive::kPrimLong:
    407       DCHECK_EQ(2u, instruction->GetVectorLength());
    408       __ psubq(dst, src);
    409       break;
    410     case Primitive::kPrimFloat:
    411       DCHECK_EQ(4u, instruction->GetVectorLength());
    412       __ subps(dst, src);
    413       break;
    414     case Primitive::kPrimDouble:
    415       DCHECK_EQ(2u, instruction->GetVectorLength());
    416       __ subpd(dst, src);
    417       break;
    418     default:
    419       LOG(FATAL) << "Unsupported SIMD type";
    420       UNREACHABLE();
    421   }
    422 }
    423 
    424 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
    425   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    426 }
    427 
    428 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
    429   LocationSummary* locations = instruction->GetLocations();
    430   DCHECK(locations->InAt(0).Equals(locations->Out()));
    431   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    432   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    433   switch (instruction->GetPackedType()) {
    434     case Primitive::kPrimChar:
    435     case Primitive::kPrimShort:
    436       DCHECK_EQ(8u, instruction->GetVectorLength());
    437       __ pmullw(dst, src);
    438       break;
    439     case Primitive::kPrimInt:
    440       DCHECK_EQ(4u, instruction->GetVectorLength());
    441       __ pmulld(dst, src);
    442       break;
    443     case Primitive::kPrimFloat:
    444       DCHECK_EQ(4u, instruction->GetVectorLength());
    445       __ mulps(dst, src);
    446       break;
    447     case Primitive::kPrimDouble:
    448       DCHECK_EQ(2u, instruction->GetVectorLength());
    449       __ mulpd(dst, src);
    450       break;
    451     default:
    452       LOG(FATAL) << "Unsupported SIMD type";
    453       UNREACHABLE();
    454   }
    455 }
    456 
    457 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
    458   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    459 }
    460 
    461 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
    462   LocationSummary* locations = instruction->GetLocations();
    463   DCHECK(locations->InAt(0).Equals(locations->Out()));
    464   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    465   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    466   switch (instruction->GetPackedType()) {
    467     case Primitive::kPrimFloat:
    468       DCHECK_EQ(4u, instruction->GetVectorLength());
    469       __ divps(dst, src);
    470       break;
    471     case Primitive::kPrimDouble:
    472       DCHECK_EQ(2u, instruction->GetVectorLength());
    473       __ divpd(dst, src);
    474       break;
    475     default:
    476       LOG(FATAL) << "Unsupported SIMD type";
    477       UNREACHABLE();
    478   }
    479 }
    480 
    481 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
    482   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    483 }
    484 
    485 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
    486   LocationSummary* locations = instruction->GetLocations();
    487   DCHECK(locations->InAt(0).Equals(locations->Out()));
    488   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    489   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    490   switch (instruction->GetPackedType()) {
    491     case Primitive::kPrimByte:
    492       DCHECK_EQ(16u, instruction->GetVectorLength());
    493       if (instruction->IsUnsigned()) {
    494         __ pminub(dst, src);
    495       } else {
    496         __ pminsb(dst, src);
    497       }
    498       break;
    499     case Primitive::kPrimChar:
    500     case Primitive::kPrimShort:
    501       DCHECK_EQ(8u, instruction->GetVectorLength());
    502       if (instruction->IsUnsigned()) {
    503         __ pminuw(dst, src);
    504       } else {
    505         __ pminsw(dst, src);
    506       }
    507       break;
    508     case Primitive::kPrimInt:
    509       DCHECK_EQ(4u, instruction->GetVectorLength());
    510       if (instruction->IsUnsigned()) {
    511         __ pminud(dst, src);
    512       } else {
    513         __ pminsd(dst, src);
    514       }
    515       break;
    516     // Next cases are sloppy wrt 0.0 vs -0.0.
    517     case Primitive::kPrimFloat:
    518       DCHECK_EQ(4u, instruction->GetVectorLength());
    519       DCHECK(!instruction->IsUnsigned());
    520       __ minps(dst, src);
    521       break;
    522     case Primitive::kPrimDouble:
    523       DCHECK_EQ(2u, instruction->GetVectorLength());
    524       DCHECK(!instruction->IsUnsigned());
    525       __ minpd(dst, src);
    526       break;
    527     default:
    528       LOG(FATAL) << "Unsupported SIMD type";
    529       UNREACHABLE();
    530   }
    531 }
    532 
    533 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
    534   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    535 }
    536 
    537 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
    538   LocationSummary* locations = instruction->GetLocations();
    539   DCHECK(locations->InAt(0).Equals(locations->Out()));
    540   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    541   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    542   switch (instruction->GetPackedType()) {
    543     case Primitive::kPrimByte:
    544       DCHECK_EQ(16u, instruction->GetVectorLength());
    545       if (instruction->IsUnsigned()) {
    546         __ pmaxub(dst, src);
    547       } else {
    548         __ pmaxsb(dst, src);
    549       }
    550       break;
    551     case Primitive::kPrimChar:
    552     case Primitive::kPrimShort:
    553       DCHECK_EQ(8u, instruction->GetVectorLength());
    554       if (instruction->IsUnsigned()) {
    555         __ pmaxuw(dst, src);
    556       } else {
    557         __ pmaxsw(dst, src);
    558       }
    559       break;
    560     case Primitive::kPrimInt:
    561       DCHECK_EQ(4u, instruction->GetVectorLength());
    562       if (instruction->IsUnsigned()) {
    563         __ pmaxud(dst, src);
    564       } else {
    565         __ pmaxsd(dst, src);
    566       }
    567       break;
    568     // Next cases are sloppy wrt 0.0 vs -0.0.
    569     case Primitive::kPrimFloat:
    570       DCHECK_EQ(4u, instruction->GetVectorLength());
    571       DCHECK(!instruction->IsUnsigned());
    572       __ maxps(dst, src);
    573       break;
    574     case Primitive::kPrimDouble:
    575       DCHECK_EQ(2u, instruction->GetVectorLength());
    576       DCHECK(!instruction->IsUnsigned());
    577       __ maxpd(dst, src);
    578       break;
    579     default:
    580       LOG(FATAL) << "Unsupported SIMD type";
    581       UNREACHABLE();
    582   }
    583 }
    584 
    585 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
    586   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    587 }
    588 
    589 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
    590   LocationSummary* locations = instruction->GetLocations();
    591   DCHECK(locations->InAt(0).Equals(locations->Out()));
    592   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    593   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    594   switch (instruction->GetPackedType()) {
    595     case Primitive::kPrimBoolean:
    596     case Primitive::kPrimByte:
    597     case Primitive::kPrimChar:
    598     case Primitive::kPrimShort:
    599     case Primitive::kPrimInt:
    600     case Primitive::kPrimLong:
    601       DCHECK_LE(2u, instruction->GetVectorLength());
    602       DCHECK_LE(instruction->GetVectorLength(), 16u);
    603       __ pand(dst, src);
    604       break;
    605     case Primitive::kPrimFloat:
    606       DCHECK_EQ(4u, instruction->GetVectorLength());
    607       __ andps(dst, src);
    608       break;
    609     case Primitive::kPrimDouble:
    610       DCHECK_EQ(2u, instruction->GetVectorLength());
    611       __ andpd(dst, src);
    612       break;
    613     default:
    614       LOG(FATAL) << "Unsupported SIMD type";
    615       UNREACHABLE();
    616   }
    617 }
    618 
    619 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
    620   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    621 }
    622 
    623 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
    624   LocationSummary* locations = instruction->GetLocations();
    625   DCHECK(locations->InAt(0).Equals(locations->Out()));
    626   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    627   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    628   switch (instruction->GetPackedType()) {
    629     case Primitive::kPrimBoolean:
    630     case Primitive::kPrimByte:
    631     case Primitive::kPrimChar:
    632     case Primitive::kPrimShort:
    633     case Primitive::kPrimInt:
    634     case Primitive::kPrimLong:
    635       DCHECK_LE(2u, instruction->GetVectorLength());
    636       DCHECK_LE(instruction->GetVectorLength(), 16u);
    637       __ pandn(dst, src);
    638       break;
    639     case Primitive::kPrimFloat:
    640       DCHECK_EQ(4u, instruction->GetVectorLength());
    641       __ andnps(dst, src);
    642       break;
    643     case Primitive::kPrimDouble:
    644       DCHECK_EQ(2u, instruction->GetVectorLength());
    645       __ andnpd(dst, src);
    646       break;
    647     default:
    648       LOG(FATAL) << "Unsupported SIMD type";
    649       UNREACHABLE();
    650   }
    651 }
    652 
    653 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
    654   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    655 }
    656 
    657 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
    658   LocationSummary* locations = instruction->GetLocations();
    659   DCHECK(locations->InAt(0).Equals(locations->Out()));
    660   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    661   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    662   switch (instruction->GetPackedType()) {
    663     case Primitive::kPrimBoolean:
    664     case Primitive::kPrimByte:
    665     case Primitive::kPrimChar:
    666     case Primitive::kPrimShort:
    667     case Primitive::kPrimInt:
    668     case Primitive::kPrimLong:
    669       DCHECK_LE(2u, instruction->GetVectorLength());
    670       DCHECK_LE(instruction->GetVectorLength(), 16u);
    671       __ por(dst, src);
    672       break;
    673     case Primitive::kPrimFloat:
    674       DCHECK_EQ(4u, instruction->GetVectorLength());
    675       __ orps(dst, src);
    676       break;
    677     case Primitive::kPrimDouble:
    678       DCHECK_EQ(2u, instruction->GetVectorLength());
    679       __ orpd(dst, src);
    680       break;
    681     default:
    682       LOG(FATAL) << "Unsupported SIMD type";
    683       UNREACHABLE();
    684   }
    685 }
    686 
    687 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
    688   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
    689 }
    690 
    691 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
    692   LocationSummary* locations = instruction->GetLocations();
    693   DCHECK(locations->InAt(0).Equals(locations->Out()));
    694   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
    695   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    696   switch (instruction->GetPackedType()) {
    697     case Primitive::kPrimBoolean:
    698     case Primitive::kPrimByte:
    699     case Primitive::kPrimChar:
    700     case Primitive::kPrimShort:
    701     case Primitive::kPrimInt:
    702     case Primitive::kPrimLong:
    703       DCHECK_LE(2u, instruction->GetVectorLength());
    704       DCHECK_LE(instruction->GetVectorLength(), 16u);
    705       __ pxor(dst, src);
    706       break;
    707     case Primitive::kPrimFloat:
    708       DCHECK_EQ(4u, instruction->GetVectorLength());
    709       __ xorps(dst, src);
    710       break;
    711     case Primitive::kPrimDouble:
    712       DCHECK_EQ(2u, instruction->GetVectorLength());
    713       __ xorpd(dst, src);
    714       break;
    715     default:
    716       LOG(FATAL) << "Unsupported SIMD type";
    717       UNREACHABLE();
    718   }
    719 }
    720 
    721 // Helper to set up locations for vector shift operations.
    722 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
    723   LocationSummary* locations = new (arena) LocationSummary(instruction);
    724   switch (instruction->GetPackedType()) {
    725     case Primitive::kPrimChar:
    726     case Primitive::kPrimShort:
    727     case Primitive::kPrimInt:
    728     case Primitive::kPrimLong:
    729       locations->SetInAt(0, Location::RequiresFpuRegister());
    730       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
    731       locations->SetOut(Location::SameAsFirstInput());
    732       break;
    733     default:
    734       LOG(FATAL) << "Unsupported SIMD type";
    735       UNREACHABLE();
    736   }
    737 }
    738 
    739 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
    740   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    741 }
    742 
    743 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
    744   LocationSummary* locations = instruction->GetLocations();
    745   DCHECK(locations->InAt(0).Equals(locations->Out()));
    746   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    747   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    748   switch (instruction->GetPackedType()) {
    749     case Primitive::kPrimChar:
    750     case Primitive::kPrimShort:
    751       DCHECK_EQ(8u, instruction->GetVectorLength());
    752       __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
    753       break;
    754     case Primitive::kPrimInt:
    755       DCHECK_EQ(4u, instruction->GetVectorLength());
    756       __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
    757       break;
    758     case Primitive::kPrimLong:
    759       DCHECK_EQ(2u, instruction->GetVectorLength());
    760       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
    761       break;
    762     default:
    763       LOG(FATAL) << "Unsupported SIMD type";
    764       UNREACHABLE();
    765   }
    766 }
    767 
    768 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
    769   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    770 }
    771 
    772 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
    773   LocationSummary* locations = instruction->GetLocations();
    774   DCHECK(locations->InAt(0).Equals(locations->Out()));
    775   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    776   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    777   switch (instruction->GetPackedType()) {
    778     case Primitive::kPrimChar:
    779     case Primitive::kPrimShort:
    780       DCHECK_EQ(8u, instruction->GetVectorLength());
    781       __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
    782       break;
    783     case Primitive::kPrimInt:
    784       DCHECK_EQ(4u, instruction->GetVectorLength());
    785       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
    786       break;
    787     default:
    788       LOG(FATAL) << "Unsupported SIMD type";
    789       UNREACHABLE();
    790   }
    791 }
    792 
    793 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
    794   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
    795 }
    796 
    797 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
    798   LocationSummary* locations = instruction->GetLocations();
    799   DCHECK(locations->InAt(0).Equals(locations->Out()));
    800   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
    801   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
    802   switch (instruction->GetPackedType()) {
    803     case Primitive::kPrimChar:
    804     case Primitive::kPrimShort:
    805       DCHECK_EQ(8u, instruction->GetVectorLength());
    806       __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
    807       break;
    808     case Primitive::kPrimInt:
    809       DCHECK_EQ(4u, instruction->GetVectorLength());
    810       __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
    811       break;
    812     case Primitive::kPrimLong:
    813       DCHECK_EQ(2u, instruction->GetVectorLength());
    814       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
    815       break;
    816     default:
    817       LOG(FATAL) << "Unsupported SIMD type";
    818       UNREACHABLE();
    819   }
    820 }
    821 
    822 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
    823   LOG(FATAL) << "No SIMD for " << instr->GetId();
    824 }
    825 
    826 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
    827   LOG(FATAL) << "No SIMD for " << instr->GetId();
    828 }
    829 
    830 // Helper to set up locations for vector memory operations.
    831 static void CreateVecMemLocations(ArenaAllocator* arena,
    832                                   HVecMemoryOperation* instruction,
    833                                   bool is_load) {
    834   LocationSummary* locations = new (arena) LocationSummary(instruction);
    835   switch (instruction->GetPackedType()) {
    836     case Primitive::kPrimBoolean:
    837     case Primitive::kPrimByte:
    838     case Primitive::kPrimChar:
    839     case Primitive::kPrimShort:
    840     case Primitive::kPrimInt:
    841     case Primitive::kPrimLong:
    842     case Primitive::kPrimFloat:
    843     case Primitive::kPrimDouble:
    844       locations->SetInAt(0, Location::RequiresRegister());
    845       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
    846       if (is_load) {
    847         locations->SetOut(Location::RequiresFpuRegister());
    848       } else {
    849         locations->SetInAt(2, Location::RequiresFpuRegister());
    850       }
    851       break;
    852     default:
    853       LOG(FATAL) << "Unsupported SIMD type";
    854       UNREACHABLE();
    855   }
    856 }
    857 
    858 // Helper to construct address for vector memory operations.
    859 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
    860   Location base = locations->InAt(0);
    861   Location index = locations->InAt(1);
    862   ScaleFactor scale = TIMES_1;
    863   switch (size) {
    864     case 2: scale = TIMES_2; break;
    865     case 4: scale = TIMES_4; break;
    866     case 8: scale = TIMES_8; break;
    867     default: break;
    868   }
    869   uint32_t offset = is_string_char_at
    870       ? mirror::String::ValueOffset().Uint32Value()
    871       : mirror::Array::DataOffset(size).Uint32Value();
    872   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
    873 }
    874 
    875 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
    876   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
    877   // String load requires a temporary for the compressed load.
    878   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
    879     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
    880   }
    881 }
    882 
    883 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
    884   LocationSummary* locations = instruction->GetLocations();
    885   size_t size = Primitive::ComponentSize(instruction->GetPackedType());
    886   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
    887   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
    888   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
    889   switch (instruction->GetPackedType()) {
    890     case Primitive::kPrimChar:
    891       DCHECK_EQ(8u, instruction->GetVectorLength());
    892       // Special handling of compressed/uncompressed string load.
    893       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
    894         NearLabel done, not_compressed;
    895         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    896         // Test compression bit.
    897         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
    898                       "Expecting 0=compressed, 1=uncompressed");
    899         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
    900         __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
    901         __ j(kNotZero, &not_compressed);
    902         // Zero extend 8 compressed bytes into 8 chars.
    903         __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
    904         __ pxor(tmp, tmp);
    905         __ punpcklbw(reg, tmp);
    906         __ jmp(&done);
    907         // Load 4 direct uncompressed chars.
    908         __ Bind(&not_compressed);
    909         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
    910         __ Bind(&done);
    911         return;
    912       }
    913       FALLTHROUGH_INTENDED;
    914     case Primitive::kPrimBoolean:
    915     case Primitive::kPrimByte:
    916     case Primitive::kPrimShort:
    917     case Primitive::kPrimInt:
    918     case Primitive::kPrimLong:
    919       DCHECK_LE(2u, instruction->GetVectorLength());
    920       DCHECK_LE(instruction->GetVectorLength(), 16u);
    921       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
    922       break;
    923     case Primitive::kPrimFloat:
    924       DCHECK_EQ(4u, instruction->GetVectorLength());
    925       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
    926       break;
    927     case Primitive::kPrimDouble:
    928       DCHECK_EQ(2u, instruction->GetVectorLength());
    929       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
    930       break;
    931     default:
    932       LOG(FATAL) << "Unsupported SIMD type";
    933       UNREACHABLE();
    934   }
    935 }
    936 
    937 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
    938   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
    939 }
    940 
    941 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
    942   LocationSummary* locations = instruction->GetLocations();
    943   size_t size = Primitive::ComponentSize(instruction->GetPackedType());
    944   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
    945   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
    946   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
    947   switch (instruction->GetPackedType()) {
    948     case Primitive::kPrimBoolean:
    949     case Primitive::kPrimByte:
    950     case Primitive::kPrimChar:
    951     case Primitive::kPrimShort:
    952     case Primitive::kPrimInt:
    953     case Primitive::kPrimLong:
    954       DCHECK_LE(2u, instruction->GetVectorLength());
    955       DCHECK_LE(instruction->GetVectorLength(), 16u);
    956       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
    957       break;
    958     case Primitive::kPrimFloat:
    959       DCHECK_EQ(4u, instruction->GetVectorLength());
    960       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
    961       break;
    962     case Primitive::kPrimDouble:
    963       DCHECK_EQ(2u, instruction->GetVectorLength());
    964       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
    965       break;
    966     default:
    967       LOG(FATAL) << "Unsupported SIMD type";
    968       UNREACHABLE();
    969   }
    970 }
    971 
    972 #undef __
    973 
    974 }  // namespace x86
    975 }  // namespace art
    976