Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "intrinsics_x86.h"
     18 
     19 #include <limits>
     20 
     21 #include "arch/x86/instruction_set_features_x86.h"
     22 #include "art_method.h"
     23 #include "base/bit_utils.h"
     24 #include "code_generator_x86.h"
     25 #include "entrypoints/quick/quick_entrypoints.h"
     26 #include "heap_poisoning.h"
     27 #include "intrinsics.h"
     28 #include "intrinsics_utils.h"
     29 #include "lock_word.h"
     30 #include "mirror/array-inl.h"
     31 #include "mirror/object_array-inl.h"
     32 #include "mirror/reference.h"
     33 #include "mirror/string.h"
     34 #include "scoped_thread_state_change-inl.h"
     35 #include "thread-current-inl.h"
     36 #include "utils/x86/assembler_x86.h"
     37 #include "utils/x86/constants_x86.h"
     38 
     39 namespace art {
     40 
     41 namespace x86 {
     42 
     43 static constexpr int kDoubleNaNHigh = 0x7FF80000;
     44 static constexpr int kDoubleNaNLow = 0x00000000;
     45 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
     46 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
     47 
     48 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
     49   : allocator_(codegen->GetGraph()->GetAllocator()),
     50     codegen_(codegen) {
     51 }
     52 
     53 
     54 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
     55   return down_cast<X86Assembler*>(codegen_->GetAssembler());
     56 }
     57 
     58 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
     59   return codegen_->GetGraph()->GetAllocator();
     60 }
     61 
     62 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
     63   Dispatch(invoke);
     64   LocationSummary* res = invoke->GetLocations();
     65   if (res == nullptr) {
     66     return false;
     67   }
     68   return res->Intrinsified();
     69 }
     70 
     71 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
     72   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
     73   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
     74 }
     75 
     76 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
     77 
     78 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     79 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
     80 
     81 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
     82 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
     83  public:
     84   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
     85       : SlowPathCode(instruction) {
     86     DCHECK(kEmitCompilerReadBarrier);
     87     DCHECK(kUseBakerReadBarrier);
     88   }
     89 
     90   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     91     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     92     LocationSummary* locations = instruction_->GetLocations();
     93     DCHECK(locations->CanCall());
     94     DCHECK(instruction_->IsInvokeStaticOrDirect())
     95         << "Unexpected instruction in read barrier arraycopy slow path: "
     96         << instruction_->DebugName();
     97     DCHECK(instruction_->GetLocations()->Intrinsified());
     98     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
     99 
    100     int32_t element_size = DataType::Size(DataType::Type::kReference);
    101     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
    102 
    103     Register src = locations->InAt(0).AsRegister<Register>();
    104     Location src_pos = locations->InAt(1);
    105     Register dest = locations->InAt(2).AsRegister<Register>();
    106     Location dest_pos = locations->InAt(3);
    107     Location length = locations->InAt(4);
    108     Location temp1_loc = locations->GetTemp(0);
    109     Register temp1 = temp1_loc.AsRegister<Register>();
    110     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
    111     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
    112 
    113     __ Bind(GetEntryLabel());
    114     // In this code path, registers `temp1`, `temp2`, and `temp3`
    115     // (resp.) are not used for the base source address, the base
    116     // destination address, and the end source address (resp.), as in
    117     // other SystemArrayCopy intrinsic code paths.  Instead they are
    118     // (resp.) used for:
    119     // - the loop index (`i`);
    120     // - the source index (`src_index`) and the loaded (source)
    121     //   reference (`value`); and
    122     // - the destination index (`dest_index`).
    123 
    124     // i = 0
    125     __ xorl(temp1, temp1);
    126     NearLabel loop;
    127     __ Bind(&loop);
    128     // value = src_array[i + src_pos]
    129     if (src_pos.IsConstant()) {
    130       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
    131       int32_t adjusted_offset = offset + constant * element_size;
    132       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
    133     } else {
    134       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
    135       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
    136     }
    137     __ MaybeUnpoisonHeapReference(temp2);
    138     // TODO: Inline the mark bit check before calling the runtime?
    139     // value = ReadBarrier::Mark(value)
    140     // No need to save live registers; it's taken care of by the
    141     // entrypoint. Also, there is no need to update the stack mask,
    142     // as this runtime call will not trigger a garbage collection.
    143     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
    144     // explanations.)
    145     DCHECK_NE(temp2, ESP);
    146     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
    147     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
    148     // This runtime call does not require a stack map.
    149     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    150     __ MaybePoisonHeapReference(temp2);
    151     // dest_array[i + dest_pos] = value
    152     if (dest_pos.IsConstant()) {
    153       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
    154       int32_t adjusted_offset = offset + constant * element_size;
    155       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
    156     } else {
    157       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
    158       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
    159     }
    160     // ++i
    161     __ addl(temp1, Immediate(1));
    162     // if (i != length) goto loop
    163     x86_codegen->GenerateIntCompare(temp1_loc, length);
    164     __ j(kNotEqual, &loop);
    165     __ jmp(GetExitLabel());
    166   }
    167 
    168   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
    169 
    170  private:
    171   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
    172 };
    173 
    174 #undef __
    175 
    176 #define __ assembler->
    177 
    178 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
    179   LocationSummary* locations =
    180       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    181   locations->SetInAt(0, Location::RequiresFpuRegister());
    182   locations->SetOut(Location::RequiresRegister());
    183   if (is64bit) {
    184     locations->AddTemp(Location::RequiresFpuRegister());
    185   }
    186 }
    187 
    188 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
    189   LocationSummary* locations =
    190       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    191   locations->SetInAt(0, Location::RequiresRegister());
    192   locations->SetOut(Location::RequiresFpuRegister());
    193   if (is64bit) {
    194     locations->AddTemp(Location::RequiresFpuRegister());
    195     locations->AddTemp(Location::RequiresFpuRegister());
    196   }
    197 }
    198 
    199 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
    200   Location input = locations->InAt(0);
    201   Location output = locations->Out();
    202   if (is64bit) {
    203     // Need to use the temporary.
    204     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    205     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
    206     __ movd(output.AsRegisterPairLow<Register>(), temp);
    207     __ psrlq(temp, Immediate(32));
    208     __ movd(output.AsRegisterPairHigh<Register>(), temp);
    209   } else {
    210     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
    211   }
    212 }
    213 
    214 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
    215   Location input = locations->InAt(0);
    216   Location output = locations->Out();
    217   if (is64bit) {
    218     // Need to use the temporary.
    219     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    220     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
    221     __ movd(temp1, input.AsRegisterPairLow<Register>());
    222     __ movd(temp2, input.AsRegisterPairHigh<Register>());
    223     __ punpckldq(temp1, temp2);
    224     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
    225   } else {
    226     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
    227   }
    228 }
    229 
    230 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    231   CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true);
    232 }
    233 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    234   CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true);
    235 }
    236 
    237 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    238   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    239 }
    240 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    241   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    242 }
    243 
    244 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    245   CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false);
    246 }
    247 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    248   CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false);
    249 }
    250 
    251 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    252   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    253 }
    254 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    255   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    256 }
    257 
    258 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    259   LocationSummary* locations =
    260       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    261   locations->SetInAt(0, Location::RequiresRegister());
    262   locations->SetOut(Location::SameAsFirstInput());
    263 }
    264 
    265 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    266   LocationSummary* locations =
    267       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    268   locations->SetInAt(0, Location::RequiresRegister());
    269   locations->SetOut(Location::RequiresRegister());
    270 }
    271 
    272 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    273   LocationSummary* locations =
    274       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    275   locations->SetInAt(0, Location::RequiresRegister());
    276   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    277 }
    278 
    279 static void GenReverseBytes(LocationSummary* locations,
    280                             DataType::Type size,
    281                             X86Assembler* assembler) {
    282   Register out = locations->Out().AsRegister<Register>();
    283 
    284   switch (size) {
    285     case DataType::Type::kInt16:
    286       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
    287       __ bswapl(out);
    288       __ sarl(out, Immediate(16));
    289       break;
    290     case DataType::Type::kInt32:
    291       __ bswapl(out);
    292       break;
    293     default:
    294       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
    295       UNREACHABLE();
    296   }
    297 }
    298 
    299 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
    300   CreateIntToIntLocations(allocator_, invoke);
    301 }
    302 
    303 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
    304   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
    305 }
    306 
    307 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
    308   CreateLongToLongLocations(allocator_, invoke);
    309 }
    310 
    311 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
    312   LocationSummary* locations = invoke->GetLocations();
    313   Location input = locations->InAt(0);
    314   Register input_lo = input.AsRegisterPairLow<Register>();
    315   Register input_hi = input.AsRegisterPairHigh<Register>();
    316   Location output = locations->Out();
    317   Register output_lo = output.AsRegisterPairLow<Register>();
    318   Register output_hi = output.AsRegisterPairHigh<Register>();
    319 
    320   X86Assembler* assembler = GetAssembler();
    321   // Assign the inputs to the outputs, mixing low/high.
    322   __ movl(output_lo, input_hi);
    323   __ movl(output_hi, input_lo);
    324   __ bswapl(output_lo);
    325   __ bswapl(output_hi);
    326 }
    327 
    328 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
    329   CreateIntToIntLocations(allocator_, invoke);
    330 }
    331 
    332 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
    333   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
    334 }
    335 
    336 
    337 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
    338 //       need is 64b.
    339 
    340 static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) {
    341   // TODO: Enable memory operations when the assembler supports them.
    342   LocationSummary* locations =
    343       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    344   locations->SetInAt(0, Location::RequiresFpuRegister());
    345   locations->SetOut(Location::SameAsFirstInput());
    346   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    347   DCHECK(static_or_direct != nullptr);
    348   if (static_or_direct->HasSpecialInput() &&
    349       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    350     // We need addressibility for the constant area.
    351     locations->SetInAt(1, Location::RequiresRegister());
    352     // We need a temporary to hold the constant.
    353     locations->AddTemp(Location::RequiresFpuRegister());
    354   }
    355 }
    356 
    357 static void MathAbsFP(HInvoke* invoke,
    358                       bool is64bit,
    359                       X86Assembler* assembler,
    360                       CodeGeneratorX86* codegen) {
    361   LocationSummary* locations = invoke->GetLocations();
    362   Location output = locations->Out();
    363 
    364   DCHECK(output.IsFpuRegister());
    365   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
    366     HX86ComputeBaseMethodAddress* method_address =
    367         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
    368     DCHECK(locations->InAt(1).IsRegister());
    369     // We also have a constant area pointer.
    370     Register constant_area = locations->InAt(1).AsRegister<Register>();
    371     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    372     if (is64bit) {
    373       __ movsd(temp, codegen->LiteralInt64Address(
    374           INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
    375       __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
    376     } else {
    377       __ movss(temp, codegen->LiteralInt32Address(
    378           INT32_C(0x7FFFFFFF), method_address, constant_area));
    379       __ andps(output.AsFpuRegister<XmmRegister>(), temp);
    380     }
    381   } else {
    382     // Create the right constant on an aligned stack.
    383     if (is64bit) {
    384       __ subl(ESP, Immediate(8));
    385       __ pushl(Immediate(0x7FFFFFFF));
    386       __ pushl(Immediate(0xFFFFFFFF));
    387       __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
    388     } else {
    389       __ subl(ESP, Immediate(12));
    390       __ pushl(Immediate(0x7FFFFFFF));
    391       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
    392     }
    393     __ addl(ESP, Immediate(16));
    394   }
    395 }
    396 
    397 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
    398   CreateFloatToFloat(allocator_, invoke);
    399 }
    400 
    401 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
    402   MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
    403 }
    404 
    405 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
    406   CreateFloatToFloat(allocator_, invoke);
    407 }
    408 
    409 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
    410   MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
    411 }
    412 
    413 static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) {
    414   LocationSummary* locations =
    415       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    416   locations->SetInAt(0, Location::RegisterLocation(EAX));
    417   locations->SetOut(Location::SameAsFirstInput());
    418   locations->AddTemp(Location::RegisterLocation(EDX));
    419 }
    420 
    421 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
    422   Location output = locations->Out();
    423   Register out = output.AsRegister<Register>();
    424   DCHECK_EQ(out, EAX);
    425   Register temp = locations->GetTemp(0).AsRegister<Register>();
    426   DCHECK_EQ(temp, EDX);
    427 
    428   // Sign extend EAX into EDX.
    429   __ cdq();
    430 
    431   // XOR EAX with sign.
    432   __ xorl(EAX, EDX);
    433 
    434   // Subtract out sign to correct.
    435   __ subl(EAX, EDX);
    436 
    437   // The result is in EAX.
    438 }
    439 
    440 static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) {
    441   LocationSummary* locations =
    442       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    443   locations->SetInAt(0, Location::RequiresRegister());
    444   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    445   locations->AddTemp(Location::RequiresRegister());
    446 }
    447 
    448 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
    449   Location input = locations->InAt(0);
    450   Register input_lo = input.AsRegisterPairLow<Register>();
    451   Register input_hi = input.AsRegisterPairHigh<Register>();
    452   Location output = locations->Out();
    453   Register output_lo = output.AsRegisterPairLow<Register>();
    454   Register output_hi = output.AsRegisterPairHigh<Register>();
    455   Register temp = locations->GetTemp(0).AsRegister<Register>();
    456 
    457   // Compute the sign into the temporary.
    458   __ movl(temp, input_hi);
    459   __ sarl(temp, Immediate(31));
    460 
    461   // Store the sign into the output.
    462   __ movl(output_lo, temp);
    463   __ movl(output_hi, temp);
    464 
    465   // XOR the input to the output.
    466   __ xorl(output_lo, input_lo);
    467   __ xorl(output_hi, input_hi);
    468 
    469   // Subtract the sign.
    470   __ subl(output_lo, temp);
    471   __ sbbl(output_hi, temp);
    472 }
    473 
    474 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
    475   CreateAbsIntLocation(allocator_, invoke);
    476 }
    477 
    478 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
    479   GenAbsInteger(invoke->GetLocations(), GetAssembler());
    480 }
    481 
    482 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
    483   CreateAbsLongLocation(allocator_, invoke);
    484 }
    485 
    486 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
    487   GenAbsLong(invoke->GetLocations(), GetAssembler());
    488 }
    489 
    490 static void GenMinMaxFP(HInvoke* invoke,
    491                         bool is_min,
    492                         bool is_double,
    493                         X86Assembler* assembler,
    494                         CodeGeneratorX86* codegen) {
    495   LocationSummary* locations = invoke->GetLocations();
    496   Location op1_loc = locations->InAt(0);
    497   Location op2_loc = locations->InAt(1);
    498   Location out_loc = locations->Out();
    499   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
    500 
    501   // Shortcut for same input locations.
    502   if (op1_loc.Equals(op2_loc)) {
    503     DCHECK(out_loc.Equals(op1_loc));
    504     return;
    505   }
    506 
    507   //  (out := op1)
    508   //  out <=? op2
    509   //  if Nan jmp Nan_label
    510   //  if out is min jmp done
    511   //  if op2 is min jmp op2_label
    512   //  handle -0/+0
    513   //  jmp done
    514   // Nan_label:
    515   //  out := NaN
    516   // op2_label:
    517   //  out := op2
    518   // done:
    519   //
    520   // This removes one jmp, but needs to copy one input (op1) to out.
    521   //
    522   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
    523 
    524   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
    525 
    526   NearLabel nan, done, op2_label;
    527   if (is_double) {
    528     __ ucomisd(out, op2);
    529   } else {
    530     __ ucomiss(out, op2);
    531   }
    532 
    533   __ j(Condition::kParityEven, &nan);
    534 
    535   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
    536   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
    537 
    538   // Handle 0.0/-0.0.
    539   if (is_min) {
    540     if (is_double) {
    541       __ orpd(out, op2);
    542     } else {
    543       __ orps(out, op2);
    544     }
    545   } else {
    546     if (is_double) {
    547       __ andpd(out, op2);
    548     } else {
    549       __ andps(out, op2);
    550     }
    551   }
    552   __ jmp(&done);
    553 
    554   // NaN handling.
    555   __ Bind(&nan);
    556   // Do we have a constant area pointer?
    557   if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
    558     HX86ComputeBaseMethodAddress* method_address =
    559         invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
    560     DCHECK(locations->InAt(2).IsRegister());
    561     Register constant_area = locations->InAt(2).AsRegister<Register>();
    562     if (is_double) {
    563       __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
    564     } else {
    565       __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
    566     }
    567   } else {
    568     if (is_double) {
    569       __ pushl(Immediate(kDoubleNaNHigh));
    570       __ pushl(Immediate(kDoubleNaNLow));
    571       __ movsd(out, Address(ESP, 0));
    572       __ addl(ESP, Immediate(8));
    573     } else {
    574       __ pushl(Immediate(kFloatNaN));
    575       __ movss(out, Address(ESP, 0));
    576       __ addl(ESP, Immediate(4));
    577     }
    578   }
    579   __ jmp(&done);
    580 
    581   // out := op2;
    582   __ Bind(&op2_label);
    583   if (is_double) {
    584     __ movsd(out, op2);
    585   } else {
    586     __ movss(out, op2);
    587   }
    588 
    589   // Done.
    590   __ Bind(&done);
    591 }
    592 
    593 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    594   LocationSummary* locations =
    595       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    596   locations->SetInAt(0, Location::RequiresFpuRegister());
    597   locations->SetInAt(1, Location::RequiresFpuRegister());
    598   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
    599   // the second input to be the output (we can simply swap inputs).
    600   locations->SetOut(Location::SameAsFirstInput());
    601   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    602   DCHECK(static_or_direct != nullptr);
    603   if (static_or_direct->HasSpecialInput() &&
    604       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    605     locations->SetInAt(2, Location::RequiresRegister());
    606   }
    607 }
    608 
    609 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
    610   CreateFPFPToFPLocations(allocator_, invoke);
    611 }
    612 
    613 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
    614   GenMinMaxFP(invoke,
    615               /* is_min */ true,
    616               /* is_double */ true,
    617               GetAssembler(),
    618               codegen_);
    619 }
    620 
    621 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
    622   CreateFPFPToFPLocations(allocator_, invoke);
    623 }
    624 
    625 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
    626   GenMinMaxFP(invoke,
    627               /* is_min */ true,
    628               /* is_double */ false,
    629               GetAssembler(),
    630               codegen_);
    631 }
    632 
    633 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    634   CreateFPFPToFPLocations(allocator_, invoke);
    635 }
    636 
    637 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    638   GenMinMaxFP(invoke,
    639               /* is_min */ false,
    640               /* is_double */ true,
    641               GetAssembler(),
    642               codegen_);
    643 }
    644 
    645 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
    646   CreateFPFPToFPLocations(allocator_, invoke);
    647 }
    648 
    649 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
    650   GenMinMaxFP(invoke,
    651               /* is_min */ false,
    652               /* is_double */ false,
    653               GetAssembler(),
    654               codegen_);
    655 }
    656 
    657 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
    658                       X86Assembler* assembler) {
    659   Location op1_loc = locations->InAt(0);
    660   Location op2_loc = locations->InAt(1);
    661 
    662   // Shortcut for same input locations.
    663   if (op1_loc.Equals(op2_loc)) {
    664     // Can return immediately, as op1_loc == out_loc.
    665     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
    666     //       a copy here.
    667     DCHECK(locations->Out().Equals(op1_loc));
    668     return;
    669   }
    670 
    671   if (is_long) {
    672     // Need to perform a subtract to get the sign right.
    673     // op1 is already in the same location as the output.
    674     Location output = locations->Out();
    675     Register output_lo = output.AsRegisterPairLow<Register>();
    676     Register output_hi = output.AsRegisterPairHigh<Register>();
    677 
    678     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
    679     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
    680 
    681     // Spare register to compute the subtraction to set condition code.
    682     Register temp = locations->GetTemp(0).AsRegister<Register>();
    683 
    684     // Subtract off op2_low.
    685     __ movl(temp, output_lo);
    686     __ subl(temp, op2_lo);
    687 
    688     // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
    689     __ movl(temp, output_hi);
    690     __ sbbl(temp, op2_hi);
    691 
    692     // Now the condition code is correct.
    693     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
    694     __ cmovl(cond, output_lo, op2_lo);
    695     __ cmovl(cond, output_hi, op2_hi);
    696   } else {
    697     Register out = locations->Out().AsRegister<Register>();
    698     Register op2 = op2_loc.AsRegister<Register>();
    699 
    700     //  (out := op1)
    701     //  out <=? op2
    702     //  if out is min jmp done
    703     //  out := op2
    704     // done:
    705 
    706     __ cmpl(out, op2);
    707     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
    708     __ cmovl(cond, out, op2);
    709   }
    710 }
    711 
    712 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    713   LocationSummary* locations =
    714       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    715   locations->SetInAt(0, Location::RequiresRegister());
    716   locations->SetInAt(1, Location::RequiresRegister());
    717   locations->SetOut(Location::SameAsFirstInput());
    718 }
    719 
    720 static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    721   LocationSummary* locations =
    722       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    723   locations->SetInAt(0, Location::RequiresRegister());
    724   locations->SetInAt(1, Location::RequiresRegister());
    725   locations->SetOut(Location::SameAsFirstInput());
    726   // Register to use to perform a long subtract to set cc.
    727   locations->AddTemp(Location::RequiresRegister());
    728 }
    729 
    730 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
    731   CreateIntIntToIntLocations(allocator_, invoke);
    732 }
    733 
    734 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
    735   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
    736 }
    737 
    738 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
    739   CreateLongLongToLongLocations(allocator_, invoke);
    740 }
    741 
    742 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
    743   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
    744 }
    745 
    746 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
    747   CreateIntIntToIntLocations(allocator_, invoke);
    748 }
    749 
    750 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
    751   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
    752 }
    753 
    754 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
    755   CreateLongLongToLongLocations(allocator_, invoke);
    756 }
    757 
    758 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
    759   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
    760 }
    761 
    762 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    763   LocationSummary* locations =
    764       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    765   locations->SetInAt(0, Location::RequiresFpuRegister());
    766   locations->SetOut(Location::RequiresFpuRegister());
    767 }
    768 
    769 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
    770   CreateFPToFPLocations(allocator_, invoke);
    771 }
    772 
    773 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
    774   LocationSummary* locations = invoke->GetLocations();
    775   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    776   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
    777 
    778   GetAssembler()->sqrtsd(out, in);
    779 }
    780 
    781 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
    782   MoveArguments(invoke, codegen);
    783 
    784   DCHECK(invoke->IsInvokeStaticOrDirect());
    785   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
    786                                       Location::RegisterLocation(EAX));
    787 
    788   // Copy the result back to the expected output.
    789   Location out = invoke->GetLocations()->Out();
    790   if (out.IsValid()) {
    791     DCHECK(out.IsRegister());
    792     codegen->MoveFromReturnRegister(out, invoke->GetType());
    793   }
    794 }
    795 
    796 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
    797                                        HInvoke* invoke,
    798                                        CodeGeneratorX86* codegen) {
    799   // Do we have instruction support?
    800   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
    801     CreateFPToFPLocations(allocator, invoke);
    802     return;
    803   }
    804 
    805   // We have to fall back to a call to the intrinsic.
    806   LocationSummary* locations =
    807       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
    808   InvokeRuntimeCallingConvention calling_convention;
    809   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    810   locations->SetOut(Location::FpuRegisterLocation(XMM0));
    811   // Needs to be EAX for the invoke.
    812   locations->AddTemp(Location::RegisterLocation(EAX));
    813 }
    814 
    815 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
    816                                    HInvoke* invoke,
    817                                    X86Assembler* assembler,
    818                                    int round_mode) {
    819   LocationSummary* locations = invoke->GetLocations();
    820   if (locations->WillCall()) {
    821     InvokeOutOfLineIntrinsic(codegen, invoke);
    822   } else {
    823     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    824     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
    825     __ roundsd(out, in, Immediate(round_mode));
    826   }
    827 }
    828 
    829 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
    830   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
    831 }
    832 
    833 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
    834   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
    835 }
    836 
    837 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
    838   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
    839 }
    840 
    841 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
    842   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
    843 }
    844 
    845 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
    846   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
    847 }
    848 
    849 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
    850   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
    851 }
    852 
    853 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
    854   // Do we have instruction support?
    855   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
    856     HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    857     DCHECK(static_or_direct != nullptr);
    858     LocationSummary* locations =
    859         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
    860     locations->SetInAt(0, Location::RequiresFpuRegister());
    861     if (static_or_direct->HasSpecialInput() &&
    862         invoke->InputAt(
    863             static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    864       locations->SetInAt(1, Location::RequiresRegister());
    865     }
    866     locations->SetOut(Location::RequiresRegister());
    867     locations->AddTemp(Location::RequiresFpuRegister());
    868     locations->AddTemp(Location::RequiresFpuRegister());
    869     return;
    870   }
    871 
    872   // We have to fall back to a call to the intrinsic.
    873   LocationSummary* locations =
    874       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
    875   InvokeRuntimeCallingConvention calling_convention;
    876   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    877   locations->SetOut(Location::RegisterLocation(EAX));
    878   // Needs to be EAX for the invoke.
    879   locations->AddTemp(Location::RegisterLocation(EAX));
    880 }
    881 
    882 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
    883   LocationSummary* locations = invoke->GetLocations();
    884   if (locations->WillCall()) {  // TODO: can we reach this?
    885     InvokeOutOfLineIntrinsic(codegen_, invoke);
    886     return;
    887   }
    888 
    889   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    890   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    891   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
    892   Register out = locations->Out().AsRegister<Register>();
    893   NearLabel skip_incr, done;
    894   X86Assembler* assembler = GetAssembler();
    895 
    896   // Since no direct x86 rounding instruction matches the required semantics,
    897   // this intrinsic is implemented as follows:
    898   //  result = floor(in);
    899   //  if (in - result >= 0.5f)
    900   //    result = result + 1.0f;
    901   __ movss(t2, in);
    902   __ roundss(t1, in, Immediate(1));
    903   __ subss(t2, t1);
    904   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
    905     // Direct constant area available.
    906     HX86ComputeBaseMethodAddress* method_address =
    907         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
    908     Register constant_area = locations->InAt(1).AsRegister<Register>();
    909     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
    910                                                 method_address,
    911                                                 constant_area));
    912     __ j(kBelow, &skip_incr);
    913     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
    914                                                method_address,
    915                                                constant_area));
    916     __ Bind(&skip_incr);
    917   } else {
    918     // No constant area: go through stack.
    919     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
    920     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
    921     __ comiss(t2, Address(ESP, 4));
    922     __ j(kBelow, &skip_incr);
    923     __ addss(t1, Address(ESP, 0));
    924     __ Bind(&skip_incr);
    925     __ addl(ESP, Immediate(8));
    926   }
    927 
    928   // Final conversion to an integer. Unfortunately this also does not have a
    929   // direct x86 instruction, since NaN should map to 0 and large positive
    930   // values need to be clipped to the extreme value.
    931   __ movl(out, Immediate(kPrimIntMax));
    932   __ cvtsi2ss(t2, out);
    933   __ comiss(t1, t2);
    934   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
    935   __ movl(out, Immediate(0));  // does not change flags
    936   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
    937   __ cvttss2si(out, t1);
    938   __ Bind(&done);
    939 }
    940 
    941 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
    942   LocationSummary* locations =
    943       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
    944   InvokeRuntimeCallingConvention calling_convention;
    945   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    946   locations->SetOut(Location::FpuRegisterLocation(XMM0));
    947 }
    948 
    949 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
    950   LocationSummary* locations = invoke->GetLocations();
    951   DCHECK(locations->WillCall());
    952   DCHECK(invoke->IsInvokeStaticOrDirect());
    953   X86Assembler* assembler = codegen->GetAssembler();
    954 
    955   // We need some place to pass the parameters.
    956   __ subl(ESP, Immediate(16));
    957   __ cfi().AdjustCFAOffset(16);
    958 
    959   // Pass the parameters at the bottom of the stack.
    960   __ movsd(Address(ESP, 0), XMM0);
    961 
    962   // If we have a second parameter, pass it next.
    963   if (invoke->GetNumberOfArguments() == 2) {
    964     __ movsd(Address(ESP, 8), XMM1);
    965   }
    966 
    967   // Now do the actual call.
    968   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
    969 
    970   // Extract the return value from the FP stack.
    971   __ fstpl(Address(ESP, 0));
    972   __ movsd(XMM0, Address(ESP, 0));
    973 
    974   // And clean up the stack.
    975   __ addl(ESP, Immediate(16));
    976   __ cfi().AdjustCFAOffset(-16);
    977 }
    978 
    979 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
    980   CreateFPToFPCallLocations(allocator_, invoke);
    981 }
    982 
    983 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
    984   GenFPToFPCall(invoke, codegen_, kQuickCos);
    985 }
    986 
    987 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
    988   CreateFPToFPCallLocations(allocator_, invoke);
    989 }
    990 
    991 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
    992   GenFPToFPCall(invoke, codegen_, kQuickSin);
    993 }
    994 
    995 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
    996   CreateFPToFPCallLocations(allocator_, invoke);
    997 }
    998 
    999 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
   1000   GenFPToFPCall(invoke, codegen_, kQuickAcos);
   1001 }
   1002 
   1003 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
   1004   CreateFPToFPCallLocations(allocator_, invoke);
   1005 }
   1006 
   1007 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
   1008   GenFPToFPCall(invoke, codegen_, kQuickAsin);
   1009 }
   1010 
   1011 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
   1012   CreateFPToFPCallLocations(allocator_, invoke);
   1013 }
   1014 
   1015 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
   1016   GenFPToFPCall(invoke, codegen_, kQuickAtan);
   1017 }
   1018 
   1019 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
   1020   CreateFPToFPCallLocations(allocator_, invoke);
   1021 }
   1022 
   1023 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
   1024   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
   1025 }
   1026 
   1027 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
   1028   CreateFPToFPCallLocations(allocator_, invoke);
   1029 }
   1030 
   1031 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
   1032   GenFPToFPCall(invoke, codegen_, kQuickCosh);
   1033 }
   1034 
   1035 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
   1036   CreateFPToFPCallLocations(allocator_, invoke);
   1037 }
   1038 
   1039 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
   1040   GenFPToFPCall(invoke, codegen_, kQuickExp);
   1041 }
   1042 
   1043 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
   1044   CreateFPToFPCallLocations(allocator_, invoke);
   1045 }
   1046 
   1047 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
   1048   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
   1049 }
   1050 
   1051 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
   1052   CreateFPToFPCallLocations(allocator_, invoke);
   1053 }
   1054 
   1055 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
   1056   GenFPToFPCall(invoke, codegen_, kQuickLog);
   1057 }
   1058 
   1059 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
   1060   CreateFPToFPCallLocations(allocator_, invoke);
   1061 }
   1062 
   1063 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
   1064   GenFPToFPCall(invoke, codegen_, kQuickLog10);
   1065 }
   1066 
   1067 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
   1068   CreateFPToFPCallLocations(allocator_, invoke);
   1069 }
   1070 
   1071 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
   1072   GenFPToFPCall(invoke, codegen_, kQuickSinh);
   1073 }
   1074 
   1075 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
   1076   CreateFPToFPCallLocations(allocator_, invoke);
   1077 }
   1078 
   1079 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
   1080   GenFPToFPCall(invoke, codegen_, kQuickTan);
   1081 }
   1082 
   1083 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
   1084   CreateFPToFPCallLocations(allocator_, invoke);
   1085 }
   1086 
   1087 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
   1088   GenFPToFPCall(invoke, codegen_, kQuickTanh);
   1089 }
   1090 
   1091 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   1092   LocationSummary* locations =
   1093       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
   1094   InvokeRuntimeCallingConvention calling_convention;
   1095   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   1096   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
   1097   locations->SetOut(Location::FpuRegisterLocation(XMM0));
   1098 }
   1099 
   1100 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
   1101   CreateFPFPToFPCallLocations(allocator_, invoke);
   1102 }
   1103 
   1104 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
   1105   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
   1106 }
   1107 
   1108 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
   1109   CreateFPFPToFPCallLocations(allocator_, invoke);
   1110 }
   1111 
   1112 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
   1113   GenFPToFPCall(invoke, codegen_, kQuickPow);
   1114 }
   1115 
   1116 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
   1117   CreateFPFPToFPCallLocations(allocator_, invoke);
   1118 }
   1119 
   1120 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
   1121   GenFPToFPCall(invoke, codegen_, kQuickHypot);
   1122 }
   1123 
   1124 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
   1125   CreateFPFPToFPCallLocations(allocator_, invoke);
   1126 }
   1127 
   1128 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
   1129   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
   1130 }
   1131 
   1132 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   1133   // We need at least two of the positions or length to be an integer constant,
   1134   // or else we won't have enough free registers.
   1135   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   1136   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
   1137   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   1138 
   1139   int num_constants =
   1140       ((src_pos != nullptr) ? 1 : 0)
   1141       + ((dest_pos != nullptr) ? 1 : 0)
   1142       + ((length != nullptr) ? 1 : 0);
   1143 
   1144   if (num_constants < 2) {
   1145     // Not enough free registers.
   1146     return;
   1147   }
   1148 
   1149   // As long as we are checking, we might as well check to see if the src and dest
   1150   // positions are >= 0.
   1151   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   1152       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
   1153     // We will have to fail anyways.
   1154     return;
   1155   }
   1156 
   1157   // And since we are already checking, check the length too.
   1158   if (length != nullptr) {
   1159     int32_t len = length->GetValue();
   1160     if (len < 0) {
   1161       // Just call as normal.
   1162       return;
   1163     }
   1164   }
   1165 
   1166   // Okay, it is safe to generate inline code.
   1167   LocationSummary* locations =
   1168       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
   1169   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
   1170   locations->SetInAt(0, Location::RequiresRegister());
   1171   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   1172   locations->SetInAt(2, Location::RequiresRegister());
   1173   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
   1174   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
   1175 
   1176   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
   1177   locations->AddTemp(Location::RegisterLocation(ESI));
   1178   locations->AddTemp(Location::RegisterLocation(EDI));
   1179   locations->AddTemp(Location::RegisterLocation(ECX));
   1180 }
   1181 
   1182 static void CheckPosition(X86Assembler* assembler,
   1183                           Location pos,
   1184                           Register input,
   1185                           Location length,
   1186                           SlowPathCode* slow_path,
   1187                           Register temp,
   1188                           bool length_is_input_length = false) {
   1189   // Where is the length in the Array?
   1190   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
   1191 
   1192   if (pos.IsConstant()) {
   1193     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
   1194     if (pos_const == 0) {
   1195       if (!length_is_input_length) {
   1196         // Check that length(input) >= length.
   1197         if (length.IsConstant()) {
   1198           __ cmpl(Address(input, length_offset),
   1199                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1200         } else {
   1201           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
   1202         }
   1203         __ j(kLess, slow_path->GetEntryLabel());
   1204       }
   1205     } else {
   1206       // Check that length(input) >= pos.
   1207       __ movl(temp, Address(input, length_offset));
   1208       __ subl(temp, Immediate(pos_const));
   1209       __ j(kLess, slow_path->GetEntryLabel());
   1210 
   1211       // Check that (length(input) - pos) >= length.
   1212       if (length.IsConstant()) {
   1213         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1214       } else {
   1215         __ cmpl(temp, length.AsRegister<Register>());
   1216       }
   1217       __ j(kLess, slow_path->GetEntryLabel());
   1218     }
   1219   } else if (length_is_input_length) {
   1220     // The only way the copy can succeed is if pos is zero.
   1221     Register pos_reg = pos.AsRegister<Register>();
   1222     __ testl(pos_reg, pos_reg);
   1223     __ j(kNotEqual, slow_path->GetEntryLabel());
   1224   } else {
   1225     // Check that pos >= 0.
   1226     Register pos_reg = pos.AsRegister<Register>();
   1227     __ testl(pos_reg, pos_reg);
   1228     __ j(kLess, slow_path->GetEntryLabel());
   1229 
   1230     // Check that pos <= length(input).
   1231     __ cmpl(Address(input, length_offset), pos_reg);
   1232     __ j(kLess, slow_path->GetEntryLabel());
   1233 
   1234     // Check that (length(input) - pos) >= length.
   1235     __ movl(temp, Address(input, length_offset));
   1236     __ subl(temp, pos_reg);
   1237     if (length.IsConstant()) {
   1238       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1239     } else {
   1240       __ cmpl(temp, length.AsRegister<Register>());
   1241     }
   1242     __ j(kLess, slow_path->GetEntryLabel());
   1243   }
   1244 }
   1245 
   1246 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   1247   X86Assembler* assembler = GetAssembler();
   1248   LocationSummary* locations = invoke->GetLocations();
   1249 
   1250   Register src = locations->InAt(0).AsRegister<Register>();
   1251   Location srcPos = locations->InAt(1);
   1252   Register dest = locations->InAt(2).AsRegister<Register>();
   1253   Location destPos = locations->InAt(3);
   1254   Location length = locations->InAt(4);
   1255 
   1256   // Temporaries that we need for MOVSW.
   1257   Register src_base = locations->GetTemp(0).AsRegister<Register>();
   1258   DCHECK_EQ(src_base, ESI);
   1259   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
   1260   DCHECK_EQ(dest_base, EDI);
   1261   Register count = locations->GetTemp(2).AsRegister<Register>();
   1262   DCHECK_EQ(count, ECX);
   1263 
   1264   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1265   codegen_->AddSlowPath(slow_path);
   1266 
   1267   // Bail out if the source and destination are the same (to handle overlap).
   1268   __ cmpl(src, dest);
   1269   __ j(kEqual, slow_path->GetEntryLabel());
   1270 
   1271   // Bail out if the source is null.
   1272   __ testl(src, src);
   1273   __ j(kEqual, slow_path->GetEntryLabel());
   1274 
   1275   // Bail out if the destination is null.
   1276   __ testl(dest, dest);
   1277   __ j(kEqual, slow_path->GetEntryLabel());
   1278 
   1279   // If the length is negative, bail out.
   1280   // We have already checked in the LocationsBuilder for the constant case.
   1281   if (!length.IsConstant()) {
   1282     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
   1283     __ j(kLess, slow_path->GetEntryLabel());
   1284   }
   1285 
   1286   // We need the count in ECX.
   1287   if (length.IsConstant()) {
   1288     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1289   } else {
   1290     __ movl(count, length.AsRegister<Register>());
   1291   }
   1292 
   1293   // Validity checks: source. Use src_base as a temporary register.
   1294   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
   1295 
   1296   // Validity checks: dest. Use src_base as a temporary register.
   1297   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
   1298 
   1299   // Okay, everything checks out.  Finally time to do the copy.
   1300   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   1301   const size_t char_size = DataType::Size(DataType::Type::kUint16);
   1302   DCHECK_EQ(char_size, 2u);
   1303 
   1304   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
   1305 
   1306   if (srcPos.IsConstant()) {
   1307     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
   1308     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
   1309   } else {
   1310     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
   1311                               ScaleFactor::TIMES_2, data_offset));
   1312   }
   1313   if (destPos.IsConstant()) {
   1314     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
   1315 
   1316     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
   1317   } else {
   1318     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
   1319                                ScaleFactor::TIMES_2, data_offset));
   1320   }
   1321 
   1322   // Do the move.
   1323   __ rep_movsw();
   1324 
   1325   __ Bind(slow_path->GetExitLabel());
   1326 }
   1327 
   1328 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   1329   // The inputs plus one temp.
   1330   LocationSummary* locations = new (allocator_) LocationSummary(
   1331       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1332   InvokeRuntimeCallingConvention calling_convention;
   1333   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1334   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1335   locations->SetOut(Location::RegisterLocation(EAX));
   1336 }
   1337 
   1338 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
   1339   X86Assembler* assembler = GetAssembler();
   1340   LocationSummary* locations = invoke->GetLocations();
   1341 
   1342   // Note that the null check must have been done earlier.
   1343   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1344 
   1345   Register argument = locations->InAt(1).AsRegister<Register>();
   1346   __ testl(argument, argument);
   1347   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1348   codegen_->AddSlowPath(slow_path);
   1349   __ j(kEqual, slow_path->GetEntryLabel());
   1350 
   1351   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
   1352   __ Bind(slow_path->GetExitLabel());
   1353 }
   1354 
   1355 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
   1356   if (kEmitCompilerReadBarrier &&
   1357       !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
   1358       !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
   1359     // No support for this odd case (String class is moveable, not in the boot image).
   1360     return;
   1361   }
   1362 
   1363   LocationSummary* locations =
   1364       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1365   locations->SetInAt(0, Location::RequiresRegister());
   1366   locations->SetInAt(1, Location::RequiresRegister());
   1367 
   1368   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
   1369   locations->AddTemp(Location::RegisterLocation(ECX));
   1370   locations->AddTemp(Location::RegisterLocation(EDI));
   1371 
   1372   // Set output, ESI needed for repe_cmpsl instruction anyways.
   1373   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
   1374 }
   1375 
   1376 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
   1377   X86Assembler* assembler = GetAssembler();
   1378   LocationSummary* locations = invoke->GetLocations();
   1379 
   1380   Register str = locations->InAt(0).AsRegister<Register>();
   1381   Register arg = locations->InAt(1).AsRegister<Register>();
   1382   Register ecx = locations->GetTemp(0).AsRegister<Register>();
   1383   Register edi = locations->GetTemp(1).AsRegister<Register>();
   1384   Register esi = locations->Out().AsRegister<Register>();
   1385 
   1386   NearLabel end, return_true, return_false;
   1387 
   1388   // Get offsets of count, value, and class fields within a string object.
   1389   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1390   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   1391   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
   1392 
   1393   // Note that the null check must have been done earlier.
   1394   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1395 
   1396   StringEqualsOptimizations optimizations(invoke);
   1397   if (!optimizations.GetArgumentNotNull()) {
   1398     // Check if input is null, return false if it is.
   1399     __ testl(arg, arg);
   1400     __ j(kEqual, &return_false);
   1401   }
   1402 
   1403   if (!optimizations.GetArgumentIsString()) {
   1404     // Instanceof check for the argument by comparing class fields.
   1405     // All string objects must have the same type since String cannot be subclassed.
   1406     // Receiver must be a string object, so its class field is equal to all strings' class fields.
   1407     // If the argument is a string object, its class field must be equal to receiver's class field.
   1408     __ movl(ecx, Address(str, class_offset));
   1409     __ cmpl(ecx, Address(arg, class_offset));
   1410     __ j(kNotEqual, &return_false);
   1411   }
   1412 
   1413   // Reference equality check, return true if same reference.
   1414   __ cmpl(str, arg);
   1415   __ j(kEqual, &return_true);
   1416 
   1417   // Load length and compression flag of receiver string.
   1418   __ movl(ecx, Address(str, count_offset));
   1419   // Check if lengths and compression flags are equal, return false if they're not.
   1420   // Two identical strings will always have same compression style since
   1421   // compression style is decided on alloc.
   1422   __ cmpl(ecx, Address(arg, count_offset));
   1423   __ j(kNotEqual, &return_false);
   1424   // Return true if strings are empty. Even with string compression `count == 0` means empty.
   1425   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1426                 "Expecting 0=compressed, 1=uncompressed");
   1427   __ jecxz(&return_true);
   1428 
   1429   if (mirror::kUseStringCompression) {
   1430     NearLabel string_uncompressed;
   1431     // Extract length and differentiate between both compressed or both uncompressed.
   1432     // Different compression style is cut above.
   1433     __ shrl(ecx, Immediate(1));
   1434     __ j(kCarrySet, &string_uncompressed);
   1435     // Divide string length by 2, rounding up, and continue as if uncompressed.
   1436     __ addl(ecx, Immediate(1));
   1437     __ shrl(ecx, Immediate(1));
   1438     __ Bind(&string_uncompressed);
   1439   }
   1440   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
   1441   __ leal(esi, Address(str, value_offset));
   1442   __ leal(edi, Address(arg, value_offset));
   1443 
   1444   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
   1445   // divisible by 2.
   1446   __ addl(ecx, Immediate(1));
   1447   __ shrl(ecx, Immediate(1));
   1448 
   1449   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
   1450   // or 4 characters (compressed) at a time.
   1451   DCHECK_ALIGNED(value_offset, 4);
   1452   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
   1453 
   1454   // Loop to compare strings two characters at a time starting at the beginning of the string.
   1455   __ repe_cmpsl();
   1456   // If strings are not equal, zero flag will be cleared.
   1457   __ j(kNotEqual, &return_false);
   1458 
   1459   // Return true and exit the function.
   1460   // If loop does not result in returning false, we return true.
   1461   __ Bind(&return_true);
   1462   __ movl(esi, Immediate(1));
   1463   __ jmp(&end);
   1464 
   1465   // Return false and exit the function.
   1466   __ Bind(&return_false);
   1467   __ xorl(esi, esi);
   1468   __ Bind(&end);
   1469 }
   1470 
   1471 static void CreateStringIndexOfLocations(HInvoke* invoke,
   1472                                          ArenaAllocator* allocator,
   1473                                          bool start_at_zero) {
   1474   LocationSummary* locations = new (allocator) LocationSummary(invoke,
   1475                                                                LocationSummary::kCallOnSlowPath,
   1476                                                                kIntrinsified);
   1477   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
   1478   locations->SetInAt(0, Location::RegisterLocation(EDI));
   1479   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
   1480   // allocator to do that, anyways. We can still do the constant check by checking the parameter
   1481   // of the instruction explicitly.
   1482   // Note: This works as we don't clobber EAX anywhere.
   1483   locations->SetInAt(1, Location::RegisterLocation(EAX));
   1484   if (!start_at_zero) {
   1485     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
   1486   }
   1487   // As we clobber EDI during execution anyways, also use it as the output.
   1488   locations->SetOut(Location::SameAsFirstInput());
   1489 
   1490   // repne scasw uses ECX as the counter.
   1491   locations->AddTemp(Location::RegisterLocation(ECX));
   1492   // Need another temporary to be able to compute the result.
   1493   locations->AddTemp(Location::RequiresRegister());
   1494   if (mirror::kUseStringCompression) {
   1495     // Need another temporary to be able to save unflagged string length.
   1496     locations->AddTemp(Location::RequiresRegister());
   1497   }
   1498 }
   1499 
   1500 static void GenerateStringIndexOf(HInvoke* invoke,
   1501                                   X86Assembler* assembler,
   1502                                   CodeGeneratorX86* codegen,
   1503                                   bool start_at_zero) {
   1504   LocationSummary* locations = invoke->GetLocations();
   1505 
   1506   // Note that the null check must have been done earlier.
   1507   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1508 
   1509   Register string_obj = locations->InAt(0).AsRegister<Register>();
   1510   Register search_value = locations->InAt(1).AsRegister<Register>();
   1511   Register counter = locations->GetTemp(0).AsRegister<Register>();
   1512   Register string_length = locations->GetTemp(1).AsRegister<Register>();
   1513   Register out = locations->Out().AsRegister<Register>();
   1514   // Only used when string compression feature is on.
   1515   Register string_length_flagged;
   1516 
   1517   // Check our assumptions for registers.
   1518   DCHECK_EQ(string_obj, EDI);
   1519   DCHECK_EQ(search_value, EAX);
   1520   DCHECK_EQ(counter, ECX);
   1521   DCHECK_EQ(out, EDI);
   1522 
   1523   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   1524   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   1525   SlowPathCode* slow_path = nullptr;
   1526   HInstruction* code_point = invoke->InputAt(1);
   1527   if (code_point->IsIntConstant()) {
   1528     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
   1529     std::numeric_limits<uint16_t>::max()) {
   1530       // Always needs the slow-path. We could directly dispatch to it, but this case should be
   1531       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
   1532       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1533       codegen->AddSlowPath(slow_path);
   1534       __ jmp(slow_path->GetEntryLabel());
   1535       __ Bind(slow_path->GetExitLabel());
   1536       return;
   1537     }
   1538   } else if (code_point->GetType() != DataType::Type::kUint16) {
   1539     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
   1540     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1541     codegen->AddSlowPath(slow_path);
   1542     __ j(kAbove, slow_path->GetEntryLabel());
   1543   }
   1544 
   1545   // From here down, we know that we are looking for a char that fits in 16 bits.
   1546   // Location of reference to data array within the String object.
   1547   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1548   // Location of count within the String object.
   1549   int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1550 
   1551   // Load the count field of the string containing the length and compression flag.
   1552   __ movl(string_length, Address(string_obj, count_offset));
   1553 
   1554   // Do a zero-length check. Even with string compression `count == 0` means empty.
   1555   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1556                 "Expecting 0=compressed, 1=uncompressed");
   1557   // TODO: Support jecxz.
   1558   NearLabel not_found_label;
   1559   __ testl(string_length, string_length);
   1560   __ j(kEqual, &not_found_label);
   1561 
   1562   if (mirror::kUseStringCompression) {
   1563     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
   1564     __ movl(string_length_flagged, string_length);
   1565     // Extract the length and shift out the least significant bit used as compression flag.
   1566     __ shrl(string_length, Immediate(1));
   1567   }
   1568 
   1569   if (start_at_zero) {
   1570     // Number of chars to scan is the same as the string length.
   1571     __ movl(counter, string_length);
   1572 
   1573     // Move to the start of the string.
   1574     __ addl(string_obj, Immediate(value_offset));
   1575   } else {
   1576     Register start_index = locations->InAt(2).AsRegister<Register>();
   1577 
   1578     // Do a start_index check.
   1579     __ cmpl(start_index, string_length);
   1580     __ j(kGreaterEqual, &not_found_label);
   1581 
   1582     // Ensure we have a start index >= 0;
   1583     __ xorl(counter, counter);
   1584     __ cmpl(start_index, Immediate(0));
   1585     __ cmovl(kGreater, counter, start_index);
   1586 
   1587     if (mirror::kUseStringCompression) {
   1588       NearLabel modify_counter, offset_uncompressed_label;
   1589       __ testl(string_length_flagged, Immediate(1));
   1590       __ j(kNotZero, &offset_uncompressed_label);
   1591       // Move to the start of the string: string_obj + value_offset + start_index.
   1592       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
   1593       __ jmp(&modify_counter);
   1594 
   1595       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
   1596       __ Bind(&offset_uncompressed_label);
   1597       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
   1598 
   1599       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
   1600       // compare.
   1601       __ Bind(&modify_counter);
   1602     } else {
   1603       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
   1604     }
   1605     __ negl(counter);
   1606     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
   1607   }
   1608 
   1609   if (mirror::kUseStringCompression) {
   1610     NearLabel uncompressed_string_comparison;
   1611     NearLabel comparison_done;
   1612     __ testl(string_length_flagged, Immediate(1));
   1613     __ j(kNotZero, &uncompressed_string_comparison);
   1614 
   1615     // Check if EAX (search_value) is ASCII.
   1616     __ cmpl(search_value, Immediate(127));
   1617     __ j(kGreater, &not_found_label);
   1618     // Comparing byte-per-byte.
   1619     __ repne_scasb();
   1620     __ jmp(&comparison_done);
   1621 
   1622     // Everything is set up for repne scasw:
   1623     //   * Comparison address in EDI.
   1624     //   * Counter in ECX.
   1625     __ Bind(&uncompressed_string_comparison);
   1626     __ repne_scasw();
   1627     __ Bind(&comparison_done);
   1628   } else {
   1629     __ repne_scasw();
   1630   }
   1631   // Did we find a match?
   1632   __ j(kNotEqual, &not_found_label);
   1633 
   1634   // Yes, we matched.  Compute the index of the result.
   1635   __ subl(string_length, counter);
   1636   __ leal(out, Address(string_length, -1));
   1637 
   1638   NearLabel done;
   1639   __ jmp(&done);
   1640 
   1641   // Failed to match; return -1.
   1642   __ Bind(&not_found_label);
   1643   __ movl(out, Immediate(-1));
   1644 
   1645   // And join up at the end.
   1646   __ Bind(&done);
   1647   if (slow_path != nullptr) {
   1648     __ Bind(slow_path->GetExitLabel());
   1649   }
   1650 }
   1651 
   1652 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
   1653   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true);
   1654 }
   1655 
   1656 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
   1657   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
   1658 }
   1659 
   1660 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
   1661   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false);
   1662 }
   1663 
   1664 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
   1665   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
   1666 }
   1667 
   1668 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1669   LocationSummary* locations = new (allocator_) LocationSummary(
   1670       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1671   InvokeRuntimeCallingConvention calling_convention;
   1672   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1673   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1674   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   1675   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
   1676   locations->SetOut(Location::RegisterLocation(EAX));
   1677 }
   1678 
   1679 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1680   X86Assembler* assembler = GetAssembler();
   1681   LocationSummary* locations = invoke->GetLocations();
   1682 
   1683   Register byte_array = locations->InAt(0).AsRegister<Register>();
   1684   __ testl(byte_array, byte_array);
   1685   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1686   codegen_->AddSlowPath(slow_path);
   1687   __ j(kEqual, slow_path->GetEntryLabel());
   1688 
   1689   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
   1690   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   1691   __ Bind(slow_path->GetExitLabel());
   1692 }
   1693 
   1694 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   1695   LocationSummary* locations =
   1696       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
   1697   InvokeRuntimeCallingConvention calling_convention;
   1698   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1699   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1700   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   1701   locations->SetOut(Location::RegisterLocation(EAX));
   1702 }
   1703 
   1704 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   1705   // No need to emit code checking whether `locations->InAt(2)` is a null
   1706   // pointer, as callers of the native method
   1707   //
   1708   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   1709   //
   1710   // all include a null check on `data` before calling that method.
   1711   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
   1712   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   1713 }
   1714 
   1715 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   1716   LocationSummary* locations = new (allocator_) LocationSummary(
   1717       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
   1718   InvokeRuntimeCallingConvention calling_convention;
   1719   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1720   locations->SetOut(Location::RegisterLocation(EAX));
   1721 }
   1722 
   1723 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
   1724   X86Assembler* assembler = GetAssembler();
   1725   LocationSummary* locations = invoke->GetLocations();
   1726 
   1727   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   1728   __ testl(string_to_copy, string_to_copy);
   1729   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   1730   codegen_->AddSlowPath(slow_path);
   1731   __ j(kEqual, slow_path->GetEntryLabel());
   1732 
   1733   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
   1734   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   1735   __ Bind(slow_path->GetExitLabel());
   1736 }
   1737 
   1738 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   1739   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   1740   LocationSummary* locations =
   1741       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1742   locations->SetInAt(0, Location::RequiresRegister());
   1743   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   1744   // Place srcEnd in ECX to save a move below.
   1745   locations->SetInAt(2, Location::RegisterLocation(ECX));
   1746   locations->SetInAt(3, Location::RequiresRegister());
   1747   locations->SetInAt(4, Location::RequiresRegister());
   1748 
   1749   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
   1750   // We don't have enough registers to also grab ECX, so handle below.
   1751   locations->AddTemp(Location::RegisterLocation(ESI));
   1752   locations->AddTemp(Location::RegisterLocation(EDI));
   1753 }
   1754 
   1755 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   1756   X86Assembler* assembler = GetAssembler();
   1757   LocationSummary* locations = invoke->GetLocations();
   1758 
   1759   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
   1760   // Location of data in char array buffer.
   1761   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
   1762   // Location of char array data in string.
   1763   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   1764 
   1765   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   1766   Register obj = locations->InAt(0).AsRegister<Register>();
   1767   Location srcBegin = locations->InAt(1);
   1768   int srcBegin_value =
   1769     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
   1770   Register srcEnd = locations->InAt(2).AsRegister<Register>();
   1771   Register dst = locations->InAt(3).AsRegister<Register>();
   1772   Register dstBegin = locations->InAt(4).AsRegister<Register>();
   1773 
   1774   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   1775   const size_t char_size = DataType::Size(DataType::Type::kUint16);
   1776   DCHECK_EQ(char_size, 2u);
   1777 
   1778   // Compute the number of chars (words) to move.
   1779   // Save ECX, since we don't know if it will be used later.
   1780   __ pushl(ECX);
   1781   int stack_adjust = kX86WordSize;
   1782   __ cfi().AdjustCFAOffset(stack_adjust);
   1783   DCHECK_EQ(srcEnd, ECX);
   1784   if (srcBegin.IsConstant()) {
   1785     __ subl(ECX, Immediate(srcBegin_value));
   1786   } else {
   1787     DCHECK(srcBegin.IsRegister());
   1788     __ subl(ECX, srcBegin.AsRegister<Register>());
   1789   }
   1790 
   1791   NearLabel done;
   1792   if (mirror::kUseStringCompression) {
   1793     // Location of count in string
   1794     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1795     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
   1796     DCHECK_EQ(c_char_size, 1u);
   1797     __ pushl(EAX);
   1798     __ cfi().AdjustCFAOffset(stack_adjust);
   1799 
   1800     NearLabel copy_loop, copy_uncompressed;
   1801     __ testl(Address(obj, count_offset), Immediate(1));
   1802     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1803                   "Expecting 0=compressed, 1=uncompressed");
   1804     __ j(kNotZero, &copy_uncompressed);
   1805     // Compute the address of the source string by adding the number of chars from
   1806     // the source beginning to the value offset of a string.
   1807     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
   1808 
   1809     // Start the loop to copy String's value to Array of Char.
   1810     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
   1811     __ Bind(&copy_loop);
   1812     __ jecxz(&done);
   1813     // Use EAX temporary (convert byte from ESI to word).
   1814     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
   1815     __ movzxb(EAX, Address(ESI, 0));
   1816     __ movw(Address(EDI, 0), EAX);
   1817     __ leal(EDI, Address(EDI, char_size));
   1818     __ leal(ESI, Address(ESI, c_char_size));
   1819     // TODO: Add support for LOOP to X86Assembler.
   1820     __ subl(ECX, Immediate(1));
   1821     __ jmp(&copy_loop);
   1822     __ Bind(&copy_uncompressed);
   1823   }
   1824 
   1825   // Do the copy for uncompressed string.
   1826   // Compute the address of the destination buffer.
   1827   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
   1828   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
   1829   __ rep_movsw();
   1830 
   1831   __ Bind(&done);
   1832   if (mirror::kUseStringCompression) {
   1833     // Restore EAX.
   1834     __ popl(EAX);
   1835     __ cfi().AdjustCFAOffset(-stack_adjust);
   1836   }
   1837   // Restore ECX.
   1838   __ popl(ECX);
   1839   __ cfi().AdjustCFAOffset(-stack_adjust);
   1840 }
   1841 
   1842 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
   1843   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   1844   Location out_loc = locations->Out();
   1845   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   1846   // to avoid a SIGBUS.
   1847   switch (size) {
   1848     case DataType::Type::kInt8:
   1849       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
   1850       break;
   1851     case DataType::Type::kInt16:
   1852       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
   1853       break;
   1854     case DataType::Type::kInt32:
   1855       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
   1856       break;
   1857     case DataType::Type::kInt64:
   1858       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
   1859       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
   1860       break;
   1861     default:
   1862       LOG(FATAL) << "Type not recognized for peek: " << size;
   1863       UNREACHABLE();
   1864   }
   1865 }
   1866 
   1867 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
   1868   CreateLongToIntLocations(allocator_, invoke);
   1869 }
   1870 
   1871 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
   1872   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
   1873 }
   1874 
   1875 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
   1876   CreateLongToIntLocations(allocator_, invoke);
   1877 }
   1878 
   1879 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
   1880   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
   1881 }
   1882 
   1883 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
   1884   CreateLongToLongLocations(allocator_, invoke);
   1885 }
   1886 
   1887 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
   1888   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
   1889 }
   1890 
   1891 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
   1892   CreateLongToIntLocations(allocator_, invoke);
   1893 }
   1894 
   1895 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
   1896   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
   1897 }
   1898 
   1899 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
   1900                                          DataType::Type size,
   1901                                          HInvoke* invoke) {
   1902   LocationSummary* locations =
   1903       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1904   locations->SetInAt(0, Location::RequiresRegister());
   1905   HInstruction* value = invoke->InputAt(1);
   1906   if (size == DataType::Type::kInt8) {
   1907     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
   1908   } else {
   1909     locations->SetInAt(1, Location::RegisterOrConstant(value));
   1910   }
   1911 }
   1912 
   1913 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
   1914   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   1915   Location value_loc = locations->InAt(1);
   1916   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   1917   // to avoid a SIGBUS.
   1918   switch (size) {
   1919     case DataType::Type::kInt8:
   1920       if (value_loc.IsConstant()) {
   1921         __ movb(Address(address, 0),
   1922                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1923       } else {
   1924         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
   1925       }
   1926       break;
   1927     case DataType::Type::kInt16:
   1928       if (value_loc.IsConstant()) {
   1929         __ movw(Address(address, 0),
   1930                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1931       } else {
   1932         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
   1933       }
   1934       break;
   1935     case DataType::Type::kInt32:
   1936       if (value_loc.IsConstant()) {
   1937         __ movl(Address(address, 0),
   1938                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1939       } else {
   1940         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
   1941       }
   1942       break;
   1943     case DataType::Type::kInt64:
   1944       if (value_loc.IsConstant()) {
   1945         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
   1946         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
   1947         __ movl(Address(address, 4), Immediate(High32Bits(value)));
   1948       } else {
   1949         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
   1950         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
   1951       }
   1952       break;
   1953     default:
   1954       LOG(FATAL) << "Type not recognized for poke: " << size;
   1955       UNREACHABLE();
   1956   }
   1957 }
   1958 
   1959 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
   1960   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
   1961 }
   1962 
   1963 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
   1964   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
   1965 }
   1966 
   1967 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
   1968   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
   1969 }
   1970 
   1971 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
   1972   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
   1973 }
   1974 
   1975 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
   1976   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
   1977 }
   1978 
   1979 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
   1980   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
   1981 }
   1982 
   1983 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
   1984   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
   1985 }
   1986 
   1987 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
   1988   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
   1989 }
   1990 
   1991 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
   1992   LocationSummary* locations =
   1993       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   1994   locations->SetOut(Location::RequiresRegister());
   1995 }
   1996 
   1997 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
   1998   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
   1999   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
   2000 }
   2001 
   2002 static void GenUnsafeGet(HInvoke* invoke,
   2003                          DataType::Type type,
   2004                          bool is_volatile,
   2005                          CodeGeneratorX86* codegen) {
   2006   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2007   LocationSummary* locations = invoke->GetLocations();
   2008   Location base_loc = locations->InAt(1);
   2009   Register base = base_loc.AsRegister<Register>();
   2010   Location offset_loc = locations->InAt(2);
   2011   Register offset = offset_loc.AsRegisterPairLow<Register>();
   2012   Location output_loc = locations->Out();
   2013 
   2014   switch (type) {
   2015     case DataType::Type::kInt32: {
   2016       Register output = output_loc.AsRegister<Register>();
   2017       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2018       break;
   2019     }
   2020 
   2021     case DataType::Type::kReference: {
   2022       Register output = output_loc.AsRegister<Register>();
   2023       if (kEmitCompilerReadBarrier) {
   2024         if (kUseBakerReadBarrier) {
   2025           Address src(base, offset, ScaleFactor::TIMES_1, 0);
   2026           codegen->GenerateReferenceLoadWithBakerReadBarrier(
   2027               invoke, output_loc, base, src, /* needs_null_check */ false);
   2028         } else {
   2029           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2030           codegen->GenerateReadBarrierSlow(
   2031               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
   2032         }
   2033       } else {
   2034         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2035         __ MaybeUnpoisonHeapReference(output);
   2036       }
   2037       break;
   2038     }
   2039 
   2040     case DataType::Type::kInt64: {
   2041         Register output_lo = output_loc.AsRegisterPairLow<Register>();
   2042         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
   2043         if (is_volatile) {
   2044           // Need to use a XMM to read atomically.
   2045           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2046           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2047           __ movd(output_lo, temp);
   2048           __ psrlq(temp, Immediate(32));
   2049           __ movd(output_hi, temp);
   2050         } else {
   2051           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2052           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
   2053         }
   2054       }
   2055       break;
   2056 
   2057     default:
   2058       LOG(FATAL) << "Unsupported op size " << type;
   2059       UNREACHABLE();
   2060   }
   2061 }
   2062 
   2063 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
   2064                                           HInvoke* invoke,
   2065                                           DataType::Type type,
   2066                                           bool is_volatile) {
   2067   bool can_call = kEmitCompilerReadBarrier &&
   2068       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
   2069        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   2070   LocationSummary* locations =
   2071       new (allocator) LocationSummary(invoke,
   2072                                       can_call
   2073                                           ? LocationSummary::kCallOnSlowPath
   2074                                           : LocationSummary::kNoCall,
   2075                                       kIntrinsified);
   2076   if (can_call && kUseBakerReadBarrier) {
   2077     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2078   }
   2079   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2080   locations->SetInAt(1, Location::RequiresRegister());
   2081   locations->SetInAt(2, Location::RequiresRegister());
   2082   if (type == DataType::Type::kInt64) {
   2083     if (is_volatile) {
   2084       // Need to use XMM to read volatile.
   2085       locations->AddTemp(Location::RequiresFpuRegister());
   2086       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2087     } else {
   2088       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   2089     }
   2090   } else {
   2091     locations->SetOut(Location::RequiresRegister(),
   2092                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   2093   }
   2094 }
   2095 
   2096 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
   2097   CreateIntIntIntToIntLocations(
   2098       allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false);
   2099 }
   2100 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
   2101   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true);
   2102 }
   2103 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
   2104   CreateIntIntIntToIntLocations(
   2105       allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false);
   2106 }
   2107 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   2108   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true);
   2109 }
   2110 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
   2111   CreateIntIntIntToIntLocations(
   2112       allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false);
   2113 }
   2114 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   2115   CreateIntIntIntToIntLocations(
   2116       allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true);
   2117 }
   2118 
   2119 
   2120 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
   2121   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
   2122 }
   2123 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
   2124   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
   2125 }
   2126 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
   2127   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
   2128 }
   2129 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   2130   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
   2131 }
   2132 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
   2133   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
   2134 }
   2135 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   2136   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
   2137 }
   2138 
   2139 
   2140 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
   2141                                                        DataType::Type type,
   2142                                                        HInvoke* invoke,
   2143                                                        bool is_volatile) {
   2144   LocationSummary* locations =
   2145       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2146   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2147   locations->SetInAt(1, Location::RequiresRegister());
   2148   locations->SetInAt(2, Location::RequiresRegister());
   2149   locations->SetInAt(3, Location::RequiresRegister());
   2150   if (type == DataType::Type::kReference) {
   2151     // Need temp registers for card-marking.
   2152     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   2153     // Ensure the value is in a byte register.
   2154     locations->AddTemp(Location::RegisterLocation(ECX));
   2155   } else if (type == DataType::Type::kInt64 && is_volatile) {
   2156     locations->AddTemp(Location::RequiresFpuRegister());
   2157     locations->AddTemp(Location::RequiresFpuRegister());
   2158   }
   2159 }
   2160 
   2161 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
   2162   CreateIntIntIntIntToVoidPlusTempsLocations(
   2163       allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false);
   2164 }
   2165 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
   2166   CreateIntIntIntIntToVoidPlusTempsLocations(
   2167       allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false);
   2168 }
   2169 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
   2170   CreateIntIntIntIntToVoidPlusTempsLocations(
   2171       allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true);
   2172 }
   2173 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
   2174   CreateIntIntIntIntToVoidPlusTempsLocations(
   2175       allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false);
   2176 }
   2177 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   2178   CreateIntIntIntIntToVoidPlusTempsLocations(
   2179       allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false);
   2180 }
   2181 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   2182   CreateIntIntIntIntToVoidPlusTempsLocations(
   2183       allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true);
   2184 }
   2185 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
   2186   CreateIntIntIntIntToVoidPlusTempsLocations(
   2187       allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false);
   2188 }
   2189 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   2190   CreateIntIntIntIntToVoidPlusTempsLocations(
   2191       allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false);
   2192 }
   2193 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   2194   CreateIntIntIntIntToVoidPlusTempsLocations(
   2195       allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true);
   2196 }
   2197 
   2198 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
   2199 // memory model.
   2200 static void GenUnsafePut(LocationSummary* locations,
   2201                          DataType::Type type,
   2202                          bool is_volatile,
   2203                          CodeGeneratorX86* codegen) {
   2204   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2205   Register base = locations->InAt(1).AsRegister<Register>();
   2206   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
   2207   Location value_loc = locations->InAt(3);
   2208 
   2209   if (type == DataType::Type::kInt64) {
   2210     Register value_lo = value_loc.AsRegisterPairLow<Register>();
   2211     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
   2212     if (is_volatile) {
   2213       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2214       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   2215       __ movd(temp1, value_lo);
   2216       __ movd(temp2, value_hi);
   2217       __ punpckldq(temp1, temp2);
   2218       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
   2219     } else {
   2220       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
   2221       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
   2222     }
   2223   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
   2224     Register temp = locations->GetTemp(0).AsRegister<Register>();
   2225     __ movl(temp, value_loc.AsRegister<Register>());
   2226     __ PoisonHeapReference(temp);
   2227     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
   2228   } else {
   2229     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
   2230   }
   2231 
   2232   if (is_volatile) {
   2233     codegen->MemoryFence();
   2234   }
   2235 
   2236   if (type == DataType::Type::kReference) {
   2237     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   2238     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
   2239                         locations->GetTemp(1).AsRegister<Register>(),
   2240                         base,
   2241                         value_loc.AsRegister<Register>(),
   2242                         value_can_be_null);
   2243   }
   2244 }
   2245 
   2246 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
   2247   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
   2248 }
   2249 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
   2250   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
   2251 }
   2252 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
   2253   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_);
   2254 }
   2255 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
   2256   GenUnsafePut(
   2257       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
   2258 }
   2259 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   2260   GenUnsafePut(
   2261       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
   2262 }
   2263 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   2264   GenUnsafePut(
   2265       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_);
   2266 }
   2267 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
   2268   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
   2269 }
   2270 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   2271   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
   2272 }
   2273 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   2274   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_);
   2275 }
   2276 
   2277 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
   2278                                        DataType::Type type,
   2279                                        HInvoke* invoke) {
   2280   bool can_call = kEmitCompilerReadBarrier &&
   2281       kUseBakerReadBarrier &&
   2282       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   2283   LocationSummary* locations =
   2284       new (allocator) LocationSummary(invoke,
   2285                                       can_call
   2286                                           ? LocationSummary::kCallOnSlowPath
   2287                                           : LocationSummary::kNoCall,
   2288                                       kIntrinsified);
   2289   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2290   locations->SetInAt(1, Location::RequiresRegister());
   2291   // Offset is a long, but in 32 bit mode, we only need the low word.
   2292   // Can we update the invoke here to remove a TypeConvert to Long?
   2293   locations->SetInAt(2, Location::RequiresRegister());
   2294   // Expected value must be in EAX or EDX:EAX.
   2295   // For long, new value must be in ECX:EBX.
   2296   if (type == DataType::Type::kInt64) {
   2297     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
   2298     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
   2299   } else {
   2300     locations->SetInAt(3, Location::RegisterLocation(EAX));
   2301     locations->SetInAt(4, Location::RequiresRegister());
   2302   }
   2303 
   2304   // Force a byte register for the output.
   2305   locations->SetOut(Location::RegisterLocation(EAX));
   2306   if (type == DataType::Type::kReference) {
   2307     // Need temporary registers for card-marking, and possibly for
   2308     // (Baker) read barrier.
   2309     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   2310     // Need a byte register for marking.
   2311     locations->AddTemp(Location::RegisterLocation(ECX));
   2312   }
   2313 }
   2314 
   2315 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
   2316   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
   2317 }
   2318 
   2319 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
   2320   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
   2321 }
   2322 
   2323 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
   2324   // The only read barrier implementation supporting the
   2325   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2326   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2327     return;
   2328   }
   2329 
   2330   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
   2331 }
   2332 
   2333 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
   2334   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2335   LocationSummary* locations = invoke->GetLocations();
   2336 
   2337   Register base = locations->InAt(1).AsRegister<Register>();
   2338   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
   2339   Location out = locations->Out();
   2340   DCHECK_EQ(out.AsRegister<Register>(), EAX);
   2341 
   2342   // The address of the field within the holding object.
   2343   Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
   2344 
   2345   if (type == DataType::Type::kReference) {
   2346     // The only read barrier implementation supporting the
   2347     // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2348     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2349 
   2350     Location temp1_loc = locations->GetTemp(0);
   2351     Register temp1 = temp1_loc.AsRegister<Register>();
   2352     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
   2353 
   2354     Register expected = locations->InAt(3).AsRegister<Register>();
   2355     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
   2356     DCHECK_EQ(expected, EAX);
   2357     Register value = locations->InAt(4).AsRegister<Register>();
   2358 
   2359     // Mark card for object assuming new value is stored.
   2360     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   2361     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
   2362 
   2363     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2364       // Need to make sure the reference stored in the field is a to-space
   2365       // one before attempting the CAS or the CAS could fail incorrectly.
   2366       codegen->GenerateReferenceLoadWithBakerReadBarrier(
   2367           invoke,
   2368           temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
   2369           base,
   2370           field_addr,
   2371           /* needs_null_check */ false,
   2372           /* always_update_field */ true,
   2373           &temp2);
   2374     }
   2375 
   2376     bool base_equals_value = (base == value);
   2377     if (kPoisonHeapReferences) {
   2378       if (base_equals_value) {
   2379         // If `base` and `value` are the same register location, move
   2380         // `value` to a temporary register.  This way, poisoning
   2381         // `value` won't invalidate `base`.
   2382         value = temp1;
   2383         __ movl(value, base);
   2384       }
   2385 
   2386       // Check that the register allocator did not assign the location
   2387       // of `expected` (EAX) to `value` nor to `base`, so that heap
   2388       // poisoning (when enabled) works as intended below.
   2389       // - If `value` were equal to `expected`, both references would
   2390       //   be poisoned twice, meaning they would not be poisoned at
   2391       //   all, as heap poisoning uses address negation.
   2392       // - If `base` were equal to `expected`, poisoning `expected`
   2393       //   would invalidate `base`.
   2394       DCHECK_NE(value, expected);
   2395       DCHECK_NE(base, expected);
   2396 
   2397       __ PoisonHeapReference(expected);
   2398       __ PoisonHeapReference(value);
   2399     }
   2400 
   2401     __ LockCmpxchgl(field_addr, value);
   2402 
   2403     // LOCK CMPXCHG has full barrier semantics, and we don't need
   2404     // scheduling barriers at this time.
   2405 
   2406     // Convert ZF into the Boolean result.
   2407     __ setb(kZero, out.AsRegister<Register>());
   2408     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   2409 
   2410     // If heap poisoning is enabled, we need to unpoison the values
   2411     // that were poisoned earlier.
   2412     if (kPoisonHeapReferences) {
   2413       if (base_equals_value) {
   2414         // `value` has been moved to a temporary register, no need to
   2415         // unpoison it.
   2416       } else {
   2417         // Ensure `value` is different from `out`, so that unpoisoning
   2418         // the former does not invalidate the latter.
   2419         DCHECK_NE(value, out.AsRegister<Register>());
   2420         __ UnpoisonHeapReference(value);
   2421       }
   2422       // Do not unpoison the reference contained in register
   2423       // `expected`, as it is the same as register `out` (EAX).
   2424     }
   2425   } else {
   2426     if (type == DataType::Type::kInt32) {
   2427       // Ensure the expected value is in EAX (required by the CMPXCHG
   2428       // instruction).
   2429       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
   2430       __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
   2431     } else if (type == DataType::Type::kInt64) {
   2432       // Ensure the expected value is in EAX:EDX and that the new
   2433       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
   2434       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
   2435       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
   2436       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
   2437       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
   2438       __ LockCmpxchg8b(field_addr);
   2439     } else {
   2440       LOG(FATAL) << "Unexpected CAS type " << type;
   2441     }
   2442 
   2443     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
   2444     // don't need scheduling barriers at this time.
   2445 
   2446     // Convert ZF into the Boolean result.
   2447     __ setb(kZero, out.AsRegister<Register>());
   2448     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   2449   }
   2450 }
   2451 
   2452 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
   2453   GenCAS(DataType::Type::kInt32, invoke, codegen_);
   2454 }
   2455 
   2456 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
   2457   GenCAS(DataType::Type::kInt64, invoke, codegen_);
   2458 }
   2459 
   2460 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
   2461   // The only read barrier implementation supporting the
   2462   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2463   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2464 
   2465   GenCAS(DataType::Type::kReference, invoke, codegen_);
   2466 }
   2467 
   2468 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
   2469   LocationSummary* locations =
   2470       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2471   locations->SetInAt(0, Location::RequiresRegister());
   2472   locations->SetOut(Location::SameAsFirstInput());
   2473   locations->AddTemp(Location::RequiresRegister());
   2474 }
   2475 
   2476 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
   2477                      X86Assembler* assembler) {
   2478   Immediate imm_shift(shift);
   2479   Immediate imm_mask(mask);
   2480   __ movl(temp, reg);
   2481   __ shrl(reg, imm_shift);
   2482   __ andl(temp, imm_mask);
   2483   __ andl(reg, imm_mask);
   2484   __ shll(temp, imm_shift);
   2485   __ orl(reg, temp);
   2486 }
   2487 
   2488 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
   2489   X86Assembler* assembler = GetAssembler();
   2490   LocationSummary* locations = invoke->GetLocations();
   2491 
   2492   Register reg = locations->InAt(0).AsRegister<Register>();
   2493   Register temp = locations->GetTemp(0).AsRegister<Register>();
   2494 
   2495   /*
   2496    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
   2497    * swapping bits to reverse bits in a number x. Using bswap to save instructions
   2498    * compared to generic luni implementation which has 5 rounds of swapping bits.
   2499    * x = bswap x
   2500    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
   2501    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
   2502    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
   2503    */
   2504   __ bswapl(reg);
   2505   SwapBits(reg, temp, 1, 0x55555555, assembler);
   2506   SwapBits(reg, temp, 2, 0x33333333, assembler);
   2507   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
   2508 }
   2509 
   2510 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
   2511   LocationSummary* locations =
   2512       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2513   locations->SetInAt(0, Location::RequiresRegister());
   2514   locations->SetOut(Location::SameAsFirstInput());
   2515   locations->AddTemp(Location::RequiresRegister());
   2516 }
   2517 
   2518 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
   2519   X86Assembler* assembler = GetAssembler();
   2520   LocationSummary* locations = invoke->GetLocations();
   2521 
   2522   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
   2523   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
   2524   Register temp = locations->GetTemp(0).AsRegister<Register>();
   2525 
   2526   // We want to swap high/low, then bswap each one, and then do the same
   2527   // as a 32 bit reverse.
   2528   // Exchange high and low.
   2529   __ movl(temp, reg_low);
   2530   __ movl(reg_low, reg_high);
   2531   __ movl(reg_high, temp);
   2532 
   2533   // bit-reverse low
   2534   __ bswapl(reg_low);
   2535   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
   2536   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
   2537   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
   2538 
   2539   // bit-reverse high
   2540   __ bswapl(reg_high);
   2541   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
   2542   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
   2543   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
   2544 }
   2545 
   2546 static void CreateBitCountLocations(
   2547     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
   2548   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
   2549     // Do nothing if there is no popcnt support. This results in generating
   2550     // a call for the intrinsic rather than direct code.
   2551     return;
   2552   }
   2553   LocationSummary* locations =
   2554       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2555   if (is_long) {
   2556     locations->AddTemp(Location::RequiresRegister());
   2557   }
   2558   locations->SetInAt(0, Location::Any());
   2559   locations->SetOut(Location::RequiresRegister());
   2560 }
   2561 
   2562 static void GenBitCount(X86Assembler* assembler,
   2563                         CodeGeneratorX86* codegen,
   2564                         HInvoke* invoke, bool is_long) {
   2565   LocationSummary* locations = invoke->GetLocations();
   2566   Location src = locations->InAt(0);
   2567   Register out = locations->Out().AsRegister<Register>();
   2568 
   2569   if (invoke->InputAt(0)->IsConstant()) {
   2570     // Evaluate this at compile time.
   2571     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2572     int32_t result = is_long
   2573         ? POPCOUNT(static_cast<uint64_t>(value))
   2574         : POPCOUNT(static_cast<uint32_t>(value));
   2575     codegen->Load32BitValue(out, result);
   2576     return;
   2577   }
   2578 
   2579   // Handle the non-constant cases.
   2580   if (!is_long) {
   2581     if (src.IsRegister()) {
   2582       __ popcntl(out, src.AsRegister<Register>());
   2583     } else {
   2584       DCHECK(src.IsStackSlot());
   2585       __ popcntl(out, Address(ESP, src.GetStackIndex()));
   2586     }
   2587   } else {
   2588     // The 64-bit case needs to worry about two parts.
   2589     Register temp = locations->GetTemp(0).AsRegister<Register>();
   2590     if (src.IsRegisterPair()) {
   2591       __ popcntl(temp, src.AsRegisterPairLow<Register>());
   2592       __ popcntl(out, src.AsRegisterPairHigh<Register>());
   2593     } else {
   2594       DCHECK(src.IsDoubleStackSlot());
   2595       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
   2596       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
   2597     }
   2598     __ addl(out, temp);
   2599   }
   2600 }
   2601 
   2602 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
   2603   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false);
   2604 }
   2605 
   2606 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
   2607   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2608 }
   2609 
   2610 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
   2611   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true);
   2612 }
   2613 
   2614 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
   2615   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2616 }
   2617 
   2618 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
   2619   LocationSummary* locations =
   2620       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2621   if (is_long) {
   2622     locations->SetInAt(0, Location::RequiresRegister());
   2623   } else {
   2624     locations->SetInAt(0, Location::Any());
   2625   }
   2626   locations->SetOut(Location::RequiresRegister());
   2627 }
   2628 
   2629 static void GenLeadingZeros(X86Assembler* assembler,
   2630                             CodeGeneratorX86* codegen,
   2631                             HInvoke* invoke, bool is_long) {
   2632   LocationSummary* locations = invoke->GetLocations();
   2633   Location src = locations->InAt(0);
   2634   Register out = locations->Out().AsRegister<Register>();
   2635 
   2636   if (invoke->InputAt(0)->IsConstant()) {
   2637     // Evaluate this at compile time.
   2638     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2639     if (value == 0) {
   2640       value = is_long ? 64 : 32;
   2641     } else {
   2642       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
   2643     }
   2644     codegen->Load32BitValue(out, value);
   2645     return;
   2646   }
   2647 
   2648   // Handle the non-constant cases.
   2649   if (!is_long) {
   2650     if (src.IsRegister()) {
   2651       __ bsrl(out, src.AsRegister<Register>());
   2652     } else {
   2653       DCHECK(src.IsStackSlot());
   2654       __ bsrl(out, Address(ESP, src.GetStackIndex()));
   2655     }
   2656 
   2657     // BSR sets ZF if the input was zero, and the output is undefined.
   2658     NearLabel all_zeroes, done;
   2659     __ j(kEqual, &all_zeroes);
   2660 
   2661     // Correct the result from BSR to get the final CLZ result.
   2662     __ xorl(out, Immediate(31));
   2663     __ jmp(&done);
   2664 
   2665     // Fix the zero case with the expected result.
   2666     __ Bind(&all_zeroes);
   2667     __ movl(out, Immediate(32));
   2668 
   2669     __ Bind(&done);
   2670     return;
   2671   }
   2672 
   2673   // 64 bit case needs to worry about both parts of the register.
   2674   DCHECK(src.IsRegisterPair());
   2675   Register src_lo = src.AsRegisterPairLow<Register>();
   2676   Register src_hi = src.AsRegisterPairHigh<Register>();
   2677   NearLabel handle_low, done, all_zeroes;
   2678 
   2679   // Is the high word zero?
   2680   __ testl(src_hi, src_hi);
   2681   __ j(kEqual, &handle_low);
   2682 
   2683   // High word is not zero. We know that the BSR result is defined in this case.
   2684   __ bsrl(out, src_hi);
   2685 
   2686   // Correct the result from BSR to get the final CLZ result.
   2687   __ xorl(out, Immediate(31));
   2688   __ jmp(&done);
   2689 
   2690   // High word was zero.  We have to compute the low word count and add 32.
   2691   __ Bind(&handle_low);
   2692   __ bsrl(out, src_lo);
   2693   __ j(kEqual, &all_zeroes);
   2694 
   2695   // We had a valid result.  Use an XOR to both correct the result and add 32.
   2696   __ xorl(out, Immediate(63));
   2697   __ jmp(&done);
   2698 
   2699   // All zero case.
   2700   __ Bind(&all_zeroes);
   2701   __ movl(out, Immediate(64));
   2702 
   2703   __ Bind(&done);
   2704 }
   2705 
   2706 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
   2707   CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false);
   2708 }
   2709 
   2710 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
   2711   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2712 }
   2713 
   2714 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
   2715   CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true);
   2716 }
   2717 
   2718 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
   2719   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2720 }
   2721 
   2722 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
   2723   LocationSummary* locations =
   2724       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   2725   if (is_long) {
   2726     locations->SetInAt(0, Location::RequiresRegister());
   2727   } else {
   2728     locations->SetInAt(0, Location::Any());
   2729   }
   2730   locations->SetOut(Location::RequiresRegister());
   2731 }
   2732 
   2733 static void GenTrailingZeros(X86Assembler* assembler,
   2734                              CodeGeneratorX86* codegen,
   2735                              HInvoke* invoke, bool is_long) {
   2736   LocationSummary* locations = invoke->GetLocations();
   2737   Location src = locations->InAt(0);
   2738   Register out = locations->Out().AsRegister<Register>();
   2739 
   2740   if (invoke->InputAt(0)->IsConstant()) {
   2741     // Evaluate this at compile time.
   2742     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2743     if (value == 0) {
   2744       value = is_long ? 64 : 32;
   2745     } else {
   2746       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
   2747     }
   2748     codegen->Load32BitValue(out, value);
   2749     return;
   2750   }
   2751 
   2752   // Handle the non-constant cases.
   2753   if (!is_long) {
   2754     if (src.IsRegister()) {
   2755       __ bsfl(out, src.AsRegister<Register>());
   2756     } else {
   2757       DCHECK(src.IsStackSlot());
   2758       __ bsfl(out, Address(ESP, src.GetStackIndex()));
   2759     }
   2760 
   2761     // BSF sets ZF if the input was zero, and the output is undefined.
   2762     NearLabel done;
   2763     __ j(kNotEqual, &done);
   2764 
   2765     // Fix the zero case with the expected result.
   2766     __ movl(out, Immediate(32));
   2767 
   2768     __ Bind(&done);
   2769     return;
   2770   }
   2771 
   2772   // 64 bit case needs to worry about both parts of the register.
   2773   DCHECK(src.IsRegisterPair());
   2774   Register src_lo = src.AsRegisterPairLow<Register>();
   2775   Register src_hi = src.AsRegisterPairHigh<Register>();
   2776   NearLabel done, all_zeroes;
   2777 
   2778   // If the low word is zero, then ZF will be set.  If not, we have the answer.
   2779   __ bsfl(out, src_lo);
   2780   __ j(kNotEqual, &done);
   2781 
   2782   // Low word was zero.  We have to compute the high word count and add 32.
   2783   __ bsfl(out, src_hi);
   2784   __ j(kEqual, &all_zeroes);
   2785 
   2786   // We had a valid result.  Add 32 to account for the low word being zero.
   2787   __ addl(out, Immediate(32));
   2788   __ jmp(&done);
   2789 
   2790   // All zero case.
   2791   __ Bind(&all_zeroes);
   2792   __ movl(out, Immediate(64));
   2793 
   2794   __ Bind(&done);
   2795 }
   2796 
   2797 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
   2798   CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false);
   2799 }
   2800 
   2801 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
   2802   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2803 }
   2804 
   2805 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
   2806   CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true);
   2807 }
   2808 
   2809 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
   2810   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2811 }
   2812 
   2813 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
   2814   return instruction->InputAt(input0) == instruction->InputAt(input1);
   2815 }
   2816 
   2817 // Compute base address for the System.arraycopy intrinsic in `base`.
   2818 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
   2819                                           DataType::Type type,
   2820                                           const Register& array,
   2821                                           const Location& pos,
   2822                                           const Register& base) {
   2823   // This routine is only used by the SystemArrayCopy intrinsic at the
   2824   // moment. We can allow DataType::Type::kReference as `type` to implement
   2825   // the SystemArrayCopyChar intrinsic.
   2826   DCHECK_EQ(type, DataType::Type::kReference);
   2827   const int32_t element_size = DataType::Size(type);
   2828   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
   2829   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   2830 
   2831   if (pos.IsConstant()) {
   2832     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
   2833     __ leal(base, Address(array, element_size * constant + data_offset));
   2834   } else {
   2835     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
   2836   }
   2837 }
   2838 
   2839 // Compute end source address for the System.arraycopy intrinsic in `end`.
   2840 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
   2841                                          DataType::Type type,
   2842                                          const Location& copy_length,
   2843                                          const Register& base,
   2844                                          const Register& end) {
   2845   // This routine is only used by the SystemArrayCopy intrinsic at the
   2846   // moment. We can allow DataType::Type::kReference as `type` to implement
   2847   // the SystemArrayCopyChar intrinsic.
   2848   DCHECK_EQ(type, DataType::Type::kReference);
   2849   const int32_t element_size = DataType::Size(type);
   2850   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
   2851 
   2852   if (copy_length.IsConstant()) {
   2853     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
   2854     __ leal(end, Address(base, element_size * constant));
   2855   } else {
   2856     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
   2857   }
   2858 }
   2859 
   2860 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
   2861   // The only read barrier implementation supporting the
   2862   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2863   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2864     return;
   2865   }
   2866 
   2867   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   2868   if (invoke->GetLocations() != nullptr) {
   2869     // Need a byte register for marking.
   2870     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
   2871 
   2872     static constexpr size_t kSrc = 0;
   2873     static constexpr size_t kSrcPos = 1;
   2874     static constexpr size_t kDest = 2;
   2875     static constexpr size_t kDestPos = 3;
   2876     static constexpr size_t kLength = 4;
   2877 
   2878     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
   2879         !invoke->InputAt(kDestPos)->IsIntConstant() &&
   2880         !invoke->InputAt(kLength)->IsIntConstant()) {
   2881       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
   2882           !IsSameInput(invoke, kSrcPos, kLength) &&
   2883           !IsSameInput(invoke, kDestPos, kLength) &&
   2884           !IsSameInput(invoke, kSrc, kDest)) {
   2885         // Not enough registers, make the length also take a stack slot.
   2886         invoke->GetLocations()->SetInAt(kLength, Location::Any());
   2887       }
   2888     }
   2889   }
   2890 }
   2891 
   2892 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
   2893   // The only read barrier implementation supporting the
   2894   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2895   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2896 
   2897   X86Assembler* assembler = GetAssembler();
   2898   LocationSummary* locations = invoke->GetLocations();
   2899 
   2900   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2901   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2902   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2903   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   2904   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   2905 
   2906   Register src = locations->InAt(0).AsRegister<Register>();
   2907   Location src_pos = locations->InAt(1);
   2908   Register dest = locations->InAt(2).AsRegister<Register>();
   2909   Location dest_pos = locations->InAt(3);
   2910   Location length_arg = locations->InAt(4);
   2911   Location length = length_arg;
   2912   Location temp1_loc = locations->GetTemp(0);
   2913   Register temp1 = temp1_loc.AsRegister<Register>();
   2914   Location temp2_loc = locations->GetTemp(1);
   2915   Register temp2 = temp2_loc.AsRegister<Register>();
   2916 
   2917   SlowPathCode* intrinsic_slow_path =
   2918       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   2919   codegen_->AddSlowPath(intrinsic_slow_path);
   2920 
   2921   NearLabel conditions_on_positions_validated;
   2922   SystemArrayCopyOptimizations optimizations(invoke);
   2923 
   2924   // If source and destination are the same, we go to slow path if we need to do
   2925   // forward copying.
   2926   if (src_pos.IsConstant()) {
   2927     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2928     if (dest_pos.IsConstant()) {
   2929       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2930       if (optimizations.GetDestinationIsSource()) {
   2931         // Checked when building locations.
   2932         DCHECK_GE(src_pos_constant, dest_pos_constant);
   2933       } else if (src_pos_constant < dest_pos_constant) {
   2934         __ cmpl(src, dest);
   2935         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2936       }
   2937     } else {
   2938       if (!optimizations.GetDestinationIsSource()) {
   2939         __ cmpl(src, dest);
   2940         __ j(kNotEqual, &conditions_on_positions_validated);
   2941       }
   2942       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
   2943       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
   2944     }
   2945   } else {
   2946     if (!optimizations.GetDestinationIsSource()) {
   2947       __ cmpl(src, dest);
   2948       __ j(kNotEqual, &conditions_on_positions_validated);
   2949     }
   2950     if (dest_pos.IsConstant()) {
   2951       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2952       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
   2953       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2954     } else {
   2955       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
   2956       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2957     }
   2958   }
   2959 
   2960   __ Bind(&conditions_on_positions_validated);
   2961 
   2962   if (!optimizations.GetSourceIsNotNull()) {
   2963     // Bail out if the source is null.
   2964     __ testl(src, src);
   2965     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2966   }
   2967 
   2968   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
   2969     // Bail out if the destination is null.
   2970     __ testl(dest, dest);
   2971     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2972   }
   2973 
   2974   Location temp3_loc = locations->GetTemp(2);
   2975   Register temp3 = temp3_loc.AsRegister<Register>();
   2976   if (length.IsStackSlot()) {
   2977     __ movl(temp3, Address(ESP, length.GetStackIndex()));
   2978     length = Location::RegisterLocation(temp3);
   2979   }
   2980 
   2981   // If the length is negative, bail out.
   2982   // We have already checked in the LocationsBuilder for the constant case.
   2983   if (!length.IsConstant() &&
   2984       !optimizations.GetCountIsSourceLength() &&
   2985       !optimizations.GetCountIsDestinationLength()) {
   2986     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
   2987     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2988   }
   2989 
   2990   // Validity checks: source.
   2991   CheckPosition(assembler,
   2992                 src_pos,
   2993                 src,
   2994                 length,
   2995                 intrinsic_slow_path,
   2996                 temp1,
   2997                 optimizations.GetCountIsSourceLength());
   2998 
   2999   // Validity checks: dest.
   3000   CheckPosition(assembler,
   3001                 dest_pos,
   3002                 dest,
   3003                 length,
   3004                 intrinsic_slow_path,
   3005                 temp1,
   3006                 optimizations.GetCountIsDestinationLength());
   3007 
   3008   if (!optimizations.GetDoesNotNeedTypeCheck()) {
   3009     // Check whether all elements of the source array are assignable to the component
   3010     // type of the destination array. We do two checks: the classes are the same,
   3011     // or the destination is Object[]. If none of these checks succeed, we go to the
   3012     // slow path.
   3013 
   3014     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   3015       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3016         // /* HeapReference<Class> */ temp1 = src->klass_
   3017         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3018             invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
   3019         // Bail out if the source is not a non primitive array.
   3020         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3021         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3022             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3023         __ testl(temp1, temp1);
   3024         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3025         // If heap poisoning is enabled, `temp1` has been unpoisoned
   3026         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3027       } else {
   3028         // /* HeapReference<Class> */ temp1 = src->klass_
   3029         __ movl(temp1, Address(src, class_offset));
   3030         __ MaybeUnpoisonHeapReference(temp1);
   3031         // Bail out if the source is not a non primitive array.
   3032         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3033         __ movl(temp1, Address(temp1, component_offset));
   3034         __ testl(temp1, temp1);
   3035         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3036         __ MaybeUnpoisonHeapReference(temp1);
   3037       }
   3038       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
   3039       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3040     }
   3041 
   3042     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3043       if (length.Equals(Location::RegisterLocation(temp3))) {
   3044         // When Baker read barriers are enabled, register `temp3`,
   3045         // which in the present case contains the `length` parameter,
   3046         // will be overwritten below.  Make the `length` location
   3047         // reference the original stack location; it will be moved
   3048         // back to `temp3` later if necessary.
   3049         DCHECK(length_arg.IsStackSlot());
   3050         length = length_arg;
   3051       }
   3052 
   3053       // /* HeapReference<Class> */ temp1 = dest->klass_
   3054       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3055           invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
   3056 
   3057       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   3058         // Bail out if the destination is not a non primitive array.
   3059         //
   3060         // Register `temp1` is not trashed by the read barrier emitted
   3061         // by GenerateFieldLoadWithBakerReadBarrier below, as that
   3062         // method produces a call to a ReadBarrierMarkRegX entry point,
   3063         // which saves all potentially live registers, including
   3064         // temporaries such a `temp1`.
   3065         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   3066         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3067             invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
   3068         __ testl(temp2, temp2);
   3069         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3070         // If heap poisoning is enabled, `temp2` has been unpoisoned
   3071         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3072         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
   3073         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3074       }
   3075 
   3076       // For the same reason given earlier, `temp1` is not trashed by the
   3077       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
   3078       // /* HeapReference<Class> */ temp2 = src->klass_
   3079       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3080           invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
   3081       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
   3082       __ cmpl(temp1, temp2);
   3083 
   3084       if (optimizations.GetDestinationIsTypedObjectArray()) {
   3085         NearLabel do_copy;
   3086         __ j(kEqual, &do_copy);
   3087         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3088         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3089             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3090         // We do not need to emit a read barrier for the following
   3091         // heap reference load, as `temp1` is only used in a
   3092         // comparison with null below, and this reference is not
   3093         // kept afterwards.
   3094         __ cmpl(Address(temp1, super_offset), Immediate(0));
   3095         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3096         __ Bind(&do_copy);
   3097       } else {
   3098         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3099       }
   3100     } else {
   3101       // Non read barrier code.
   3102 
   3103       // /* HeapReference<Class> */ temp1 = dest->klass_
   3104       __ movl(temp1, Address(dest, class_offset));
   3105       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   3106         __ MaybeUnpoisonHeapReference(temp1);
   3107         // Bail out if the destination is not a non primitive array.
   3108         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   3109         __ movl(temp2, Address(temp1, component_offset));
   3110         __ testl(temp2, temp2);
   3111         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3112         __ MaybeUnpoisonHeapReference(temp2);
   3113         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
   3114         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3115         // Re-poison the heap reference to make the compare instruction below
   3116         // compare two poisoned references.
   3117         __ PoisonHeapReference(temp1);
   3118       }
   3119 
   3120       // Note: if heap poisoning is on, we are comparing two poisoned references here.
   3121       __ cmpl(temp1, Address(src, class_offset));
   3122 
   3123       if (optimizations.GetDestinationIsTypedObjectArray()) {
   3124         NearLabel do_copy;
   3125         __ j(kEqual, &do_copy);
   3126         __ MaybeUnpoisonHeapReference(temp1);
   3127         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3128         __ movl(temp1, Address(temp1, component_offset));
   3129         __ MaybeUnpoisonHeapReference(temp1);
   3130         __ cmpl(Address(temp1, super_offset), Immediate(0));
   3131         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3132         __ Bind(&do_copy);
   3133       } else {
   3134         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3135       }
   3136     }
   3137   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   3138     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
   3139     // Bail out if the source is not a non primitive array.
   3140     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3141       // /* HeapReference<Class> */ temp1 = src->klass_
   3142       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3143           invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
   3144       // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3145       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3146           invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3147       __ testl(temp1, temp1);
   3148       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3149       // If heap poisoning is enabled, `temp1` has been unpoisoned
   3150       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3151     } else {
   3152       // /* HeapReference<Class> */ temp1 = src->klass_
   3153       __ movl(temp1, Address(src, class_offset));
   3154       __ MaybeUnpoisonHeapReference(temp1);
   3155       // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3156       __ movl(temp1, Address(temp1, component_offset));
   3157       __ testl(temp1, temp1);
   3158       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3159       __ MaybeUnpoisonHeapReference(temp1);
   3160     }
   3161     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
   3162     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3163   }
   3164 
   3165   const DataType::Type type = DataType::Type::kReference;
   3166   const int32_t element_size = DataType::Size(type);
   3167 
   3168   // Compute the base source address in `temp1`.
   3169   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
   3170 
   3171   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3172     // If it is needed (in the case of the fast-path loop), the base
   3173     // destination address is computed later, as `temp2` is used for
   3174     // intermediate computations.
   3175 
   3176     // Compute the end source address in `temp3`.
   3177     if (length.IsStackSlot()) {
   3178       // Location `length` is again pointing at a stack slot, as
   3179       // register `temp3` (which was containing the length parameter
   3180       // earlier) has been overwritten; restore it now
   3181       DCHECK(length.Equals(length_arg));
   3182       __ movl(temp3, Address(ESP, length.GetStackIndex()));
   3183       length = Location::RegisterLocation(temp3);
   3184     }
   3185     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   3186 
   3187     // SystemArrayCopy implementation for Baker read barriers (see
   3188     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
   3189     //
   3190     //   if (src_ptr != end_ptr) {
   3191     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
   3192     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   3193     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   3194     //     if (is_gray) {
   3195     //       // Slow-path copy.
   3196     //       for (size_t i = 0; i != length; ++i) {
   3197     //         dest_array[dest_pos + i] =
   3198     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
   3199     //       }
   3200     //     } else {
   3201     //       // Fast-path copy.
   3202     //       do {
   3203     //         *dest_ptr++ = *src_ptr++;
   3204     //       } while (src_ptr != end_ptr)
   3205     //     }
   3206     //   }
   3207 
   3208     NearLabel loop, done;
   3209 
   3210     // Don't enter copy loop if `length == 0`.
   3211     __ cmpl(temp1, temp3);
   3212     __ j(kEqual, &done);
   3213 
   3214     // Given the numeric representation, it's enough to check the low bit of the rb_state.
   3215     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   3216     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   3217     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   3218     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   3219     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
   3220 
   3221     // if (rb_state == ReadBarrier::GrayState())
   3222     //   goto slow_path;
   3223     // At this point, just do the "if" and make sure that flags are preserved until the branch.
   3224     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
   3225 
   3226     // Load fence to prevent load-load reordering.
   3227     // Note that this is a no-op, thanks to the x86 memory model.
   3228     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   3229 
   3230     // Slow path used to copy array when `src` is gray.
   3231     SlowPathCode* read_barrier_slow_path =
   3232         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
   3233     codegen_->AddSlowPath(read_barrier_slow_path);
   3234 
   3235     // We have done the "if" of the gray bit check above, now branch based on the flags.
   3236     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
   3237 
   3238     // Fast-path copy.
   3239     // Compute the base destination address in `temp2`.
   3240     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   3241     // Iterate over the arrays and do a raw copy of the objects. We don't need to
   3242     // poison/unpoison.
   3243     __ Bind(&loop);
   3244     __ pushl(Address(temp1, 0));
   3245     __ cfi().AdjustCFAOffset(4);
   3246     __ popl(Address(temp2, 0));
   3247     __ cfi().AdjustCFAOffset(-4);
   3248     __ addl(temp1, Immediate(element_size));
   3249     __ addl(temp2, Immediate(element_size));
   3250     __ cmpl(temp1, temp3);
   3251     __ j(kNotEqual, &loop);
   3252 
   3253     __ Bind(read_barrier_slow_path->GetExitLabel());
   3254     __ Bind(&done);
   3255   } else {
   3256     // Non read barrier code.
   3257     // Compute the base destination address in `temp2`.
   3258     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   3259     // Compute the end source address in `temp3`.
   3260     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   3261     // Iterate over the arrays and do a raw copy of the objects. We don't need to
   3262     // poison/unpoison.
   3263     NearLabel loop, done;
   3264     __ cmpl(temp1, temp3);
   3265     __ j(kEqual, &done);
   3266     __ Bind(&loop);
   3267     __ pushl(Address(temp1, 0));
   3268     __ cfi().AdjustCFAOffset(4);
   3269     __ popl(Address(temp2, 0));
   3270     __ cfi().AdjustCFAOffset(-4);
   3271     __ addl(temp1, Immediate(element_size));
   3272     __ addl(temp2, Immediate(element_size));
   3273     __ cmpl(temp1, temp3);
   3274     __ j(kNotEqual, &loop);
   3275     __ Bind(&done);
   3276   }
   3277 
   3278   // We only need one card marking on the destination array.
   3279   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
   3280 
   3281   __ Bind(intrinsic_slow_path->GetExitLabel());
   3282 }
   3283 
   3284 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
   3285   InvokeRuntimeCallingConvention calling_convention;
   3286   IntrinsicVisitor::ComputeIntegerValueOfLocations(
   3287       invoke,
   3288       codegen_,
   3289       Location::RegisterLocation(EAX),
   3290       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   3291 }
   3292 
   3293 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
   3294   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
   3295   LocationSummary* locations = invoke->GetLocations();
   3296   X86Assembler* assembler = GetAssembler();
   3297 
   3298   Register out = locations->Out().AsRegister<Register>();
   3299   InvokeRuntimeCallingConvention calling_convention;
   3300   if (invoke->InputAt(0)->IsConstant()) {
   3301     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
   3302     if (value >= info.low && value <= info.high) {
   3303       // Just embed the j.l.Integer in the code.
   3304       ScopedObjectAccess soa(Thread::Current());
   3305       mirror::Object* boxed = info.cache->Get(value + (-info.low));
   3306       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
   3307       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
   3308       __ movl(out, Immediate(address));
   3309     } else {
   3310       // Allocate and initialize a new j.l.Integer.
   3311       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
   3312       // JIT object table.
   3313       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3314       __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
   3315       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3316       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3317       __ movl(Address(out, info.value_offset), Immediate(value));
   3318     }
   3319   } else {
   3320     Register in = locations->InAt(0).AsRegister<Register>();
   3321     // Check bounds of our cache.
   3322     __ leal(out, Address(in, -info.low));
   3323     __ cmpl(out, Immediate(info.high - info.low + 1));
   3324     NearLabel allocate, done;
   3325     __ j(kAboveEqual, &allocate);
   3326     // If the value is within the bounds, load the j.l.Integer directly from the array.
   3327     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   3328     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
   3329     __ movl(out, Address(out, TIMES_4, data_offset + address));
   3330     __ MaybeUnpoisonHeapReference(out);
   3331     __ jmp(&done);
   3332     __ Bind(&allocate);
   3333     // Otherwise allocate and initialize a new j.l.Integer.
   3334     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3335     __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
   3336     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3337     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3338     __ movl(Address(out, info.value_offset), in);
   3339     __ Bind(&done);
   3340   }
   3341 }
   3342 
   3343 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
   3344   LocationSummary* locations =
   3345       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   3346   locations->SetOut(Location::RequiresRegister());
   3347 }
   3348 
   3349 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
   3350   X86Assembler* assembler = GetAssembler();
   3351   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
   3352   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
   3353   NearLabel done;
   3354   __ fs()->movl(out, address);
   3355   __ testl(out, out);
   3356   __ j(kEqual, &done);
   3357   __ fs()->movl(address, Immediate(0));
   3358   codegen_->MemoryFence();
   3359   __ Bind(&done);
   3360 }
   3361 
   3362 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
   3363   LocationSummary* locations =
   3364       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   3365   locations->SetInAt(0, Location::Any());
   3366 }
   3367 
   3368 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
   3369 
   3370 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
   3371 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
   3372 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
   3373 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
   3374 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
   3375 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
   3376 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
   3377 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
   3378 
   3379 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
   3380 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
   3381 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
   3382 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
   3383 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
   3384 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
   3385 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
   3386 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
   3387 
   3388 // 1.8.
   3389 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
   3390 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
   3391 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
   3392 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
   3393 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
   3394 
   3395 UNREACHABLE_INTRINSICS(X86)
   3396 
   3397 #undef __
   3398 
   3399 }  // namespace x86
   3400 }  // namespace art
   3401