Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "intrinsics_x86.h"
     18 
     19 #include <limits>
     20 
     21 #include "arch/x86/instruction_set_features_x86.h"
     22 #include "art_method.h"
     23 #include "base/bit_utils.h"
     24 #include "code_generator_x86.h"
     25 #include "entrypoints/quick/quick_entrypoints.h"
     26 #include "intrinsics.h"
     27 #include "intrinsics_utils.h"
     28 #include "lock_word.h"
     29 #include "mirror/array-inl.h"
     30 #include "mirror/object_array-inl.h"
     31 #include "mirror/reference.h"
     32 #include "mirror/string.h"
     33 #include "scoped_thread_state_change-inl.h"
     34 #include "thread-current-inl.h"
     35 #include "utils/x86/assembler_x86.h"
     36 #include "utils/x86/constants_x86.h"
     37 
     38 namespace art {
     39 
     40 namespace x86 {
     41 
     42 static constexpr int kDoubleNaNHigh = 0x7FF80000;
     43 static constexpr int kDoubleNaNLow = 0x00000000;
     44 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
     45 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
     46 
     47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
     48   : arena_(codegen->GetGraph()->GetArena()),
     49     codegen_(codegen) {
     50 }
     51 
     52 
     53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
     54   return down_cast<X86Assembler*>(codegen_->GetAssembler());
     55 }
     56 
     57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
     58   return codegen_->GetGraph()->GetArena();
     59 }
     60 
     61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
     62   Dispatch(invoke);
     63   LocationSummary* res = invoke->GetLocations();
     64   if (res == nullptr) {
     65     return false;
     66   }
     67   return res->Intrinsified();
     68 }
     69 
     70 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
     71   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
     72   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
     73 }
     74 
     75 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
     76 
     77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
     79 
     80 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
     81 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
     82  public:
     83   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
     84       : SlowPathCode(instruction) {
     85     DCHECK(kEmitCompilerReadBarrier);
     86     DCHECK(kUseBakerReadBarrier);
     87   }
     88 
     89   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     90     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     91     LocationSummary* locations = instruction_->GetLocations();
     92     DCHECK(locations->CanCall());
     93     DCHECK(instruction_->IsInvokeStaticOrDirect())
     94         << "Unexpected instruction in read barrier arraycopy slow path: "
     95         << instruction_->DebugName();
     96     DCHECK(instruction_->GetLocations()->Intrinsified());
     97     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
     98 
     99     int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
    100     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
    101 
    102     Register src = locations->InAt(0).AsRegister<Register>();
    103     Location src_pos = locations->InAt(1);
    104     Register dest = locations->InAt(2).AsRegister<Register>();
    105     Location dest_pos = locations->InAt(3);
    106     Location length = locations->InAt(4);
    107     Location temp1_loc = locations->GetTemp(0);
    108     Register temp1 = temp1_loc.AsRegister<Register>();
    109     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
    110     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
    111 
    112     __ Bind(GetEntryLabel());
    113     // In this code path, registers `temp1`, `temp2`, and `temp3`
    114     // (resp.) are not used for the base source address, the base
    115     // destination address, and the end source address (resp.), as in
    116     // other SystemArrayCopy intrinsic code paths.  Instead they are
    117     // (resp.) used for:
    118     // - the loop index (`i`);
    119     // - the source index (`src_index`) and the loaded (source)
    120     //   reference (`value`); and
    121     // - the destination index (`dest_index`).
    122 
    123     // i = 0
    124     __ xorl(temp1, temp1);
    125     NearLabel loop;
    126     __ Bind(&loop);
    127     // value = src_array[i + src_pos]
    128     if (src_pos.IsConstant()) {
    129       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
    130       int32_t adjusted_offset = offset + constant * element_size;
    131       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
    132     } else {
    133       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
    134       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
    135     }
    136     __ MaybeUnpoisonHeapReference(temp2);
    137     // TODO: Inline the mark bit check before calling the runtime?
    138     // value = ReadBarrier::Mark(value)
    139     // No need to save live registers; it's taken care of by the
    140     // entrypoint. Also, there is no need to update the stack mask,
    141     // as this runtime call will not trigger a garbage collection.
    142     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
    143     // explanations.)
    144     DCHECK_NE(temp2, ESP);
    145     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
    146     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
    147     // This runtime call does not require a stack map.
    148     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    149     __ MaybePoisonHeapReference(temp2);
    150     // dest_array[i + dest_pos] = value
    151     if (dest_pos.IsConstant()) {
    152       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
    153       int32_t adjusted_offset = offset + constant * element_size;
    154       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
    155     } else {
    156       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
    157       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
    158     }
    159     // ++i
    160     __ addl(temp1, Immediate(1));
    161     // if (i != length) goto loop
    162     x86_codegen->GenerateIntCompare(temp1_loc, length);
    163     __ j(kNotEqual, &loop);
    164     __ jmp(GetExitLabel());
    165   }
    166 
    167   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
    168 
    169  private:
    170   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
    171 };
    172 
    173 #undef __
    174 
    175 #define __ assembler->
    176 
    177 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
    178   LocationSummary* locations = new (arena) LocationSummary(invoke,
    179                                                            LocationSummary::kNoCall,
    180                                                            kIntrinsified);
    181   locations->SetInAt(0, Location::RequiresFpuRegister());
    182   locations->SetOut(Location::RequiresRegister());
    183   if (is64bit) {
    184     locations->AddTemp(Location::RequiresFpuRegister());
    185   }
    186 }
    187 
    188 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
    189   LocationSummary* locations = new (arena) LocationSummary(invoke,
    190                                                            LocationSummary::kNoCall,
    191                                                            kIntrinsified);
    192   locations->SetInAt(0, Location::RequiresRegister());
    193   locations->SetOut(Location::RequiresFpuRegister());
    194   if (is64bit) {
    195     locations->AddTemp(Location::RequiresFpuRegister());
    196     locations->AddTemp(Location::RequiresFpuRegister());
    197   }
    198 }
    199 
    200 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
    201   Location input = locations->InAt(0);
    202   Location output = locations->Out();
    203   if (is64bit) {
    204     // Need to use the temporary.
    205     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    206     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
    207     __ movd(output.AsRegisterPairLow<Register>(), temp);
    208     __ psrlq(temp, Immediate(32));
    209     __ movd(output.AsRegisterPairHigh<Register>(), temp);
    210   } else {
    211     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
    212   }
    213 }
    214 
    215 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
    216   Location input = locations->InAt(0);
    217   Location output = locations->Out();
    218   if (is64bit) {
    219     // Need to use the temporary.
    220     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    221     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
    222     __ movd(temp1, input.AsRegisterPairLow<Register>());
    223     __ movd(temp2, input.AsRegisterPairHigh<Register>());
    224     __ punpckldq(temp1, temp2);
    225     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
    226   } else {
    227     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
    228   }
    229 }
    230 
    231 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    232   CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
    233 }
    234 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    235   CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
    236 }
    237 
    238 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
    239   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    240 }
    241 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
    242   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
    243 }
    244 
    245 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    246   CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
    247 }
    248 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    249   CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
    250 }
    251 
    252 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
    253   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    254 }
    255 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
    256   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
    257 }
    258 
    259 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    260   LocationSummary* locations = new (arena) LocationSummary(invoke,
    261                                                            LocationSummary::kNoCall,
    262                                                            kIntrinsified);
    263   locations->SetInAt(0, Location::RequiresRegister());
    264   locations->SetOut(Location::SameAsFirstInput());
    265 }
    266 
    267 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    268   LocationSummary* locations = new (arena) LocationSummary(invoke,
    269                                                            LocationSummary::kNoCall,
    270                                                            kIntrinsified);
    271   locations->SetInAt(0, Location::RequiresRegister());
    272   locations->SetOut(Location::RequiresRegister());
    273 }
    274 
    275 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
    276   LocationSummary* locations = new (arena) LocationSummary(invoke,
    277                                                            LocationSummary::kNoCall,
    278                                                            kIntrinsified);
    279   locations->SetInAt(0, Location::RequiresRegister());
    280   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    281 }
    282 
    283 static void GenReverseBytes(LocationSummary* locations,
    284                             Primitive::Type size,
    285                             X86Assembler* assembler) {
    286   Register out = locations->Out().AsRegister<Register>();
    287 
    288   switch (size) {
    289     case Primitive::kPrimShort:
    290       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
    291       __ bswapl(out);
    292       __ sarl(out, Immediate(16));
    293       break;
    294     case Primitive::kPrimInt:
    295       __ bswapl(out);
    296       break;
    297     default:
    298       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
    299       UNREACHABLE();
    300   }
    301 }
    302 
    303 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
    304   CreateIntToIntLocations(arena_, invoke);
    305 }
    306 
    307 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
    308   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
    309 }
    310 
    311 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
    312   CreateLongToLongLocations(arena_, invoke);
    313 }
    314 
    315 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
    316   LocationSummary* locations = invoke->GetLocations();
    317   Location input = locations->InAt(0);
    318   Register input_lo = input.AsRegisterPairLow<Register>();
    319   Register input_hi = input.AsRegisterPairHigh<Register>();
    320   Location output = locations->Out();
    321   Register output_lo = output.AsRegisterPairLow<Register>();
    322   Register output_hi = output.AsRegisterPairHigh<Register>();
    323 
    324   X86Assembler* assembler = GetAssembler();
    325   // Assign the inputs to the outputs, mixing low/high.
    326   __ movl(output_lo, input_hi);
    327   __ movl(output_hi, input_lo);
    328   __ bswapl(output_lo);
    329   __ bswapl(output_hi);
    330 }
    331 
    332 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
    333   CreateIntToIntLocations(arena_, invoke);
    334 }
    335 
    336 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
    337   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
    338 }
    339 
    340 
    341 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
    342 //       need is 64b.
    343 
    344 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
    345   // TODO: Enable memory operations when the assembler supports them.
    346   LocationSummary* locations = new (arena) LocationSummary(invoke,
    347                                                            LocationSummary::kNoCall,
    348                                                            kIntrinsified);
    349   locations->SetInAt(0, Location::RequiresFpuRegister());
    350   locations->SetOut(Location::SameAsFirstInput());
    351   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    352   DCHECK(static_or_direct != nullptr);
    353   if (static_or_direct->HasSpecialInput() &&
    354       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    355     // We need addressibility for the constant area.
    356     locations->SetInAt(1, Location::RequiresRegister());
    357     // We need a temporary to hold the constant.
    358     locations->AddTemp(Location::RequiresFpuRegister());
    359   }
    360 }
    361 
    362 static void MathAbsFP(HInvoke* invoke,
    363                       bool is64bit,
    364                       X86Assembler* assembler,
    365                       CodeGeneratorX86* codegen) {
    366   LocationSummary* locations = invoke->GetLocations();
    367   Location output = locations->Out();
    368 
    369   DCHECK(output.IsFpuRegister());
    370   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
    371     HX86ComputeBaseMethodAddress* method_address =
    372         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
    373     DCHECK(locations->InAt(1).IsRegister());
    374     // We also have a constant area pointer.
    375     Register constant_area = locations->InAt(1).AsRegister<Register>();
    376     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    377     if (is64bit) {
    378       __ movsd(temp, codegen->LiteralInt64Address(
    379           INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
    380       __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
    381     } else {
    382       __ movss(temp, codegen->LiteralInt32Address(
    383           INT32_C(0x7FFFFFFF), method_address, constant_area));
    384       __ andps(output.AsFpuRegister<XmmRegister>(), temp);
    385     }
    386   } else {
    387     // Create the right constant on an aligned stack.
    388     if (is64bit) {
    389       __ subl(ESP, Immediate(8));
    390       __ pushl(Immediate(0x7FFFFFFF));
    391       __ pushl(Immediate(0xFFFFFFFF));
    392       __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
    393     } else {
    394       __ subl(ESP, Immediate(12));
    395       __ pushl(Immediate(0x7FFFFFFF));
    396       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
    397     }
    398     __ addl(ESP, Immediate(16));
    399   }
    400 }
    401 
    402 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
    403   CreateFloatToFloat(arena_, invoke);
    404 }
    405 
    406 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
    407   MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
    408 }
    409 
    410 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
    411   CreateFloatToFloat(arena_, invoke);
    412 }
    413 
    414 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
    415   MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
    416 }
    417 
    418 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
    419   LocationSummary* locations = new (arena) LocationSummary(invoke,
    420                                                            LocationSummary::kNoCall,
    421                                                            kIntrinsified);
    422   locations->SetInAt(0, Location::RegisterLocation(EAX));
    423   locations->SetOut(Location::SameAsFirstInput());
    424   locations->AddTemp(Location::RegisterLocation(EDX));
    425 }
    426 
    427 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
    428   Location output = locations->Out();
    429   Register out = output.AsRegister<Register>();
    430   DCHECK_EQ(out, EAX);
    431   Register temp = locations->GetTemp(0).AsRegister<Register>();
    432   DCHECK_EQ(temp, EDX);
    433 
    434   // Sign extend EAX into EDX.
    435   __ cdq();
    436 
    437   // XOR EAX with sign.
    438   __ xorl(EAX, EDX);
    439 
    440   // Subtract out sign to correct.
    441   __ subl(EAX, EDX);
    442 
    443   // The result is in EAX.
    444 }
    445 
    446 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
    447   LocationSummary* locations = new (arena) LocationSummary(invoke,
    448                                                            LocationSummary::kNoCall,
    449                                                            kIntrinsified);
    450   locations->SetInAt(0, Location::RequiresRegister());
    451   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
    452   locations->AddTemp(Location::RequiresRegister());
    453 }
    454 
    455 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
    456   Location input = locations->InAt(0);
    457   Register input_lo = input.AsRegisterPairLow<Register>();
    458   Register input_hi = input.AsRegisterPairHigh<Register>();
    459   Location output = locations->Out();
    460   Register output_lo = output.AsRegisterPairLow<Register>();
    461   Register output_hi = output.AsRegisterPairHigh<Register>();
    462   Register temp = locations->GetTemp(0).AsRegister<Register>();
    463 
    464   // Compute the sign into the temporary.
    465   __ movl(temp, input_hi);
    466   __ sarl(temp, Immediate(31));
    467 
    468   // Store the sign into the output.
    469   __ movl(output_lo, temp);
    470   __ movl(output_hi, temp);
    471 
    472   // XOR the input to the output.
    473   __ xorl(output_lo, input_lo);
    474   __ xorl(output_hi, input_hi);
    475 
    476   // Subtract the sign.
    477   __ subl(output_lo, temp);
    478   __ sbbl(output_hi, temp);
    479 }
    480 
    481 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
    482   CreateAbsIntLocation(arena_, invoke);
    483 }
    484 
    485 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
    486   GenAbsInteger(invoke->GetLocations(), GetAssembler());
    487 }
    488 
    489 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
    490   CreateAbsLongLocation(arena_, invoke);
    491 }
    492 
    493 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
    494   GenAbsLong(invoke->GetLocations(), GetAssembler());
    495 }
    496 
    497 static void GenMinMaxFP(HInvoke* invoke,
    498                         bool is_min,
    499                         bool is_double,
    500                         X86Assembler* assembler,
    501                         CodeGeneratorX86* codegen) {
    502   LocationSummary* locations = invoke->GetLocations();
    503   Location op1_loc = locations->InAt(0);
    504   Location op2_loc = locations->InAt(1);
    505   Location out_loc = locations->Out();
    506   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
    507 
    508   // Shortcut for same input locations.
    509   if (op1_loc.Equals(op2_loc)) {
    510     DCHECK(out_loc.Equals(op1_loc));
    511     return;
    512   }
    513 
    514   //  (out := op1)
    515   //  out <=? op2
    516   //  if Nan jmp Nan_label
    517   //  if out is min jmp done
    518   //  if op2 is min jmp op2_label
    519   //  handle -0/+0
    520   //  jmp done
    521   // Nan_label:
    522   //  out := NaN
    523   // op2_label:
    524   //  out := op2
    525   // done:
    526   //
    527   // This removes one jmp, but needs to copy one input (op1) to out.
    528   //
    529   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
    530 
    531   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
    532 
    533   NearLabel nan, done, op2_label;
    534   if (is_double) {
    535     __ ucomisd(out, op2);
    536   } else {
    537     __ ucomiss(out, op2);
    538   }
    539 
    540   __ j(Condition::kParityEven, &nan);
    541 
    542   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
    543   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
    544 
    545   // Handle 0.0/-0.0.
    546   if (is_min) {
    547     if (is_double) {
    548       __ orpd(out, op2);
    549     } else {
    550       __ orps(out, op2);
    551     }
    552   } else {
    553     if (is_double) {
    554       __ andpd(out, op2);
    555     } else {
    556       __ andps(out, op2);
    557     }
    558   }
    559   __ jmp(&done);
    560 
    561   // NaN handling.
    562   __ Bind(&nan);
    563   // Do we have a constant area pointer?
    564   if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
    565     HX86ComputeBaseMethodAddress* method_address =
    566         invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
    567     DCHECK(locations->InAt(2).IsRegister());
    568     Register constant_area = locations->InAt(2).AsRegister<Register>();
    569     if (is_double) {
    570       __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
    571     } else {
    572       __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
    573     }
    574   } else {
    575     if (is_double) {
    576       __ pushl(Immediate(kDoubleNaNHigh));
    577       __ pushl(Immediate(kDoubleNaNLow));
    578       __ movsd(out, Address(ESP, 0));
    579       __ addl(ESP, Immediate(8));
    580     } else {
    581       __ pushl(Immediate(kFloatNaN));
    582       __ movss(out, Address(ESP, 0));
    583       __ addl(ESP, Immediate(4));
    584     }
    585   }
    586   __ jmp(&done);
    587 
    588   // out := op2;
    589   __ Bind(&op2_label);
    590   if (is_double) {
    591     __ movsd(out, op2);
    592   } else {
    593     __ movss(out, op2);
    594   }
    595 
    596   // Done.
    597   __ Bind(&done);
    598 }
    599 
    600 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    601   LocationSummary* locations = new (arena) LocationSummary(invoke,
    602                                                            LocationSummary::kNoCall,
    603                                                            kIntrinsified);
    604   locations->SetInAt(0, Location::RequiresFpuRegister());
    605   locations->SetInAt(1, Location::RequiresFpuRegister());
    606   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
    607   // the second input to be the output (we can simply swap inputs).
    608   locations->SetOut(Location::SameAsFirstInput());
    609   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    610   DCHECK(static_or_direct != nullptr);
    611   if (static_or_direct->HasSpecialInput() &&
    612       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    613     locations->SetInAt(2, Location::RequiresRegister());
    614   }
    615 }
    616 
    617 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
    618   CreateFPFPToFPLocations(arena_, invoke);
    619 }
    620 
    621 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
    622   GenMinMaxFP(invoke,
    623               /* is_min */ true,
    624               /* is_double */ true,
    625               GetAssembler(),
    626               codegen_);
    627 }
    628 
    629 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
    630   CreateFPFPToFPLocations(arena_, invoke);
    631 }
    632 
    633 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
    634   GenMinMaxFP(invoke,
    635               /* is_min */ true,
    636               /* is_double */ false,
    637               GetAssembler(),
    638               codegen_);
    639 }
    640 
    641 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    642   CreateFPFPToFPLocations(arena_, invoke);
    643 }
    644 
    645 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
    646   GenMinMaxFP(invoke,
    647               /* is_min */ false,
    648               /* is_double */ true,
    649               GetAssembler(),
    650               codegen_);
    651 }
    652 
    653 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
    654   CreateFPFPToFPLocations(arena_, invoke);
    655 }
    656 
    657 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
    658   GenMinMaxFP(invoke,
    659               /* is_min */ false,
    660               /* is_double */ false,
    661               GetAssembler(),
    662               codegen_);
    663 }
    664 
    665 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
    666                       X86Assembler* assembler) {
    667   Location op1_loc = locations->InAt(0);
    668   Location op2_loc = locations->InAt(1);
    669 
    670   // Shortcut for same input locations.
    671   if (op1_loc.Equals(op2_loc)) {
    672     // Can return immediately, as op1_loc == out_loc.
    673     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
    674     //       a copy here.
    675     DCHECK(locations->Out().Equals(op1_loc));
    676     return;
    677   }
    678 
    679   if (is_long) {
    680     // Need to perform a subtract to get the sign right.
    681     // op1 is already in the same location as the output.
    682     Location output = locations->Out();
    683     Register output_lo = output.AsRegisterPairLow<Register>();
    684     Register output_hi = output.AsRegisterPairHigh<Register>();
    685 
    686     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
    687     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
    688 
    689     // Spare register to compute the subtraction to set condition code.
    690     Register temp = locations->GetTemp(0).AsRegister<Register>();
    691 
    692     // Subtract off op2_low.
    693     __ movl(temp, output_lo);
    694     __ subl(temp, op2_lo);
    695 
    696     // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
    697     __ movl(temp, output_hi);
    698     __ sbbl(temp, op2_hi);
    699 
    700     // Now the condition code is correct.
    701     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
    702     __ cmovl(cond, output_lo, op2_lo);
    703     __ cmovl(cond, output_hi, op2_hi);
    704   } else {
    705     Register out = locations->Out().AsRegister<Register>();
    706     Register op2 = op2_loc.AsRegister<Register>();
    707 
    708     //  (out := op1)
    709     //  out <=? op2
    710     //  if out is min jmp done
    711     //  out := op2
    712     // done:
    713 
    714     __ cmpl(out, op2);
    715     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
    716     __ cmovl(cond, out, op2);
    717   }
    718 }
    719 
    720 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
    721   LocationSummary* locations = new (arena) LocationSummary(invoke,
    722                                                            LocationSummary::kNoCall,
    723                                                            kIntrinsified);
    724   locations->SetInAt(0, Location::RequiresRegister());
    725   locations->SetInAt(1, Location::RequiresRegister());
    726   locations->SetOut(Location::SameAsFirstInput());
    727 }
    728 
    729 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
    730   LocationSummary* locations = new (arena) LocationSummary(invoke,
    731                                                            LocationSummary::kNoCall,
    732                                                            kIntrinsified);
    733   locations->SetInAt(0, Location::RequiresRegister());
    734   locations->SetInAt(1, Location::RequiresRegister());
    735   locations->SetOut(Location::SameAsFirstInput());
    736   // Register to use to perform a long subtract to set cc.
    737   locations->AddTemp(Location::RequiresRegister());
    738 }
    739 
    740 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
    741   CreateIntIntToIntLocations(arena_, invoke);
    742 }
    743 
    744 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
    745   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
    746 }
    747 
    748 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
    749   CreateLongLongToLongLocations(arena_, invoke);
    750 }
    751 
    752 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
    753   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
    754 }
    755 
    756 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
    757   CreateIntIntToIntLocations(arena_, invoke);
    758 }
    759 
    760 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
    761   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
    762 }
    763 
    764 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
    765   CreateLongLongToLongLocations(arena_, invoke);
    766 }
    767 
    768 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
    769   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
    770 }
    771 
    772 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
    773   LocationSummary* locations = new (arena) LocationSummary(invoke,
    774                                                            LocationSummary::kNoCall,
    775                                                            kIntrinsified);
    776   locations->SetInAt(0, Location::RequiresFpuRegister());
    777   locations->SetOut(Location::RequiresFpuRegister());
    778 }
    779 
    780 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
    781   CreateFPToFPLocations(arena_, invoke);
    782 }
    783 
    784 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
    785   LocationSummary* locations = invoke->GetLocations();
    786   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    787   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
    788 
    789   GetAssembler()->sqrtsd(out, in);
    790 }
    791 
    792 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
    793   MoveArguments(invoke, codegen);
    794 
    795   DCHECK(invoke->IsInvokeStaticOrDirect());
    796   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
    797                                       Location::RegisterLocation(EAX));
    798 
    799   // Copy the result back to the expected output.
    800   Location out = invoke->GetLocations()->Out();
    801   if (out.IsValid()) {
    802     DCHECK(out.IsRegister());
    803     codegen->MoveFromReturnRegister(out, invoke->GetType());
    804   }
    805 }
    806 
    807 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
    808                                       HInvoke* invoke,
    809                                       CodeGeneratorX86* codegen) {
    810   // Do we have instruction support?
    811   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
    812     CreateFPToFPLocations(arena, invoke);
    813     return;
    814   }
    815 
    816   // We have to fall back to a call to the intrinsic.
    817   LocationSummary* locations = new (arena) LocationSummary(invoke,
    818                                                            LocationSummary::kCallOnMainOnly);
    819   InvokeRuntimeCallingConvention calling_convention;
    820   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    821   locations->SetOut(Location::FpuRegisterLocation(XMM0));
    822   // Needs to be EAX for the invoke.
    823   locations->AddTemp(Location::RegisterLocation(EAX));
    824 }
    825 
    826 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
    827                                    HInvoke* invoke,
    828                                    X86Assembler* assembler,
    829                                    int round_mode) {
    830   LocationSummary* locations = invoke->GetLocations();
    831   if (locations->WillCall()) {
    832     InvokeOutOfLineIntrinsic(codegen, invoke);
    833   } else {
    834     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    835     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
    836     __ roundsd(out, in, Immediate(round_mode));
    837   }
    838 }
    839 
    840 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
    841   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
    842 }
    843 
    844 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
    845   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
    846 }
    847 
    848 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
    849   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
    850 }
    851 
    852 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
    853   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
    854 }
    855 
    856 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
    857   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
    858 }
    859 
    860 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
    861   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
    862 }
    863 
    864 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
    865   // Do we have instruction support?
    866   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
    867     HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
    868     DCHECK(static_or_direct != nullptr);
    869     LocationSummary* locations = new (arena_) LocationSummary(invoke,
    870                                                               LocationSummary::kNoCall,
    871                                                               kIntrinsified);
    872     locations->SetInAt(0, Location::RequiresFpuRegister());
    873     if (static_or_direct->HasSpecialInput() &&
    874         invoke->InputAt(
    875             static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
    876       locations->SetInAt(1, Location::RequiresRegister());
    877     }
    878     locations->SetOut(Location::RequiresRegister());
    879     locations->AddTemp(Location::RequiresFpuRegister());
    880     locations->AddTemp(Location::RequiresFpuRegister());
    881     return;
    882   }
    883 
    884   // We have to fall back to a call to the intrinsic.
    885   LocationSummary* locations = new (arena_) LocationSummary(invoke,
    886                                                             LocationSummary::kCallOnMainOnly);
    887   InvokeRuntimeCallingConvention calling_convention;
    888   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    889   locations->SetOut(Location::RegisterLocation(EAX));
    890   // Needs to be EAX for the invoke.
    891   locations->AddTemp(Location::RegisterLocation(EAX));
    892 }
    893 
    894 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
    895   LocationSummary* locations = invoke->GetLocations();
    896   if (locations->WillCall()) {  // TODO: can we reach this?
    897     InvokeOutOfLineIntrinsic(codegen_, invoke);
    898     return;
    899   }
    900 
    901   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
    902   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
    903   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
    904   Register out = locations->Out().AsRegister<Register>();
    905   NearLabel skip_incr, done;
    906   X86Assembler* assembler = GetAssembler();
    907 
    908   // Since no direct x86 rounding instruction matches the required semantics,
    909   // this intrinsic is implemented as follows:
    910   //  result = floor(in);
    911   //  if (in - result >= 0.5f)
    912   //    result = result + 1.0f;
    913   __ movss(t2, in);
    914   __ roundss(t1, in, Immediate(1));
    915   __ subss(t2, t1);
    916   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
    917     // Direct constant area available.
    918     HX86ComputeBaseMethodAddress* method_address =
    919         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
    920     Register constant_area = locations->InAt(1).AsRegister<Register>();
    921     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
    922                                                 method_address,
    923                                                 constant_area));
    924     __ j(kBelow, &skip_incr);
    925     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
    926                                                method_address,
    927                                                constant_area));
    928     __ Bind(&skip_incr);
    929   } else {
    930     // No constant area: go through stack.
    931     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
    932     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
    933     __ comiss(t2, Address(ESP, 4));
    934     __ j(kBelow, &skip_incr);
    935     __ addss(t1, Address(ESP, 0));
    936     __ Bind(&skip_incr);
    937     __ addl(ESP, Immediate(8));
    938   }
    939 
    940   // Final conversion to an integer. Unfortunately this also does not have a
    941   // direct x86 instruction, since NaN should map to 0 and large positive
    942   // values need to be clipped to the extreme value.
    943   __ movl(out, Immediate(kPrimIntMax));
    944   __ cvtsi2ss(t2, out);
    945   __ comiss(t1, t2);
    946   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
    947   __ movl(out, Immediate(0));  // does not change flags
    948   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
    949   __ cvttss2si(out, t1);
    950   __ Bind(&done);
    951 }
    952 
    953 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
    954                                       HInvoke* invoke) {
    955   LocationSummary* locations = new (arena) LocationSummary(invoke,
    956                                                            LocationSummary::kCallOnMainOnly,
    957                                                            kIntrinsified);
    958   InvokeRuntimeCallingConvention calling_convention;
    959   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
    960   locations->SetOut(Location::FpuRegisterLocation(XMM0));
    961 }
    962 
    963 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
    964   LocationSummary* locations = invoke->GetLocations();
    965   DCHECK(locations->WillCall());
    966   DCHECK(invoke->IsInvokeStaticOrDirect());
    967   X86Assembler* assembler = codegen->GetAssembler();
    968 
    969   // We need some place to pass the parameters.
    970   __ subl(ESP, Immediate(16));
    971   __ cfi().AdjustCFAOffset(16);
    972 
    973   // Pass the parameters at the bottom of the stack.
    974   __ movsd(Address(ESP, 0), XMM0);
    975 
    976   // If we have a second parameter, pass it next.
    977   if (invoke->GetNumberOfArguments() == 2) {
    978     __ movsd(Address(ESP, 8), XMM1);
    979   }
    980 
    981   // Now do the actual call.
    982   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
    983 
    984   // Extract the return value from the FP stack.
    985   __ fstpl(Address(ESP, 0));
    986   __ movsd(XMM0, Address(ESP, 0));
    987 
    988   // And clean up the stack.
    989   __ addl(ESP, Immediate(16));
    990   __ cfi().AdjustCFAOffset(-16);
    991 }
    992 
    993 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
    994   CreateFPToFPCallLocations(arena_, invoke);
    995 }
    996 
    997 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
    998   GenFPToFPCall(invoke, codegen_, kQuickCos);
    999 }
   1000 
   1001 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
   1002   CreateFPToFPCallLocations(arena_, invoke);
   1003 }
   1004 
   1005 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
   1006   GenFPToFPCall(invoke, codegen_, kQuickSin);
   1007 }
   1008 
   1009 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
   1010   CreateFPToFPCallLocations(arena_, invoke);
   1011 }
   1012 
   1013 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
   1014   GenFPToFPCall(invoke, codegen_, kQuickAcos);
   1015 }
   1016 
   1017 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
   1018   CreateFPToFPCallLocations(arena_, invoke);
   1019 }
   1020 
   1021 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
   1022   GenFPToFPCall(invoke, codegen_, kQuickAsin);
   1023 }
   1024 
   1025 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
   1026   CreateFPToFPCallLocations(arena_, invoke);
   1027 }
   1028 
   1029 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
   1030   GenFPToFPCall(invoke, codegen_, kQuickAtan);
   1031 }
   1032 
   1033 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
   1034   CreateFPToFPCallLocations(arena_, invoke);
   1035 }
   1036 
   1037 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
   1038   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
   1039 }
   1040 
   1041 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
   1042   CreateFPToFPCallLocations(arena_, invoke);
   1043 }
   1044 
   1045 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
   1046   GenFPToFPCall(invoke, codegen_, kQuickCosh);
   1047 }
   1048 
   1049 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
   1050   CreateFPToFPCallLocations(arena_, invoke);
   1051 }
   1052 
   1053 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
   1054   GenFPToFPCall(invoke, codegen_, kQuickExp);
   1055 }
   1056 
   1057 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
   1058   CreateFPToFPCallLocations(arena_, invoke);
   1059 }
   1060 
   1061 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
   1062   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
   1063 }
   1064 
   1065 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
   1066   CreateFPToFPCallLocations(arena_, invoke);
   1067 }
   1068 
   1069 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
   1070   GenFPToFPCall(invoke, codegen_, kQuickLog);
   1071 }
   1072 
   1073 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
   1074   CreateFPToFPCallLocations(arena_, invoke);
   1075 }
   1076 
   1077 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
   1078   GenFPToFPCall(invoke, codegen_, kQuickLog10);
   1079 }
   1080 
   1081 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
   1082   CreateFPToFPCallLocations(arena_, invoke);
   1083 }
   1084 
   1085 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
   1086   GenFPToFPCall(invoke, codegen_, kQuickSinh);
   1087 }
   1088 
   1089 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
   1090   CreateFPToFPCallLocations(arena_, invoke);
   1091 }
   1092 
   1093 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
   1094   GenFPToFPCall(invoke, codegen_, kQuickTan);
   1095 }
   1096 
   1097 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
   1098   CreateFPToFPCallLocations(arena_, invoke);
   1099 }
   1100 
   1101 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
   1102   GenFPToFPCall(invoke, codegen_, kQuickTanh);
   1103 }
   1104 
   1105 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
   1106                                         HInvoke* invoke) {
   1107   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1108                                                            LocationSummary::kCallOnMainOnly,
   1109                                                            kIntrinsified);
   1110   InvokeRuntimeCallingConvention calling_convention;
   1111   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   1112   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
   1113   locations->SetOut(Location::FpuRegisterLocation(XMM0));
   1114 }
   1115 
   1116 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
   1117   CreateFPFPToFPCallLocations(arena_, invoke);
   1118 }
   1119 
   1120 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
   1121   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
   1122 }
   1123 
   1124 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
   1125   CreateFPFPToFPCallLocations(arena_, invoke);
   1126 }
   1127 
   1128 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
   1129   GenFPToFPCall(invoke, codegen_, kQuickHypot);
   1130 }
   1131 
   1132 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
   1133   CreateFPFPToFPCallLocations(arena_, invoke);
   1134 }
   1135 
   1136 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
   1137   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
   1138 }
   1139 
   1140 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   1141   // We need at least two of the positions or length to be an integer constant,
   1142   // or else we won't have enough free registers.
   1143   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
   1144   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
   1145   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   1146 
   1147   int num_constants =
   1148       ((src_pos != nullptr) ? 1 : 0)
   1149       + ((dest_pos != nullptr) ? 1 : 0)
   1150       + ((length != nullptr) ? 1 : 0);
   1151 
   1152   if (num_constants < 2) {
   1153     // Not enough free registers.
   1154     return;
   1155   }
   1156 
   1157   // As long as we are checking, we might as well check to see if the src and dest
   1158   // positions are >= 0.
   1159   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
   1160       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
   1161     // We will have to fail anyways.
   1162     return;
   1163   }
   1164 
   1165   // And since we are already checking, check the length too.
   1166   if (length != nullptr) {
   1167     int32_t len = length->GetValue();
   1168     if (len < 0) {
   1169       // Just call as normal.
   1170       return;
   1171     }
   1172   }
   1173 
   1174   // Okay, it is safe to generate inline code.
   1175   LocationSummary* locations =
   1176     new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
   1177   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
   1178   locations->SetInAt(0, Location::RequiresRegister());
   1179   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   1180   locations->SetInAt(2, Location::RequiresRegister());
   1181   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
   1182   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
   1183 
   1184   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
   1185   locations->AddTemp(Location::RegisterLocation(ESI));
   1186   locations->AddTemp(Location::RegisterLocation(EDI));
   1187   locations->AddTemp(Location::RegisterLocation(ECX));
   1188 }
   1189 
   1190 static void CheckPosition(X86Assembler* assembler,
   1191                           Location pos,
   1192                           Register input,
   1193                           Location length,
   1194                           SlowPathCode* slow_path,
   1195                           Register temp,
   1196                           bool length_is_input_length = false) {
   1197   // Where is the length in the Array?
   1198   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
   1199 
   1200   if (pos.IsConstant()) {
   1201     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
   1202     if (pos_const == 0) {
   1203       if (!length_is_input_length) {
   1204         // Check that length(input) >= length.
   1205         if (length.IsConstant()) {
   1206           __ cmpl(Address(input, length_offset),
   1207                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1208         } else {
   1209           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
   1210         }
   1211         __ j(kLess, slow_path->GetEntryLabel());
   1212       }
   1213     } else {
   1214       // Check that length(input) >= pos.
   1215       __ movl(temp, Address(input, length_offset));
   1216       __ subl(temp, Immediate(pos_const));
   1217       __ j(kLess, slow_path->GetEntryLabel());
   1218 
   1219       // Check that (length(input) - pos) >= length.
   1220       if (length.IsConstant()) {
   1221         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1222       } else {
   1223         __ cmpl(temp, length.AsRegister<Register>());
   1224       }
   1225       __ j(kLess, slow_path->GetEntryLabel());
   1226     }
   1227   } else if (length_is_input_length) {
   1228     // The only way the copy can succeed is if pos is zero.
   1229     Register pos_reg = pos.AsRegister<Register>();
   1230     __ testl(pos_reg, pos_reg);
   1231     __ j(kNotEqual, slow_path->GetEntryLabel());
   1232   } else {
   1233     // Check that pos >= 0.
   1234     Register pos_reg = pos.AsRegister<Register>();
   1235     __ testl(pos_reg, pos_reg);
   1236     __ j(kLess, slow_path->GetEntryLabel());
   1237 
   1238     // Check that pos <= length(input).
   1239     __ cmpl(Address(input, length_offset), pos_reg);
   1240     __ j(kLess, slow_path->GetEntryLabel());
   1241 
   1242     // Check that (length(input) - pos) >= length.
   1243     __ movl(temp, Address(input, length_offset));
   1244     __ subl(temp, pos_reg);
   1245     if (length.IsConstant()) {
   1246       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1247     } else {
   1248       __ cmpl(temp, length.AsRegister<Register>());
   1249     }
   1250     __ j(kLess, slow_path->GetEntryLabel());
   1251   }
   1252 }
   1253 
   1254 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   1255   X86Assembler* assembler = GetAssembler();
   1256   LocationSummary* locations = invoke->GetLocations();
   1257 
   1258   Register src = locations->InAt(0).AsRegister<Register>();
   1259   Location srcPos = locations->InAt(1);
   1260   Register dest = locations->InAt(2).AsRegister<Register>();
   1261   Location destPos = locations->InAt(3);
   1262   Location length = locations->InAt(4);
   1263 
   1264   // Temporaries that we need for MOVSW.
   1265   Register src_base = locations->GetTemp(0).AsRegister<Register>();
   1266   DCHECK_EQ(src_base, ESI);
   1267   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
   1268   DCHECK_EQ(dest_base, EDI);
   1269   Register count = locations->GetTemp(2).AsRegister<Register>();
   1270   DCHECK_EQ(count, ECX);
   1271 
   1272   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   1273   codegen_->AddSlowPath(slow_path);
   1274 
   1275   // Bail out if the source and destination are the same (to handle overlap).
   1276   __ cmpl(src, dest);
   1277   __ j(kEqual, slow_path->GetEntryLabel());
   1278 
   1279   // Bail out if the source is null.
   1280   __ testl(src, src);
   1281   __ j(kEqual, slow_path->GetEntryLabel());
   1282 
   1283   // Bail out if the destination is null.
   1284   __ testl(dest, dest);
   1285   __ j(kEqual, slow_path->GetEntryLabel());
   1286 
   1287   // If the length is negative, bail out.
   1288   // We have already checked in the LocationsBuilder for the constant case.
   1289   if (!length.IsConstant()) {
   1290     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
   1291     __ j(kLess, slow_path->GetEntryLabel());
   1292   }
   1293 
   1294   // We need the count in ECX.
   1295   if (length.IsConstant()) {
   1296     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
   1297   } else {
   1298     __ movl(count, length.AsRegister<Register>());
   1299   }
   1300 
   1301   // Validity checks: source. Use src_base as a temporary register.
   1302   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
   1303 
   1304   // Validity checks: dest. Use src_base as a temporary register.
   1305   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
   1306 
   1307   // Okay, everything checks out.  Finally time to do the copy.
   1308   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   1309   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   1310   DCHECK_EQ(char_size, 2u);
   1311 
   1312   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
   1313 
   1314   if (srcPos.IsConstant()) {
   1315     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
   1316     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
   1317   } else {
   1318     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
   1319                               ScaleFactor::TIMES_2, data_offset));
   1320   }
   1321   if (destPos.IsConstant()) {
   1322     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
   1323 
   1324     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
   1325   } else {
   1326     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
   1327                                ScaleFactor::TIMES_2, data_offset));
   1328   }
   1329 
   1330   // Do the move.
   1331   __ rep_movsw();
   1332 
   1333   __ Bind(slow_path->GetExitLabel());
   1334 }
   1335 
   1336 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   1337   // The inputs plus one temp.
   1338   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1339                                                             LocationSummary::kCallOnMainAndSlowPath,
   1340                                                             kIntrinsified);
   1341   InvokeRuntimeCallingConvention calling_convention;
   1342   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1343   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1344   locations->SetOut(Location::RegisterLocation(EAX));
   1345 }
   1346 
   1347 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
   1348   X86Assembler* assembler = GetAssembler();
   1349   LocationSummary* locations = invoke->GetLocations();
   1350 
   1351   // Note that the null check must have been done earlier.
   1352   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1353 
   1354   Register argument = locations->InAt(1).AsRegister<Register>();
   1355   __ testl(argument, argument);
   1356   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   1357   codegen_->AddSlowPath(slow_path);
   1358   __ j(kEqual, slow_path->GetEntryLabel());
   1359 
   1360   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
   1361   __ Bind(slow_path->GetExitLabel());
   1362 }
   1363 
   1364 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
   1365   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1366                                                             LocationSummary::kNoCall,
   1367                                                             kIntrinsified);
   1368   locations->SetInAt(0, Location::RequiresRegister());
   1369   locations->SetInAt(1, Location::RequiresRegister());
   1370 
   1371   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
   1372   locations->AddTemp(Location::RegisterLocation(ECX));
   1373   locations->AddTemp(Location::RegisterLocation(EDI));
   1374 
   1375   // Set output, ESI needed for repe_cmpsl instruction anyways.
   1376   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
   1377 }
   1378 
   1379 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
   1380   X86Assembler* assembler = GetAssembler();
   1381   LocationSummary* locations = invoke->GetLocations();
   1382 
   1383   Register str = locations->InAt(0).AsRegister<Register>();
   1384   Register arg = locations->InAt(1).AsRegister<Register>();
   1385   Register ecx = locations->GetTemp(0).AsRegister<Register>();
   1386   Register edi = locations->GetTemp(1).AsRegister<Register>();
   1387   Register esi = locations->Out().AsRegister<Register>();
   1388 
   1389   NearLabel end, return_true, return_false;
   1390 
   1391   // Get offsets of count, value, and class fields within a string object.
   1392   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1393   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   1394   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
   1395 
   1396   // Note that the null check must have been done earlier.
   1397   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1398 
   1399   StringEqualsOptimizations optimizations(invoke);
   1400   if (!optimizations.GetArgumentNotNull()) {
   1401     // Check if input is null, return false if it is.
   1402     __ testl(arg, arg);
   1403     __ j(kEqual, &return_false);
   1404   }
   1405 
   1406   if (!optimizations.GetArgumentIsString()) {
   1407     // Instanceof check for the argument by comparing class fields.
   1408     // All string objects must have the same type since String cannot be subclassed.
   1409     // Receiver must be a string object, so its class field is equal to all strings' class fields.
   1410     // If the argument is a string object, its class field must be equal to receiver's class field.
   1411     __ movl(ecx, Address(str, class_offset));
   1412     __ cmpl(ecx, Address(arg, class_offset));
   1413     __ j(kNotEqual, &return_false);
   1414   }
   1415 
   1416   // Reference equality check, return true if same reference.
   1417   __ cmpl(str, arg);
   1418   __ j(kEqual, &return_true);
   1419 
   1420   // Load length and compression flag of receiver string.
   1421   __ movl(ecx, Address(str, count_offset));
   1422   // Check if lengths and compression flags are equal, return false if they're not.
   1423   // Two identical strings will always have same compression style since
   1424   // compression style is decided on alloc.
   1425   __ cmpl(ecx, Address(arg, count_offset));
   1426   __ j(kNotEqual, &return_false);
   1427   // Return true if strings are empty. Even with string compression `count == 0` means empty.
   1428   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1429                 "Expecting 0=compressed, 1=uncompressed");
   1430   __ jecxz(&return_true);
   1431 
   1432   if (mirror::kUseStringCompression) {
   1433     NearLabel string_uncompressed;
   1434     // Extract length and differentiate between both compressed or both uncompressed.
   1435     // Different compression style is cut above.
   1436     __ shrl(ecx, Immediate(1));
   1437     __ j(kCarrySet, &string_uncompressed);
   1438     // Divide string length by 2, rounding up, and continue as if uncompressed.
   1439     __ addl(ecx, Immediate(1));
   1440     __ shrl(ecx, Immediate(1));
   1441     __ Bind(&string_uncompressed);
   1442   }
   1443   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
   1444   __ leal(esi, Address(str, value_offset));
   1445   __ leal(edi, Address(arg, value_offset));
   1446 
   1447   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
   1448   // divisible by 2.
   1449   __ addl(ecx, Immediate(1));
   1450   __ shrl(ecx, Immediate(1));
   1451 
   1452   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
   1453   // or 4 characters (compressed) at a time.
   1454   DCHECK_ALIGNED(value_offset, 4);
   1455   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
   1456 
   1457   // Loop to compare strings two characters at a time starting at the beginning of the string.
   1458   __ repe_cmpsl();
   1459   // If strings are not equal, zero flag will be cleared.
   1460   __ j(kNotEqual, &return_false);
   1461 
   1462   // Return true and exit the function.
   1463   // If loop does not result in returning false, we return true.
   1464   __ Bind(&return_true);
   1465   __ movl(esi, Immediate(1));
   1466   __ jmp(&end);
   1467 
   1468   // Return false and exit the function.
   1469   __ Bind(&return_false);
   1470   __ xorl(esi, esi);
   1471   __ Bind(&end);
   1472 }
   1473 
   1474 static void CreateStringIndexOfLocations(HInvoke* invoke,
   1475                                          ArenaAllocator* allocator,
   1476                                          bool start_at_zero) {
   1477   LocationSummary* locations = new (allocator) LocationSummary(invoke,
   1478                                                                LocationSummary::kCallOnSlowPath,
   1479                                                                kIntrinsified);
   1480   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
   1481   locations->SetInAt(0, Location::RegisterLocation(EDI));
   1482   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
   1483   // allocator to do that, anyways. We can still do the constant check by checking the parameter
   1484   // of the instruction explicitly.
   1485   // Note: This works as we don't clobber EAX anywhere.
   1486   locations->SetInAt(1, Location::RegisterLocation(EAX));
   1487   if (!start_at_zero) {
   1488     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
   1489   }
   1490   // As we clobber EDI during execution anyways, also use it as the output.
   1491   locations->SetOut(Location::SameAsFirstInput());
   1492 
   1493   // repne scasw uses ECX as the counter.
   1494   locations->AddTemp(Location::RegisterLocation(ECX));
   1495   // Need another temporary to be able to compute the result.
   1496   locations->AddTemp(Location::RequiresRegister());
   1497   if (mirror::kUseStringCompression) {
   1498     // Need another temporary to be able to save unflagged string length.
   1499     locations->AddTemp(Location::RequiresRegister());
   1500   }
   1501 }
   1502 
   1503 static void GenerateStringIndexOf(HInvoke* invoke,
   1504                                   X86Assembler* assembler,
   1505                                   CodeGeneratorX86* codegen,
   1506                                   ArenaAllocator* allocator,
   1507                                   bool start_at_zero) {
   1508   LocationSummary* locations = invoke->GetLocations();
   1509 
   1510   // Note that the null check must have been done earlier.
   1511   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
   1512 
   1513   Register string_obj = locations->InAt(0).AsRegister<Register>();
   1514   Register search_value = locations->InAt(1).AsRegister<Register>();
   1515   Register counter = locations->GetTemp(0).AsRegister<Register>();
   1516   Register string_length = locations->GetTemp(1).AsRegister<Register>();
   1517   Register out = locations->Out().AsRegister<Register>();
   1518   // Only used when string compression feature is on.
   1519   Register string_length_flagged;
   1520 
   1521   // Check our assumptions for registers.
   1522   DCHECK_EQ(string_obj, EDI);
   1523   DCHECK_EQ(search_value, EAX);
   1524   DCHECK_EQ(counter, ECX);
   1525   DCHECK_EQ(out, EDI);
   1526 
   1527   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
   1528   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   1529   SlowPathCode* slow_path = nullptr;
   1530   HInstruction* code_point = invoke->InputAt(1);
   1531   if (code_point->IsIntConstant()) {
   1532     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
   1533     std::numeric_limits<uint16_t>::max()) {
   1534       // Always needs the slow-path. We could directly dispatch to it, but this case should be
   1535       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
   1536       slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
   1537       codegen->AddSlowPath(slow_path);
   1538       __ jmp(slow_path->GetEntryLabel());
   1539       __ Bind(slow_path->GetExitLabel());
   1540       return;
   1541     }
   1542   } else if (code_point->GetType() != Primitive::kPrimChar) {
   1543     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
   1544     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
   1545     codegen->AddSlowPath(slow_path);
   1546     __ j(kAbove, slow_path->GetEntryLabel());
   1547   }
   1548 
   1549   // From here down, we know that we are looking for a char that fits in 16 bits.
   1550   // Location of reference to data array within the String object.
   1551   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   1552   // Location of count within the String object.
   1553   int32_t count_offset = mirror::String::CountOffset().Int32Value();
   1554 
   1555   // Load the count field of the string containing the length and compression flag.
   1556   __ movl(string_length, Address(string_obj, count_offset));
   1557 
   1558   // Do a zero-length check. Even with string compression `count == 0` means empty.
   1559   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1560                 "Expecting 0=compressed, 1=uncompressed");
   1561   // TODO: Support jecxz.
   1562   NearLabel not_found_label;
   1563   __ testl(string_length, string_length);
   1564   __ j(kEqual, &not_found_label);
   1565 
   1566   if (mirror::kUseStringCompression) {
   1567     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
   1568     __ movl(string_length_flagged, string_length);
   1569     // Extract the length and shift out the least significant bit used as compression flag.
   1570     __ shrl(string_length, Immediate(1));
   1571   }
   1572 
   1573   if (start_at_zero) {
   1574     // Number of chars to scan is the same as the string length.
   1575     __ movl(counter, string_length);
   1576 
   1577     // Move to the start of the string.
   1578     __ addl(string_obj, Immediate(value_offset));
   1579   } else {
   1580     Register start_index = locations->InAt(2).AsRegister<Register>();
   1581 
   1582     // Do a start_index check.
   1583     __ cmpl(start_index, string_length);
   1584     __ j(kGreaterEqual, &not_found_label);
   1585 
   1586     // Ensure we have a start index >= 0;
   1587     __ xorl(counter, counter);
   1588     __ cmpl(start_index, Immediate(0));
   1589     __ cmovl(kGreater, counter, start_index);
   1590 
   1591     if (mirror::kUseStringCompression) {
   1592       NearLabel modify_counter, offset_uncompressed_label;
   1593       __ testl(string_length_flagged, Immediate(1));
   1594       __ j(kNotZero, &offset_uncompressed_label);
   1595       // Move to the start of the string: string_obj + value_offset + start_index.
   1596       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
   1597       __ jmp(&modify_counter);
   1598 
   1599       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
   1600       __ Bind(&offset_uncompressed_label);
   1601       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
   1602 
   1603       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
   1604       // compare.
   1605       __ Bind(&modify_counter);
   1606     } else {
   1607       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
   1608     }
   1609     __ negl(counter);
   1610     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
   1611   }
   1612 
   1613   if (mirror::kUseStringCompression) {
   1614     NearLabel uncompressed_string_comparison;
   1615     NearLabel comparison_done;
   1616     __ testl(string_length_flagged, Immediate(1));
   1617     __ j(kNotZero, &uncompressed_string_comparison);
   1618 
   1619     // Check if EAX (search_value) is ASCII.
   1620     __ cmpl(search_value, Immediate(127));
   1621     __ j(kGreater, &not_found_label);
   1622     // Comparing byte-per-byte.
   1623     __ repne_scasb();
   1624     __ jmp(&comparison_done);
   1625 
   1626     // Everything is set up for repne scasw:
   1627     //   * Comparison address in EDI.
   1628     //   * Counter in ECX.
   1629     __ Bind(&uncompressed_string_comparison);
   1630     __ repne_scasw();
   1631     __ Bind(&comparison_done);
   1632   } else {
   1633     __ repne_scasw();
   1634   }
   1635   // Did we find a match?
   1636   __ j(kNotEqual, &not_found_label);
   1637 
   1638   // Yes, we matched.  Compute the index of the result.
   1639   __ subl(string_length, counter);
   1640   __ leal(out, Address(string_length, -1));
   1641 
   1642   NearLabel done;
   1643   __ jmp(&done);
   1644 
   1645   // Failed to match; return -1.
   1646   __ Bind(&not_found_label);
   1647   __ movl(out, Immediate(-1));
   1648 
   1649   // And join up at the end.
   1650   __ Bind(&done);
   1651   if (slow_path != nullptr) {
   1652     __ Bind(slow_path->GetExitLabel());
   1653   }
   1654 }
   1655 
   1656 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
   1657   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
   1658 }
   1659 
   1660 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
   1661   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
   1662 }
   1663 
   1664 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
   1665   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
   1666 }
   1667 
   1668 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
   1669   GenerateStringIndexOf(
   1670       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
   1671 }
   1672 
   1673 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1674   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1675                                                             LocationSummary::kCallOnMainAndSlowPath,
   1676                                                             kIntrinsified);
   1677   InvokeRuntimeCallingConvention calling_convention;
   1678   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1679   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1680   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   1681   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
   1682   locations->SetOut(Location::RegisterLocation(EAX));
   1683 }
   1684 
   1685 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   1686   X86Assembler* assembler = GetAssembler();
   1687   LocationSummary* locations = invoke->GetLocations();
   1688 
   1689   Register byte_array = locations->InAt(0).AsRegister<Register>();
   1690   __ testl(byte_array, byte_array);
   1691   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   1692   codegen_->AddSlowPath(slow_path);
   1693   __ j(kEqual, slow_path->GetEntryLabel());
   1694 
   1695   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
   1696   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   1697   __ Bind(slow_path->GetExitLabel());
   1698 }
   1699 
   1700 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   1701   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1702                                                             LocationSummary::kCallOnMainOnly,
   1703                                                             kIntrinsified);
   1704   InvokeRuntimeCallingConvention calling_convention;
   1705   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1706   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   1707   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   1708   locations->SetOut(Location::RegisterLocation(EAX));
   1709 }
   1710 
   1711 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   1712   // No need to emit code checking whether `locations->InAt(2)` is a null
   1713   // pointer, as callers of the native method
   1714   //
   1715   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   1716   //
   1717   // all include a null check on `data` before calling that method.
   1718   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
   1719   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   1720 }
   1721 
   1722 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   1723   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1724                                                             LocationSummary::kCallOnMainAndSlowPath,
   1725                                                             kIntrinsified);
   1726   InvokeRuntimeCallingConvention calling_convention;
   1727   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1728   locations->SetOut(Location::RegisterLocation(EAX));
   1729 }
   1730 
   1731 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
   1732   X86Assembler* assembler = GetAssembler();
   1733   LocationSummary* locations = invoke->GetLocations();
   1734 
   1735   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   1736   __ testl(string_to_copy, string_to_copy);
   1737   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   1738   codegen_->AddSlowPath(slow_path);
   1739   __ j(kEqual, slow_path->GetEntryLabel());
   1740 
   1741   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
   1742   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   1743   __ Bind(slow_path->GetExitLabel());
   1744 }
   1745 
   1746 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   1747   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   1748   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   1749                                                             LocationSummary::kNoCall,
   1750                                                             kIntrinsified);
   1751   locations->SetInAt(0, Location::RequiresRegister());
   1752   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   1753   // Place srcEnd in ECX to save a move below.
   1754   locations->SetInAt(2, Location::RegisterLocation(ECX));
   1755   locations->SetInAt(3, Location::RequiresRegister());
   1756   locations->SetInAt(4, Location::RequiresRegister());
   1757 
   1758   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
   1759   // We don't have enough registers to also grab ECX, so handle below.
   1760   locations->AddTemp(Location::RegisterLocation(ESI));
   1761   locations->AddTemp(Location::RegisterLocation(EDI));
   1762 }
   1763 
   1764 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   1765   X86Assembler* assembler = GetAssembler();
   1766   LocationSummary* locations = invoke->GetLocations();
   1767 
   1768   size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
   1769   // Location of data in char array buffer.
   1770   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
   1771   // Location of char array data in string.
   1772   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
   1773 
   1774   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
   1775   Register obj = locations->InAt(0).AsRegister<Register>();
   1776   Location srcBegin = locations->InAt(1);
   1777   int srcBegin_value =
   1778     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
   1779   Register srcEnd = locations->InAt(2).AsRegister<Register>();
   1780   Register dst = locations->InAt(3).AsRegister<Register>();
   1781   Register dstBegin = locations->InAt(4).AsRegister<Register>();
   1782 
   1783   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   1784   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   1785   DCHECK_EQ(char_size, 2u);
   1786 
   1787   // Compute the number of chars (words) to move.
   1788   // Save ECX, since we don't know if it will be used later.
   1789   __ pushl(ECX);
   1790   int stack_adjust = kX86WordSize;
   1791   __ cfi().AdjustCFAOffset(stack_adjust);
   1792   DCHECK_EQ(srcEnd, ECX);
   1793   if (srcBegin.IsConstant()) {
   1794     __ subl(ECX, Immediate(srcBegin_value));
   1795   } else {
   1796     DCHECK(srcBegin.IsRegister());
   1797     __ subl(ECX, srcBegin.AsRegister<Register>());
   1798   }
   1799 
   1800   NearLabel done;
   1801   if (mirror::kUseStringCompression) {
   1802     // Location of count in string
   1803     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   1804     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
   1805     DCHECK_EQ(c_char_size, 1u);
   1806     __ pushl(EAX);
   1807     __ cfi().AdjustCFAOffset(stack_adjust);
   1808 
   1809     NearLabel copy_loop, copy_uncompressed;
   1810     __ testl(Address(obj, count_offset), Immediate(1));
   1811     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   1812                   "Expecting 0=compressed, 1=uncompressed");
   1813     __ j(kNotZero, &copy_uncompressed);
   1814     // Compute the address of the source string by adding the number of chars from
   1815     // the source beginning to the value offset of a string.
   1816     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
   1817 
   1818     // Start the loop to copy String's value to Array of Char.
   1819     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
   1820     __ Bind(&copy_loop);
   1821     __ jecxz(&done);
   1822     // Use EAX temporary (convert byte from ESI to word).
   1823     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
   1824     __ movzxb(EAX, Address(ESI, 0));
   1825     __ movw(Address(EDI, 0), EAX);
   1826     __ leal(EDI, Address(EDI, char_size));
   1827     __ leal(ESI, Address(ESI, c_char_size));
   1828     // TODO: Add support for LOOP to X86Assembler.
   1829     __ subl(ECX, Immediate(1));
   1830     __ jmp(&copy_loop);
   1831     __ Bind(&copy_uncompressed);
   1832   }
   1833 
   1834   // Do the copy for uncompressed string.
   1835   // Compute the address of the destination buffer.
   1836   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
   1837   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
   1838   __ rep_movsw();
   1839 
   1840   __ Bind(&done);
   1841   if (mirror::kUseStringCompression) {
   1842     // Restore EAX.
   1843     __ popl(EAX);
   1844     __ cfi().AdjustCFAOffset(-stack_adjust);
   1845   }
   1846   // Restore ECX.
   1847   __ popl(ECX);
   1848   __ cfi().AdjustCFAOffset(-stack_adjust);
   1849 }
   1850 
   1851 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
   1852   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   1853   Location out_loc = locations->Out();
   1854   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   1855   // to avoid a SIGBUS.
   1856   switch (size) {
   1857     case Primitive::kPrimByte:
   1858       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
   1859       break;
   1860     case Primitive::kPrimShort:
   1861       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
   1862       break;
   1863     case Primitive::kPrimInt:
   1864       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
   1865       break;
   1866     case Primitive::kPrimLong:
   1867       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
   1868       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
   1869       break;
   1870     default:
   1871       LOG(FATAL) << "Type not recognized for peek: " << size;
   1872       UNREACHABLE();
   1873   }
   1874 }
   1875 
   1876 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
   1877   CreateLongToIntLocations(arena_, invoke);
   1878 }
   1879 
   1880 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
   1881   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
   1882 }
   1883 
   1884 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
   1885   CreateLongToIntLocations(arena_, invoke);
   1886 }
   1887 
   1888 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
   1889   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
   1890 }
   1891 
   1892 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
   1893   CreateLongToLongLocations(arena_, invoke);
   1894 }
   1895 
   1896 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
   1897   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
   1898 }
   1899 
   1900 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
   1901   CreateLongToIntLocations(arena_, invoke);
   1902 }
   1903 
   1904 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
   1905   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
   1906 }
   1907 
   1908 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
   1909                                          HInvoke* invoke) {
   1910   LocationSummary* locations = new (arena) LocationSummary(invoke,
   1911                                                            LocationSummary::kNoCall,
   1912                                                            kIntrinsified);
   1913   locations->SetInAt(0, Location::RequiresRegister());
   1914   HInstruction* value = invoke->InputAt(1);
   1915   if (size == Primitive::kPrimByte) {
   1916     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
   1917   } else {
   1918     locations->SetInAt(1, Location::RegisterOrConstant(value));
   1919   }
   1920 }
   1921 
   1922 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
   1923   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   1924   Location value_loc = locations->InAt(1);
   1925   // x86 allows unaligned access. We do not have to check the input or use specific instructions
   1926   // to avoid a SIGBUS.
   1927   switch (size) {
   1928     case Primitive::kPrimByte:
   1929       if (value_loc.IsConstant()) {
   1930         __ movb(Address(address, 0),
   1931                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1932       } else {
   1933         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
   1934       }
   1935       break;
   1936     case Primitive::kPrimShort:
   1937       if (value_loc.IsConstant()) {
   1938         __ movw(Address(address, 0),
   1939                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1940       } else {
   1941         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
   1942       }
   1943       break;
   1944     case Primitive::kPrimInt:
   1945       if (value_loc.IsConstant()) {
   1946         __ movl(Address(address, 0),
   1947                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
   1948       } else {
   1949         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
   1950       }
   1951       break;
   1952     case Primitive::kPrimLong:
   1953       if (value_loc.IsConstant()) {
   1954         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
   1955         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
   1956         __ movl(Address(address, 4), Immediate(High32Bits(value)));
   1957       } else {
   1958         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
   1959         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
   1960       }
   1961       break;
   1962     default:
   1963       LOG(FATAL) << "Type not recognized for poke: " << size;
   1964       UNREACHABLE();
   1965   }
   1966 }
   1967 
   1968 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
   1969   CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
   1970 }
   1971 
   1972 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
   1973   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
   1974 }
   1975 
   1976 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
   1977   CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
   1978 }
   1979 
   1980 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
   1981   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
   1982 }
   1983 
   1984 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
   1985   CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
   1986 }
   1987 
   1988 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
   1989   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
   1990 }
   1991 
   1992 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
   1993   CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
   1994 }
   1995 
   1996 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
   1997   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
   1998 }
   1999 
   2000 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
   2001   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2002                                                             LocationSummary::kNoCall,
   2003                                                             kIntrinsified);
   2004   locations->SetOut(Location::RequiresRegister());
   2005 }
   2006 
   2007 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
   2008   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
   2009   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
   2010 }
   2011 
   2012 static void GenUnsafeGet(HInvoke* invoke,
   2013                          Primitive::Type type,
   2014                          bool is_volatile,
   2015                          CodeGeneratorX86* codegen) {
   2016   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2017   LocationSummary* locations = invoke->GetLocations();
   2018   Location base_loc = locations->InAt(1);
   2019   Register base = base_loc.AsRegister<Register>();
   2020   Location offset_loc = locations->InAt(2);
   2021   Register offset = offset_loc.AsRegisterPairLow<Register>();
   2022   Location output_loc = locations->Out();
   2023 
   2024   switch (type) {
   2025     case Primitive::kPrimInt: {
   2026       Register output = output_loc.AsRegister<Register>();
   2027       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2028       break;
   2029     }
   2030 
   2031     case Primitive::kPrimNot: {
   2032       Register output = output_loc.AsRegister<Register>();
   2033       if (kEmitCompilerReadBarrier) {
   2034         if (kUseBakerReadBarrier) {
   2035           Address src(base, offset, ScaleFactor::TIMES_1, 0);
   2036           codegen->GenerateReferenceLoadWithBakerReadBarrier(
   2037               invoke, output_loc, base, src, /* needs_null_check */ false);
   2038         } else {
   2039           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2040           codegen->GenerateReadBarrierSlow(
   2041               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
   2042         }
   2043       } else {
   2044         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2045         __ MaybeUnpoisonHeapReference(output);
   2046       }
   2047       break;
   2048     }
   2049 
   2050     case Primitive::kPrimLong: {
   2051         Register output_lo = output_loc.AsRegisterPairLow<Register>();
   2052         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
   2053         if (is_volatile) {
   2054           // Need to use a XMM to read atomically.
   2055           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2056           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2057           __ movd(output_lo, temp);
   2058           __ psrlq(temp, Immediate(32));
   2059           __ movd(output_hi, temp);
   2060         } else {
   2061           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
   2062           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
   2063         }
   2064       }
   2065       break;
   2066 
   2067     default:
   2068       LOG(FATAL) << "Unsupported op size " << type;
   2069       UNREACHABLE();
   2070   }
   2071 }
   2072 
   2073 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
   2074                                           HInvoke* invoke,
   2075                                           Primitive::Type type,
   2076                                           bool is_volatile) {
   2077   bool can_call = kEmitCompilerReadBarrier &&
   2078       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
   2079        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   2080   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2081                                                            (can_call
   2082                                                                 ? LocationSummary::kCallOnSlowPath
   2083                                                                 : LocationSummary::kNoCall),
   2084                                                            kIntrinsified);
   2085   if (can_call && kUseBakerReadBarrier) {
   2086     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2087   }
   2088   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2089   locations->SetInAt(1, Location::RequiresRegister());
   2090   locations->SetInAt(2, Location::RequiresRegister());
   2091   if (type == Primitive::kPrimLong) {
   2092     if (is_volatile) {
   2093       // Need to use XMM to read volatile.
   2094       locations->AddTemp(Location::RequiresFpuRegister());
   2095       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2096     } else {
   2097       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   2098     }
   2099   } else {
   2100     locations->SetOut(Location::RequiresRegister(),
   2101                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   2102   }
   2103 }
   2104 
   2105 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
   2106   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
   2107 }
   2108 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
   2109   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
   2110 }
   2111 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
   2112   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
   2113 }
   2114 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   2115   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
   2116 }
   2117 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
   2118   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
   2119 }
   2120 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   2121   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
   2122 }
   2123 
   2124 
   2125 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
   2126   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
   2127 }
   2128 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
   2129   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
   2130 }
   2131 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
   2132   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
   2133 }
   2134 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
   2135   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
   2136 }
   2137 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
   2138   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
   2139 }
   2140 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
   2141   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
   2142 }
   2143 
   2144 
   2145 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
   2146                                                        Primitive::Type type,
   2147                                                        HInvoke* invoke,
   2148                                                        bool is_volatile) {
   2149   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2150                                                            LocationSummary::kNoCall,
   2151                                                            kIntrinsified);
   2152   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2153   locations->SetInAt(1, Location::RequiresRegister());
   2154   locations->SetInAt(2, Location::RequiresRegister());
   2155   locations->SetInAt(3, Location::RequiresRegister());
   2156   if (type == Primitive::kPrimNot) {
   2157     // Need temp registers for card-marking.
   2158     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   2159     // Ensure the value is in a byte register.
   2160     locations->AddTemp(Location::RegisterLocation(ECX));
   2161   } else if (type == Primitive::kPrimLong && is_volatile) {
   2162     locations->AddTemp(Location::RequiresFpuRegister());
   2163     locations->AddTemp(Location::RequiresFpuRegister());
   2164   }
   2165 }
   2166 
   2167 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
   2168   CreateIntIntIntIntToVoidPlusTempsLocations(
   2169       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
   2170 }
   2171 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
   2172   CreateIntIntIntIntToVoidPlusTempsLocations(
   2173       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
   2174 }
   2175 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
   2176   CreateIntIntIntIntToVoidPlusTempsLocations(
   2177       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
   2178 }
   2179 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
   2180   CreateIntIntIntIntToVoidPlusTempsLocations(
   2181       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
   2182 }
   2183 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   2184   CreateIntIntIntIntToVoidPlusTempsLocations(
   2185       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
   2186 }
   2187 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   2188   CreateIntIntIntIntToVoidPlusTempsLocations(
   2189       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
   2190 }
   2191 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
   2192   CreateIntIntIntIntToVoidPlusTempsLocations(
   2193       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
   2194 }
   2195 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   2196   CreateIntIntIntIntToVoidPlusTempsLocations(
   2197       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
   2198 }
   2199 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   2200   CreateIntIntIntIntToVoidPlusTempsLocations(
   2201       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
   2202 }
   2203 
   2204 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
   2205 // memory model.
   2206 static void GenUnsafePut(LocationSummary* locations,
   2207                          Primitive::Type type,
   2208                          bool is_volatile,
   2209                          CodeGeneratorX86* codegen) {
   2210   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2211   Register base = locations->InAt(1).AsRegister<Register>();
   2212   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
   2213   Location value_loc = locations->InAt(3);
   2214 
   2215   if (type == Primitive::kPrimLong) {
   2216     Register value_lo = value_loc.AsRegisterPairLow<Register>();
   2217     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
   2218     if (is_volatile) {
   2219       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2220       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   2221       __ movd(temp1, value_lo);
   2222       __ movd(temp2, value_hi);
   2223       __ punpckldq(temp1, temp2);
   2224       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
   2225     } else {
   2226       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
   2227       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
   2228     }
   2229   } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
   2230     Register temp = locations->GetTemp(0).AsRegister<Register>();
   2231     __ movl(temp, value_loc.AsRegister<Register>());
   2232     __ PoisonHeapReference(temp);
   2233     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
   2234   } else {
   2235     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
   2236   }
   2237 
   2238   if (is_volatile) {
   2239     codegen->MemoryFence();
   2240   }
   2241 
   2242   if (type == Primitive::kPrimNot) {
   2243     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   2244     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
   2245                         locations->GetTemp(1).AsRegister<Register>(),
   2246                         base,
   2247                         value_loc.AsRegister<Register>(),
   2248                         value_can_be_null);
   2249   }
   2250 }
   2251 
   2252 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
   2253   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
   2254 }
   2255 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
   2256   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
   2257 }
   2258 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
   2259   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
   2260 }
   2261 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
   2262   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
   2263 }
   2264 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
   2265   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
   2266 }
   2267 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
   2268   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
   2269 }
   2270 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
   2271   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
   2272 }
   2273 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
   2274   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
   2275 }
   2276 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   2277   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
   2278 }
   2279 
   2280 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
   2281                                        Primitive::Type type,
   2282                                        HInvoke* invoke) {
   2283   bool can_call = kEmitCompilerReadBarrier &&
   2284       kUseBakerReadBarrier &&
   2285       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   2286   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2287                                                            (can_call
   2288                                                                 ? LocationSummary::kCallOnSlowPath
   2289                                                                 : LocationSummary::kNoCall),
   2290                                                            kIntrinsified);
   2291   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   2292   locations->SetInAt(1, Location::RequiresRegister());
   2293   // Offset is a long, but in 32 bit mode, we only need the low word.
   2294   // Can we update the invoke here to remove a TypeConvert to Long?
   2295   locations->SetInAt(2, Location::RequiresRegister());
   2296   // Expected value must be in EAX or EDX:EAX.
   2297   // For long, new value must be in ECX:EBX.
   2298   if (type == Primitive::kPrimLong) {
   2299     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
   2300     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
   2301   } else {
   2302     locations->SetInAt(3, Location::RegisterLocation(EAX));
   2303     locations->SetInAt(4, Location::RequiresRegister());
   2304   }
   2305 
   2306   // Force a byte register for the output.
   2307   locations->SetOut(Location::RegisterLocation(EAX));
   2308   if (type == Primitive::kPrimNot) {
   2309     // Need temporary registers for card-marking, and possibly for
   2310     // (Baker) read barrier.
   2311     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   2312     // Need a byte register for marking.
   2313     locations->AddTemp(Location::RegisterLocation(ECX));
   2314   }
   2315 }
   2316 
   2317 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
   2318   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
   2319 }
   2320 
   2321 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
   2322   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
   2323 }
   2324 
   2325 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
   2326   // The only read barrier implementation supporting the
   2327   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2328   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2329     return;
   2330   }
   2331 
   2332   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
   2333 }
   2334 
   2335 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
   2336   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   2337   LocationSummary* locations = invoke->GetLocations();
   2338 
   2339   Register base = locations->InAt(1).AsRegister<Register>();
   2340   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
   2341   Location out = locations->Out();
   2342   DCHECK_EQ(out.AsRegister<Register>(), EAX);
   2343 
   2344   // The address of the field within the holding object.
   2345   Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
   2346 
   2347   if (type == Primitive::kPrimNot) {
   2348     // The only read barrier implementation supporting the
   2349     // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2350     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2351 
   2352     Location temp1_loc = locations->GetTemp(0);
   2353     Register temp1 = temp1_loc.AsRegister<Register>();
   2354     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
   2355 
   2356     Register expected = locations->InAt(3).AsRegister<Register>();
   2357     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
   2358     DCHECK_EQ(expected, EAX);
   2359     Register value = locations->InAt(4).AsRegister<Register>();
   2360 
   2361     // Mark card for object assuming new value is stored.
   2362     bool value_can_be_null = true;  // TODO: Worth finding out this information?
   2363     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
   2364 
   2365     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2366       // Need to make sure the reference stored in the field is a to-space
   2367       // one before attempting the CAS or the CAS could fail incorrectly.
   2368       codegen->GenerateReferenceLoadWithBakerReadBarrier(
   2369           invoke,
   2370           temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
   2371           base,
   2372           field_addr,
   2373           /* needs_null_check */ false,
   2374           /* always_update_field */ true,
   2375           &temp2);
   2376     }
   2377 
   2378     bool base_equals_value = (base == value);
   2379     if (kPoisonHeapReferences) {
   2380       if (base_equals_value) {
   2381         // If `base` and `value` are the same register location, move
   2382         // `value` to a temporary register.  This way, poisoning
   2383         // `value` won't invalidate `base`.
   2384         value = temp1;
   2385         __ movl(value, base);
   2386       }
   2387 
   2388       // Check that the register allocator did not assign the location
   2389       // of `expected` (EAX) to `value` nor to `base`, so that heap
   2390       // poisoning (when enabled) works as intended below.
   2391       // - If `value` were equal to `expected`, both references would
   2392       //   be poisoned twice, meaning they would not be poisoned at
   2393       //   all, as heap poisoning uses address negation.
   2394       // - If `base` were equal to `expected`, poisoning `expected`
   2395       //   would invalidate `base`.
   2396       DCHECK_NE(value, expected);
   2397       DCHECK_NE(base, expected);
   2398 
   2399       __ PoisonHeapReference(expected);
   2400       __ PoisonHeapReference(value);
   2401     }
   2402 
   2403     __ LockCmpxchgl(field_addr, value);
   2404 
   2405     // LOCK CMPXCHG has full barrier semantics, and we don't need
   2406     // scheduling barriers at this time.
   2407 
   2408     // Convert ZF into the Boolean result.
   2409     __ setb(kZero, out.AsRegister<Register>());
   2410     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   2411 
   2412     // If heap poisoning is enabled, we need to unpoison the values
   2413     // that were poisoned earlier.
   2414     if (kPoisonHeapReferences) {
   2415       if (base_equals_value) {
   2416         // `value` has been moved to a temporary register, no need to
   2417         // unpoison it.
   2418       } else {
   2419         // Ensure `value` is different from `out`, so that unpoisoning
   2420         // the former does not invalidate the latter.
   2421         DCHECK_NE(value, out.AsRegister<Register>());
   2422         __ UnpoisonHeapReference(value);
   2423       }
   2424       // Do not unpoison the reference contained in register
   2425       // `expected`, as it is the same as register `out` (EAX).
   2426     }
   2427   } else {
   2428     if (type == Primitive::kPrimInt) {
   2429       // Ensure the expected value is in EAX (required by the CMPXCHG
   2430       // instruction).
   2431       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
   2432       __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
   2433     } else if (type == Primitive::kPrimLong) {
   2434       // Ensure the expected value is in EAX:EDX and that the new
   2435       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
   2436       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
   2437       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
   2438       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
   2439       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
   2440       __ LockCmpxchg8b(field_addr);
   2441     } else {
   2442       LOG(FATAL) << "Unexpected CAS type " << type;
   2443     }
   2444 
   2445     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
   2446     // don't need scheduling barriers at this time.
   2447 
   2448     // Convert ZF into the Boolean result.
   2449     __ setb(kZero, out.AsRegister<Register>());
   2450     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   2451   }
   2452 }
   2453 
   2454 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
   2455   GenCAS(Primitive::kPrimInt, invoke, codegen_);
   2456 }
   2457 
   2458 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
   2459   GenCAS(Primitive::kPrimLong, invoke, codegen_);
   2460 }
   2461 
   2462 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
   2463   // The only read barrier implementation supporting the
   2464   // UnsafeCASObject intrinsic is the Baker-style read barriers.
   2465   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2466 
   2467   GenCAS(Primitive::kPrimNot, invoke, codegen_);
   2468 }
   2469 
   2470 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
   2471   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2472                                                            LocationSummary::kNoCall,
   2473                                                            kIntrinsified);
   2474   locations->SetInAt(0, Location::RequiresRegister());
   2475   locations->SetOut(Location::SameAsFirstInput());
   2476   locations->AddTemp(Location::RequiresRegister());
   2477 }
   2478 
   2479 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
   2480                      X86Assembler* assembler) {
   2481   Immediate imm_shift(shift);
   2482   Immediate imm_mask(mask);
   2483   __ movl(temp, reg);
   2484   __ shrl(reg, imm_shift);
   2485   __ andl(temp, imm_mask);
   2486   __ andl(reg, imm_mask);
   2487   __ shll(temp, imm_shift);
   2488   __ orl(reg, temp);
   2489 }
   2490 
   2491 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
   2492   X86Assembler* assembler = GetAssembler();
   2493   LocationSummary* locations = invoke->GetLocations();
   2494 
   2495   Register reg = locations->InAt(0).AsRegister<Register>();
   2496   Register temp = locations->GetTemp(0).AsRegister<Register>();
   2497 
   2498   /*
   2499    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
   2500    * swapping bits to reverse bits in a number x. Using bswap to save instructions
   2501    * compared to generic luni implementation which has 5 rounds of swapping bits.
   2502    * x = bswap x
   2503    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
   2504    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
   2505    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
   2506    */
   2507   __ bswapl(reg);
   2508   SwapBits(reg, temp, 1, 0x55555555, assembler);
   2509   SwapBits(reg, temp, 2, 0x33333333, assembler);
   2510   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
   2511 }
   2512 
   2513 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
   2514   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   2515                                                            LocationSummary::kNoCall,
   2516                                                            kIntrinsified);
   2517   locations->SetInAt(0, Location::RequiresRegister());
   2518   locations->SetOut(Location::SameAsFirstInput());
   2519   locations->AddTemp(Location::RequiresRegister());
   2520 }
   2521 
   2522 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
   2523   X86Assembler* assembler = GetAssembler();
   2524   LocationSummary* locations = invoke->GetLocations();
   2525 
   2526   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
   2527   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
   2528   Register temp = locations->GetTemp(0).AsRegister<Register>();
   2529 
   2530   // We want to swap high/low, then bswap each one, and then do the same
   2531   // as a 32 bit reverse.
   2532   // Exchange high and low.
   2533   __ movl(temp, reg_low);
   2534   __ movl(reg_low, reg_high);
   2535   __ movl(reg_high, temp);
   2536 
   2537   // bit-reverse low
   2538   __ bswapl(reg_low);
   2539   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
   2540   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
   2541   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
   2542 
   2543   // bit-reverse high
   2544   __ bswapl(reg_high);
   2545   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
   2546   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
   2547   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
   2548 }
   2549 
   2550 static void CreateBitCountLocations(
   2551     ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
   2552   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
   2553     // Do nothing if there is no popcnt support. This results in generating
   2554     // a call for the intrinsic rather than direct code.
   2555     return;
   2556   }
   2557   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2558                                                            LocationSummary::kNoCall,
   2559                                                            kIntrinsified);
   2560   if (is_long) {
   2561     locations->AddTemp(Location::RequiresRegister());
   2562   }
   2563   locations->SetInAt(0, Location::Any());
   2564   locations->SetOut(Location::RequiresRegister());
   2565 }
   2566 
   2567 static void GenBitCount(X86Assembler* assembler,
   2568                         CodeGeneratorX86* codegen,
   2569                         HInvoke* invoke, bool is_long) {
   2570   LocationSummary* locations = invoke->GetLocations();
   2571   Location src = locations->InAt(0);
   2572   Register out = locations->Out().AsRegister<Register>();
   2573 
   2574   if (invoke->InputAt(0)->IsConstant()) {
   2575     // Evaluate this at compile time.
   2576     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2577     int32_t result = is_long
   2578         ? POPCOUNT(static_cast<uint64_t>(value))
   2579         : POPCOUNT(static_cast<uint32_t>(value));
   2580     codegen->Load32BitValue(out, result);
   2581     return;
   2582   }
   2583 
   2584   // Handle the non-constant cases.
   2585   if (!is_long) {
   2586     if (src.IsRegister()) {
   2587       __ popcntl(out, src.AsRegister<Register>());
   2588     } else {
   2589       DCHECK(src.IsStackSlot());
   2590       __ popcntl(out, Address(ESP, src.GetStackIndex()));
   2591     }
   2592   } else {
   2593     // The 64-bit case needs to worry about two parts.
   2594     Register temp = locations->GetTemp(0).AsRegister<Register>();
   2595     if (src.IsRegisterPair()) {
   2596       __ popcntl(temp, src.AsRegisterPairLow<Register>());
   2597       __ popcntl(out, src.AsRegisterPairHigh<Register>());
   2598     } else {
   2599       DCHECK(src.IsDoubleStackSlot());
   2600       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
   2601       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
   2602     }
   2603     __ addl(out, temp);
   2604   }
   2605 }
   2606 
   2607 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
   2608   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
   2609 }
   2610 
   2611 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
   2612   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2613 }
   2614 
   2615 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
   2616   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
   2617 }
   2618 
   2619 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
   2620   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2621 }
   2622 
   2623 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   2624   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2625                                                            LocationSummary::kNoCall,
   2626                                                            kIntrinsified);
   2627   if (is_long) {
   2628     locations->SetInAt(0, Location::RequiresRegister());
   2629   } else {
   2630     locations->SetInAt(0, Location::Any());
   2631   }
   2632   locations->SetOut(Location::RequiresRegister());
   2633 }
   2634 
   2635 static void GenLeadingZeros(X86Assembler* assembler,
   2636                             CodeGeneratorX86* codegen,
   2637                             HInvoke* invoke, bool is_long) {
   2638   LocationSummary* locations = invoke->GetLocations();
   2639   Location src = locations->InAt(0);
   2640   Register out = locations->Out().AsRegister<Register>();
   2641 
   2642   if (invoke->InputAt(0)->IsConstant()) {
   2643     // Evaluate this at compile time.
   2644     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2645     if (value == 0) {
   2646       value = is_long ? 64 : 32;
   2647     } else {
   2648       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
   2649     }
   2650     codegen->Load32BitValue(out, value);
   2651     return;
   2652   }
   2653 
   2654   // Handle the non-constant cases.
   2655   if (!is_long) {
   2656     if (src.IsRegister()) {
   2657       __ bsrl(out, src.AsRegister<Register>());
   2658     } else {
   2659       DCHECK(src.IsStackSlot());
   2660       __ bsrl(out, Address(ESP, src.GetStackIndex()));
   2661     }
   2662 
   2663     // BSR sets ZF if the input was zero, and the output is undefined.
   2664     NearLabel all_zeroes, done;
   2665     __ j(kEqual, &all_zeroes);
   2666 
   2667     // Correct the result from BSR to get the final CLZ result.
   2668     __ xorl(out, Immediate(31));
   2669     __ jmp(&done);
   2670 
   2671     // Fix the zero case with the expected result.
   2672     __ Bind(&all_zeroes);
   2673     __ movl(out, Immediate(32));
   2674 
   2675     __ Bind(&done);
   2676     return;
   2677   }
   2678 
   2679   // 64 bit case needs to worry about both parts of the register.
   2680   DCHECK(src.IsRegisterPair());
   2681   Register src_lo = src.AsRegisterPairLow<Register>();
   2682   Register src_hi = src.AsRegisterPairHigh<Register>();
   2683   NearLabel handle_low, done, all_zeroes;
   2684 
   2685   // Is the high word zero?
   2686   __ testl(src_hi, src_hi);
   2687   __ j(kEqual, &handle_low);
   2688 
   2689   // High word is not zero. We know that the BSR result is defined in this case.
   2690   __ bsrl(out, src_hi);
   2691 
   2692   // Correct the result from BSR to get the final CLZ result.
   2693   __ xorl(out, Immediate(31));
   2694   __ jmp(&done);
   2695 
   2696   // High word was zero.  We have to compute the low word count and add 32.
   2697   __ Bind(&handle_low);
   2698   __ bsrl(out, src_lo);
   2699   __ j(kEqual, &all_zeroes);
   2700 
   2701   // We had a valid result.  Use an XOR to both correct the result and add 32.
   2702   __ xorl(out, Immediate(63));
   2703   __ jmp(&done);
   2704 
   2705   // All zero case.
   2706   __ Bind(&all_zeroes);
   2707   __ movl(out, Immediate(64));
   2708 
   2709   __ Bind(&done);
   2710 }
   2711 
   2712 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
   2713   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
   2714 }
   2715 
   2716 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
   2717   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2718 }
   2719 
   2720 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
   2721   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
   2722 }
   2723 
   2724 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
   2725   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2726 }
   2727 
   2728 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   2729   LocationSummary* locations = new (arena) LocationSummary(invoke,
   2730                                                            LocationSummary::kNoCall,
   2731                                                            kIntrinsified);
   2732   if (is_long) {
   2733     locations->SetInAt(0, Location::RequiresRegister());
   2734   } else {
   2735     locations->SetInAt(0, Location::Any());
   2736   }
   2737   locations->SetOut(Location::RequiresRegister());
   2738 }
   2739 
   2740 static void GenTrailingZeros(X86Assembler* assembler,
   2741                              CodeGeneratorX86* codegen,
   2742                              HInvoke* invoke, bool is_long) {
   2743   LocationSummary* locations = invoke->GetLocations();
   2744   Location src = locations->InAt(0);
   2745   Register out = locations->Out().AsRegister<Register>();
   2746 
   2747   if (invoke->InputAt(0)->IsConstant()) {
   2748     // Evaluate this at compile time.
   2749     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
   2750     if (value == 0) {
   2751       value = is_long ? 64 : 32;
   2752     } else {
   2753       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
   2754     }
   2755     codegen->Load32BitValue(out, value);
   2756     return;
   2757   }
   2758 
   2759   // Handle the non-constant cases.
   2760   if (!is_long) {
   2761     if (src.IsRegister()) {
   2762       __ bsfl(out, src.AsRegister<Register>());
   2763     } else {
   2764       DCHECK(src.IsStackSlot());
   2765       __ bsfl(out, Address(ESP, src.GetStackIndex()));
   2766     }
   2767 
   2768     // BSF sets ZF if the input was zero, and the output is undefined.
   2769     NearLabel done;
   2770     __ j(kNotEqual, &done);
   2771 
   2772     // Fix the zero case with the expected result.
   2773     __ movl(out, Immediate(32));
   2774 
   2775     __ Bind(&done);
   2776     return;
   2777   }
   2778 
   2779   // 64 bit case needs to worry about both parts of the register.
   2780   DCHECK(src.IsRegisterPair());
   2781   Register src_lo = src.AsRegisterPairLow<Register>();
   2782   Register src_hi = src.AsRegisterPairHigh<Register>();
   2783   NearLabel done, all_zeroes;
   2784 
   2785   // If the low word is zero, then ZF will be set.  If not, we have the answer.
   2786   __ bsfl(out, src_lo);
   2787   __ j(kNotEqual, &done);
   2788 
   2789   // Low word was zero.  We have to compute the high word count and add 32.
   2790   __ bsfl(out, src_hi);
   2791   __ j(kEqual, &all_zeroes);
   2792 
   2793   // We had a valid result.  Add 32 to account for the low word being zero.
   2794   __ addl(out, Immediate(32));
   2795   __ jmp(&done);
   2796 
   2797   // All zero case.
   2798   __ Bind(&all_zeroes);
   2799   __ movl(out, Immediate(64));
   2800 
   2801   __ Bind(&done);
   2802 }
   2803 
   2804 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
   2805   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
   2806 }
   2807 
   2808 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
   2809   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
   2810 }
   2811 
   2812 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
   2813   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
   2814 }
   2815 
   2816 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
   2817   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
   2818 }
   2819 
   2820 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
   2821   return instruction->InputAt(input0) == instruction->InputAt(input1);
   2822 }
   2823 
   2824 // Compute base address for the System.arraycopy intrinsic in `base`.
   2825 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
   2826                                           Primitive::Type type,
   2827                                           const Register& array,
   2828                                           const Location& pos,
   2829                                           const Register& base) {
   2830   // This routine is only used by the SystemArrayCopy intrinsic at the
   2831   // moment. We can allow Primitive::kPrimNot as `type` to implement
   2832   // the SystemArrayCopyChar intrinsic.
   2833   DCHECK_EQ(type, Primitive::kPrimNot);
   2834   const int32_t element_size = Primitive::ComponentSize(type);
   2835   const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
   2836   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   2837 
   2838   if (pos.IsConstant()) {
   2839     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
   2840     __ leal(base, Address(array, element_size * constant + data_offset));
   2841   } else {
   2842     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
   2843   }
   2844 }
   2845 
   2846 // Compute end source address for the System.arraycopy intrinsic in `end`.
   2847 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
   2848                                          Primitive::Type type,
   2849                                          const Location& copy_length,
   2850                                          const Register& base,
   2851                                          const Register& end) {
   2852   // This routine is only used by the SystemArrayCopy intrinsic at the
   2853   // moment. We can allow Primitive::kPrimNot as `type` to implement
   2854   // the SystemArrayCopyChar intrinsic.
   2855   DCHECK_EQ(type, Primitive::kPrimNot);
   2856   const int32_t element_size = Primitive::ComponentSize(type);
   2857   const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
   2858 
   2859   if (copy_length.IsConstant()) {
   2860     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
   2861     __ leal(end, Address(base, element_size * constant));
   2862   } else {
   2863     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
   2864   }
   2865 }
   2866 
   2867 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
   2868   // The only read barrier implementation supporting the
   2869   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2870   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
   2871     return;
   2872   }
   2873 
   2874   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   2875   if (invoke->GetLocations() != nullptr) {
   2876     // Need a byte register for marking.
   2877     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
   2878 
   2879     static constexpr size_t kSrc = 0;
   2880     static constexpr size_t kSrcPos = 1;
   2881     static constexpr size_t kDest = 2;
   2882     static constexpr size_t kDestPos = 3;
   2883     static constexpr size_t kLength = 4;
   2884 
   2885     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
   2886         !invoke->InputAt(kDestPos)->IsIntConstant() &&
   2887         !invoke->InputAt(kLength)->IsIntConstant()) {
   2888       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
   2889           !IsSameInput(invoke, kSrcPos, kLength) &&
   2890           !IsSameInput(invoke, kDestPos, kLength) &&
   2891           !IsSameInput(invoke, kSrc, kDest)) {
   2892         // Not enough registers, make the length also take a stack slot.
   2893         invoke->GetLocations()->SetInAt(kLength, Location::Any());
   2894       }
   2895     }
   2896   }
   2897 }
   2898 
   2899 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
   2900   // The only read barrier implementation supporting the
   2901   // SystemArrayCopy intrinsic is the Baker-style read barriers.
   2902   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
   2903 
   2904   X86Assembler* assembler = GetAssembler();
   2905   LocationSummary* locations = invoke->GetLocations();
   2906 
   2907   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2908   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2909   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2910   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   2911   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   2912 
   2913   Register src = locations->InAt(0).AsRegister<Register>();
   2914   Location src_pos = locations->InAt(1);
   2915   Register dest = locations->InAt(2).AsRegister<Register>();
   2916   Location dest_pos = locations->InAt(3);
   2917   Location length_arg = locations->InAt(4);
   2918   Location length = length_arg;
   2919   Location temp1_loc = locations->GetTemp(0);
   2920   Register temp1 = temp1_loc.AsRegister<Register>();
   2921   Location temp2_loc = locations->GetTemp(1);
   2922   Register temp2 = temp2_loc.AsRegister<Register>();
   2923 
   2924   SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   2925   codegen_->AddSlowPath(intrinsic_slow_path);
   2926 
   2927   NearLabel conditions_on_positions_validated;
   2928   SystemArrayCopyOptimizations optimizations(invoke);
   2929 
   2930   // If source and destination are the same, we go to slow path if we need to do
   2931   // forward copying.
   2932   if (src_pos.IsConstant()) {
   2933     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
   2934     if (dest_pos.IsConstant()) {
   2935       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2936       if (optimizations.GetDestinationIsSource()) {
   2937         // Checked when building locations.
   2938         DCHECK_GE(src_pos_constant, dest_pos_constant);
   2939       } else if (src_pos_constant < dest_pos_constant) {
   2940         __ cmpl(src, dest);
   2941         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2942       }
   2943     } else {
   2944       if (!optimizations.GetDestinationIsSource()) {
   2945         __ cmpl(src, dest);
   2946         __ j(kNotEqual, &conditions_on_positions_validated);
   2947       }
   2948       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
   2949       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
   2950     }
   2951   } else {
   2952     if (!optimizations.GetDestinationIsSource()) {
   2953       __ cmpl(src, dest);
   2954       __ j(kNotEqual, &conditions_on_positions_validated);
   2955     }
   2956     if (dest_pos.IsConstant()) {
   2957       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
   2958       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
   2959       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2960     } else {
   2961       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
   2962       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2963     }
   2964   }
   2965 
   2966   __ Bind(&conditions_on_positions_validated);
   2967 
   2968   if (!optimizations.GetSourceIsNotNull()) {
   2969     // Bail out if the source is null.
   2970     __ testl(src, src);
   2971     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2972   }
   2973 
   2974   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
   2975     // Bail out if the destination is null.
   2976     __ testl(dest, dest);
   2977     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   2978   }
   2979 
   2980   Location temp3_loc = locations->GetTemp(2);
   2981   Register temp3 = temp3_loc.AsRegister<Register>();
   2982   if (length.IsStackSlot()) {
   2983     __ movl(temp3, Address(ESP, length.GetStackIndex()));
   2984     length = Location::RegisterLocation(temp3);
   2985   }
   2986 
   2987   // If the length is negative, bail out.
   2988   // We have already checked in the LocationsBuilder for the constant case.
   2989   if (!length.IsConstant() &&
   2990       !optimizations.GetCountIsSourceLength() &&
   2991       !optimizations.GetCountIsDestinationLength()) {
   2992     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
   2993     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   2994   }
   2995 
   2996   // Validity checks: source.
   2997   CheckPosition(assembler,
   2998                 src_pos,
   2999                 src,
   3000                 length,
   3001                 intrinsic_slow_path,
   3002                 temp1,
   3003                 optimizations.GetCountIsSourceLength());
   3004 
   3005   // Validity checks: dest.
   3006   CheckPosition(assembler,
   3007                 dest_pos,
   3008                 dest,
   3009                 length,
   3010                 intrinsic_slow_path,
   3011                 temp1,
   3012                 optimizations.GetCountIsDestinationLength());
   3013 
   3014   if (!optimizations.GetDoesNotNeedTypeCheck()) {
   3015     // Check whether all elements of the source array are assignable to the component
   3016     // type of the destination array. We do two checks: the classes are the same,
   3017     // or the destination is Object[]. If none of these checks succeed, we go to the
   3018     // slow path.
   3019 
   3020     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   3021       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3022         // /* HeapReference<Class> */ temp1 = src->klass_
   3023         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3024             invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
   3025         // Bail out if the source is not a non primitive array.
   3026         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3027         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3028             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3029         __ testl(temp1, temp1);
   3030         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3031         // If heap poisoning is enabled, `temp1` has been unpoisoned
   3032         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3033       } else {
   3034         // /* HeapReference<Class> */ temp1 = src->klass_
   3035         __ movl(temp1, Address(src, class_offset));
   3036         __ MaybeUnpoisonHeapReference(temp1);
   3037         // Bail out if the source is not a non primitive array.
   3038         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3039         __ movl(temp1, Address(temp1, component_offset));
   3040         __ testl(temp1, temp1);
   3041         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3042         __ MaybeUnpoisonHeapReference(temp1);
   3043       }
   3044       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
   3045       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3046     }
   3047 
   3048     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3049       if (length.Equals(Location::RegisterLocation(temp3))) {
   3050         // When Baker read barriers are enabled, register `temp3`,
   3051         // which in the present case contains the `length` parameter,
   3052         // will be overwritten below.  Make the `length` location
   3053         // reference the original stack location; it will be moved
   3054         // back to `temp3` later if necessary.
   3055         DCHECK(length_arg.IsStackSlot());
   3056         length = length_arg;
   3057       }
   3058 
   3059       // /* HeapReference<Class> */ temp1 = dest->klass_
   3060       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3061           invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
   3062 
   3063       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   3064         // Bail out if the destination is not a non primitive array.
   3065         //
   3066         // Register `temp1` is not trashed by the read barrier emitted
   3067         // by GenerateFieldLoadWithBakerReadBarrier below, as that
   3068         // method produces a call to a ReadBarrierMarkRegX entry point,
   3069         // which saves all potentially live registers, including
   3070         // temporaries such a `temp1`.
   3071         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   3072         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3073             invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
   3074         __ testl(temp2, temp2);
   3075         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3076         // If heap poisoning is enabled, `temp2` has been unpoisoned
   3077         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3078         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
   3079         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3080       }
   3081 
   3082       // For the same reason given earlier, `temp1` is not trashed by the
   3083       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
   3084       // /* HeapReference<Class> */ temp2 = src->klass_
   3085       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3086           invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
   3087       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
   3088       __ cmpl(temp1, temp2);
   3089 
   3090       if (optimizations.GetDestinationIsTypedObjectArray()) {
   3091         NearLabel do_copy;
   3092         __ j(kEqual, &do_copy);
   3093         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3094         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3095             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3096         // We do not need to emit a read barrier for the following
   3097         // heap reference load, as `temp1` is only used in a
   3098         // comparison with null below, and this reference is not
   3099         // kept afterwards.
   3100         __ cmpl(Address(temp1, super_offset), Immediate(0));
   3101         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3102         __ Bind(&do_copy);
   3103       } else {
   3104         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3105       }
   3106     } else {
   3107       // Non read barrier code.
   3108 
   3109       // /* HeapReference<Class> */ temp1 = dest->klass_
   3110       __ movl(temp1, Address(dest, class_offset));
   3111       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
   3112         __ MaybeUnpoisonHeapReference(temp1);
   3113         // Bail out if the destination is not a non primitive array.
   3114         // /* HeapReference<Class> */ temp2 = temp1->component_type_
   3115         __ movl(temp2, Address(temp1, component_offset));
   3116         __ testl(temp2, temp2);
   3117         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3118         __ MaybeUnpoisonHeapReference(temp2);
   3119         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
   3120         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3121         // Re-poison the heap reference to make the compare instruction below
   3122         // compare two poisoned references.
   3123         __ PoisonHeapReference(temp1);
   3124       }
   3125 
   3126       // Note: if heap poisoning is on, we are comparing two poisoned references here.
   3127       __ cmpl(temp1, Address(src, class_offset));
   3128 
   3129       if (optimizations.GetDestinationIsTypedObjectArray()) {
   3130         NearLabel do_copy;
   3131         __ j(kEqual, &do_copy);
   3132         __ MaybeUnpoisonHeapReference(temp1);
   3133         // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3134         __ movl(temp1, Address(temp1, component_offset));
   3135         __ MaybeUnpoisonHeapReference(temp1);
   3136         __ cmpl(Address(temp1, super_offset), Immediate(0));
   3137         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3138         __ Bind(&do_copy);
   3139       } else {
   3140         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3141       }
   3142     }
   3143   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
   3144     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
   3145     // Bail out if the source is not a non primitive array.
   3146     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3147       // /* HeapReference<Class> */ temp1 = src->klass_
   3148       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3149           invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
   3150       // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3151       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   3152           invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
   3153       __ testl(temp1, temp1);
   3154       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3155       // If heap poisoning is enabled, `temp1` has been unpoisoned
   3156       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
   3157     } else {
   3158       // /* HeapReference<Class> */ temp1 = src->klass_
   3159       __ movl(temp1, Address(src, class_offset));
   3160       __ MaybeUnpoisonHeapReference(temp1);
   3161       // /* HeapReference<Class> */ temp1 = temp1->component_type_
   3162       __ movl(temp1, Address(temp1, component_offset));
   3163       __ testl(temp1, temp1);
   3164       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   3165       __ MaybeUnpoisonHeapReference(temp1);
   3166     }
   3167     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
   3168     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   3169   }
   3170 
   3171   const Primitive::Type type = Primitive::kPrimNot;
   3172   const int32_t element_size = Primitive::ComponentSize(type);
   3173 
   3174   // Compute the base source address in `temp1`.
   3175   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
   3176 
   3177   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   3178     // If it is needed (in the case of the fast-path loop), the base
   3179     // destination address is computed later, as `temp2` is used for
   3180     // intermediate computations.
   3181 
   3182     // Compute the end source address in `temp3`.
   3183     if (length.IsStackSlot()) {
   3184       // Location `length` is again pointing at a stack slot, as
   3185       // register `temp3` (which was containing the length parameter
   3186       // earlier) has been overwritten; restore it now
   3187       DCHECK(length.Equals(length_arg));
   3188       __ movl(temp3, Address(ESP, length.GetStackIndex()));
   3189       length = Location::RegisterLocation(temp3);
   3190     }
   3191     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   3192 
   3193     // SystemArrayCopy implementation for Baker read barriers (see
   3194     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
   3195     //
   3196     //   if (src_ptr != end_ptr) {
   3197     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
   3198     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   3199     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   3200     //     if (is_gray) {
   3201     //       // Slow-path copy.
   3202     //       for (size_t i = 0; i != length; ++i) {
   3203     //         dest_array[dest_pos + i] =
   3204     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
   3205     //       }
   3206     //     } else {
   3207     //       // Fast-path copy.
   3208     //       do {
   3209     //         *dest_ptr++ = *src_ptr++;
   3210     //       } while (src_ptr != end_ptr)
   3211     //     }
   3212     //   }
   3213 
   3214     NearLabel loop, done;
   3215 
   3216     // Don't enter copy loop if `length == 0`.
   3217     __ cmpl(temp1, temp3);
   3218     __ j(kEqual, &done);
   3219 
   3220     // Given the numeric representation, it's enough to check the low bit of the rb_state.
   3221     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   3222     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   3223     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   3224     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   3225     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
   3226 
   3227     // if (rb_state == ReadBarrier::GrayState())
   3228     //   goto slow_path;
   3229     // At this point, just do the "if" and make sure that flags are preserved until the branch.
   3230     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
   3231 
   3232     // Load fence to prevent load-load reordering.
   3233     // Note that this is a no-op, thanks to the x86 memory model.
   3234     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   3235 
   3236     // Slow path used to copy array when `src` is gray.
   3237     SlowPathCode* read_barrier_slow_path =
   3238         new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
   3239     codegen_->AddSlowPath(read_barrier_slow_path);
   3240 
   3241     // We have done the "if" of the gray bit check above, now branch based on the flags.
   3242     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
   3243 
   3244     // Fast-path copy.
   3245     // Compute the base destination address in `temp2`.
   3246     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   3247     // Iterate over the arrays and do a raw copy of the objects. We don't need to
   3248     // poison/unpoison.
   3249     __ Bind(&loop);
   3250     __ pushl(Address(temp1, 0));
   3251     __ cfi().AdjustCFAOffset(4);
   3252     __ popl(Address(temp2, 0));
   3253     __ cfi().AdjustCFAOffset(-4);
   3254     __ addl(temp1, Immediate(element_size));
   3255     __ addl(temp2, Immediate(element_size));
   3256     __ cmpl(temp1, temp3);
   3257     __ j(kNotEqual, &loop);
   3258 
   3259     __ Bind(read_barrier_slow_path->GetExitLabel());
   3260     __ Bind(&done);
   3261   } else {
   3262     // Non read barrier code.
   3263     // Compute the base destination address in `temp2`.
   3264     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
   3265     // Compute the end source address in `temp3`.
   3266     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
   3267     // Iterate over the arrays and do a raw copy of the objects. We don't need to
   3268     // poison/unpoison.
   3269     NearLabel loop, done;
   3270     __ cmpl(temp1, temp3);
   3271     __ j(kEqual, &done);
   3272     __ Bind(&loop);
   3273     __ pushl(Address(temp1, 0));
   3274     __ cfi().AdjustCFAOffset(4);
   3275     __ popl(Address(temp2, 0));
   3276     __ cfi().AdjustCFAOffset(-4);
   3277     __ addl(temp1, Immediate(element_size));
   3278     __ addl(temp2, Immediate(element_size));
   3279     __ cmpl(temp1, temp3);
   3280     __ j(kNotEqual, &loop);
   3281     __ Bind(&done);
   3282   }
   3283 
   3284   // We only need one card marking on the destination array.
   3285   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
   3286 
   3287   __ Bind(intrinsic_slow_path->GetExitLabel());
   3288 }
   3289 
   3290 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
   3291   InvokeRuntimeCallingConvention calling_convention;
   3292   IntrinsicVisitor::ComputeIntegerValueOfLocations(
   3293       invoke,
   3294       codegen_,
   3295       Location::RegisterLocation(EAX),
   3296       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   3297 }
   3298 
   3299 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
   3300   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
   3301   LocationSummary* locations = invoke->GetLocations();
   3302   X86Assembler* assembler = GetAssembler();
   3303 
   3304   Register out = locations->Out().AsRegister<Register>();
   3305   InvokeRuntimeCallingConvention calling_convention;
   3306   if (invoke->InputAt(0)->IsConstant()) {
   3307     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
   3308     if (value >= info.low && value <= info.high) {
   3309       // Just embed the j.l.Integer in the code.
   3310       ScopedObjectAccess soa(Thread::Current());
   3311       mirror::Object* boxed = info.cache->Get(value + (-info.low));
   3312       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
   3313       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
   3314       __ movl(out, Immediate(address));
   3315     } else {
   3316       // Allocate and initialize a new j.l.Integer.
   3317       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
   3318       // JIT object table.
   3319       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3320       __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
   3321       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3322       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3323       __ movl(Address(out, info.value_offset), Immediate(value));
   3324     }
   3325   } else {
   3326     Register in = locations->InAt(0).AsRegister<Register>();
   3327     // Check bounds of our cache.
   3328     __ leal(out, Address(in, -info.low));
   3329     __ cmpl(out, Immediate(info.high - info.low + 1));
   3330     NearLabel allocate, done;
   3331     __ j(kAboveEqual, &allocate);
   3332     // If the value is within the bounds, load the j.l.Integer directly from the array.
   3333     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   3334     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
   3335     __ movl(out, Address(out, TIMES_4, data_offset + address));
   3336     __ MaybeUnpoisonHeapReference(out);
   3337     __ jmp(&done);
   3338     __ Bind(&allocate);
   3339     // Otherwise allocate and initialize a new j.l.Integer.
   3340     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
   3341     __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
   3342     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
   3343     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   3344     __ movl(Address(out, info.value_offset), in);
   3345     __ Bind(&done);
   3346   }
   3347 }
   3348 
   3349 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
   3350   LocationSummary* locations = new (arena_) LocationSummary(invoke,
   3351                                                             LocationSummary::kNoCall,
   3352                                                             kIntrinsified);
   3353   locations->SetOut(Location::RequiresRegister());
   3354 }
   3355 
   3356 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
   3357   X86Assembler* assembler = GetAssembler();
   3358   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
   3359   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
   3360   NearLabel done;
   3361   __ fs()->movl(out, address);
   3362   __ testl(out, out);
   3363   __ j(kEqual, &done);
   3364   __ fs()->movl(address, Immediate(0));
   3365   codegen_->MemoryFence();
   3366   __ Bind(&done);
   3367 }
   3368 
   3369 
   3370 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
   3371 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
   3372 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
   3373 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
   3374 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
   3375 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
   3376 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
   3377 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
   3378 
   3379 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
   3380 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
   3381 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
   3382 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
   3383 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
   3384 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
   3385 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
   3386 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
   3387 
   3388 // 1.8.
   3389 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
   3390 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
   3391 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
   3392 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
   3393 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
   3394 
   3395 UNREACHABLE_INTRINSICS(X86)
   3396 
   3397 #undef __
   3398 
   3399 }  // namespace x86
   3400 }  // namespace art
   3401