1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86.h" 18 19 #include <limits> 20 21 #include "arch/x86/instruction_set_features_x86.h" 22 #include "art_method.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "heap_poisoning.h" 27 #include "intrinsics.h" 28 #include "intrinsics_utils.h" 29 #include "lock_word.h" 30 #include "mirror/array-inl.h" 31 #include "mirror/object_array-inl.h" 32 #include "mirror/reference.h" 33 #include "mirror/string.h" 34 #include "scoped_thread_state_change-inl.h" 35 #include "thread-current-inl.h" 36 #include "utils/x86/assembler_x86.h" 37 #include "utils/x86/constants_x86.h" 38 39 namespace art { 40 41 namespace x86 { 42 43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) 44 : allocator_(codegen->GetGraph()->GetAllocator()), 45 codegen_(codegen) { 46 } 47 48 49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { 50 return down_cast<X86Assembler*>(codegen_->GetAssembler()); 51 } 52 53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { 54 return codegen_->GetGraph()->GetAllocator(); 55 } 56 57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { 58 Dispatch(invoke); 59 LocationSummary* res = invoke->GetLocations(); 60 if (res == nullptr) { 61 return false; 62 } 63 return res->Intrinsified(); 64 } 65 66 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { 67 InvokeDexCallingConventionVisitorX86 calling_convention_visitor; 68 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 69 } 70 71 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; 72 73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT 75 76 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 77 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { 78 public: 79 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) 80 : SlowPathCode(instruction) { 81 DCHECK(kEmitCompilerReadBarrier); 82 DCHECK(kUseBakerReadBarrier); 83 } 84 85 void EmitNativeCode(CodeGenerator* codegen) override { 86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 87 LocationSummary* locations = instruction_->GetLocations(); 88 DCHECK(locations->CanCall()); 89 DCHECK(instruction_->IsInvokeStaticOrDirect()) 90 << "Unexpected instruction in read barrier arraycopy slow path: " 91 << instruction_->DebugName(); 92 DCHECK(instruction_->GetLocations()->Intrinsified()); 93 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 94 95 int32_t element_size = DataType::Size(DataType::Type::kReference); 96 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); 97 98 Register src = locations->InAt(0).AsRegister<Register>(); 99 Location src_pos = locations->InAt(1); 100 Register dest = locations->InAt(2).AsRegister<Register>(); 101 Location dest_pos = locations->InAt(3); 102 Location length = locations->InAt(4); 103 Location temp1_loc = locations->GetTemp(0); 104 Register temp1 = temp1_loc.AsRegister<Register>(); 105 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 106 Register temp3 = locations->GetTemp(2).AsRegister<Register>(); 107 108 __ Bind(GetEntryLabel()); 109 // In this code path, registers `temp1`, `temp2`, and `temp3` 110 // (resp.) are not used for the base source address, the base 111 // destination address, and the end source address (resp.), as in 112 // other SystemArrayCopy intrinsic code paths. Instead they are 113 // (resp.) used for: 114 // - the loop index (`i`); 115 // - the source index (`src_index`) and the loaded (source) 116 // reference (`value`); and 117 // - the destination index (`dest_index`). 118 119 // i = 0 120 __ xorl(temp1, temp1); 121 NearLabel loop; 122 __ Bind(&loop); 123 // value = src_array[i + src_pos] 124 if (src_pos.IsConstant()) { 125 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 126 int32_t adjusted_offset = offset + constant * element_size; 127 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); 128 } else { 129 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 130 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); 131 } 132 __ MaybeUnpoisonHeapReference(temp2); 133 // TODO: Inline the mark bit check before calling the runtime? 134 // value = ReadBarrier::Mark(value) 135 // No need to save live registers; it's taken care of by the 136 // entrypoint. Also, there is no need to update the stack mask, 137 // as this runtime call will not trigger a garbage collection. 138 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more 139 // explanations.) 140 DCHECK_NE(temp2, ESP); 141 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; 142 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); 143 // This runtime call does not require a stack map. 144 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 145 __ MaybePoisonHeapReference(temp2); 146 // dest_array[i + dest_pos] = value 147 if (dest_pos.IsConstant()) { 148 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 149 int32_t adjusted_offset = offset + constant * element_size; 150 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); 151 } else { 152 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 153 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); 154 } 155 // ++i 156 __ addl(temp1, Immediate(1)); 157 // if (i != length) goto loop 158 x86_codegen->GenerateIntCompare(temp1_loc, length); 159 __ j(kNotEqual, &loop); 160 __ jmp(GetExitLabel()); 161 } 162 163 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; } 164 165 private: 166 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); 167 }; 168 169 #undef __ 170 171 #define __ assembler-> 172 173 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { 174 LocationSummary* locations = 175 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 176 locations->SetInAt(0, Location::RequiresFpuRegister()); 177 locations->SetOut(Location::RequiresRegister()); 178 if (is64bit) { 179 locations->AddTemp(Location::RequiresFpuRegister()); 180 } 181 } 182 183 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { 184 LocationSummary* locations = 185 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 186 locations->SetInAt(0, Location::RequiresRegister()); 187 locations->SetOut(Location::RequiresFpuRegister()); 188 if (is64bit) { 189 locations->AddTemp(Location::RequiresFpuRegister()); 190 locations->AddTemp(Location::RequiresFpuRegister()); 191 } 192 } 193 194 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 195 Location input = locations->InAt(0); 196 Location output = locations->Out(); 197 if (is64bit) { 198 // Need to use the temporary. 199 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 200 __ movsd(temp, input.AsFpuRegister<XmmRegister>()); 201 __ movd(output.AsRegisterPairLow<Register>(), temp); 202 __ psrlq(temp, Immediate(32)); 203 __ movd(output.AsRegisterPairHigh<Register>(), temp); 204 } else { 205 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>()); 206 } 207 } 208 209 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 210 Location input = locations->InAt(0); 211 Location output = locations->Out(); 212 if (is64bit) { 213 // Need to use the temporary. 214 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 215 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 216 __ movd(temp1, input.AsRegisterPairLow<Register>()); 217 __ movd(temp2, input.AsRegisterPairHigh<Register>()); 218 __ punpckldq(temp1, temp2); 219 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1); 220 } else { 221 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>()); 222 } 223 } 224 225 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 226 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true); 227 } 228 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 229 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true); 230 } 231 232 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 233 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); 234 } 235 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 236 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); 237 } 238 239 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 240 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false); 241 } 242 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 243 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false); 244 } 245 246 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 247 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); 248 } 249 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 250 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); 251 } 252 253 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 254 LocationSummary* locations = 255 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 256 locations->SetInAt(0, Location::RequiresRegister()); 257 locations->SetOut(Location::SameAsFirstInput()); 258 } 259 260 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 261 LocationSummary* locations = 262 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 263 locations->SetInAt(0, Location::RequiresRegister()); 264 locations->SetOut(Location::RequiresRegister()); 265 } 266 267 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { 268 LocationSummary* locations = 269 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 270 locations->SetInAt(0, Location::RequiresRegister()); 271 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 272 } 273 274 static void GenReverseBytes(LocationSummary* locations, 275 DataType::Type size, 276 X86Assembler* assembler) { 277 Register out = locations->Out().AsRegister<Register>(); 278 279 switch (size) { 280 case DataType::Type::kInt16: 281 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 282 __ bswapl(out); 283 __ sarl(out, Immediate(16)); 284 break; 285 case DataType::Type::kInt32: 286 __ bswapl(out); 287 break; 288 default: 289 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 290 UNREACHABLE(); 291 } 292 } 293 294 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { 295 CreateIntToIntLocations(allocator_, invoke); 296 } 297 298 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { 299 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 300 } 301 302 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { 303 CreateLongToLongLocations(allocator_, invoke); 304 } 305 306 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { 307 LocationSummary* locations = invoke->GetLocations(); 308 Location input = locations->InAt(0); 309 Register input_lo = input.AsRegisterPairLow<Register>(); 310 Register input_hi = input.AsRegisterPairHigh<Register>(); 311 Location output = locations->Out(); 312 Register output_lo = output.AsRegisterPairLow<Register>(); 313 Register output_hi = output.AsRegisterPairHigh<Register>(); 314 315 X86Assembler* assembler = GetAssembler(); 316 // Assign the inputs to the outputs, mixing low/high. 317 __ movl(output_lo, input_hi); 318 __ movl(output_hi, input_lo); 319 __ bswapl(output_lo); 320 __ bswapl(output_hi); 321 } 322 323 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { 324 CreateIntToIntLocations(allocator_, invoke); 325 } 326 327 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { 328 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 329 } 330 331 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 332 LocationSummary* locations = 333 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 334 locations->SetInAt(0, Location::RequiresFpuRegister()); 335 locations->SetOut(Location::RequiresFpuRegister()); 336 } 337 338 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { 339 CreateFPToFPLocations(allocator_, invoke); 340 } 341 342 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { 343 LocationSummary* locations = invoke->GetLocations(); 344 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 345 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 346 347 GetAssembler()->sqrtsd(out, in); 348 } 349 350 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { 351 MoveArguments(invoke, codegen); 352 353 DCHECK(invoke->IsInvokeStaticOrDirect()); 354 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), 355 Location::RegisterLocation(EAX)); 356 357 // Copy the result back to the expected output. 358 Location out = invoke->GetLocations()->Out(); 359 if (out.IsValid()) { 360 DCHECK(out.IsRegister()); 361 codegen->MoveFromReturnRegister(out, invoke->GetType()); 362 } 363 } 364 365 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator, 366 HInvoke* invoke, 367 CodeGeneratorX86* codegen) { 368 // Do we have instruction support? 369 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 370 CreateFPToFPLocations(allocator, invoke); 371 return; 372 } 373 374 // We have to fall back to a call to the intrinsic. 375 LocationSummary* locations = 376 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 377 InvokeRuntimeCallingConvention calling_convention; 378 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 379 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 380 // Needs to be EAX for the invoke. 381 locations->AddTemp(Location::RegisterLocation(EAX)); 382 } 383 384 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, 385 HInvoke* invoke, 386 X86Assembler* assembler, 387 int round_mode) { 388 LocationSummary* locations = invoke->GetLocations(); 389 if (locations->WillCall()) { 390 InvokeOutOfLineIntrinsic(codegen, invoke); 391 } else { 392 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 393 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 394 __ roundsd(out, in, Immediate(round_mode)); 395 } 396 } 397 398 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { 399 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 400 } 401 402 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { 403 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 404 } 405 406 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { 407 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 408 } 409 410 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { 411 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 412 } 413 414 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { 415 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 416 } 417 418 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { 419 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 420 } 421 422 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { 423 // Do we have instruction support? 424 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { 425 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 426 DCHECK(static_or_direct != nullptr); 427 LocationSummary* locations = 428 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 429 locations->SetInAt(0, Location::RequiresFpuRegister()); 430 if (static_or_direct->HasSpecialInput() && 431 invoke->InputAt( 432 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 433 locations->SetInAt(1, Location::RequiresRegister()); 434 } 435 locations->SetOut(Location::RequiresRegister()); 436 locations->AddTemp(Location::RequiresFpuRegister()); 437 locations->AddTemp(Location::RequiresFpuRegister()); 438 return; 439 } 440 441 // We have to fall back to a call to the intrinsic. 442 LocationSummary* locations = 443 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 444 InvokeRuntimeCallingConvention calling_convention; 445 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 446 locations->SetOut(Location::RegisterLocation(EAX)); 447 // Needs to be EAX for the invoke. 448 locations->AddTemp(Location::RegisterLocation(EAX)); 449 } 450 451 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { 452 LocationSummary* locations = invoke->GetLocations(); 453 if (locations->WillCall()) { // TODO: can we reach this? 454 InvokeOutOfLineIntrinsic(codegen_, invoke); 455 return; 456 } 457 458 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 459 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 460 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 461 Register out = locations->Out().AsRegister<Register>(); 462 NearLabel skip_incr, done; 463 X86Assembler* assembler = GetAssembler(); 464 465 // Since no direct x86 rounding instruction matches the required semantics, 466 // this intrinsic is implemented as follows: 467 // result = floor(in); 468 // if (in - result >= 0.5f) 469 // result = result + 1.0f; 470 __ movss(t2, in); 471 __ roundss(t1, in, Immediate(1)); 472 __ subss(t2, t1); 473 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 474 // Direct constant area available. 475 HX86ComputeBaseMethodAddress* method_address = 476 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); 477 Register constant_area = locations->InAt(1).AsRegister<Register>(); 478 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), 479 method_address, 480 constant_area)); 481 __ j(kBelow, &skip_incr); 482 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), 483 method_address, 484 constant_area)); 485 __ Bind(&skip_incr); 486 } else { 487 // No constant area: go through stack. 488 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); 489 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); 490 __ comiss(t2, Address(ESP, 4)); 491 __ j(kBelow, &skip_incr); 492 __ addss(t1, Address(ESP, 0)); 493 __ Bind(&skip_incr); 494 __ addl(ESP, Immediate(8)); 495 } 496 497 // Final conversion to an integer. Unfortunately this also does not have a 498 // direct x86 instruction, since NaN should map to 0 and large positive 499 // values need to be clipped to the extreme value. 500 __ movl(out, Immediate(kPrimIntMax)); 501 __ cvtsi2ss(t2, out); 502 __ comiss(t1, t2); 503 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered 504 __ movl(out, Immediate(0)); // does not change flags 505 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) 506 __ cvttss2si(out, t1); 507 __ Bind(&done); 508 } 509 510 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 511 LocationSummary* locations = 512 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 513 InvokeRuntimeCallingConvention calling_convention; 514 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 515 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 516 } 517 518 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { 519 LocationSummary* locations = invoke->GetLocations(); 520 DCHECK(locations->WillCall()); 521 DCHECK(invoke->IsInvokeStaticOrDirect()); 522 X86Assembler* assembler = codegen->GetAssembler(); 523 524 // We need some place to pass the parameters. 525 __ subl(ESP, Immediate(16)); 526 __ cfi().AdjustCFAOffset(16); 527 528 // Pass the parameters at the bottom of the stack. 529 __ movsd(Address(ESP, 0), XMM0); 530 531 // If we have a second parameter, pass it next. 532 if (invoke->GetNumberOfArguments() == 2) { 533 __ movsd(Address(ESP, 8), XMM1); 534 } 535 536 // Now do the actual call. 537 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 538 539 // Extract the return value from the FP stack. 540 __ fstpl(Address(ESP, 0)); 541 __ movsd(XMM0, Address(ESP, 0)); 542 543 // And clean up the stack. 544 __ addl(ESP, Immediate(16)); 545 __ cfi().AdjustCFAOffset(-16); 546 } 547 548 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) { 549 LocationSummary* locations = 550 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 551 if (is_long) { 552 locations->SetInAt(0, Location::RequiresRegister()); 553 } else { 554 locations->SetInAt(0, Location::Any()); 555 } 556 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 557 } 558 559 static void GenLowestOneBit(X86Assembler* assembler, 560 CodeGeneratorX86* codegen, 561 bool is_long, 562 HInvoke* invoke) { 563 LocationSummary* locations = invoke->GetLocations(); 564 Location src = locations->InAt(0); 565 Location out_loc = locations->Out(); 566 567 if (invoke->InputAt(0)->IsConstant()) { 568 // Evaluate this at compile time. 569 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 570 if (value == 0) { 571 if (is_long) { 572 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>()); 573 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>()); 574 } else { 575 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>()); 576 } 577 return; 578 } 579 // Nonzero value. 580 value = is_long ? CTZ(static_cast<uint64_t>(value)) 581 : CTZ(static_cast<uint32_t>(value)); 582 if (is_long) { 583 if (value >= 32) { 584 int shift = value-32; 585 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0); 586 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift); 587 } else { 588 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value); 589 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0); 590 } 591 } else { 592 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value); 593 } 594 return; 595 } 596 // Handle non constant case 597 if (is_long) { 598 DCHECK(src.IsRegisterPair()); 599 Register src_lo = src.AsRegisterPairLow<Register>(); 600 Register src_hi = src.AsRegisterPairHigh<Register>(); 601 602 Register out_lo = out_loc.AsRegisterPairLow<Register>(); 603 Register out_hi = out_loc.AsRegisterPairHigh<Register>(); 604 605 __ movl(out_lo, src_lo); 606 __ movl(out_hi, src_hi); 607 608 __ negl(out_lo); 609 __ adcl(out_hi, Immediate(0)); 610 __ negl(out_hi); 611 612 __ andl(out_lo, src_lo); 613 __ andl(out_hi, src_hi); 614 } else { 615 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) { 616 Register out = out_loc.AsRegister<Register>(); 617 __ blsi(out, src.AsRegister<Register>()); 618 } else { 619 Register out = out_loc.AsRegister<Register>(); 620 // Do tmp & -tmp 621 if (src.IsRegister()) { 622 __ movl(out, src.AsRegister<Register>()); 623 } else { 624 DCHECK(src.IsStackSlot()); 625 __ movl(out, Address(ESP, src.GetStackIndex())); 626 } 627 __ negl(out); 628 629 if (src.IsRegister()) { 630 __ andl(out, src.AsRegister<Register>()); 631 } else { 632 __ andl(out, Address(ESP, src.GetStackIndex())); 633 } 634 } 635 } 636 } 637 638 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { 639 CreateFPToFPCallLocations(allocator_, invoke); 640 } 641 642 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { 643 GenFPToFPCall(invoke, codegen_, kQuickCos); 644 } 645 646 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { 647 CreateFPToFPCallLocations(allocator_, invoke); 648 } 649 650 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { 651 GenFPToFPCall(invoke, codegen_, kQuickSin); 652 } 653 654 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { 655 CreateFPToFPCallLocations(allocator_, invoke); 656 } 657 658 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { 659 GenFPToFPCall(invoke, codegen_, kQuickAcos); 660 } 661 662 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { 663 CreateFPToFPCallLocations(allocator_, invoke); 664 } 665 666 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { 667 GenFPToFPCall(invoke, codegen_, kQuickAsin); 668 } 669 670 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { 671 CreateFPToFPCallLocations(allocator_, invoke); 672 } 673 674 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { 675 GenFPToFPCall(invoke, codegen_, kQuickAtan); 676 } 677 678 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { 679 CreateFPToFPCallLocations(allocator_, invoke); 680 } 681 682 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { 683 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 684 } 685 686 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { 687 CreateFPToFPCallLocations(allocator_, invoke); 688 } 689 690 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { 691 GenFPToFPCall(invoke, codegen_, kQuickCosh); 692 } 693 694 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { 695 CreateFPToFPCallLocations(allocator_, invoke); 696 } 697 698 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { 699 GenFPToFPCall(invoke, codegen_, kQuickExp); 700 } 701 702 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { 703 CreateFPToFPCallLocations(allocator_, invoke); 704 } 705 706 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { 707 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 708 } 709 710 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { 711 CreateFPToFPCallLocations(allocator_, invoke); 712 } 713 714 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { 715 GenFPToFPCall(invoke, codegen_, kQuickLog); 716 } 717 718 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { 719 CreateFPToFPCallLocations(allocator_, invoke); 720 } 721 722 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { 723 GenFPToFPCall(invoke, codegen_, kQuickLog10); 724 } 725 726 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { 727 CreateFPToFPCallLocations(allocator_, invoke); 728 } 729 730 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { 731 GenFPToFPCall(invoke, codegen_, kQuickSinh); 732 } 733 734 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { 735 CreateFPToFPCallLocations(allocator_, invoke); 736 } 737 738 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { 739 GenFPToFPCall(invoke, codegen_, kQuickTan); 740 } 741 742 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { 743 CreateFPToFPCallLocations(allocator_, invoke); 744 } 745 746 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { 747 GenFPToFPCall(invoke, codegen_, kQuickTanh); 748 } 749 750 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) { 751 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke); 752 } 753 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) { 754 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke); 755 } 756 757 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) { 758 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke); 759 } 760 761 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) { 762 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke); 763 } 764 765 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 766 LocationSummary* locations = 767 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 768 InvokeRuntimeCallingConvention calling_convention; 769 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 770 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 771 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 772 } 773 774 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { 775 CreateFPFPToFPCallLocations(allocator_, invoke); 776 } 777 778 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { 779 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 780 } 781 782 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { 783 CreateFPFPToFPCallLocations(allocator_, invoke); 784 } 785 786 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { 787 GenFPToFPCall(invoke, codegen_, kQuickPow); 788 } 789 790 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { 791 CreateFPFPToFPCallLocations(allocator_, invoke); 792 } 793 794 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { 795 GenFPToFPCall(invoke, codegen_, kQuickHypot); 796 } 797 798 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { 799 CreateFPFPToFPCallLocations(allocator_, invoke); 800 } 801 802 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { 803 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 804 } 805 806 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 807 // We need at least two of the positions or length to be an integer constant, 808 // or else we won't have enough free registers. 809 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 810 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 811 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 812 813 int num_constants = 814 ((src_pos != nullptr) ? 1 : 0) 815 + ((dest_pos != nullptr) ? 1 : 0) 816 + ((length != nullptr) ? 1 : 0); 817 818 if (num_constants < 2) { 819 // Not enough free registers. 820 return; 821 } 822 823 // As long as we are checking, we might as well check to see if the src and dest 824 // positions are >= 0. 825 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 826 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 827 // We will have to fail anyways. 828 return; 829 } 830 831 // And since we are already checking, check the length too. 832 if (length != nullptr) { 833 int32_t len = length->GetValue(); 834 if (len < 0) { 835 // Just call as normal. 836 return; 837 } 838 } 839 840 // Okay, it is safe to generate inline code. 841 LocationSummary* locations = 842 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 843 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). 844 locations->SetInAt(0, Location::RequiresRegister()); 845 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 846 locations->SetInAt(2, Location::RequiresRegister()); 847 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 848 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 849 850 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 851 locations->AddTemp(Location::RegisterLocation(ESI)); 852 locations->AddTemp(Location::RegisterLocation(EDI)); 853 locations->AddTemp(Location::RegisterLocation(ECX)); 854 } 855 856 static void CheckPosition(X86Assembler* assembler, 857 Location pos, 858 Register input, 859 Location length, 860 SlowPathCode* slow_path, 861 Register temp, 862 bool length_is_input_length = false) { 863 // Where is the length in the Array? 864 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 865 866 if (pos.IsConstant()) { 867 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 868 if (pos_const == 0) { 869 if (!length_is_input_length) { 870 // Check that length(input) >= length. 871 if (length.IsConstant()) { 872 __ cmpl(Address(input, length_offset), 873 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 874 } else { 875 __ cmpl(Address(input, length_offset), length.AsRegister<Register>()); 876 } 877 __ j(kLess, slow_path->GetEntryLabel()); 878 } 879 } else { 880 // Check that length(input) >= pos. 881 __ movl(temp, Address(input, length_offset)); 882 __ subl(temp, Immediate(pos_const)); 883 __ j(kLess, slow_path->GetEntryLabel()); 884 885 // Check that (length(input) - pos) >= length. 886 if (length.IsConstant()) { 887 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 888 } else { 889 __ cmpl(temp, length.AsRegister<Register>()); 890 } 891 __ j(kLess, slow_path->GetEntryLabel()); 892 } 893 } else if (length_is_input_length) { 894 // The only way the copy can succeed is if pos is zero. 895 Register pos_reg = pos.AsRegister<Register>(); 896 __ testl(pos_reg, pos_reg); 897 __ j(kNotEqual, slow_path->GetEntryLabel()); 898 } else { 899 // Check that pos >= 0. 900 Register pos_reg = pos.AsRegister<Register>(); 901 __ testl(pos_reg, pos_reg); 902 __ j(kLess, slow_path->GetEntryLabel()); 903 904 // Check that pos <= length(input). 905 __ cmpl(Address(input, length_offset), pos_reg); 906 __ j(kLess, slow_path->GetEntryLabel()); 907 908 // Check that (length(input) - pos) >= length. 909 __ movl(temp, Address(input, length_offset)); 910 __ subl(temp, pos_reg); 911 if (length.IsConstant()) { 912 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 913 } else { 914 __ cmpl(temp, length.AsRegister<Register>()); 915 } 916 __ j(kLess, slow_path->GetEntryLabel()); 917 } 918 } 919 920 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 921 X86Assembler* assembler = GetAssembler(); 922 LocationSummary* locations = invoke->GetLocations(); 923 924 Register src = locations->InAt(0).AsRegister<Register>(); 925 Location srcPos = locations->InAt(1); 926 Register dest = locations->InAt(2).AsRegister<Register>(); 927 Location destPos = locations->InAt(3); 928 Location length = locations->InAt(4); 929 930 // Temporaries that we need for MOVSW. 931 Register src_base = locations->GetTemp(0).AsRegister<Register>(); 932 DCHECK_EQ(src_base, ESI); 933 Register dest_base = locations->GetTemp(1).AsRegister<Register>(); 934 DCHECK_EQ(dest_base, EDI); 935 Register count = locations->GetTemp(2).AsRegister<Register>(); 936 DCHECK_EQ(count, ECX); 937 938 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 939 codegen_->AddSlowPath(slow_path); 940 941 // Bail out if the source and destination are the same (to handle overlap). 942 __ cmpl(src, dest); 943 __ j(kEqual, slow_path->GetEntryLabel()); 944 945 // Bail out if the source is null. 946 __ testl(src, src); 947 __ j(kEqual, slow_path->GetEntryLabel()); 948 949 // Bail out if the destination is null. 950 __ testl(dest, dest); 951 __ j(kEqual, slow_path->GetEntryLabel()); 952 953 // If the length is negative, bail out. 954 // We have already checked in the LocationsBuilder for the constant case. 955 if (!length.IsConstant()) { 956 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>()); 957 __ j(kLess, slow_path->GetEntryLabel()); 958 } 959 960 // We need the count in ECX. 961 if (length.IsConstant()) { 962 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 963 } else { 964 __ movl(count, length.AsRegister<Register>()); 965 } 966 967 // Validity checks: source. Use src_base as a temporary register. 968 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base); 969 970 // Validity checks: dest. Use src_base as a temporary register. 971 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base); 972 973 // Okay, everything checks out. Finally time to do the copy. 974 // Check assumption that sizeof(Char) is 2 (used in scaling below). 975 const size_t char_size = DataType::Size(DataType::Type::kUint16); 976 DCHECK_EQ(char_size, 2u); 977 978 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 979 980 if (srcPos.IsConstant()) { 981 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); 982 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); 983 } else { 984 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(), 985 ScaleFactor::TIMES_2, data_offset)); 986 } 987 if (destPos.IsConstant()) { 988 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); 989 990 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); 991 } else { 992 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(), 993 ScaleFactor::TIMES_2, data_offset)); 994 } 995 996 // Do the move. 997 __ rep_movsw(); 998 999 __ Bind(slow_path->GetExitLabel()); 1000 } 1001 1002 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { 1003 // The inputs plus one temp. 1004 LocationSummary* locations = new (allocator_) LocationSummary( 1005 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1006 InvokeRuntimeCallingConvention calling_convention; 1007 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1008 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1009 locations->SetOut(Location::RegisterLocation(EAX)); 1010 } 1011 1012 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { 1013 X86Assembler* assembler = GetAssembler(); 1014 LocationSummary* locations = invoke->GetLocations(); 1015 1016 // Note that the null check must have been done earlier. 1017 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1018 1019 Register argument = locations->InAt(1).AsRegister<Register>(); 1020 __ testl(argument, argument); 1021 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1022 codegen_->AddSlowPath(slow_path); 1023 __ j(kEqual, slow_path->GetEntryLabel()); 1024 1025 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); 1026 __ Bind(slow_path->GetExitLabel()); 1027 } 1028 1029 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { 1030 LocationSummary* locations = 1031 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1032 locations->SetInAt(0, Location::RequiresRegister()); 1033 locations->SetInAt(1, Location::RequiresRegister()); 1034 1035 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. 1036 locations->AddTemp(Location::RegisterLocation(ECX)); 1037 locations->AddTemp(Location::RegisterLocation(EDI)); 1038 1039 // Set output, ESI needed for repe_cmpsl instruction anyways. 1040 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); 1041 } 1042 1043 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { 1044 X86Assembler* assembler = GetAssembler(); 1045 LocationSummary* locations = invoke->GetLocations(); 1046 1047 Register str = locations->InAt(0).AsRegister<Register>(); 1048 Register arg = locations->InAt(1).AsRegister<Register>(); 1049 Register ecx = locations->GetTemp(0).AsRegister<Register>(); 1050 Register edi = locations->GetTemp(1).AsRegister<Register>(); 1051 Register esi = locations->Out().AsRegister<Register>(); 1052 1053 NearLabel end, return_true, return_false; 1054 1055 // Get offsets of count, value, and class fields within a string object. 1056 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1057 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1058 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1059 1060 // Note that the null check must have been done earlier. 1061 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1062 1063 StringEqualsOptimizations optimizations(invoke); 1064 if (!optimizations.GetArgumentNotNull()) { 1065 // Check if input is null, return false if it is. 1066 __ testl(arg, arg); 1067 __ j(kEqual, &return_false); 1068 } 1069 1070 if (!optimizations.GetArgumentIsString()) { 1071 // Instanceof check for the argument by comparing class fields. 1072 // All string objects must have the same type since String cannot be subclassed. 1073 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1074 // If the argument is a string object, its class field must be equal to receiver's class field. 1075 // 1076 // As the String class is expected to be non-movable, we can read the class 1077 // field from String.equals' arguments without read barriers. 1078 AssertNonMovableStringClass(); 1079 // Also, because we use the loaded class references only to compare them, we 1080 // don't need to unpoison them. 1081 // /* HeapReference<Class> */ ecx = str->klass_ 1082 __ movl(ecx, Address(str, class_offset)); 1083 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false 1084 __ cmpl(ecx, Address(arg, class_offset)); 1085 __ j(kNotEqual, &return_false); 1086 } 1087 1088 // Reference equality check, return true if same reference. 1089 __ cmpl(str, arg); 1090 __ j(kEqual, &return_true); 1091 1092 // Load length and compression flag of receiver string. 1093 __ movl(ecx, Address(str, count_offset)); 1094 // Check if lengths and compression flags are equal, return false if they're not. 1095 // Two identical strings will always have same compression style since 1096 // compression style is decided on alloc. 1097 __ cmpl(ecx, Address(arg, count_offset)); 1098 __ j(kNotEqual, &return_false); 1099 // Return true if strings are empty. Even with string compression `count == 0` means empty. 1100 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1101 "Expecting 0=compressed, 1=uncompressed"); 1102 __ jecxz(&return_true); 1103 1104 if (mirror::kUseStringCompression) { 1105 NearLabel string_uncompressed; 1106 // Extract length and differentiate between both compressed or both uncompressed. 1107 // Different compression style is cut above. 1108 __ shrl(ecx, Immediate(1)); 1109 __ j(kCarrySet, &string_uncompressed); 1110 // Divide string length by 2, rounding up, and continue as if uncompressed. 1111 __ addl(ecx, Immediate(1)); 1112 __ shrl(ecx, Immediate(1)); 1113 __ Bind(&string_uncompressed); 1114 } 1115 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. 1116 __ leal(esi, Address(str, value_offset)); 1117 __ leal(edi, Address(arg, value_offset)); 1118 1119 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not 1120 // divisible by 2. 1121 __ addl(ecx, Immediate(1)); 1122 __ shrl(ecx, Immediate(1)); 1123 1124 // Assertions that must hold in order to compare strings 2 characters (uncompressed) 1125 // or 4 characters (compressed) at a time. 1126 DCHECK_ALIGNED(value_offset, 4); 1127 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); 1128 1129 // Loop to compare strings two characters at a time starting at the beginning of the string. 1130 __ repe_cmpsl(); 1131 // If strings are not equal, zero flag will be cleared. 1132 __ j(kNotEqual, &return_false); 1133 1134 // Return true and exit the function. 1135 // If loop does not result in returning false, we return true. 1136 __ Bind(&return_true); 1137 __ movl(esi, Immediate(1)); 1138 __ jmp(&end); 1139 1140 // Return false and exit the function. 1141 __ Bind(&return_false); 1142 __ xorl(esi, esi); 1143 __ Bind(&end); 1144 } 1145 1146 static void CreateStringIndexOfLocations(HInvoke* invoke, 1147 ArenaAllocator* allocator, 1148 bool start_at_zero) { 1149 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1150 LocationSummary::kCallOnSlowPath, 1151 kIntrinsified); 1152 // The data needs to be in EDI for scasw. So request that the string is there, anyways. 1153 locations->SetInAt(0, Location::RegisterLocation(EDI)); 1154 // If we look for a constant char, we'll still have to copy it into EAX. So just request the 1155 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1156 // of the instruction explicitly. 1157 // Note: This works as we don't clobber EAX anywhere. 1158 locations->SetInAt(1, Location::RegisterLocation(EAX)); 1159 if (!start_at_zero) { 1160 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1161 } 1162 // As we clobber EDI during execution anyways, also use it as the output. 1163 locations->SetOut(Location::SameAsFirstInput()); 1164 1165 // repne scasw uses ECX as the counter. 1166 locations->AddTemp(Location::RegisterLocation(ECX)); 1167 // Need another temporary to be able to compute the result. 1168 locations->AddTemp(Location::RequiresRegister()); 1169 if (mirror::kUseStringCompression) { 1170 // Need another temporary to be able to save unflagged string length. 1171 locations->AddTemp(Location::RequiresRegister()); 1172 } 1173 } 1174 1175 static void GenerateStringIndexOf(HInvoke* invoke, 1176 X86Assembler* assembler, 1177 CodeGeneratorX86* codegen, 1178 bool start_at_zero) { 1179 LocationSummary* locations = invoke->GetLocations(); 1180 1181 // Note that the null check must have been done earlier. 1182 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1183 1184 Register string_obj = locations->InAt(0).AsRegister<Register>(); 1185 Register search_value = locations->InAt(1).AsRegister<Register>(); 1186 Register counter = locations->GetTemp(0).AsRegister<Register>(); 1187 Register string_length = locations->GetTemp(1).AsRegister<Register>(); 1188 Register out = locations->Out().AsRegister<Register>(); 1189 // Only used when string compression feature is on. 1190 Register string_length_flagged; 1191 1192 // Check our assumptions for registers. 1193 DCHECK_EQ(string_obj, EDI); 1194 DCHECK_EQ(search_value, EAX); 1195 DCHECK_EQ(counter, ECX); 1196 DCHECK_EQ(out, EDI); 1197 1198 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1199 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1200 SlowPathCode* slow_path = nullptr; 1201 HInstruction* code_point = invoke->InputAt(1); 1202 if (code_point->IsIntConstant()) { 1203 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 1204 std::numeric_limits<uint16_t>::max()) { 1205 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1206 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1207 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1208 codegen->AddSlowPath(slow_path); 1209 __ jmp(slow_path->GetEntryLabel()); 1210 __ Bind(slow_path->GetExitLabel()); 1211 return; 1212 } 1213 } else if (code_point->GetType() != DataType::Type::kUint16) { 1214 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1215 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1216 codegen->AddSlowPath(slow_path); 1217 __ j(kAbove, slow_path->GetEntryLabel()); 1218 } 1219 1220 // From here down, we know that we are looking for a char that fits in 16 bits. 1221 // Location of reference to data array within the String object. 1222 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1223 // Location of count within the String object. 1224 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1225 1226 // Load the count field of the string containing the length and compression flag. 1227 __ movl(string_length, Address(string_obj, count_offset)); 1228 1229 // Do a zero-length check. Even with string compression `count == 0` means empty. 1230 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1231 "Expecting 0=compressed, 1=uncompressed"); 1232 // TODO: Support jecxz. 1233 NearLabel not_found_label; 1234 __ testl(string_length, string_length); 1235 __ j(kEqual, ¬_found_label); 1236 1237 if (mirror::kUseStringCompression) { 1238 string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); 1239 __ movl(string_length_flagged, string_length); 1240 // Extract the length and shift out the least significant bit used as compression flag. 1241 __ shrl(string_length, Immediate(1)); 1242 } 1243 1244 if (start_at_zero) { 1245 // Number of chars to scan is the same as the string length. 1246 __ movl(counter, string_length); 1247 1248 // Move to the start of the string. 1249 __ addl(string_obj, Immediate(value_offset)); 1250 } else { 1251 Register start_index = locations->InAt(2).AsRegister<Register>(); 1252 1253 // Do a start_index check. 1254 __ cmpl(start_index, string_length); 1255 __ j(kGreaterEqual, ¬_found_label); 1256 1257 // Ensure we have a start index >= 0; 1258 __ xorl(counter, counter); 1259 __ cmpl(start_index, Immediate(0)); 1260 __ cmovl(kGreater, counter, start_index); 1261 1262 if (mirror::kUseStringCompression) { 1263 NearLabel modify_counter, offset_uncompressed_label; 1264 __ testl(string_length_flagged, Immediate(1)); 1265 __ j(kNotZero, &offset_uncompressed_label); 1266 // Move to the start of the string: string_obj + value_offset + start_index. 1267 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); 1268 __ jmp(&modify_counter); 1269 1270 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1271 __ Bind(&offset_uncompressed_label); 1272 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1273 1274 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to 1275 // compare. 1276 __ Bind(&modify_counter); 1277 } else { 1278 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1279 } 1280 __ negl(counter); 1281 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1282 } 1283 1284 if (mirror::kUseStringCompression) { 1285 NearLabel uncompressed_string_comparison; 1286 NearLabel comparison_done; 1287 __ testl(string_length_flagged, Immediate(1)); 1288 __ j(kNotZero, &uncompressed_string_comparison); 1289 1290 // Check if EAX (search_value) is ASCII. 1291 __ cmpl(search_value, Immediate(127)); 1292 __ j(kGreater, ¬_found_label); 1293 // Comparing byte-per-byte. 1294 __ repne_scasb(); 1295 __ jmp(&comparison_done); 1296 1297 // Everything is set up for repne scasw: 1298 // * Comparison address in EDI. 1299 // * Counter in ECX. 1300 __ Bind(&uncompressed_string_comparison); 1301 __ repne_scasw(); 1302 __ Bind(&comparison_done); 1303 } else { 1304 __ repne_scasw(); 1305 } 1306 // Did we find a match? 1307 __ j(kNotEqual, ¬_found_label); 1308 1309 // Yes, we matched. Compute the index of the result. 1310 __ subl(string_length, counter); 1311 __ leal(out, Address(string_length, -1)); 1312 1313 NearLabel done; 1314 __ jmp(&done); 1315 1316 // Failed to match; return -1. 1317 __ Bind(¬_found_label); 1318 __ movl(out, Immediate(-1)); 1319 1320 // And join up at the end. 1321 __ Bind(&done); 1322 if (slow_path != nullptr) { 1323 __ Bind(slow_path->GetExitLabel()); 1324 } 1325 } 1326 1327 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { 1328 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true); 1329 } 1330 1331 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { 1332 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); 1333 } 1334 1335 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1336 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false); 1337 } 1338 1339 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1340 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); 1341 } 1342 1343 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1344 LocationSummary* locations = new (allocator_) LocationSummary( 1345 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1346 InvokeRuntimeCallingConvention calling_convention; 1347 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1348 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1349 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1350 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1351 locations->SetOut(Location::RegisterLocation(EAX)); 1352 } 1353 1354 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1355 X86Assembler* assembler = GetAssembler(); 1356 LocationSummary* locations = invoke->GetLocations(); 1357 1358 Register byte_array = locations->InAt(0).AsRegister<Register>(); 1359 __ testl(byte_array, byte_array); 1360 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1361 codegen_->AddSlowPath(slow_path); 1362 __ j(kEqual, slow_path->GetEntryLabel()); 1363 1364 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); 1365 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1366 __ Bind(slow_path->GetExitLabel()); 1367 } 1368 1369 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1370 LocationSummary* locations = 1371 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1372 InvokeRuntimeCallingConvention calling_convention; 1373 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1374 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1375 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1376 locations->SetOut(Location::RegisterLocation(EAX)); 1377 } 1378 1379 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1380 // No need to emit code checking whether `locations->InAt(2)` is a null 1381 // pointer, as callers of the native method 1382 // 1383 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1384 // 1385 // all include a null check on `data` before calling that method. 1386 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1387 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1388 } 1389 1390 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { 1391 LocationSummary* locations = new (allocator_) LocationSummary( 1392 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1393 InvokeRuntimeCallingConvention calling_convention; 1394 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1395 locations->SetOut(Location::RegisterLocation(EAX)); 1396 } 1397 1398 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { 1399 X86Assembler* assembler = GetAssembler(); 1400 LocationSummary* locations = invoke->GetLocations(); 1401 1402 Register string_to_copy = locations->InAt(0).AsRegister<Register>(); 1403 __ testl(string_to_copy, string_to_copy); 1404 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1405 codegen_->AddSlowPath(slow_path); 1406 __ j(kEqual, slow_path->GetEntryLabel()); 1407 1408 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); 1409 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1410 __ Bind(slow_path->GetExitLabel()); 1411 } 1412 1413 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1414 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1415 LocationSummary* locations = 1416 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1417 locations->SetInAt(0, Location::RequiresRegister()); 1418 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1419 // Place srcEnd in ECX to save a move below. 1420 locations->SetInAt(2, Location::RegisterLocation(ECX)); 1421 locations->SetInAt(3, Location::RequiresRegister()); 1422 locations->SetInAt(4, Location::RequiresRegister()); 1423 1424 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1425 // We don't have enough registers to also grab ECX, so handle below. 1426 locations->AddTemp(Location::RegisterLocation(ESI)); 1427 locations->AddTemp(Location::RegisterLocation(EDI)); 1428 } 1429 1430 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1431 X86Assembler* assembler = GetAssembler(); 1432 LocationSummary* locations = invoke->GetLocations(); 1433 1434 size_t char_component_size = DataType::Size(DataType::Type::kUint16); 1435 // Location of data in char array buffer. 1436 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1437 // Location of char array data in string. 1438 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1439 1440 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1441 Register obj = locations->InAt(0).AsRegister<Register>(); 1442 Location srcBegin = locations->InAt(1); 1443 int srcBegin_value = 1444 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1445 Register srcEnd = locations->InAt(2).AsRegister<Register>(); 1446 Register dst = locations->InAt(3).AsRegister<Register>(); 1447 Register dstBegin = locations->InAt(4).AsRegister<Register>(); 1448 1449 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1450 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1451 DCHECK_EQ(char_size, 2u); 1452 1453 // Compute the number of chars (words) to move. 1454 // Save ECX, since we don't know if it will be used later. 1455 __ pushl(ECX); 1456 int stack_adjust = kX86WordSize; 1457 __ cfi().AdjustCFAOffset(stack_adjust); 1458 DCHECK_EQ(srcEnd, ECX); 1459 if (srcBegin.IsConstant()) { 1460 __ subl(ECX, Immediate(srcBegin_value)); 1461 } else { 1462 DCHECK(srcBegin.IsRegister()); 1463 __ subl(ECX, srcBegin.AsRegister<Register>()); 1464 } 1465 1466 NearLabel done; 1467 if (mirror::kUseStringCompression) { 1468 // Location of count in string 1469 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1470 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1471 DCHECK_EQ(c_char_size, 1u); 1472 __ pushl(EAX); 1473 __ cfi().AdjustCFAOffset(stack_adjust); 1474 1475 NearLabel copy_loop, copy_uncompressed; 1476 __ testl(Address(obj, count_offset), Immediate(1)); 1477 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1478 "Expecting 0=compressed, 1=uncompressed"); 1479 __ j(kNotZero, ©_uncompressed); 1480 // Compute the address of the source string by adding the number of chars from 1481 // the source beginning to the value offset of a string. 1482 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); 1483 1484 // Start the loop to copy String's value to Array of Char. 1485 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1486 __ Bind(©_loop); 1487 __ jecxz(&done); 1488 // Use EAX temporary (convert byte from ESI to word). 1489 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0. 1490 __ movzxb(EAX, Address(ESI, 0)); 1491 __ movw(Address(EDI, 0), EAX); 1492 __ leal(EDI, Address(EDI, char_size)); 1493 __ leal(ESI, Address(ESI, c_char_size)); 1494 // TODO: Add support for LOOP to X86Assembler. 1495 __ subl(ECX, Immediate(1)); 1496 __ jmp(©_loop); 1497 __ Bind(©_uncompressed); 1498 } 1499 1500 // Do the copy for uncompressed string. 1501 // Compute the address of the destination buffer. 1502 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1503 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); 1504 __ rep_movsw(); 1505 1506 __ Bind(&done); 1507 if (mirror::kUseStringCompression) { 1508 // Restore EAX. 1509 __ popl(EAX); 1510 __ cfi().AdjustCFAOffset(-stack_adjust); 1511 } 1512 // Restore ECX. 1513 __ popl(ECX); 1514 __ cfi().AdjustCFAOffset(-stack_adjust); 1515 } 1516 1517 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { 1518 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1519 Location out_loc = locations->Out(); 1520 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1521 // to avoid a SIGBUS. 1522 switch (size) { 1523 case DataType::Type::kInt8: 1524 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); 1525 break; 1526 case DataType::Type::kInt16: 1527 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); 1528 break; 1529 case DataType::Type::kInt32: 1530 __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); 1531 break; 1532 case DataType::Type::kInt64: 1533 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); 1534 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); 1535 break; 1536 default: 1537 LOG(FATAL) << "Type not recognized for peek: " << size; 1538 UNREACHABLE(); 1539 } 1540 } 1541 1542 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { 1543 CreateLongToIntLocations(allocator_, invoke); 1544 } 1545 1546 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { 1547 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 1548 } 1549 1550 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1551 CreateLongToIntLocations(allocator_, invoke); 1552 } 1553 1554 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1555 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 1556 } 1557 1558 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1559 CreateLongToLongLocations(allocator_, invoke); 1560 } 1561 1562 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1563 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 1564 } 1565 1566 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1567 CreateLongToIntLocations(allocator_, invoke); 1568 } 1569 1570 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1571 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 1572 } 1573 1574 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator, 1575 DataType::Type size, 1576 HInvoke* invoke) { 1577 LocationSummary* locations = 1578 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1579 locations->SetInAt(0, Location::RequiresRegister()); 1580 HInstruction* value = invoke->InputAt(1); 1581 if (size == DataType::Type::kInt8) { 1582 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); 1583 } else { 1584 locations->SetInAt(1, Location::RegisterOrConstant(value)); 1585 } 1586 } 1587 1588 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { 1589 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1590 Location value_loc = locations->InAt(1); 1591 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1592 // to avoid a SIGBUS. 1593 switch (size) { 1594 case DataType::Type::kInt8: 1595 if (value_loc.IsConstant()) { 1596 __ movb(Address(address, 0), 1597 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1598 } else { 1599 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); 1600 } 1601 break; 1602 case DataType::Type::kInt16: 1603 if (value_loc.IsConstant()) { 1604 __ movw(Address(address, 0), 1605 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1606 } else { 1607 __ movw(Address(address, 0), value_loc.AsRegister<Register>()); 1608 } 1609 break; 1610 case DataType::Type::kInt32: 1611 if (value_loc.IsConstant()) { 1612 __ movl(Address(address, 0), 1613 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1614 } else { 1615 __ movl(Address(address, 0), value_loc.AsRegister<Register>()); 1616 } 1617 break; 1618 case DataType::Type::kInt64: 1619 if (value_loc.IsConstant()) { 1620 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); 1621 __ movl(Address(address, 0), Immediate(Low32Bits(value))); 1622 __ movl(Address(address, 4), Immediate(High32Bits(value))); 1623 } else { 1624 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>()); 1625 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>()); 1626 } 1627 break; 1628 default: 1629 LOG(FATAL) << "Type not recognized for poke: " << size; 1630 UNREACHABLE(); 1631 } 1632 } 1633 1634 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { 1635 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke); 1636 } 1637 1638 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { 1639 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 1640 } 1641 1642 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1643 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke); 1644 } 1645 1646 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1647 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 1648 } 1649 1650 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1651 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke); 1652 } 1653 1654 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1655 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 1656 } 1657 1658 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1659 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke); 1660 } 1661 1662 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1663 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 1664 } 1665 1666 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { 1667 LocationSummary* locations = 1668 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1669 locations->SetOut(Location::RequiresRegister()); 1670 } 1671 1672 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { 1673 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 1674 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>())); 1675 } 1676 1677 static void GenUnsafeGet(HInvoke* invoke, 1678 DataType::Type type, 1679 bool is_volatile, 1680 CodeGeneratorX86* codegen) { 1681 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 1682 LocationSummary* locations = invoke->GetLocations(); 1683 Location base_loc = locations->InAt(1); 1684 Register base = base_loc.AsRegister<Register>(); 1685 Location offset_loc = locations->InAt(2); 1686 Register offset = offset_loc.AsRegisterPairLow<Register>(); 1687 Location output_loc = locations->Out(); 1688 1689 switch (type) { 1690 case DataType::Type::kInt32: { 1691 Register output = output_loc.AsRegister<Register>(); 1692 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1693 break; 1694 } 1695 1696 case DataType::Type::kReference: { 1697 Register output = output_loc.AsRegister<Register>(); 1698 if (kEmitCompilerReadBarrier) { 1699 if (kUseBakerReadBarrier) { 1700 Address src(base, offset, ScaleFactor::TIMES_1, 0); 1701 codegen->GenerateReferenceLoadWithBakerReadBarrier( 1702 invoke, output_loc, base, src, /* needs_null_check= */ false); 1703 } else { 1704 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1705 codegen->GenerateReadBarrierSlow( 1706 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 1707 } 1708 } else { 1709 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1710 __ MaybeUnpoisonHeapReference(output); 1711 } 1712 break; 1713 } 1714 1715 case DataType::Type::kInt64: { 1716 Register output_lo = output_loc.AsRegisterPairLow<Register>(); 1717 Register output_hi = output_loc.AsRegisterPairHigh<Register>(); 1718 if (is_volatile) { 1719 // Need to use a XMM to read atomically. 1720 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1721 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1722 __ movd(output_lo, temp); 1723 __ psrlq(temp, Immediate(32)); 1724 __ movd(output_hi, temp); 1725 } else { 1726 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1727 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4)); 1728 } 1729 } 1730 break; 1731 1732 default: 1733 LOG(FATAL) << "Unsupported op size " << type; 1734 UNREACHABLE(); 1735 } 1736 } 1737 1738 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, 1739 HInvoke* invoke, 1740 DataType::Type type, 1741 bool is_volatile) { 1742 bool can_call = kEmitCompilerReadBarrier && 1743 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1744 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1745 LocationSummary* locations = 1746 new (allocator) LocationSummary(invoke, 1747 can_call 1748 ? LocationSummary::kCallOnSlowPath 1749 : LocationSummary::kNoCall, 1750 kIntrinsified); 1751 if (can_call && kUseBakerReadBarrier) { 1752 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 1753 } 1754 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1755 locations->SetInAt(1, Location::RequiresRegister()); 1756 locations->SetInAt(2, Location::RequiresRegister()); 1757 if (type == DataType::Type::kInt64) { 1758 if (is_volatile) { 1759 // Need to use XMM to read volatile. 1760 locations->AddTemp(Location::RequiresFpuRegister()); 1761 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1762 } else { 1763 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1764 } 1765 } else { 1766 locations->SetOut(Location::RequiresRegister(), 1767 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 1768 } 1769 } 1770 1771 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { 1772 CreateIntIntIntToIntLocations( 1773 allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false); 1774 } 1775 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 1776 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true); 1777 } 1778 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { 1779 CreateIntIntIntToIntLocations( 1780 allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false); 1781 } 1782 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1783 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true); 1784 } 1785 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { 1786 CreateIntIntIntToIntLocations( 1787 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false); 1788 } 1789 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1790 CreateIntIntIntToIntLocations( 1791 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true); 1792 } 1793 1794 1795 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { 1796 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); 1797 } 1798 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 1799 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); 1800 } 1801 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { 1802 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); 1803 } 1804 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1805 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); 1806 } 1807 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { 1808 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); 1809 } 1810 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1811 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); 1812 } 1813 1814 1815 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, 1816 DataType::Type type, 1817 HInvoke* invoke, 1818 bool is_volatile) { 1819 LocationSummary* locations = 1820 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1821 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1822 locations->SetInAt(1, Location::RequiresRegister()); 1823 locations->SetInAt(2, Location::RequiresRegister()); 1824 locations->SetInAt(3, Location::RequiresRegister()); 1825 if (type == DataType::Type::kReference) { 1826 // Need temp registers for card-marking. 1827 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 1828 // Ensure the value is in a byte register. 1829 locations->AddTemp(Location::RegisterLocation(ECX)); 1830 } else if (type == DataType::Type::kInt64 && is_volatile) { 1831 locations->AddTemp(Location::RequiresFpuRegister()); 1832 locations->AddTemp(Location::RequiresFpuRegister()); 1833 } 1834 } 1835 1836 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { 1837 CreateIntIntIntIntToVoidPlusTempsLocations( 1838 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); 1839 } 1840 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { 1841 CreateIntIntIntIntToVoidPlusTempsLocations( 1842 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); 1843 } 1844 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { 1845 CreateIntIntIntIntToVoidPlusTempsLocations( 1846 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true); 1847 } 1848 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { 1849 CreateIntIntIntIntToVoidPlusTempsLocations( 1850 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); 1851 } 1852 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1853 CreateIntIntIntIntToVoidPlusTempsLocations( 1854 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); 1855 } 1856 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1857 CreateIntIntIntIntToVoidPlusTempsLocations( 1858 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true); 1859 } 1860 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { 1861 CreateIntIntIntIntToVoidPlusTempsLocations( 1862 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); 1863 } 1864 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1865 CreateIntIntIntIntToVoidPlusTempsLocations( 1866 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); 1867 } 1868 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1869 CreateIntIntIntIntToVoidPlusTempsLocations( 1870 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true); 1871 } 1872 1873 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 1874 // memory model. 1875 static void GenUnsafePut(LocationSummary* locations, 1876 DataType::Type type, 1877 bool is_volatile, 1878 CodeGeneratorX86* codegen) { 1879 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 1880 Register base = locations->InAt(1).AsRegister<Register>(); 1881 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 1882 Location value_loc = locations->InAt(3); 1883 1884 if (type == DataType::Type::kInt64) { 1885 Register value_lo = value_loc.AsRegisterPairLow<Register>(); 1886 Register value_hi = value_loc.AsRegisterPairHigh<Register>(); 1887 if (is_volatile) { 1888 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1889 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 1890 __ movd(temp1, value_lo); 1891 __ movd(temp2, value_hi); 1892 __ punpckldq(temp1, temp2); 1893 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1); 1894 } else { 1895 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); 1896 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); 1897 } 1898 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1899 Register temp = locations->GetTemp(0).AsRegister<Register>(); 1900 __ movl(temp, value_loc.AsRegister<Register>()); 1901 __ PoisonHeapReference(temp); 1902 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 1903 } else { 1904 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>()); 1905 } 1906 1907 if (is_volatile) { 1908 codegen->MemoryFence(); 1909 } 1910 1911 if (type == DataType::Type::kReference) { 1912 bool value_can_be_null = true; // TODO: Worth finding out this information? 1913 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), 1914 locations->GetTemp(1).AsRegister<Register>(), 1915 base, 1916 value_loc.AsRegister<Register>(), 1917 value_can_be_null); 1918 } 1919 } 1920 1921 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { 1922 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); 1923 } 1924 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { 1925 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); 1926 } 1927 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { 1928 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); 1929 } 1930 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { 1931 GenUnsafePut( 1932 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); 1933 } 1934 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1935 GenUnsafePut( 1936 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); 1937 } 1938 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1939 GenUnsafePut( 1940 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_); 1941 } 1942 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { 1943 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); 1944 } 1945 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1946 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); 1947 } 1948 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1949 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_); 1950 } 1951 1952 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, 1953 DataType::Type type, 1954 HInvoke* invoke) { 1955 bool can_call = kEmitCompilerReadBarrier && 1956 kUseBakerReadBarrier && 1957 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 1958 LocationSummary* locations = 1959 new (allocator) LocationSummary(invoke, 1960 can_call 1961 ? LocationSummary::kCallOnSlowPath 1962 : LocationSummary::kNoCall, 1963 kIntrinsified); 1964 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1965 locations->SetInAt(1, Location::RequiresRegister()); 1966 // Offset is a long, but in 32 bit mode, we only need the low word. 1967 // Can we update the invoke here to remove a TypeConvert to Long? 1968 locations->SetInAt(2, Location::RequiresRegister()); 1969 // Expected value must be in EAX or EDX:EAX. 1970 // For long, new value must be in ECX:EBX. 1971 if (type == DataType::Type::kInt64) { 1972 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); 1973 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); 1974 } else { 1975 locations->SetInAt(3, Location::RegisterLocation(EAX)); 1976 locations->SetInAt(4, Location::RequiresRegister()); 1977 } 1978 1979 // Force a byte register for the output. 1980 locations->SetOut(Location::RegisterLocation(EAX)); 1981 if (type == DataType::Type::kReference) { 1982 // Need temporary registers for card-marking, and possibly for 1983 // (Baker) read barrier. 1984 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 1985 // Need a byte register for marking. 1986 locations->AddTemp(Location::RegisterLocation(ECX)); 1987 } 1988 } 1989 1990 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { 1991 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); 1992 } 1993 1994 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { 1995 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); 1996 } 1997 1998 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { 1999 // The only read barrier implementation supporting the 2000 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2001 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2002 return; 2003 } 2004 2005 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); 2006 } 2007 2008 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { 2009 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2010 LocationSummary* locations = invoke->GetLocations(); 2011 2012 Register base = locations->InAt(1).AsRegister<Register>(); 2013 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2014 Location out = locations->Out(); 2015 DCHECK_EQ(out.AsRegister<Register>(), EAX); 2016 2017 // The address of the field within the holding object. 2018 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); 2019 2020 if (type == DataType::Type::kReference) { 2021 // The only read barrier implementation supporting the 2022 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2023 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2024 2025 Location temp1_loc = locations->GetTemp(0); 2026 Register temp1 = temp1_loc.AsRegister<Register>(); 2027 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 2028 2029 Register expected = locations->InAt(3).AsRegister<Register>(); 2030 // Ensure `expected` is in EAX (required by the CMPXCHG instruction). 2031 DCHECK_EQ(expected, EAX); 2032 Register value = locations->InAt(4).AsRegister<Register>(); 2033 2034 // Mark card for object assuming new value is stored. 2035 bool value_can_be_null = true; // TODO: Worth finding out this information? 2036 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); 2037 2038 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2039 // Need to make sure the reference stored in the field is a to-space 2040 // one before attempting the CAS or the CAS could fail incorrectly. 2041 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2042 invoke, 2043 temp1_loc, // Unused, used only as a "temporary" within the read barrier. 2044 base, 2045 field_addr, 2046 /* needs_null_check= */ false, 2047 /* always_update_field= */ true, 2048 &temp2); 2049 } 2050 2051 bool base_equals_value = (base == value); 2052 if (kPoisonHeapReferences) { 2053 if (base_equals_value) { 2054 // If `base` and `value` are the same register location, move 2055 // `value` to a temporary register. This way, poisoning 2056 // `value` won't invalidate `base`. 2057 value = temp1; 2058 __ movl(value, base); 2059 } 2060 2061 // Check that the register allocator did not assign the location 2062 // of `expected` (EAX) to `value` nor to `base`, so that heap 2063 // poisoning (when enabled) works as intended below. 2064 // - If `value` were equal to `expected`, both references would 2065 // be poisoned twice, meaning they would not be poisoned at 2066 // all, as heap poisoning uses address negation. 2067 // - If `base` were equal to `expected`, poisoning `expected` 2068 // would invalidate `base`. 2069 DCHECK_NE(value, expected); 2070 DCHECK_NE(base, expected); 2071 2072 __ PoisonHeapReference(expected); 2073 __ PoisonHeapReference(value); 2074 } 2075 2076 __ LockCmpxchgl(field_addr, value); 2077 2078 // LOCK CMPXCHG has full barrier semantics, and we don't need 2079 // scheduling barriers at this time. 2080 2081 // Convert ZF into the Boolean result. 2082 __ setb(kZero, out.AsRegister<Register>()); 2083 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2084 2085 // If heap poisoning is enabled, we need to unpoison the values 2086 // that were poisoned earlier. 2087 if (kPoisonHeapReferences) { 2088 if (base_equals_value) { 2089 // `value` has been moved to a temporary register, no need to 2090 // unpoison it. 2091 } else { 2092 // Ensure `value` is different from `out`, so that unpoisoning 2093 // the former does not invalidate the latter. 2094 DCHECK_NE(value, out.AsRegister<Register>()); 2095 __ UnpoisonHeapReference(value); 2096 } 2097 // Do not unpoison the reference contained in register 2098 // `expected`, as it is the same as register `out` (EAX). 2099 } 2100 } else { 2101 if (type == DataType::Type::kInt32) { 2102 // Ensure the expected value is in EAX (required by the CMPXCHG 2103 // instruction). 2104 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); 2105 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); 2106 } else if (type == DataType::Type::kInt64) { 2107 // Ensure the expected value is in EAX:EDX and that the new 2108 // value is in EBX:ECX (required by the CMPXCHG8B instruction). 2109 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); 2110 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); 2111 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); 2112 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); 2113 __ LockCmpxchg8b(field_addr); 2114 } else { 2115 LOG(FATAL) << "Unexpected CAS type " << type; 2116 } 2117 2118 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we 2119 // don't need scheduling barriers at this time. 2120 2121 // Convert ZF into the Boolean result. 2122 __ setb(kZero, out.AsRegister<Register>()); 2123 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2124 } 2125 } 2126 2127 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { 2128 GenCAS(DataType::Type::kInt32, invoke, codegen_); 2129 } 2130 2131 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { 2132 GenCAS(DataType::Type::kInt64, invoke, codegen_); 2133 } 2134 2135 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { 2136 // The only read barrier implementation supporting the 2137 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2138 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2139 2140 GenCAS(DataType::Type::kReference, invoke, codegen_); 2141 } 2142 2143 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { 2144 LocationSummary* locations = 2145 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2146 locations->SetInAt(0, Location::RequiresRegister()); 2147 locations->SetOut(Location::SameAsFirstInput()); 2148 locations->AddTemp(Location::RequiresRegister()); 2149 } 2150 2151 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, 2152 X86Assembler* assembler) { 2153 Immediate imm_shift(shift); 2154 Immediate imm_mask(mask); 2155 __ movl(temp, reg); 2156 __ shrl(reg, imm_shift); 2157 __ andl(temp, imm_mask); 2158 __ andl(reg, imm_mask); 2159 __ shll(temp, imm_shift); 2160 __ orl(reg, temp); 2161 } 2162 2163 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { 2164 X86Assembler* assembler = GetAssembler(); 2165 LocationSummary* locations = invoke->GetLocations(); 2166 2167 Register reg = locations->InAt(0).AsRegister<Register>(); 2168 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2169 2170 /* 2171 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2172 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2173 * compared to generic luni implementation which has 5 rounds of swapping bits. 2174 * x = bswap x 2175 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2176 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2177 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2178 */ 2179 __ bswapl(reg); 2180 SwapBits(reg, temp, 1, 0x55555555, assembler); 2181 SwapBits(reg, temp, 2, 0x33333333, assembler); 2182 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2183 } 2184 2185 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { 2186 LocationSummary* locations = 2187 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2188 locations->SetInAt(0, Location::RequiresRegister()); 2189 locations->SetOut(Location::SameAsFirstInput()); 2190 locations->AddTemp(Location::RequiresRegister()); 2191 } 2192 2193 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { 2194 X86Assembler* assembler = GetAssembler(); 2195 LocationSummary* locations = invoke->GetLocations(); 2196 2197 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); 2198 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); 2199 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2200 2201 // We want to swap high/low, then bswap each one, and then do the same 2202 // as a 32 bit reverse. 2203 // Exchange high and low. 2204 __ movl(temp, reg_low); 2205 __ movl(reg_low, reg_high); 2206 __ movl(reg_high, temp); 2207 2208 // bit-reverse low 2209 __ bswapl(reg_low); 2210 SwapBits(reg_low, temp, 1, 0x55555555, assembler); 2211 SwapBits(reg_low, temp, 2, 0x33333333, assembler); 2212 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); 2213 2214 // bit-reverse high 2215 __ bswapl(reg_high); 2216 SwapBits(reg_high, temp, 1, 0x55555555, assembler); 2217 SwapBits(reg_high, temp, 2, 0x33333333, assembler); 2218 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); 2219 } 2220 2221 static void CreateBitCountLocations( 2222 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { 2223 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2224 // Do nothing if there is no popcnt support. This results in generating 2225 // a call for the intrinsic rather than direct code. 2226 return; 2227 } 2228 LocationSummary* locations = 2229 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2230 if (is_long) { 2231 locations->AddTemp(Location::RequiresRegister()); 2232 } 2233 locations->SetInAt(0, Location::Any()); 2234 locations->SetOut(Location::RequiresRegister()); 2235 } 2236 2237 static void GenBitCount(X86Assembler* assembler, 2238 CodeGeneratorX86* codegen, 2239 HInvoke* invoke, bool is_long) { 2240 LocationSummary* locations = invoke->GetLocations(); 2241 Location src = locations->InAt(0); 2242 Register out = locations->Out().AsRegister<Register>(); 2243 2244 if (invoke->InputAt(0)->IsConstant()) { 2245 // Evaluate this at compile time. 2246 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2247 int32_t result = is_long 2248 ? POPCOUNT(static_cast<uint64_t>(value)) 2249 : POPCOUNT(static_cast<uint32_t>(value)); 2250 codegen->Load32BitValue(out, result); 2251 return; 2252 } 2253 2254 // Handle the non-constant cases. 2255 if (!is_long) { 2256 if (src.IsRegister()) { 2257 __ popcntl(out, src.AsRegister<Register>()); 2258 } else { 2259 DCHECK(src.IsStackSlot()); 2260 __ popcntl(out, Address(ESP, src.GetStackIndex())); 2261 } 2262 } else { 2263 // The 64-bit case needs to worry about two parts. 2264 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2265 if (src.IsRegisterPair()) { 2266 __ popcntl(temp, src.AsRegisterPairLow<Register>()); 2267 __ popcntl(out, src.AsRegisterPairHigh<Register>()); 2268 } else { 2269 DCHECK(src.IsDoubleStackSlot()); 2270 __ popcntl(temp, Address(ESP, src.GetStackIndex())); 2271 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize))); 2272 } 2273 __ addl(out, temp); 2274 } 2275 } 2276 2277 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { 2278 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false); 2279 } 2280 2281 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { 2282 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false); 2283 } 2284 2285 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { 2286 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true); 2287 } 2288 2289 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { 2290 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true); 2291 } 2292 2293 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { 2294 LocationSummary* locations = 2295 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2296 if (is_long) { 2297 locations->SetInAt(0, Location::RequiresRegister()); 2298 } else { 2299 locations->SetInAt(0, Location::Any()); 2300 } 2301 locations->SetOut(Location::RequiresRegister()); 2302 } 2303 2304 static void GenLeadingZeros(X86Assembler* assembler, 2305 CodeGeneratorX86* codegen, 2306 HInvoke* invoke, bool is_long) { 2307 LocationSummary* locations = invoke->GetLocations(); 2308 Location src = locations->InAt(0); 2309 Register out = locations->Out().AsRegister<Register>(); 2310 2311 if (invoke->InputAt(0)->IsConstant()) { 2312 // Evaluate this at compile time. 2313 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2314 if (value == 0) { 2315 value = is_long ? 64 : 32; 2316 } else { 2317 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2318 } 2319 codegen->Load32BitValue(out, value); 2320 return; 2321 } 2322 2323 // Handle the non-constant cases. 2324 if (!is_long) { 2325 if (src.IsRegister()) { 2326 __ bsrl(out, src.AsRegister<Register>()); 2327 } else { 2328 DCHECK(src.IsStackSlot()); 2329 __ bsrl(out, Address(ESP, src.GetStackIndex())); 2330 } 2331 2332 // BSR sets ZF if the input was zero, and the output is undefined. 2333 NearLabel all_zeroes, done; 2334 __ j(kEqual, &all_zeroes); 2335 2336 // Correct the result from BSR to get the final CLZ result. 2337 __ xorl(out, Immediate(31)); 2338 __ jmp(&done); 2339 2340 // Fix the zero case with the expected result. 2341 __ Bind(&all_zeroes); 2342 __ movl(out, Immediate(32)); 2343 2344 __ Bind(&done); 2345 return; 2346 } 2347 2348 // 64 bit case needs to worry about both parts of the register. 2349 DCHECK(src.IsRegisterPair()); 2350 Register src_lo = src.AsRegisterPairLow<Register>(); 2351 Register src_hi = src.AsRegisterPairHigh<Register>(); 2352 NearLabel handle_low, done, all_zeroes; 2353 2354 // Is the high word zero? 2355 __ testl(src_hi, src_hi); 2356 __ j(kEqual, &handle_low); 2357 2358 // High word is not zero. We know that the BSR result is defined in this case. 2359 __ bsrl(out, src_hi); 2360 2361 // Correct the result from BSR to get the final CLZ result. 2362 __ xorl(out, Immediate(31)); 2363 __ jmp(&done); 2364 2365 // High word was zero. We have to compute the low word count and add 32. 2366 __ Bind(&handle_low); 2367 __ bsrl(out, src_lo); 2368 __ j(kEqual, &all_zeroes); 2369 2370 // We had a valid result. Use an XOR to both correct the result and add 32. 2371 __ xorl(out, Immediate(63)); 2372 __ jmp(&done); 2373 2374 // All zero case. 2375 __ Bind(&all_zeroes); 2376 __ movl(out, Immediate(64)); 2377 2378 __ Bind(&done); 2379 } 2380 2381 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2382 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false); 2383 } 2384 2385 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2386 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); 2387 } 2388 2389 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2390 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true); 2391 } 2392 2393 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2394 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); 2395 } 2396 2397 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { 2398 LocationSummary* locations = 2399 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2400 if (is_long) { 2401 locations->SetInAt(0, Location::RequiresRegister()); 2402 } else { 2403 locations->SetInAt(0, Location::Any()); 2404 } 2405 locations->SetOut(Location::RequiresRegister()); 2406 } 2407 2408 static void GenTrailingZeros(X86Assembler* assembler, 2409 CodeGeneratorX86* codegen, 2410 HInvoke* invoke, bool is_long) { 2411 LocationSummary* locations = invoke->GetLocations(); 2412 Location src = locations->InAt(0); 2413 Register out = locations->Out().AsRegister<Register>(); 2414 2415 if (invoke->InputAt(0)->IsConstant()) { 2416 // Evaluate this at compile time. 2417 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2418 if (value == 0) { 2419 value = is_long ? 64 : 32; 2420 } else { 2421 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2422 } 2423 codegen->Load32BitValue(out, value); 2424 return; 2425 } 2426 2427 // Handle the non-constant cases. 2428 if (!is_long) { 2429 if (src.IsRegister()) { 2430 __ bsfl(out, src.AsRegister<Register>()); 2431 } else { 2432 DCHECK(src.IsStackSlot()); 2433 __ bsfl(out, Address(ESP, src.GetStackIndex())); 2434 } 2435 2436 // BSF sets ZF if the input was zero, and the output is undefined. 2437 NearLabel done; 2438 __ j(kNotEqual, &done); 2439 2440 // Fix the zero case with the expected result. 2441 __ movl(out, Immediate(32)); 2442 2443 __ Bind(&done); 2444 return; 2445 } 2446 2447 // 64 bit case needs to worry about both parts of the register. 2448 DCHECK(src.IsRegisterPair()); 2449 Register src_lo = src.AsRegisterPairLow<Register>(); 2450 Register src_hi = src.AsRegisterPairHigh<Register>(); 2451 NearLabel done, all_zeroes; 2452 2453 // If the low word is zero, then ZF will be set. If not, we have the answer. 2454 __ bsfl(out, src_lo); 2455 __ j(kNotEqual, &done); 2456 2457 // Low word was zero. We have to compute the high word count and add 32. 2458 __ bsfl(out, src_hi); 2459 __ j(kEqual, &all_zeroes); 2460 2461 // We had a valid result. Add 32 to account for the low word being zero. 2462 __ addl(out, Immediate(32)); 2463 __ jmp(&done); 2464 2465 // All zero case. 2466 __ Bind(&all_zeroes); 2467 __ movl(out, Immediate(64)); 2468 2469 __ Bind(&done); 2470 } 2471 2472 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2473 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false); 2474 } 2475 2476 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2477 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); 2478 } 2479 2480 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2481 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true); 2482 } 2483 2484 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2485 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); 2486 } 2487 2488 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { 2489 return instruction->InputAt(input0) == instruction->InputAt(input1); 2490 } 2491 2492 // Compute base address for the System.arraycopy intrinsic in `base`. 2493 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, 2494 DataType::Type type, 2495 const Register& array, 2496 const Location& pos, 2497 const Register& base) { 2498 // This routine is only used by the SystemArrayCopy intrinsic at the 2499 // moment. We can allow DataType::Type::kReference as `type` to implement 2500 // the SystemArrayCopyChar intrinsic. 2501 DCHECK_EQ(type, DataType::Type::kReference); 2502 const int32_t element_size = DataType::Size(type); 2503 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); 2504 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2505 2506 if (pos.IsConstant()) { 2507 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); 2508 __ leal(base, Address(array, element_size * constant + data_offset)); 2509 } else { 2510 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset)); 2511 } 2512 } 2513 2514 // Compute end source address for the System.arraycopy intrinsic in `end`. 2515 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, 2516 DataType::Type type, 2517 const Location& copy_length, 2518 const Register& base, 2519 const Register& end) { 2520 // This routine is only used by the SystemArrayCopy intrinsic at the 2521 // moment. We can allow DataType::Type::kReference as `type` to implement 2522 // the SystemArrayCopyChar intrinsic. 2523 DCHECK_EQ(type, DataType::Type::kReference); 2524 const int32_t element_size = DataType::Size(type); 2525 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); 2526 2527 if (copy_length.IsConstant()) { 2528 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2529 __ leal(end, Address(base, element_size * constant)); 2530 } else { 2531 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0)); 2532 } 2533 } 2534 2535 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { 2536 // The only read barrier implementation supporting the 2537 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2538 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2539 return; 2540 } 2541 2542 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 2543 if (invoke->GetLocations() != nullptr) { 2544 // Need a byte register for marking. 2545 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX)); 2546 2547 static constexpr size_t kSrc = 0; 2548 static constexpr size_t kSrcPos = 1; 2549 static constexpr size_t kDest = 2; 2550 static constexpr size_t kDestPos = 3; 2551 static constexpr size_t kLength = 4; 2552 2553 if (!invoke->InputAt(kSrcPos)->IsIntConstant() && 2554 !invoke->InputAt(kDestPos)->IsIntConstant() && 2555 !invoke->InputAt(kLength)->IsIntConstant()) { 2556 if (!IsSameInput(invoke, kSrcPos, kDestPos) && 2557 !IsSameInput(invoke, kSrcPos, kLength) && 2558 !IsSameInput(invoke, kDestPos, kLength) && 2559 !IsSameInput(invoke, kSrc, kDest)) { 2560 // Not enough registers, make the length also take a stack slot. 2561 invoke->GetLocations()->SetInAt(kLength, Location::Any()); 2562 } 2563 } 2564 } 2565 } 2566 2567 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { 2568 // The only read barrier implementation supporting the 2569 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2570 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2571 2572 X86Assembler* assembler = GetAssembler(); 2573 LocationSummary* locations = invoke->GetLocations(); 2574 2575 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2576 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2577 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2578 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2579 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2580 2581 Register src = locations->InAt(0).AsRegister<Register>(); 2582 Location src_pos = locations->InAt(1); 2583 Register dest = locations->InAt(2).AsRegister<Register>(); 2584 Location dest_pos = locations->InAt(3); 2585 Location length_arg = locations->InAt(4); 2586 Location length = length_arg; 2587 Location temp1_loc = locations->GetTemp(0); 2588 Register temp1 = temp1_loc.AsRegister<Register>(); 2589 Location temp2_loc = locations->GetTemp(1); 2590 Register temp2 = temp2_loc.AsRegister<Register>(); 2591 2592 SlowPathCode* intrinsic_slow_path = 2593 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 2594 codegen_->AddSlowPath(intrinsic_slow_path); 2595 2596 NearLabel conditions_on_positions_validated; 2597 SystemArrayCopyOptimizations optimizations(invoke); 2598 2599 // If source and destination are the same, we go to slow path if we need to do 2600 // forward copying. 2601 if (src_pos.IsConstant()) { 2602 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2603 if (dest_pos.IsConstant()) { 2604 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2605 if (optimizations.GetDestinationIsSource()) { 2606 // Checked when building locations. 2607 DCHECK_GE(src_pos_constant, dest_pos_constant); 2608 } else if (src_pos_constant < dest_pos_constant) { 2609 __ cmpl(src, dest); 2610 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2611 } 2612 } else { 2613 if (!optimizations.GetDestinationIsSource()) { 2614 __ cmpl(src, dest); 2615 __ j(kNotEqual, &conditions_on_positions_validated); 2616 } 2617 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); 2618 __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); 2619 } 2620 } else { 2621 if (!optimizations.GetDestinationIsSource()) { 2622 __ cmpl(src, dest); 2623 __ j(kNotEqual, &conditions_on_positions_validated); 2624 } 2625 if (dest_pos.IsConstant()) { 2626 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2627 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); 2628 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2629 } else { 2630 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); 2631 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2632 } 2633 } 2634 2635 __ Bind(&conditions_on_positions_validated); 2636 2637 if (!optimizations.GetSourceIsNotNull()) { 2638 // Bail out if the source is null. 2639 __ testl(src, src); 2640 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2641 } 2642 2643 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2644 // Bail out if the destination is null. 2645 __ testl(dest, dest); 2646 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2647 } 2648 2649 Location temp3_loc = locations->GetTemp(2); 2650 Register temp3 = temp3_loc.AsRegister<Register>(); 2651 if (length.IsStackSlot()) { 2652 __ movl(temp3, Address(ESP, length.GetStackIndex())); 2653 length = Location::RegisterLocation(temp3); 2654 } 2655 2656 // If the length is negative, bail out. 2657 // We have already checked in the LocationsBuilder for the constant case. 2658 if (!length.IsConstant() && 2659 !optimizations.GetCountIsSourceLength() && 2660 !optimizations.GetCountIsDestinationLength()) { 2661 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); 2662 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2663 } 2664 2665 // Validity checks: source. 2666 CheckPosition(assembler, 2667 src_pos, 2668 src, 2669 length, 2670 intrinsic_slow_path, 2671 temp1, 2672 optimizations.GetCountIsSourceLength()); 2673 2674 // Validity checks: dest. 2675 CheckPosition(assembler, 2676 dest_pos, 2677 dest, 2678 length, 2679 intrinsic_slow_path, 2680 temp1, 2681 optimizations.GetCountIsDestinationLength()); 2682 2683 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2684 // Check whether all elements of the source array are assignable to the component 2685 // type of the destination array. We do two checks: the classes are the same, 2686 // or the destination is Object[]. If none of these checks succeed, we go to the 2687 // slow path. 2688 2689 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2690 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2691 // /* HeapReference<Class> */ temp1 = src->klass_ 2692 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2693 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); 2694 // Bail out if the source is not a non primitive array. 2695 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2696 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2697 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); 2698 __ testl(temp1, temp1); 2699 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2700 // If heap poisoning is enabled, `temp1` has been unpoisoned 2701 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2702 } else { 2703 // /* HeapReference<Class> */ temp1 = src->klass_ 2704 __ movl(temp1, Address(src, class_offset)); 2705 __ MaybeUnpoisonHeapReference(temp1); 2706 // Bail out if the source is not a non primitive array. 2707 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2708 __ movl(temp1, Address(temp1, component_offset)); 2709 __ testl(temp1, temp1); 2710 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2711 __ MaybeUnpoisonHeapReference(temp1); 2712 } 2713 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 2714 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2715 } 2716 2717 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2718 if (length.Equals(Location::RegisterLocation(temp3))) { 2719 // When Baker read barriers are enabled, register `temp3`, 2720 // which in the present case contains the `length` parameter, 2721 // will be overwritten below. Make the `length` location 2722 // reference the original stack location; it will be moved 2723 // back to `temp3` later if necessary. 2724 DCHECK(length_arg.IsStackSlot()); 2725 length = length_arg; 2726 } 2727 2728 // /* HeapReference<Class> */ temp1 = dest->klass_ 2729 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2730 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); 2731 2732 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2733 // Bail out if the destination is not a non primitive array. 2734 // 2735 // Register `temp1` is not trashed by the read barrier emitted 2736 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2737 // method produces a call to a ReadBarrierMarkRegX entry point, 2738 // which saves all potentially live registers, including 2739 // temporaries such a `temp1`. 2740 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2741 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2742 invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false); 2743 __ testl(temp2, temp2); 2744 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2745 // If heap poisoning is enabled, `temp2` has been unpoisoned 2746 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2747 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 2748 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2749 } 2750 2751 // For the same reason given earlier, `temp1` is not trashed by the 2752 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2753 // /* HeapReference<Class> */ temp2 = src->klass_ 2754 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2755 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); 2756 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2757 __ cmpl(temp1, temp2); 2758 2759 if (optimizations.GetDestinationIsTypedObjectArray()) { 2760 NearLabel do_copy; 2761 __ j(kEqual, &do_copy); 2762 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2763 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2764 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); 2765 // We do not need to emit a read barrier for the following 2766 // heap reference load, as `temp1` is only used in a 2767 // comparison with null below, and this reference is not 2768 // kept afterwards. 2769 __ cmpl(Address(temp1, super_offset), Immediate(0)); 2770 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2771 __ Bind(&do_copy); 2772 } else { 2773 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2774 } 2775 } else { 2776 // Non read barrier code. 2777 2778 // /* HeapReference<Class> */ temp1 = dest->klass_ 2779 __ movl(temp1, Address(dest, class_offset)); 2780 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2781 __ MaybeUnpoisonHeapReference(temp1); 2782 // Bail out if the destination is not a non primitive array. 2783 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2784 __ movl(temp2, Address(temp1, component_offset)); 2785 __ testl(temp2, temp2); 2786 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2787 __ MaybeUnpoisonHeapReference(temp2); 2788 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 2789 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2790 // Re-poison the heap reference to make the compare instruction below 2791 // compare two poisoned references. 2792 __ PoisonHeapReference(temp1); 2793 } 2794 2795 // Note: if heap poisoning is on, we are comparing two poisoned references here. 2796 __ cmpl(temp1, Address(src, class_offset)); 2797 2798 if (optimizations.GetDestinationIsTypedObjectArray()) { 2799 NearLabel do_copy; 2800 __ j(kEqual, &do_copy); 2801 __ MaybeUnpoisonHeapReference(temp1); 2802 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2803 __ movl(temp1, Address(temp1, component_offset)); 2804 __ MaybeUnpoisonHeapReference(temp1); 2805 __ cmpl(Address(temp1, super_offset), Immediate(0)); 2806 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2807 __ Bind(&do_copy); 2808 } else { 2809 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2810 } 2811 } 2812 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2813 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2814 // Bail out if the source is not a non primitive array. 2815 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2816 // /* HeapReference<Class> */ temp1 = src->klass_ 2817 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2818 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); 2819 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2820 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2821 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); 2822 __ testl(temp1, temp1); 2823 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2824 // If heap poisoning is enabled, `temp1` has been unpoisoned 2825 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2826 } else { 2827 // /* HeapReference<Class> */ temp1 = src->klass_ 2828 __ movl(temp1, Address(src, class_offset)); 2829 __ MaybeUnpoisonHeapReference(temp1); 2830 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2831 __ movl(temp1, Address(temp1, component_offset)); 2832 __ testl(temp1, temp1); 2833 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2834 __ MaybeUnpoisonHeapReference(temp1); 2835 } 2836 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 2837 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 2838 } 2839 2840 const DataType::Type type = DataType::Type::kReference; 2841 const int32_t element_size = DataType::Size(type); 2842 2843 // Compute the base source address in `temp1`. 2844 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 2845 2846 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2847 // If it is needed (in the case of the fast-path loop), the base 2848 // destination address is computed later, as `temp2` is used for 2849 // intermediate computations. 2850 2851 // Compute the end source address in `temp3`. 2852 if (length.IsStackSlot()) { 2853 // Location `length` is again pointing at a stack slot, as 2854 // register `temp3` (which was containing the length parameter 2855 // earlier) has been overwritten; restore it now 2856 DCHECK(length.Equals(length_arg)); 2857 __ movl(temp3, Address(ESP, length.GetStackIndex())); 2858 length = Location::RegisterLocation(temp3); 2859 } 2860 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2861 2862 // SystemArrayCopy implementation for Baker read barriers (see 2863 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): 2864 // 2865 // if (src_ptr != end_ptr) { 2866 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2867 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2868 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2869 // if (is_gray) { 2870 // // Slow-path copy. 2871 // for (size_t i = 0; i != length; ++i) { 2872 // dest_array[dest_pos + i] = 2873 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); 2874 // } 2875 // } else { 2876 // // Fast-path copy. 2877 // do { 2878 // *dest_ptr++ = *src_ptr++; 2879 // } while (src_ptr != end_ptr) 2880 // } 2881 // } 2882 2883 NearLabel loop, done; 2884 2885 // Don't enter copy loop if `length == 0`. 2886 __ cmpl(temp1, temp3); 2887 __ j(kEqual, &done); 2888 2889 // Given the numeric representation, it's enough to check the low bit of the rb_state. 2890 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 2891 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2892 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 2893 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 2894 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 2895 2896 // if (rb_state == ReadBarrier::GrayState()) 2897 // goto slow_path; 2898 // At this point, just do the "if" and make sure that flags are preserved until the branch. 2899 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); 2900 2901 // Load fence to prevent load-load reordering. 2902 // Note that this is a no-op, thanks to the x86 memory model. 2903 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 2904 2905 // Slow path used to copy array when `src` is gray. 2906 SlowPathCode* read_barrier_slow_path = 2907 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); 2908 codegen_->AddSlowPath(read_barrier_slow_path); 2909 2910 // We have done the "if" of the gray bit check above, now branch based on the flags. 2911 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); 2912 2913 // Fast-path copy. 2914 // Compute the base destination address in `temp2`. 2915 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2916 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2917 // poison/unpoison. 2918 __ Bind(&loop); 2919 __ pushl(Address(temp1, 0)); 2920 __ cfi().AdjustCFAOffset(4); 2921 __ popl(Address(temp2, 0)); 2922 __ cfi().AdjustCFAOffset(-4); 2923 __ addl(temp1, Immediate(element_size)); 2924 __ addl(temp2, Immediate(element_size)); 2925 __ cmpl(temp1, temp3); 2926 __ j(kNotEqual, &loop); 2927 2928 __ Bind(read_barrier_slow_path->GetExitLabel()); 2929 __ Bind(&done); 2930 } else { 2931 // Non read barrier code. 2932 // Compute the base destination address in `temp2`. 2933 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2934 // Compute the end source address in `temp3`. 2935 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2936 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2937 // poison/unpoison. 2938 NearLabel loop, done; 2939 __ cmpl(temp1, temp3); 2940 __ j(kEqual, &done); 2941 __ Bind(&loop); 2942 __ pushl(Address(temp1, 0)); 2943 __ cfi().AdjustCFAOffset(4); 2944 __ popl(Address(temp2, 0)); 2945 __ cfi().AdjustCFAOffset(-4); 2946 __ addl(temp1, Immediate(element_size)); 2947 __ addl(temp2, Immediate(element_size)); 2948 __ cmpl(temp1, temp3); 2949 __ j(kNotEqual, &loop); 2950 __ Bind(&done); 2951 } 2952 2953 // We only need one card marking on the destination array. 2954 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false); 2955 2956 __ Bind(intrinsic_slow_path->GetExitLabel()); 2957 } 2958 2959 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { 2960 DCHECK(invoke->IsInvokeStaticOrDirect()); 2961 InvokeRuntimeCallingConvention calling_convention; 2962 IntrinsicVisitor::ComputeIntegerValueOfLocations( 2963 invoke, 2964 codegen_, 2965 Location::RegisterLocation(EAX), 2966 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 2967 2968 LocationSummary* locations = invoke->GetLocations(); 2969 if (locations != nullptr) { 2970 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); 2971 if (invoke_static_or_direct->HasSpecialInput() && 2972 invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex()) 2973 ->IsX86ComputeBaseMethodAddress()) { 2974 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(), 2975 Location::RequiresRegister()); 2976 } 2977 } 2978 } 2979 2980 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { 2981 DCHECK(invoke->IsInvokeStaticOrDirect()); 2982 IntrinsicVisitor::IntegerValueOfInfo info = 2983 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); 2984 LocationSummary* locations = invoke->GetLocations(); 2985 X86Assembler* assembler = GetAssembler(); 2986 2987 Register out = locations->Out().AsRegister<Register>(); 2988 InvokeRuntimeCallingConvention calling_convention; 2989 if (invoke->InputAt(0)->IsConstant()) { 2990 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 2991 if (static_cast<uint32_t>(value - info.low) < info.length) { 2992 // Just embed the j.l.Integer in the code. 2993 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); 2994 codegen_->LoadBootImageAddress( 2995 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect()); 2996 } else { 2997 DCHECK(locations->CanCall()); 2998 // Allocate and initialize a new j.l.Integer. 2999 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 3000 // JIT object table. 3001 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), 3002 info.integer_boot_image_offset); 3003 __ movl(Address(out, info.value_offset), Immediate(value)); 3004 } 3005 } else { 3006 DCHECK(locations->CanCall()); 3007 Register in = locations->InAt(0).AsRegister<Register>(); 3008 // Check bounds of our cache. 3009 __ leal(out, Address(in, -info.low)); 3010 __ cmpl(out, Immediate(info.length)); 3011 NearLabel allocate, done; 3012 __ j(kAboveEqual, &allocate); 3013 // If the value is within the bounds, load the j.l.Integer directly from the array. 3014 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>); 3015 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), 3016 "Check heap reference size."); 3017 if (codegen_->GetCompilerOptions().IsBootImage()) { 3018 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 3019 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex(); 3020 HX86ComputeBaseMethodAddress* method_address = 3021 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress(); 3022 DCHECK(method_address != nullptr); 3023 Register method_address_reg = 3024 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>(); 3025 __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset)); 3026 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference); 3027 } else { 3028 // Note: We're about to clobber the index in `out`, so we need to use `in` and 3029 // adjust the offset accordingly. 3030 uint32_t mid_array_boot_image_offset = 3031 info.array_data_boot_image_reference - info.low * kElementSize; 3032 codegen_->LoadBootImageAddress( 3033 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect()); 3034 DCHECK_NE(out, in); 3035 __ movl(out, Address(out, in, TIMES_4, 0)); 3036 } 3037 __ MaybeUnpoisonHeapReference(out); 3038 __ jmp(&done); 3039 __ Bind(&allocate); 3040 // Otherwise allocate and initialize a new j.l.Integer. 3041 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), 3042 info.integer_boot_image_offset); 3043 __ movl(Address(out, info.value_offset), in); 3044 __ Bind(&done); 3045 } 3046 } 3047 3048 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) { 3049 LocationSummary* locations = 3050 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3051 locations->SetOut(Location::RequiresRegister()); 3052 } 3053 3054 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { 3055 X86Assembler* assembler = GetAssembler(); 3056 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 3057 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value()); 3058 NearLabel done; 3059 __ fs()->movl(out, address); 3060 __ testl(out, out); 3061 __ j(kEqual, &done); 3062 __ fs()->movl(address, Immediate(0)); 3063 codegen_->MemoryFence(); 3064 __ Bind(&done); 3065 } 3066 3067 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { 3068 LocationSummary* locations = 3069 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3070 locations->SetInAt(0, Location::Any()); 3071 } 3072 3073 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 3074 3075 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) 3076 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) 3077 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) 3078 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) 3079 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) 3080 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) 3081 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) 3082 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) 3083 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) 3084 3085 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); 3086 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); 3087 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); 3088 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); 3089 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); 3090 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend); 3091 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); 3092 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); 3093 3094 // 1.8. 3095 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) 3096 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) 3097 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) 3098 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) 3099 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) 3100 3101 UNREACHABLE_INTRINSICS(X86) 3102 3103 #undef __ 3104 3105 } // namespace x86 3106 } // namespace art 3107