1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86_64.h" 18 19 #include <limits> 20 21 #include "arch/x86_64/instruction_set_features_x86_64.h" 22 #include "art_method.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86_64.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "heap_poisoning.h" 27 #include "intrinsics.h" 28 #include "intrinsics_utils.h" 29 #include "lock_word.h" 30 #include "mirror/array-inl.h" 31 #include "mirror/object_array-inl.h" 32 #include "mirror/reference.h" 33 #include "mirror/string.h" 34 #include "scoped_thread_state_change-inl.h" 35 #include "thread-current-inl.h" 36 #include "utils/x86_64/assembler_x86_64.h" 37 #include "utils/x86_64/constants_x86_64.h" 38 39 namespace art { 40 41 namespace x86_64 { 42 43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) 44 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { 45 } 46 47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { 48 return down_cast<X86_64Assembler*>(codegen_->GetAssembler()); 49 } 50 51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { 52 return codegen_->GetGraph()->GetAllocator(); 53 } 54 55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { 56 Dispatch(invoke); 57 LocationSummary* res = invoke->GetLocations(); 58 if (res == nullptr) { 59 return false; 60 } 61 return res->Intrinsified(); 62 } 63 64 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 65 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 66 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 67 } 68 69 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; 70 71 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 72 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT 73 74 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 75 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { 76 public: 77 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) 78 : SlowPathCode(instruction) { 79 DCHECK(kEmitCompilerReadBarrier); 80 DCHECK(kUseBakerReadBarrier); 81 } 82 83 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 84 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 85 LocationSummary* locations = instruction_->GetLocations(); 86 DCHECK(locations->CanCall()); 87 DCHECK(instruction_->IsInvokeStaticOrDirect()) 88 << "Unexpected instruction in read barrier arraycopy slow path: " 89 << instruction_->DebugName(); 90 DCHECK(instruction_->GetLocations()->Intrinsified()); 91 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 92 93 int32_t element_size = DataType::Size(DataType::Type::kReference); 94 95 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>(); 96 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>(); 97 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>(); 98 99 __ Bind(GetEntryLabel()); 100 NearLabel loop; 101 __ Bind(&loop); 102 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0)); 103 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 104 // TODO: Inline the mark bit check before calling the runtime? 105 // TMP = ReadBarrier::Mark(TMP); 106 // No need to save live registers; it's taken care of by the 107 // entrypoint. Also, there is no need to update the stack mask, 108 // as this runtime call will not trigger a garbage collection. 109 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); 110 // This runtime call does not require a stack map. 111 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 112 __ MaybePoisonHeapReference(CpuRegister(TMP)); 113 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP)); 114 __ addl(src_curr_addr, Immediate(element_size)); 115 __ addl(dst_curr_addr, Immediate(element_size)); 116 __ cmpl(src_curr_addr, src_stop_addr); 117 __ j(kNotEqual, &loop); 118 __ jmp(GetExitLabel()); 119 } 120 121 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } 122 123 private: 124 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); 125 }; 126 127 #undef __ 128 129 #define __ assembler-> 130 131 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 132 LocationSummary* locations = 133 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 134 locations->SetInAt(0, Location::RequiresFpuRegister()); 135 locations->SetOut(Location::RequiresRegister()); 136 } 137 138 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 139 LocationSummary* locations = 140 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 141 locations->SetInAt(0, Location::RequiresRegister()); 142 locations->SetOut(Location::RequiresFpuRegister()); 143 } 144 145 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 146 Location input = locations->InAt(0); 147 Location output = locations->Out(); 148 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); 149 } 150 151 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 152 Location input = locations->InAt(0); 153 Location output = locations->Out(); 154 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); 155 } 156 157 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 158 CreateFPToIntLocations(allocator_, invoke); 159 } 160 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 161 CreateIntToFPLocations(allocator_, invoke); 162 } 163 164 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 165 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 166 } 167 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 168 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 169 } 170 171 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 172 CreateFPToIntLocations(allocator_, invoke); 173 } 174 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 175 CreateIntToFPLocations(allocator_, invoke); 176 } 177 178 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 179 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 180 } 181 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 182 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 183 } 184 185 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 186 LocationSummary* locations = 187 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 188 locations->SetInAt(0, Location::RequiresRegister()); 189 locations->SetOut(Location::SameAsFirstInput()); 190 } 191 192 static void GenReverseBytes(LocationSummary* locations, 193 DataType::Type size, 194 X86_64Assembler* assembler) { 195 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 196 197 switch (size) { 198 case DataType::Type::kInt16: 199 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 200 __ bswapl(out); 201 __ sarl(out, Immediate(16)); 202 break; 203 case DataType::Type::kInt32: 204 __ bswapl(out); 205 break; 206 case DataType::Type::kInt64: 207 __ bswapq(out); 208 break; 209 default: 210 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 211 UNREACHABLE(); 212 } 213 } 214 215 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 216 CreateIntToIntLocations(allocator_, invoke); 217 } 218 219 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 220 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 221 } 222 223 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { 224 CreateIntToIntLocations(allocator_, invoke); 225 } 226 227 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { 228 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 229 } 230 231 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { 232 CreateIntToIntLocations(allocator_, invoke); 233 } 234 235 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { 236 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 237 } 238 239 240 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 241 // need is 64b. 242 243 static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { 244 // TODO: Enable memory operations when the assembler supports them. 245 LocationSummary* locations = 246 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 247 locations->SetInAt(0, Location::RequiresFpuRegister()); 248 locations->SetOut(Location::SameAsFirstInput()); 249 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. 250 } 251 252 static void MathAbsFP(LocationSummary* locations, 253 bool is64bit, 254 X86_64Assembler* assembler, 255 CodeGeneratorX86_64* codegen) { 256 Location output = locations->Out(); 257 258 DCHECK(output.IsFpuRegister()); 259 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 260 261 // TODO: Can mask directly with constant area using pand if we can guarantee 262 // that the literal is aligned on a 16 byte boundary. This will avoid a 263 // temporary. 264 if (is64bit) { 265 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 266 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); 267 } else { 268 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 269 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); 270 } 271 } 272 273 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { 274 CreateFloatToFloatPlusTemps(allocator_, invoke); 275 } 276 277 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { 278 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); 279 } 280 281 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { 282 CreateFloatToFloatPlusTemps(allocator_, invoke); 283 } 284 285 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { 286 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); 287 } 288 289 static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { 290 LocationSummary* locations = 291 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 292 locations->SetInAt(0, Location::RequiresRegister()); 293 locations->SetOut(Location::SameAsFirstInput()); 294 locations->AddTemp(Location::RequiresRegister()); 295 } 296 297 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 298 Location output = locations->Out(); 299 CpuRegister out = output.AsRegister<CpuRegister>(); 300 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 301 302 if (is64bit) { 303 // Create mask. 304 __ movq(mask, out); 305 __ sarq(mask, Immediate(63)); 306 // Add mask. 307 __ addq(out, mask); 308 __ xorq(out, mask); 309 } else { 310 // Create mask. 311 __ movl(mask, out); 312 __ sarl(mask, Immediate(31)); 313 // Add mask. 314 __ addl(out, mask); 315 __ xorl(out, mask); 316 } 317 } 318 319 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { 320 CreateIntToIntPlusTemp(allocator_, invoke); 321 } 322 323 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { 324 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 325 } 326 327 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { 328 CreateIntToIntPlusTemp(allocator_, invoke); 329 } 330 331 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { 332 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 333 } 334 335 static void GenMinMaxFP(LocationSummary* locations, 336 bool is_min, 337 bool is_double, 338 X86_64Assembler* assembler, 339 CodeGeneratorX86_64* codegen) { 340 Location op1_loc = locations->InAt(0); 341 Location op2_loc = locations->InAt(1); 342 Location out_loc = locations->Out(); 343 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 344 345 // Shortcut for same input locations. 346 if (op1_loc.Equals(op2_loc)) { 347 DCHECK(out_loc.Equals(op1_loc)); 348 return; 349 } 350 351 // (out := op1) 352 // out <=? op2 353 // if Nan jmp Nan_label 354 // if out is min jmp done 355 // if op2 is min jmp op2_label 356 // handle -0/+0 357 // jmp done 358 // Nan_label: 359 // out := NaN 360 // op2_label: 361 // out := op2 362 // done: 363 // 364 // This removes one jmp, but needs to copy one input (op1) to out. 365 // 366 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 367 368 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 369 370 NearLabel nan, done, op2_label; 371 if (is_double) { 372 __ ucomisd(out, op2); 373 } else { 374 __ ucomiss(out, op2); 375 } 376 377 __ j(Condition::kParityEven, &nan); 378 379 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 380 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 381 382 // Handle 0.0/-0.0. 383 if (is_min) { 384 if (is_double) { 385 __ orpd(out, op2); 386 } else { 387 __ orps(out, op2); 388 } 389 } else { 390 if (is_double) { 391 __ andpd(out, op2); 392 } else { 393 __ andps(out, op2); 394 } 395 } 396 __ jmp(&done); 397 398 // NaN handling. 399 __ Bind(&nan); 400 if (is_double) { 401 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 402 } else { 403 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); 404 } 405 __ jmp(&done); 406 407 // out := op2; 408 __ Bind(&op2_label); 409 if (is_double) { 410 __ movsd(out, op2); 411 } else { 412 __ movss(out, op2); 413 } 414 415 // Done. 416 __ Bind(&done); 417 } 418 419 static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { 420 LocationSummary* locations = 421 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 422 locations->SetInAt(0, Location::RequiresFpuRegister()); 423 locations->SetInAt(1, Location::RequiresFpuRegister()); 424 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 425 // the second input to be the output (we can simply swap inputs). 426 locations->SetOut(Location::SameAsFirstInput()); 427 } 428 429 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 430 CreateFPFPToFP(allocator_, invoke); 431 } 432 433 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 434 GenMinMaxFP( 435 invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); 436 } 437 438 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 439 CreateFPFPToFP(allocator_, invoke); 440 } 441 442 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 443 GenMinMaxFP( 444 invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); 445 } 446 447 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 448 CreateFPFPToFP(allocator_, invoke); 449 } 450 451 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 452 GenMinMaxFP( 453 invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); 454 } 455 456 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 457 CreateFPFPToFP(allocator_, invoke); 458 } 459 460 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 461 GenMinMaxFP( 462 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); 463 } 464 465 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 466 X86_64Assembler* assembler) { 467 Location op1_loc = locations->InAt(0); 468 Location op2_loc = locations->InAt(1); 469 470 // Shortcut for same input locations. 471 if (op1_loc.Equals(op2_loc)) { 472 // Can return immediately, as op1_loc == out_loc. 473 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 474 // a copy here. 475 DCHECK(locations->Out().Equals(op1_loc)); 476 return; 477 } 478 479 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 480 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 481 482 // (out := op1) 483 // out <=? op2 484 // if out is min jmp done 485 // out := op2 486 // done: 487 488 if (is_long) { 489 __ cmpq(out, op2); 490 } else { 491 __ cmpl(out, op2); 492 } 493 494 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); 495 } 496 497 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 498 LocationSummary* locations = 499 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 500 locations->SetInAt(0, Location::RequiresRegister()); 501 locations->SetInAt(1, Location::RequiresRegister()); 502 locations->SetOut(Location::SameAsFirstInput()); 503 } 504 505 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { 506 CreateIntIntToIntLocations(allocator_, invoke); 507 } 508 509 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { 510 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 511 } 512 513 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { 514 CreateIntIntToIntLocations(allocator_, invoke); 515 } 516 517 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { 518 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 519 } 520 521 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 522 CreateIntIntToIntLocations(allocator_, invoke); 523 } 524 525 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 526 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 527 } 528 529 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 530 CreateIntIntToIntLocations(allocator_, invoke); 531 } 532 533 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 534 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 535 } 536 537 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 538 LocationSummary* locations = 539 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 540 locations->SetInAt(0, Location::RequiresFpuRegister()); 541 locations->SetOut(Location::RequiresFpuRegister()); 542 } 543 544 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { 545 CreateFPToFPLocations(allocator_, invoke); 546 } 547 548 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { 549 LocationSummary* locations = invoke->GetLocations(); 550 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 551 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 552 553 GetAssembler()->sqrtsd(out, in); 554 } 555 556 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { 557 MoveArguments(invoke, codegen); 558 559 DCHECK(invoke->IsInvokeStaticOrDirect()); 560 codegen->GenerateStaticOrDirectCall( 561 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); 562 563 // Copy the result back to the expected output. 564 Location out = invoke->GetLocations()->Out(); 565 if (out.IsValid()) { 566 DCHECK(out.IsRegister()); 567 codegen->MoveFromReturnRegister(out, invoke->GetType()); 568 } 569 } 570 571 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator, 572 HInvoke* invoke, 573 CodeGeneratorX86_64* codegen) { 574 // Do we have instruction support? 575 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 576 CreateFPToFPLocations(allocator, invoke); 577 return; 578 } 579 580 // We have to fall back to a call to the intrinsic. 581 LocationSummary* locations = 582 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 583 InvokeRuntimeCallingConvention calling_convention; 584 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 585 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 586 // Needs to be RDI for the invoke. 587 locations->AddTemp(Location::RegisterLocation(RDI)); 588 } 589 590 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, 591 HInvoke* invoke, 592 X86_64Assembler* assembler, 593 int round_mode) { 594 LocationSummary* locations = invoke->GetLocations(); 595 if (locations->WillCall()) { 596 InvokeOutOfLineIntrinsic(codegen, invoke); 597 } else { 598 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 599 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 600 __ roundsd(out, in, Immediate(round_mode)); 601 } 602 } 603 604 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { 605 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 606 } 607 608 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { 609 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 610 } 611 612 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { 613 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 614 } 615 616 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { 617 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 618 } 619 620 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { 621 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 622 } 623 624 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { 625 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 626 } 627 628 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator, 629 HInvoke* invoke, 630 CodeGeneratorX86_64* codegen) { 631 // Do we have instruction support? 632 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 633 LocationSummary* locations = 634 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 635 locations->SetInAt(0, Location::RequiresFpuRegister()); 636 locations->SetOut(Location::RequiresRegister()); 637 locations->AddTemp(Location::RequiresFpuRegister()); 638 locations->AddTemp(Location::RequiresFpuRegister()); 639 return; 640 } 641 642 // We have to fall back to a call to the intrinsic. 643 LocationSummary* locations = 644 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 645 InvokeRuntimeCallingConvention calling_convention; 646 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 647 locations->SetOut(Location::RegisterLocation(RAX)); 648 // Needs to be RDI for the invoke. 649 locations->AddTemp(Location::RegisterLocation(RDI)); 650 } 651 652 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { 653 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_); 654 } 655 656 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { 657 LocationSummary* locations = invoke->GetLocations(); 658 if (locations->WillCall()) { 659 InvokeOutOfLineIntrinsic(codegen_, invoke); 660 return; 661 } 662 663 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 664 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 665 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 666 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 667 NearLabel skip_incr, done; 668 X86_64Assembler* assembler = GetAssembler(); 669 670 // Since no direct x86 rounding instruction matches the required semantics, 671 // this intrinsic is implemented as follows: 672 // result = floor(in); 673 // if (in - result >= 0.5f) 674 // result = result + 1.0f; 675 __ movss(t2, in); 676 __ roundss(t1, in, Immediate(1)); 677 __ subss(t2, t1); 678 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f)); 679 __ j(kBelow, &skip_incr); 680 __ addss(t1, codegen_->LiteralFloatAddress(1.0f)); 681 __ Bind(&skip_incr); 682 683 // Final conversion to an integer. Unfortunately this also does not have a 684 // direct x86 instruction, since NaN should map to 0 and large positive 685 // values need to be clipped to the extreme value. 686 codegen_->Load32BitValue(out, kPrimIntMax); 687 __ cvtsi2ss(t2, out); 688 __ comiss(t1, t2); 689 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered 690 __ movl(out, Immediate(0)); // does not change flags 691 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) 692 __ cvttss2si(out, t1); 693 __ Bind(&done); 694 } 695 696 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { 697 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_); 698 } 699 700 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { 701 LocationSummary* locations = invoke->GetLocations(); 702 if (locations->WillCall()) { 703 InvokeOutOfLineIntrinsic(codegen_, invoke); 704 return; 705 } 706 707 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 708 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 709 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 710 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 711 NearLabel skip_incr, done; 712 X86_64Assembler* assembler = GetAssembler(); 713 714 // Since no direct x86 rounding instruction matches the required semantics, 715 // this intrinsic is implemented as follows: 716 // result = floor(in); 717 // if (in - result >= 0.5) 718 // result = result + 1.0f; 719 __ movsd(t2, in); 720 __ roundsd(t1, in, Immediate(1)); 721 __ subsd(t2, t1); 722 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5)); 723 __ j(kBelow, &skip_incr); 724 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f)); 725 __ Bind(&skip_incr); 726 727 // Final conversion to an integer. Unfortunately this also does not have a 728 // direct x86 instruction, since NaN should map to 0 and large positive 729 // values need to be clipped to the extreme value. 730 codegen_->Load64BitValue(out, kPrimLongMax); 731 __ cvtsi2sd(t2, out, /* is64bit */ true); 732 __ comisd(t1, t2); 733 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered 734 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit 735 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) 736 __ cvttsd2si(out, t1, /* is64bit */ true); 737 __ Bind(&done); 738 } 739 740 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 741 LocationSummary* locations = 742 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 743 InvokeRuntimeCallingConvention calling_convention; 744 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 745 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 746 747 // We have to ensure that the native code doesn't clobber the XMM registers which are 748 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 749 // saved in the prologue and properly restored. 750 for (FloatRegister fp_reg : non_volatile_xmm_regs) { 751 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 752 } 753 } 754 755 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, 756 QuickEntrypointEnum entry) { 757 LocationSummary* locations = invoke->GetLocations(); 758 DCHECK(locations->WillCall()); 759 DCHECK(invoke->IsInvokeStaticOrDirect()); 760 761 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 762 } 763 764 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { 765 CreateFPToFPCallLocations(allocator_, invoke); 766 } 767 768 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { 769 GenFPToFPCall(invoke, codegen_, kQuickCos); 770 } 771 772 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { 773 CreateFPToFPCallLocations(allocator_, invoke); 774 } 775 776 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { 777 GenFPToFPCall(invoke, codegen_, kQuickSin); 778 } 779 780 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { 781 CreateFPToFPCallLocations(allocator_, invoke); 782 } 783 784 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { 785 GenFPToFPCall(invoke, codegen_, kQuickAcos); 786 } 787 788 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { 789 CreateFPToFPCallLocations(allocator_, invoke); 790 } 791 792 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { 793 GenFPToFPCall(invoke, codegen_, kQuickAsin); 794 } 795 796 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { 797 CreateFPToFPCallLocations(allocator_, invoke); 798 } 799 800 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { 801 GenFPToFPCall(invoke, codegen_, kQuickAtan); 802 } 803 804 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { 805 CreateFPToFPCallLocations(allocator_, invoke); 806 } 807 808 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { 809 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 810 } 811 812 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { 813 CreateFPToFPCallLocations(allocator_, invoke); 814 } 815 816 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { 817 GenFPToFPCall(invoke, codegen_, kQuickCosh); 818 } 819 820 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { 821 CreateFPToFPCallLocations(allocator_, invoke); 822 } 823 824 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { 825 GenFPToFPCall(invoke, codegen_, kQuickExp); 826 } 827 828 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { 829 CreateFPToFPCallLocations(allocator_, invoke); 830 } 831 832 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { 833 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 834 } 835 836 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { 837 CreateFPToFPCallLocations(allocator_, invoke); 838 } 839 840 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { 841 GenFPToFPCall(invoke, codegen_, kQuickLog); 842 } 843 844 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { 845 CreateFPToFPCallLocations(allocator_, invoke); 846 } 847 848 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { 849 GenFPToFPCall(invoke, codegen_, kQuickLog10); 850 } 851 852 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { 853 CreateFPToFPCallLocations(allocator_, invoke); 854 } 855 856 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { 857 GenFPToFPCall(invoke, codegen_, kQuickSinh); 858 } 859 860 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { 861 CreateFPToFPCallLocations(allocator_, invoke); 862 } 863 864 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { 865 GenFPToFPCall(invoke, codegen_, kQuickTan); 866 } 867 868 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { 869 CreateFPToFPCallLocations(allocator_, invoke); 870 } 871 872 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { 873 GenFPToFPCall(invoke, codegen_, kQuickTanh); 874 } 875 876 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 877 LocationSummary* locations = 878 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 879 InvokeRuntimeCallingConvention calling_convention; 880 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 881 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 882 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 883 884 // We have to ensure that the native code doesn't clobber the XMM registers which are 885 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 886 // saved in the prologue and properly restored. 887 for (FloatRegister fp_reg : non_volatile_xmm_regs) { 888 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 889 } 890 } 891 892 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { 893 CreateFPFPToFPCallLocations(allocator_, invoke); 894 } 895 896 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { 897 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 898 } 899 900 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { 901 CreateFPFPToFPCallLocations(allocator_, invoke); 902 } 903 904 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { 905 GenFPToFPCall(invoke, codegen_, kQuickPow); 906 } 907 908 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { 909 CreateFPFPToFPCallLocations(allocator_, invoke); 910 } 911 912 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { 913 GenFPToFPCall(invoke, codegen_, kQuickHypot); 914 } 915 916 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { 917 CreateFPFPToFPCallLocations(allocator_, invoke); 918 } 919 920 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { 921 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 922 } 923 924 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 925 // Check to see if we have known failures that will cause us to have to bail out 926 // to the runtime, and just generate the runtime call directly. 927 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 928 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 929 930 // The positions must be non-negative. 931 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 932 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 933 // We will have to fail anyways. 934 return; 935 } 936 937 // The length must be > 0. 938 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 939 if (length != nullptr) { 940 int32_t len = length->GetValue(); 941 if (len < 0) { 942 // Just call as normal. 943 return; 944 } 945 } 946 947 LocationSummary* locations = 948 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 949 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 950 locations->SetInAt(0, Location::RequiresRegister()); 951 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 952 locations->SetInAt(2, Location::RequiresRegister()); 953 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 954 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 955 956 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 957 locations->AddTemp(Location::RegisterLocation(RSI)); 958 locations->AddTemp(Location::RegisterLocation(RDI)); 959 locations->AddTemp(Location::RegisterLocation(RCX)); 960 } 961 962 static void CheckPosition(X86_64Assembler* assembler, 963 Location pos, 964 CpuRegister input, 965 Location length, 966 SlowPathCode* slow_path, 967 CpuRegister temp, 968 bool length_is_input_length = false) { 969 // Where is the length in the Array? 970 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 971 972 if (pos.IsConstant()) { 973 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 974 if (pos_const == 0) { 975 if (!length_is_input_length) { 976 // Check that length(input) >= length. 977 if (length.IsConstant()) { 978 __ cmpl(Address(input, length_offset), 979 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 980 } else { 981 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>()); 982 } 983 __ j(kLess, slow_path->GetEntryLabel()); 984 } 985 } else { 986 // Check that length(input) >= pos. 987 __ movl(temp, Address(input, length_offset)); 988 __ subl(temp, Immediate(pos_const)); 989 __ j(kLess, slow_path->GetEntryLabel()); 990 991 // Check that (length(input) - pos) >= length. 992 if (length.IsConstant()) { 993 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 994 } else { 995 __ cmpl(temp, length.AsRegister<CpuRegister>()); 996 } 997 __ j(kLess, slow_path->GetEntryLabel()); 998 } 999 } else if (length_is_input_length) { 1000 // The only way the copy can succeed is if pos is zero. 1001 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1002 __ testl(pos_reg, pos_reg); 1003 __ j(kNotEqual, slow_path->GetEntryLabel()); 1004 } else { 1005 // Check that pos >= 0. 1006 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1007 __ testl(pos_reg, pos_reg); 1008 __ j(kLess, slow_path->GetEntryLabel()); 1009 1010 // Check that pos <= length(input). 1011 __ cmpl(Address(input, length_offset), pos_reg); 1012 __ j(kLess, slow_path->GetEntryLabel()); 1013 1014 // Check that (length(input) - pos) >= length. 1015 __ movl(temp, Address(input, length_offset)); 1016 __ subl(temp, pos_reg); 1017 if (length.IsConstant()) { 1018 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1019 } else { 1020 __ cmpl(temp, length.AsRegister<CpuRegister>()); 1021 } 1022 __ j(kLess, slow_path->GetEntryLabel()); 1023 } 1024 } 1025 1026 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 1027 X86_64Assembler* assembler = GetAssembler(); 1028 LocationSummary* locations = invoke->GetLocations(); 1029 1030 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1031 Location src_pos = locations->InAt(1); 1032 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1033 Location dest_pos = locations->InAt(3); 1034 Location length = locations->InAt(4); 1035 1036 // Temporaries that we need for MOVSW. 1037 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>(); 1038 DCHECK_EQ(src_base.AsRegister(), RSI); 1039 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>(); 1040 DCHECK_EQ(dest_base.AsRegister(), RDI); 1041 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>(); 1042 DCHECK_EQ(count.AsRegister(), RCX); 1043 1044 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1045 codegen_->AddSlowPath(slow_path); 1046 1047 // Bail out if the source and destination are the same. 1048 __ cmpl(src, dest); 1049 __ j(kEqual, slow_path->GetEntryLabel()); 1050 1051 // Bail out if the source is null. 1052 __ testl(src, src); 1053 __ j(kEqual, slow_path->GetEntryLabel()); 1054 1055 // Bail out if the destination is null. 1056 __ testl(dest, dest); 1057 __ j(kEqual, slow_path->GetEntryLabel()); 1058 1059 // If the length is negative, bail out. 1060 // We have already checked in the LocationsBuilder for the constant case. 1061 if (!length.IsConstant()) { 1062 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1063 __ j(kLess, slow_path->GetEntryLabel()); 1064 } 1065 1066 // Validity checks: source. Use src_base as a temporary register. 1067 CheckPosition(assembler, src_pos, src, length, slow_path, src_base); 1068 1069 // Validity checks: dest. Use src_base as a temporary register. 1070 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base); 1071 1072 // We need the count in RCX. 1073 if (length.IsConstant()) { 1074 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1075 } else { 1076 __ movl(count, length.AsRegister<CpuRegister>()); 1077 } 1078 1079 // Okay, everything checks out. Finally time to do the copy. 1080 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1081 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1082 DCHECK_EQ(char_size, 2u); 1083 1084 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1085 1086 if (src_pos.IsConstant()) { 1087 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1088 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset)); 1089 } else { 1090 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), 1091 ScaleFactor::TIMES_2, data_offset)); 1092 } 1093 if (dest_pos.IsConstant()) { 1094 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1095 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset)); 1096 } else { 1097 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(), 1098 ScaleFactor::TIMES_2, data_offset)); 1099 } 1100 1101 // Do the move. 1102 __ rep_movsw(); 1103 1104 __ Bind(slow_path->GetExitLabel()); 1105 } 1106 1107 1108 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1109 // The only read barrier implementation supporting the 1110 // SystemArrayCopy intrinsic is the Baker-style read barriers. 1111 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1112 return; 1113 } 1114 1115 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 1116 } 1117 1118 // Compute base source address, base destination address, and end 1119 // source address for the System.arraycopy intrinsic in `src_base`, 1120 // `dst_base` and `src_end` respectively. 1121 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, 1122 DataType::Type type, 1123 const CpuRegister& src, 1124 const Location& src_pos, 1125 const CpuRegister& dst, 1126 const Location& dst_pos, 1127 const Location& copy_length, 1128 const CpuRegister& src_base, 1129 const CpuRegister& dst_base, 1130 const CpuRegister& src_end) { 1131 // This routine is only used by the SystemArrayCopy intrinsic. 1132 DCHECK_EQ(type, DataType::Type::kReference); 1133 const int32_t element_size = DataType::Size(type); 1134 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); 1135 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 1136 1137 if (src_pos.IsConstant()) { 1138 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1139 __ leal(src_base, Address(src, element_size * constant + data_offset)); 1140 } else { 1141 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset)); 1142 } 1143 1144 if (dst_pos.IsConstant()) { 1145 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); 1146 __ leal(dst_base, Address(dst, element_size * constant + data_offset)); 1147 } else { 1148 __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset)); 1149 } 1150 1151 if (copy_length.IsConstant()) { 1152 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 1153 __ leal(src_end, Address(src_base, element_size * constant)); 1154 } else { 1155 __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0)); 1156 } 1157 } 1158 1159 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1160 // The only read barrier implementation supporting the 1161 // SystemArrayCopy intrinsic is the Baker-style read barriers. 1162 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1163 1164 X86_64Assembler* assembler = GetAssembler(); 1165 LocationSummary* locations = invoke->GetLocations(); 1166 1167 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1168 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 1169 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 1170 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 1171 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 1172 1173 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1174 Location src_pos = locations->InAt(1); 1175 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1176 Location dest_pos = locations->InAt(3); 1177 Location length = locations->InAt(4); 1178 Location temp1_loc = locations->GetTemp(0); 1179 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>(); 1180 Location temp2_loc = locations->GetTemp(1); 1181 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); 1182 Location temp3_loc = locations->GetTemp(2); 1183 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>(); 1184 Location TMP_loc = Location::RegisterLocation(TMP); 1185 1186 SlowPathCode* intrinsic_slow_path = 1187 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1188 codegen_->AddSlowPath(intrinsic_slow_path); 1189 1190 NearLabel conditions_on_positions_validated; 1191 SystemArrayCopyOptimizations optimizations(invoke); 1192 1193 // If source and destination are the same, we go to slow path if we need to do 1194 // forward copying. 1195 if (src_pos.IsConstant()) { 1196 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1197 if (dest_pos.IsConstant()) { 1198 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1199 if (optimizations.GetDestinationIsSource()) { 1200 // Checked when building locations. 1201 DCHECK_GE(src_pos_constant, dest_pos_constant); 1202 } else if (src_pos_constant < dest_pos_constant) { 1203 __ cmpl(src, dest); 1204 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1205 } 1206 } else { 1207 if (!optimizations.GetDestinationIsSource()) { 1208 __ cmpl(src, dest); 1209 __ j(kNotEqual, &conditions_on_positions_validated); 1210 } 1211 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); 1212 __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); 1213 } 1214 } else { 1215 if (!optimizations.GetDestinationIsSource()) { 1216 __ cmpl(src, dest); 1217 __ j(kNotEqual, &conditions_on_positions_validated); 1218 } 1219 if (dest_pos.IsConstant()) { 1220 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1221 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); 1222 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 1223 } else { 1224 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); 1225 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 1226 } 1227 } 1228 1229 __ Bind(&conditions_on_positions_validated); 1230 1231 if (!optimizations.GetSourceIsNotNull()) { 1232 // Bail out if the source is null. 1233 __ testl(src, src); 1234 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1235 } 1236 1237 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 1238 // Bail out if the destination is null. 1239 __ testl(dest, dest); 1240 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1241 } 1242 1243 // If the length is negative, bail out. 1244 // We have already checked in the LocationsBuilder for the constant case. 1245 if (!length.IsConstant() && 1246 !optimizations.GetCountIsSourceLength() && 1247 !optimizations.GetCountIsDestinationLength()) { 1248 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1249 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 1250 } 1251 1252 // Validity checks: source. 1253 CheckPosition(assembler, 1254 src_pos, 1255 src, 1256 length, 1257 intrinsic_slow_path, 1258 temp1, 1259 optimizations.GetCountIsSourceLength()); 1260 1261 // Validity checks: dest. 1262 CheckPosition(assembler, 1263 dest_pos, 1264 dest, 1265 length, 1266 intrinsic_slow_path, 1267 temp1, 1268 optimizations.GetCountIsDestinationLength()); 1269 1270 if (!optimizations.GetDoesNotNeedTypeCheck()) { 1271 // Check whether all elements of the source array are assignable to the component 1272 // type of the destination array. We do two checks: the classes are the same, 1273 // or the destination is Object[]. If none of these checks succeed, we go to the 1274 // slow path. 1275 1276 bool did_unpoison = false; 1277 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1278 // /* HeapReference<Class> */ temp1 = dest->klass_ 1279 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1280 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); 1281 // Register `temp1` is not trashed by the read barrier emitted 1282 // by GenerateFieldLoadWithBakerReadBarrier below, as that 1283 // method produces a call to a ReadBarrierMarkRegX entry point, 1284 // which saves all potentially live registers, including 1285 // temporaries such a `temp1`. 1286 // /* HeapReference<Class> */ temp2 = src->klass_ 1287 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1288 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); 1289 // If heap poisoning is enabled, `temp1` and `temp2` have been 1290 // unpoisoned by the the previous calls to 1291 // GenerateFieldLoadWithBakerReadBarrier. 1292 } else { 1293 // /* HeapReference<Class> */ temp1 = dest->klass_ 1294 __ movl(temp1, Address(dest, class_offset)); 1295 // /* HeapReference<Class> */ temp2 = src->klass_ 1296 __ movl(temp2, Address(src, class_offset)); 1297 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 1298 !optimizations.GetSourceIsNonPrimitiveArray()) { 1299 // One or two of the references need to be unpoisoned. Unpoison them 1300 // both to make the identity check valid. 1301 __ MaybeUnpoisonHeapReference(temp1); 1302 __ MaybeUnpoisonHeapReference(temp2); 1303 did_unpoison = true; 1304 } 1305 } 1306 1307 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 1308 // Bail out if the destination is not a non primitive array. 1309 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1310 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1311 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1312 invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); 1313 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1314 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1315 // If heap poisoning is enabled, `TMP` has been unpoisoned by 1316 // the the previous call to GenerateFieldLoadWithBakerReadBarrier. 1317 } else { 1318 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1319 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1320 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1321 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1322 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1323 } 1324 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1325 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 1326 } 1327 1328 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1329 // Bail out if the source is not a non primitive array. 1330 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1331 // For the same reason given earlier, `temp1` is not trashed by the 1332 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 1333 // /* HeapReference<Class> */ TMP = temp2->component_type_ 1334 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1335 invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); 1336 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1337 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1338 // If heap poisoning is enabled, `TMP` has been unpoisoned by 1339 // the the previous call to GenerateFieldLoadWithBakerReadBarrier. 1340 } else { 1341 // /* HeapReference<Class> */ TMP = temp2->component_type_ 1342 __ movl(CpuRegister(TMP), Address(temp2, component_offset)); 1343 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1344 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1345 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1346 } 1347 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1348 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 1349 } 1350 1351 __ cmpl(temp1, temp2); 1352 1353 if (optimizations.GetDestinationIsTypedObjectArray()) { 1354 NearLabel do_copy; 1355 __ j(kEqual, &do_copy); 1356 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1357 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 1358 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1359 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 1360 // We do not need to emit a read barrier for the following 1361 // heap reference load, as `temp1` is only used in a 1362 // comparison with null below, and this reference is not 1363 // kept afterwards. 1364 __ cmpl(Address(temp1, super_offset), Immediate(0)); 1365 } else { 1366 if (!did_unpoison) { 1367 __ MaybeUnpoisonHeapReference(temp1); 1368 } 1369 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 1370 __ movl(temp1, Address(temp1, component_offset)); 1371 __ MaybeUnpoisonHeapReference(temp1); 1372 // No need to unpoison the following heap reference load, as 1373 // we're comparing against null. 1374 __ cmpl(Address(temp1, super_offset), Immediate(0)); 1375 } 1376 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 1377 __ Bind(&do_copy); 1378 } else { 1379 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 1380 } 1381 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1382 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 1383 // Bail out if the source is not a non primitive array. 1384 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1385 // /* HeapReference<Class> */ temp1 = src->klass_ 1386 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1387 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); 1388 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1389 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1390 invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); 1391 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1392 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1393 } else { 1394 // /* HeapReference<Class> */ temp1 = src->klass_ 1395 __ movl(temp1, Address(src, class_offset)); 1396 __ MaybeUnpoisonHeapReference(temp1); 1397 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1398 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1399 // No need to unpoison `TMP` now, as we're comparing against null. 1400 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1401 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 1402 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1403 } 1404 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1405 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 1406 } 1407 1408 const DataType::Type type = DataType::Type::kReference; 1409 const int32_t element_size = DataType::Size(type); 1410 1411 // Compute base source address, base destination address, and end 1412 // source address in `temp1`, `temp2` and `temp3` respectively. 1413 GenSystemArrayCopyAddresses( 1414 GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3); 1415 1416 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1417 // SystemArrayCopy implementation for Baker read barriers (see 1418 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): 1419 // 1420 // if (src_ptr != end_ptr) { 1421 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 1422 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 1423 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 1424 // if (is_gray) { 1425 // // Slow-path copy. 1426 // do { 1427 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 1428 // } while (src_ptr != end_ptr) 1429 // } else { 1430 // // Fast-path copy. 1431 // do { 1432 // *dest_ptr++ = *src_ptr++; 1433 // } while (src_ptr != end_ptr) 1434 // } 1435 // } 1436 1437 NearLabel loop, done; 1438 1439 // Don't enter copy loop if `length == 0`. 1440 __ cmpl(temp1, temp3); 1441 __ j(kEqual, &done); 1442 1443 // Given the numeric representation, it's enough to check the low bit of the rb_state. 1444 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 1445 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 1446 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 1447 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 1448 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 1449 1450 // if (rb_state == ReadBarrier::GrayState()) 1451 // goto slow_path; 1452 // At this point, just do the "if" and make sure that flags are preserved until the branch. 1453 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); 1454 1455 // Load fence to prevent load-load reordering. 1456 // Note that this is a no-op, thanks to the x86-64 memory model. 1457 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 1458 1459 // Slow path used to copy array when `src` is gray. 1460 SlowPathCode* read_barrier_slow_path = 1461 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); 1462 codegen_->AddSlowPath(read_barrier_slow_path); 1463 1464 // We have done the "if" of the gray bit check above, now branch based on the flags. 1465 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); 1466 1467 // Fast-path copy. 1468 // Iterate over the arrays and do a raw copy of the objects. We don't need to 1469 // poison/unpoison. 1470 __ Bind(&loop); 1471 __ movl(CpuRegister(TMP), Address(temp1, 0)); 1472 __ movl(Address(temp2, 0), CpuRegister(TMP)); 1473 __ addl(temp1, Immediate(element_size)); 1474 __ addl(temp2, Immediate(element_size)); 1475 __ cmpl(temp1, temp3); 1476 __ j(kNotEqual, &loop); 1477 1478 __ Bind(read_barrier_slow_path->GetExitLabel()); 1479 __ Bind(&done); 1480 } else { 1481 // Non read barrier code. 1482 1483 // Iterate over the arrays and do a raw copy of the objects. We don't need to 1484 // poison/unpoison. 1485 NearLabel loop, done; 1486 __ cmpl(temp1, temp3); 1487 __ j(kEqual, &done); 1488 __ Bind(&loop); 1489 __ movl(CpuRegister(TMP), Address(temp1, 0)); 1490 __ movl(Address(temp2, 0), CpuRegister(TMP)); 1491 __ addl(temp1, Immediate(element_size)); 1492 __ addl(temp2, Immediate(element_size)); 1493 __ cmpl(temp1, temp3); 1494 __ j(kNotEqual, &loop); 1495 __ Bind(&done); 1496 } 1497 1498 // We only need one card marking on the destination array. 1499 codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false); 1500 1501 __ Bind(intrinsic_slow_path->GetExitLabel()); 1502 } 1503 1504 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { 1505 LocationSummary* locations = new (allocator_) LocationSummary( 1506 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1507 InvokeRuntimeCallingConvention calling_convention; 1508 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1509 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1510 locations->SetOut(Location::RegisterLocation(RAX)); 1511 } 1512 1513 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { 1514 X86_64Assembler* assembler = GetAssembler(); 1515 LocationSummary* locations = invoke->GetLocations(); 1516 1517 // Note that the null check must have been done earlier. 1518 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1519 1520 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); 1521 __ testl(argument, argument); 1522 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1523 codegen_->AddSlowPath(slow_path); 1524 __ j(kEqual, slow_path->GetEntryLabel()); 1525 1526 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); 1527 __ Bind(slow_path->GetExitLabel()); 1528 } 1529 1530 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { 1531 if (kEmitCompilerReadBarrier && 1532 !StringEqualsOptimizations(invoke).GetArgumentIsString() && 1533 !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { 1534 // No support for this odd case (String class is moveable, not in the boot image). 1535 return; 1536 } 1537 1538 LocationSummary* locations = 1539 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1540 locations->SetInAt(0, Location::RequiresRegister()); 1541 locations->SetInAt(1, Location::RequiresRegister()); 1542 1543 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction. 1544 locations->AddTemp(Location::RegisterLocation(RCX)); 1545 locations->AddTemp(Location::RegisterLocation(RDI)); 1546 1547 // Set output, RSI needed for repe_cmpsq instruction anyways. 1548 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap); 1549 } 1550 1551 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { 1552 X86_64Assembler* assembler = GetAssembler(); 1553 LocationSummary* locations = invoke->GetLocations(); 1554 1555 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>(); 1556 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>(); 1557 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>(); 1558 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>(); 1559 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>(); 1560 1561 NearLabel end, return_true, return_false; 1562 1563 // Get offsets of count, value, and class fields within a string object. 1564 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1565 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1566 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1567 1568 // Note that the null check must have been done earlier. 1569 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1570 1571 StringEqualsOptimizations optimizations(invoke); 1572 if (!optimizations.GetArgumentNotNull()) { 1573 // Check if input is null, return false if it is. 1574 __ testl(arg, arg); 1575 __ j(kEqual, &return_false); 1576 } 1577 1578 if (!optimizations.GetArgumentIsString()) { 1579 // Instanceof check for the argument by comparing class fields. 1580 // All string objects must have the same type since String cannot be subclassed. 1581 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1582 // If the argument is a string object, its class field must be equal to receiver's class field. 1583 __ movl(rcx, Address(str, class_offset)); 1584 __ cmpl(rcx, Address(arg, class_offset)); 1585 __ j(kNotEqual, &return_false); 1586 } 1587 1588 // Reference equality check, return true if same reference. 1589 __ cmpl(str, arg); 1590 __ j(kEqual, &return_true); 1591 1592 // Load length and compression flag of receiver string. 1593 __ movl(rcx, Address(str, count_offset)); 1594 // Check if lengths and compressiond flags are equal, return false if they're not. 1595 // Two identical strings will always have same compression style since 1596 // compression style is decided on alloc. 1597 __ cmpl(rcx, Address(arg, count_offset)); 1598 __ j(kNotEqual, &return_false); 1599 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1600 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1601 "Expecting 0=compressed, 1=uncompressed"); 1602 __ jrcxz(&return_true); 1603 1604 if (mirror::kUseStringCompression) { 1605 NearLabel string_uncompressed; 1606 // Extract length and differentiate between both compressed or both uncompressed. 1607 // Different compression style is cut above. 1608 __ shrl(rcx, Immediate(1)); 1609 __ j(kCarrySet, &string_uncompressed); 1610 // Divide string length by 2, rounding up, and continue as if uncompressed. 1611 // Merge clearing the compression flag with +1 for rounding. 1612 __ addl(rcx, Immediate(1)); 1613 __ shrl(rcx, Immediate(1)); 1614 __ Bind(&string_uncompressed); 1615 } 1616 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. 1617 __ leal(rsi, Address(str, value_offset)); 1618 __ leal(rdi, Address(arg, value_offset)); 1619 1620 // Divide string length by 4 and adjust for lengths not divisible by 4. 1621 __ addl(rcx, Immediate(3)); 1622 __ shrl(rcx, Immediate(2)); 1623 1624 // Assertions that must hold in order to compare strings 4 characters (uncompressed) 1625 // or 8 characters (compressed) at a time. 1626 DCHECK_ALIGNED(value_offset, 8); 1627 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); 1628 1629 // Loop to compare strings four characters at a time starting at the beginning of the string. 1630 __ repe_cmpsq(); 1631 // If strings are not equal, zero flag will be cleared. 1632 __ j(kNotEqual, &return_false); 1633 1634 // Return true and exit the function. 1635 // If loop does not result in returning false, we return true. 1636 __ Bind(&return_true); 1637 __ movl(rsi, Immediate(1)); 1638 __ jmp(&end); 1639 1640 // Return false and exit the function. 1641 __ Bind(&return_false); 1642 __ xorl(rsi, rsi); 1643 __ Bind(&end); 1644 } 1645 1646 static void CreateStringIndexOfLocations(HInvoke* invoke, 1647 ArenaAllocator* allocator, 1648 bool start_at_zero) { 1649 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1650 LocationSummary::kCallOnSlowPath, 1651 kIntrinsified); 1652 // The data needs to be in RDI for scasw. So request that the string is there, anyways. 1653 locations->SetInAt(0, Location::RegisterLocation(RDI)); 1654 // If we look for a constant char, we'll still have to copy it into RAX. So just request the 1655 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1656 // of the instruction explicitly. 1657 // Note: This works as we don't clobber RAX anywhere. 1658 locations->SetInAt(1, Location::RegisterLocation(RAX)); 1659 if (!start_at_zero) { 1660 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1661 } 1662 // As we clobber RDI during execution anyways, also use it as the output. 1663 locations->SetOut(Location::SameAsFirstInput()); 1664 1665 // repne scasw uses RCX as the counter. 1666 locations->AddTemp(Location::RegisterLocation(RCX)); 1667 // Need another temporary to be able to compute the result. 1668 locations->AddTemp(Location::RequiresRegister()); 1669 } 1670 1671 static void GenerateStringIndexOf(HInvoke* invoke, 1672 X86_64Assembler* assembler, 1673 CodeGeneratorX86_64* codegen, 1674 bool start_at_zero) { 1675 LocationSummary* locations = invoke->GetLocations(); 1676 1677 // Note that the null check must have been done earlier. 1678 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1679 1680 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); 1681 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); 1682 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); 1683 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); 1684 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 1685 1686 // Check our assumptions for registers. 1687 DCHECK_EQ(string_obj.AsRegister(), RDI); 1688 DCHECK_EQ(search_value.AsRegister(), RAX); 1689 DCHECK_EQ(counter.AsRegister(), RCX); 1690 DCHECK_EQ(out.AsRegister(), RDI); 1691 1692 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1693 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1694 SlowPathCode* slow_path = nullptr; 1695 HInstruction* code_point = invoke->InputAt(1); 1696 if (code_point->IsIntConstant()) { 1697 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 1698 std::numeric_limits<uint16_t>::max()) { 1699 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1700 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1701 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1702 codegen->AddSlowPath(slow_path); 1703 __ jmp(slow_path->GetEntryLabel()); 1704 __ Bind(slow_path->GetExitLabel()); 1705 return; 1706 } 1707 } else if (code_point->GetType() != DataType::Type::kUint16) { 1708 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1709 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1710 codegen->AddSlowPath(slow_path); 1711 __ j(kAbove, slow_path->GetEntryLabel()); 1712 } 1713 1714 // From here down, we know that we are looking for a char that fits in 1715 // 16 bits (uncompressed) or 8 bits (compressed). 1716 // Location of reference to data array within the String object. 1717 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1718 // Location of count within the String object. 1719 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1720 1721 // Load the count field of the string containing the length and compression flag. 1722 __ movl(string_length, Address(string_obj, count_offset)); 1723 1724 // Do a zero-length check. Even with string compression `count == 0` means empty. 1725 // TODO: Support jecxz. 1726 NearLabel not_found_label; 1727 __ testl(string_length, string_length); 1728 __ j(kEqual, ¬_found_label); 1729 1730 if (mirror::kUseStringCompression) { 1731 // Use TMP to keep string_length_flagged. 1732 __ movl(CpuRegister(TMP), string_length); 1733 // Mask out first bit used as compression flag. 1734 __ shrl(string_length, Immediate(1)); 1735 } 1736 1737 if (start_at_zero) { 1738 // Number of chars to scan is the same as the string length. 1739 __ movl(counter, string_length); 1740 // Move to the start of the string. 1741 __ addq(string_obj, Immediate(value_offset)); 1742 } else { 1743 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); 1744 1745 // Do a start_index check. 1746 __ cmpl(start_index, string_length); 1747 __ j(kGreaterEqual, ¬_found_label); 1748 1749 // Ensure we have a start index >= 0; 1750 __ xorl(counter, counter); 1751 __ cmpl(start_index, Immediate(0)); 1752 __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. 1753 1754 if (mirror::kUseStringCompression) { 1755 NearLabel modify_counter, offset_uncompressed_label; 1756 __ testl(CpuRegister(TMP), Immediate(1)); 1757 __ j(kNotZero, &offset_uncompressed_label); 1758 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); 1759 __ jmp(&modify_counter); 1760 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1761 __ Bind(&offset_uncompressed_label); 1762 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1763 __ Bind(&modify_counter); 1764 } else { 1765 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1766 } 1767 // Now update ecx, the work counter: it's gonna be string.length - start_index. 1768 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. 1769 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1770 } 1771 1772 if (mirror::kUseStringCompression) { 1773 NearLabel uncompressed_string_comparison; 1774 NearLabel comparison_done; 1775 __ testl(CpuRegister(TMP), Immediate(1)); 1776 __ j(kNotZero, &uncompressed_string_comparison); 1777 // Check if RAX (search_value) is ASCII. 1778 __ cmpl(search_value, Immediate(127)); 1779 __ j(kGreater, ¬_found_label); 1780 // Comparing byte-per-byte. 1781 __ repne_scasb(); 1782 __ jmp(&comparison_done); 1783 // Everything is set up for repne scasw: 1784 // * Comparison address in RDI. 1785 // * Counter in ECX. 1786 __ Bind(&uncompressed_string_comparison); 1787 __ repne_scasw(); 1788 __ Bind(&comparison_done); 1789 } else { 1790 __ repne_scasw(); 1791 } 1792 // Did we find a match? 1793 __ j(kNotEqual, ¬_found_label); 1794 1795 // Yes, we matched. Compute the index of the result. 1796 __ subl(string_length, counter); 1797 __ leal(out, Address(string_length, -1)); 1798 1799 NearLabel done; 1800 __ jmp(&done); 1801 1802 // Failed to match; return -1. 1803 __ Bind(¬_found_label); 1804 __ movl(out, Immediate(-1)); 1805 1806 // And join up at the end. 1807 __ Bind(&done); 1808 if (slow_path != nullptr) { 1809 __ Bind(slow_path->GetExitLabel()); 1810 } 1811 } 1812 1813 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { 1814 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); 1815 } 1816 1817 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { 1818 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); 1819 } 1820 1821 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1822 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); 1823 } 1824 1825 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1826 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); 1827 } 1828 1829 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1830 LocationSummary* locations = new (allocator_) LocationSummary( 1831 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1832 InvokeRuntimeCallingConvention calling_convention; 1833 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1834 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1835 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1836 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1837 locations->SetOut(Location::RegisterLocation(RAX)); 1838 } 1839 1840 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1841 X86_64Assembler* assembler = GetAssembler(); 1842 LocationSummary* locations = invoke->GetLocations(); 1843 1844 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); 1845 __ testl(byte_array, byte_array); 1846 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1847 codegen_->AddSlowPath(slow_path); 1848 __ j(kEqual, slow_path->GetEntryLabel()); 1849 1850 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); 1851 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1852 __ Bind(slow_path->GetExitLabel()); 1853 } 1854 1855 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1856 LocationSummary* locations = 1857 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1858 InvokeRuntimeCallingConvention calling_convention; 1859 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1860 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1861 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1862 locations->SetOut(Location::RegisterLocation(RAX)); 1863 } 1864 1865 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1866 // No need to emit code checking whether `locations->InAt(2)` is a null 1867 // pointer, as callers of the native method 1868 // 1869 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1870 // 1871 // all include a null check on `data` before calling that method. 1872 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1873 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1874 } 1875 1876 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1877 LocationSummary* locations = new (allocator_) LocationSummary( 1878 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1879 InvokeRuntimeCallingConvention calling_convention; 1880 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1881 locations->SetOut(Location::RegisterLocation(RAX)); 1882 } 1883 1884 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1885 X86_64Assembler* assembler = GetAssembler(); 1886 LocationSummary* locations = invoke->GetLocations(); 1887 1888 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); 1889 __ testl(string_to_copy, string_to_copy); 1890 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); 1891 codegen_->AddSlowPath(slow_path); 1892 __ j(kEqual, slow_path->GetEntryLabel()); 1893 1894 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); 1895 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1896 __ Bind(slow_path->GetExitLabel()); 1897 } 1898 1899 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1900 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1901 LocationSummary* locations = 1902 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1903 locations->SetInAt(0, Location::RequiresRegister()); 1904 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1905 locations->SetInAt(2, Location::RequiresRegister()); 1906 locations->SetInAt(3, Location::RequiresRegister()); 1907 locations->SetInAt(4, Location::RequiresRegister()); 1908 1909 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1910 locations->AddTemp(Location::RegisterLocation(RSI)); 1911 locations->AddTemp(Location::RegisterLocation(RDI)); 1912 locations->AddTemp(Location::RegisterLocation(RCX)); 1913 } 1914 1915 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1916 X86_64Assembler* assembler = GetAssembler(); 1917 LocationSummary* locations = invoke->GetLocations(); 1918 1919 size_t char_component_size = DataType::Size(DataType::Type::kUint16); 1920 // Location of data in char array buffer. 1921 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1922 // Location of char array data in string. 1923 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1924 1925 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1926 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 1927 Location srcBegin = locations->InAt(1); 1928 int srcBegin_value = 1929 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1930 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>(); 1931 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>(); 1932 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); 1933 1934 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1935 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1936 DCHECK_EQ(char_size, 2u); 1937 1938 NearLabel done; 1939 // Compute the number of chars (words) to move. 1940 __ movl(CpuRegister(RCX), srcEnd); 1941 if (srcBegin.IsConstant()) { 1942 __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); 1943 } else { 1944 DCHECK(srcBegin.IsRegister()); 1945 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>()); 1946 } 1947 if (mirror::kUseStringCompression) { 1948 NearLabel copy_uncompressed, copy_loop; 1949 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1950 DCHECK_EQ(c_char_size, 1u); 1951 // Location of count in string. 1952 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1953 1954 __ testl(Address(obj, count_offset), Immediate(1)); 1955 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1956 "Expecting 0=compressed, 1=uncompressed"); 1957 __ j(kNotZero, ©_uncompressed); 1958 // Compute the address of the source string by adding the number of chars from 1959 // the source beginning to the value offset of a string. 1960 __ leaq(CpuRegister(RSI), 1961 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); 1962 // Start the loop to copy String's value to Array of Char. 1963 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1964 1965 __ Bind(©_loop); 1966 __ jrcxz(&done); 1967 // Use TMP as temporary (convert byte from RSI to word). 1968 // TODO: Selecting RAX as the temporary and using LODSB/STOSW. 1969 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0)); 1970 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP)); 1971 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size)); 1972 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size)); 1973 // TODO: Add support for LOOP to X86_64Assembler. 1974 __ subl(CpuRegister(RCX), Immediate(1)); 1975 __ jmp(©_loop); 1976 1977 __ Bind(©_uncompressed); 1978 } 1979 1980 __ leaq(CpuRegister(RSI), 1981 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); 1982 // Compute the address of the destination buffer. 1983 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1984 // Do the move. 1985 __ rep_movsw(); 1986 1987 __ Bind(&done); 1988 } 1989 1990 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) { 1991 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1992 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. 1993 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1994 // to avoid a SIGBUS. 1995 switch (size) { 1996 case DataType::Type::kInt8: 1997 __ movsxb(out, Address(address, 0)); 1998 break; 1999 case DataType::Type::kInt16: 2000 __ movsxw(out, Address(address, 0)); 2001 break; 2002 case DataType::Type::kInt32: 2003 __ movl(out, Address(address, 0)); 2004 break; 2005 case DataType::Type::kInt64: 2006 __ movq(out, Address(address, 0)); 2007 break; 2008 default: 2009 LOG(FATAL) << "Type not recognized for peek: " << size; 2010 UNREACHABLE(); 2011 } 2012 } 2013 2014 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 2015 CreateIntToIntLocations(allocator_, invoke); 2016 } 2017 2018 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 2019 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 2020 } 2021 2022 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 2023 CreateIntToIntLocations(allocator_, invoke); 2024 } 2025 2026 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 2027 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 2028 } 2029 2030 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 2031 CreateIntToIntLocations(allocator_, invoke); 2032 } 2033 2034 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 2035 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 2036 } 2037 2038 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 2039 CreateIntToIntLocations(allocator_, invoke); 2040 } 2041 2042 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 2043 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 2044 } 2045 2046 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2047 LocationSummary* locations = 2048 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2049 locations->SetInAt(0, Location::RequiresRegister()); 2050 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); 2051 } 2052 2053 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) { 2054 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 2055 Location value = locations->InAt(1); 2056 // x86 allows unaligned access. We do not have to check the input or use specific instructions 2057 // to avoid a SIGBUS. 2058 switch (size) { 2059 case DataType::Type::kInt8: 2060 if (value.IsConstant()) { 2061 __ movb(Address(address, 0), 2062 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 2063 } else { 2064 __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); 2065 } 2066 break; 2067 case DataType::Type::kInt16: 2068 if (value.IsConstant()) { 2069 __ movw(Address(address, 0), 2070 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 2071 } else { 2072 __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); 2073 } 2074 break; 2075 case DataType::Type::kInt32: 2076 if (value.IsConstant()) { 2077 __ movl(Address(address, 0), 2078 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 2079 } else { 2080 __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); 2081 } 2082 break; 2083 case DataType::Type::kInt64: 2084 if (value.IsConstant()) { 2085 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 2086 DCHECK(IsInt<32>(v)); 2087 int32_t v_32 = v; 2088 __ movq(Address(address, 0), Immediate(v_32)); 2089 } else { 2090 __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); 2091 } 2092 break; 2093 default: 2094 LOG(FATAL) << "Type not recognized for poke: " << size; 2095 UNREACHABLE(); 2096 } 2097 } 2098 2099 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 2100 CreateIntIntToVoidLocations(allocator_, invoke); 2101 } 2102 2103 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 2104 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 2105 } 2106 2107 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 2108 CreateIntIntToVoidLocations(allocator_, invoke); 2109 } 2110 2111 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 2112 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 2113 } 2114 2115 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 2116 CreateIntIntToVoidLocations(allocator_, invoke); 2117 } 2118 2119 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 2120 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 2121 } 2122 2123 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 2124 CreateIntIntToVoidLocations(allocator_, invoke); 2125 } 2126 2127 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 2128 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 2129 } 2130 2131 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 2132 LocationSummary* locations = 2133 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2134 locations->SetOut(Location::RequiresRegister()); 2135 } 2136 2137 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 2138 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); 2139 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(), 2140 /* no_rip */ true)); 2141 } 2142 2143 static void GenUnsafeGet(HInvoke* invoke, 2144 DataType::Type type, 2145 bool is_volatile ATTRIBUTE_UNUSED, 2146 CodeGeneratorX86_64* codegen) { 2147 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2148 LocationSummary* locations = invoke->GetLocations(); 2149 Location base_loc = locations->InAt(1); 2150 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 2151 Location offset_loc = locations->InAt(2); 2152 CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); 2153 Location output_loc = locations->Out(); 2154 CpuRegister output = output_loc.AsRegister<CpuRegister>(); 2155 2156 switch (type) { 2157 case DataType::Type::kInt32: 2158 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2159 break; 2160 2161 case DataType::Type::kReference: { 2162 if (kEmitCompilerReadBarrier) { 2163 if (kUseBakerReadBarrier) { 2164 Address src(base, offset, ScaleFactor::TIMES_1, 0); 2165 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2166 invoke, output_loc, base, src, /* needs_null_check */ false); 2167 } else { 2168 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2169 codegen->GenerateReadBarrierSlow( 2170 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 2171 } 2172 } else { 2173 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2174 __ MaybeUnpoisonHeapReference(output); 2175 } 2176 break; 2177 } 2178 2179 case DataType::Type::kInt64: 2180 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2181 break; 2182 2183 default: 2184 LOG(FATAL) << "Unsupported op size " << type; 2185 UNREACHABLE(); 2186 } 2187 } 2188 2189 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2190 bool can_call = kEmitCompilerReadBarrier && 2191 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 2192 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 2193 LocationSummary* locations = 2194 new (allocator) LocationSummary(invoke, 2195 can_call 2196 ? LocationSummary::kCallOnSlowPath 2197 : LocationSummary::kNoCall, 2198 kIntrinsified); 2199 if (can_call && kUseBakerReadBarrier) { 2200 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2201 } 2202 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2203 locations->SetInAt(1, Location::RequiresRegister()); 2204 locations->SetInAt(2, Location::RequiresRegister()); 2205 locations->SetOut(Location::RequiresRegister(), 2206 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 2207 } 2208 2209 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { 2210 CreateIntIntIntToIntLocations(allocator_, invoke); 2211 } 2212 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2213 CreateIntIntIntToIntLocations(allocator_, invoke); 2214 } 2215 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2216 CreateIntIntIntToIntLocations(allocator_, invoke); 2217 } 2218 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2219 CreateIntIntIntToIntLocations(allocator_, invoke); 2220 } 2221 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2222 CreateIntIntIntToIntLocations(allocator_, invoke); 2223 } 2224 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2225 CreateIntIntIntToIntLocations(allocator_, invoke); 2226 } 2227 2228 2229 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { 2230 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2231 } 2232 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2233 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); 2234 } 2235 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2236 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2237 } 2238 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2239 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); 2240 } 2241 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2242 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); 2243 } 2244 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2245 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); 2246 } 2247 2248 2249 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, 2250 DataType::Type type, 2251 HInvoke* invoke) { 2252 LocationSummary* locations = 2253 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2254 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2255 locations->SetInAt(1, Location::RequiresRegister()); 2256 locations->SetInAt(2, Location::RequiresRegister()); 2257 locations->SetInAt(3, Location::RequiresRegister()); 2258 if (type == DataType::Type::kReference) { 2259 // Need temp registers for card-marking. 2260 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2261 locations->AddTemp(Location::RequiresRegister()); 2262 } 2263 } 2264 2265 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { 2266 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); 2267 } 2268 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2269 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); 2270 } 2271 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2272 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); 2273 } 2274 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2275 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); 2276 } 2277 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2278 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); 2279 } 2280 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2281 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); 2282 } 2283 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2284 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); 2285 } 2286 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2287 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); 2288 } 2289 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2290 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); 2291 } 2292 2293 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2294 // memory model. 2295 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile, 2296 CodeGeneratorX86_64* codegen) { 2297 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2298 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2299 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2300 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); 2301 2302 if (type == DataType::Type::kInt64) { 2303 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2304 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) { 2305 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2306 __ movl(temp, value); 2307 __ PoisonHeapReference(temp); 2308 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2309 } else { 2310 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2311 } 2312 2313 if (is_volatile) { 2314 codegen->MemoryFence(); 2315 } 2316 2317 if (type == DataType::Type::kReference) { 2318 bool value_can_be_null = true; // TODO: Worth finding out this information? 2319 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 2320 locations->GetTemp(1).AsRegister<CpuRegister>(), 2321 base, 2322 value, 2323 value_can_be_null); 2324 } 2325 } 2326 2327 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { 2328 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2329 } 2330 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2331 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2332 } 2333 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2334 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); 2335 } 2336 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2337 GenUnsafePut( 2338 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); 2339 } 2340 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2341 GenUnsafePut( 2342 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); 2343 } 2344 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2345 GenUnsafePut( 2346 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); 2347 } 2348 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2349 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2350 } 2351 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2352 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2353 } 2354 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2355 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); 2356 } 2357 2358 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, 2359 DataType::Type type, 2360 HInvoke* invoke) { 2361 bool can_call = kEmitCompilerReadBarrier && 2362 kUseBakerReadBarrier && 2363 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 2364 LocationSummary* locations = 2365 new (allocator) LocationSummary(invoke, 2366 can_call 2367 ? LocationSummary::kCallOnSlowPath 2368 : LocationSummary::kNoCall, 2369 kIntrinsified); 2370 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2371 locations->SetInAt(1, Location::RequiresRegister()); 2372 locations->SetInAt(2, Location::RequiresRegister()); 2373 // expected value must be in EAX/RAX. 2374 locations->SetInAt(3, Location::RegisterLocation(RAX)); 2375 locations->SetInAt(4, Location::RequiresRegister()); 2376 2377 locations->SetOut(Location::RequiresRegister()); 2378 if (type == DataType::Type::kReference) { 2379 // Need temporary registers for card-marking, and possibly for 2380 // (Baker) read barrier. 2381 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2382 locations->AddTemp(Location::RequiresRegister()); 2383 } 2384 } 2385 2386 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2387 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); 2388 } 2389 2390 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2391 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); 2392 } 2393 2394 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2395 // The only read barrier implementation supporting the 2396 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2397 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2398 return; 2399 } 2400 2401 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); 2402 } 2403 2404 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2405 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2406 LocationSummary* locations = invoke->GetLocations(); 2407 2408 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2409 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2410 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); 2411 // Ensure `expected` is in RAX (required by the CMPXCHG instruction). 2412 DCHECK_EQ(expected.AsRegister(), RAX); 2413 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); 2414 Location out_loc = locations->Out(); 2415 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 2416 2417 if (type == DataType::Type::kReference) { 2418 // The only read barrier implementation supporting the 2419 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2420 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2421 2422 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 2423 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 2424 2425 // Mark card for object assuming new value is stored. 2426 bool value_can_be_null = true; // TODO: Worth finding out this information? 2427 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); 2428 2429 // The address of the field within the holding object. 2430 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); 2431 2432 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2433 // Need to make sure the reference stored in the field is a to-space 2434 // one before attempting the CAS or the CAS could fail incorrectly. 2435 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2436 invoke, 2437 out_loc, // Unused, used only as a "temporary" within the read barrier. 2438 base, 2439 field_addr, 2440 /* needs_null_check */ false, 2441 /* always_update_field */ true, 2442 &temp1, 2443 &temp2); 2444 } 2445 2446 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 2447 Register value_reg = value.AsRegister(); 2448 if (kPoisonHeapReferences) { 2449 if (base_equals_value) { 2450 // If `base` and `value` are the same register location, move 2451 // `value_reg` to a temporary register. This way, poisoning 2452 // `value_reg` won't invalidate `base`. 2453 value_reg = temp1.AsRegister(); 2454 __ movl(CpuRegister(value_reg), base); 2455 } 2456 2457 // Check that the register allocator did not assign the location 2458 // of `expected` (RAX) to `value` nor to `base`, so that heap 2459 // poisoning (when enabled) works as intended below. 2460 // - If `value` were equal to `expected`, both references would 2461 // be poisoned twice, meaning they would not be poisoned at 2462 // all, as heap poisoning uses address negation. 2463 // - If `base` were equal to `expected`, poisoning `expected` 2464 // would invalidate `base`. 2465 DCHECK_NE(value_reg, expected.AsRegister()); 2466 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 2467 2468 __ PoisonHeapReference(expected); 2469 __ PoisonHeapReference(CpuRegister(value_reg)); 2470 } 2471 2472 __ LockCmpxchgl(field_addr, CpuRegister(value_reg)); 2473 2474 // LOCK CMPXCHG has full barrier semantics, and we don't need 2475 // scheduling barriers at this time. 2476 2477 // Convert ZF into the Boolean result. 2478 __ setcc(kZero, out); 2479 __ movzxb(out, out); 2480 2481 // If heap poisoning is enabled, we need to unpoison the values 2482 // that were poisoned earlier. 2483 if (kPoisonHeapReferences) { 2484 if (base_equals_value) { 2485 // `value_reg` has been moved to a temporary register, no need 2486 // to unpoison it. 2487 } else { 2488 // Ensure `value` is different from `out`, so that unpoisoning 2489 // the former does not invalidate the latter. 2490 DCHECK_NE(value_reg, out.AsRegister()); 2491 __ UnpoisonHeapReference(CpuRegister(value_reg)); 2492 } 2493 // Ensure `expected` is different from `out`, so that unpoisoning 2494 // the former does not invalidate the latter. 2495 DCHECK_NE(expected.AsRegister(), out.AsRegister()); 2496 __ UnpoisonHeapReference(expected); 2497 } 2498 } else { 2499 if (type == DataType::Type::kInt32) { 2500 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); 2501 } else if (type == DataType::Type::kInt64) { 2502 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); 2503 } else { 2504 LOG(FATAL) << "Unexpected CAS type " << type; 2505 } 2506 2507 // LOCK CMPXCHG has full barrier semantics, and we don't need 2508 // scheduling barriers at this time. 2509 2510 // Convert ZF into the Boolean result. 2511 __ setcc(kZero, out); 2512 __ movzxb(out, out); 2513 } 2514 } 2515 2516 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2517 GenCAS(DataType::Type::kInt32, invoke, codegen_); 2518 } 2519 2520 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2521 GenCAS(DataType::Type::kInt64, invoke, codegen_); 2522 } 2523 2524 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2525 // The only read barrier implementation supporting the 2526 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2527 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2528 2529 GenCAS(DataType::Type::kReference, invoke, codegen_); 2530 } 2531 2532 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { 2533 LocationSummary* locations = 2534 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2535 locations->SetInAt(0, Location::RequiresRegister()); 2536 locations->SetOut(Location::SameAsFirstInput()); 2537 locations->AddTemp(Location::RequiresRegister()); 2538 } 2539 2540 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, 2541 X86_64Assembler* assembler) { 2542 Immediate imm_shift(shift); 2543 Immediate imm_mask(mask); 2544 __ movl(temp, reg); 2545 __ shrl(reg, imm_shift); 2546 __ andl(temp, imm_mask); 2547 __ andl(reg, imm_mask); 2548 __ shll(temp, imm_shift); 2549 __ orl(reg, temp); 2550 } 2551 2552 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { 2553 X86_64Assembler* assembler = GetAssembler(); 2554 LocationSummary* locations = invoke->GetLocations(); 2555 2556 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2557 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2558 2559 /* 2560 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2561 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2562 * compared to generic luni implementation which has 5 rounds of swapping bits. 2563 * x = bswap x 2564 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2565 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2566 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2567 */ 2568 __ bswapl(reg); 2569 SwapBits(reg, temp, 1, 0x55555555, assembler); 2570 SwapBits(reg, temp, 2, 0x33333333, assembler); 2571 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2572 } 2573 2574 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { 2575 LocationSummary* locations = 2576 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2577 locations->SetInAt(0, Location::RequiresRegister()); 2578 locations->SetOut(Location::SameAsFirstInput()); 2579 locations->AddTemp(Location::RequiresRegister()); 2580 locations->AddTemp(Location::RequiresRegister()); 2581 } 2582 2583 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, 2584 int32_t shift, int64_t mask, X86_64Assembler* assembler) { 2585 Immediate imm_shift(shift); 2586 __ movq(temp_mask, Immediate(mask)); 2587 __ movq(temp, reg); 2588 __ shrq(reg, imm_shift); 2589 __ andq(temp, temp_mask); 2590 __ andq(reg, temp_mask); 2591 __ shlq(temp, imm_shift); 2592 __ orq(reg, temp); 2593 } 2594 2595 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { 2596 X86_64Assembler* assembler = GetAssembler(); 2597 LocationSummary* locations = invoke->GetLocations(); 2598 2599 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2600 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 2601 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 2602 2603 /* 2604 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2605 * swapping bits to reverse bits in a long number x. Using bswap to save instructions 2606 * compared to generic luni implementation which has 5 rounds of swapping bits. 2607 * x = bswap x 2608 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; 2609 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; 2610 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; 2611 */ 2612 __ bswapq(reg); 2613 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); 2614 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); 2615 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); 2616 } 2617 2618 static void CreateBitCountLocations( 2619 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) { 2620 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2621 // Do nothing if there is no popcnt support. This results in generating 2622 // a call for the intrinsic rather than direct code. 2623 return; 2624 } 2625 LocationSummary* locations = 2626 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2627 locations->SetInAt(0, Location::Any()); 2628 locations->SetOut(Location::RequiresRegister()); 2629 } 2630 2631 static void GenBitCount(X86_64Assembler* assembler, 2632 CodeGeneratorX86_64* codegen, 2633 HInvoke* invoke, 2634 bool is_long) { 2635 LocationSummary* locations = invoke->GetLocations(); 2636 Location src = locations->InAt(0); 2637 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2638 2639 if (invoke->InputAt(0)->IsConstant()) { 2640 // Evaluate this at compile time. 2641 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2642 int32_t result = is_long 2643 ? POPCOUNT(static_cast<uint64_t>(value)) 2644 : POPCOUNT(static_cast<uint32_t>(value)); 2645 codegen->Load32BitValue(out, result); 2646 return; 2647 } 2648 2649 if (src.IsRegister()) { 2650 if (is_long) { 2651 __ popcntq(out, src.AsRegister<CpuRegister>()); 2652 } else { 2653 __ popcntl(out, src.AsRegister<CpuRegister>()); 2654 } 2655 } else if (is_long) { 2656 DCHECK(src.IsDoubleStackSlot()); 2657 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2658 } else { 2659 DCHECK(src.IsStackSlot()); 2660 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2661 } 2662 } 2663 2664 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2665 CreateBitCountLocations(allocator_, codegen_, invoke); 2666 } 2667 2668 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2669 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2670 } 2671 2672 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { 2673 CreateBitCountLocations(allocator_, codegen_, invoke); 2674 } 2675 2676 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { 2677 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2678 } 2679 2680 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) { 2681 LocationSummary* locations = 2682 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2683 locations->SetInAt(0, Location::Any()); 2684 locations->SetOut(Location::RequiresRegister()); 2685 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL 2686 : Location::RequiresRegister()); // any will do 2687 } 2688 2689 static void GenOneBit(X86_64Assembler* assembler, 2690 CodeGeneratorX86_64* codegen, 2691 HInvoke* invoke, 2692 bool is_high, bool is_long) { 2693 LocationSummary* locations = invoke->GetLocations(); 2694 Location src = locations->InAt(0); 2695 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2696 2697 if (invoke->InputAt(0)->IsConstant()) { 2698 // Evaluate this at compile time. 2699 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2700 if (value == 0) { 2701 __ xorl(out, out); // Clears upper bits too. 2702 return; 2703 } 2704 // Nonzero value. 2705 if (is_high) { 2706 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value)) 2707 : 31 - CLZ(static_cast<uint32_t>(value)); 2708 } else { 2709 value = is_long ? CTZ(static_cast<uint64_t>(value)) 2710 : CTZ(static_cast<uint32_t>(value)); 2711 } 2712 if (is_long) { 2713 codegen->Load64BitValue(out, 1ULL << value); 2714 } else { 2715 codegen->Load32BitValue(out, 1 << value); 2716 } 2717 return; 2718 } 2719 2720 // Handle the non-constant cases. 2721 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2722 if (is_high) { 2723 // Use architectural support: basically 1 << bsr. 2724 if (src.IsRegister()) { 2725 if (is_long) { 2726 __ bsrq(tmp, src.AsRegister<CpuRegister>()); 2727 } else { 2728 __ bsrl(tmp, src.AsRegister<CpuRegister>()); 2729 } 2730 } else if (is_long) { 2731 DCHECK(src.IsDoubleStackSlot()); 2732 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2733 } else { 2734 DCHECK(src.IsStackSlot()); 2735 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2736 } 2737 // BSR sets ZF if the input was zero. 2738 NearLabel is_zero, done; 2739 __ j(kEqual, &is_zero); 2740 __ movl(out, Immediate(1)); // Clears upper bits too. 2741 if (is_long) { 2742 __ shlq(out, tmp); 2743 } else { 2744 __ shll(out, tmp); 2745 } 2746 __ jmp(&done); 2747 __ Bind(&is_zero); 2748 __ xorl(out, out); // Clears upper bits too. 2749 __ Bind(&done); 2750 } else { 2751 // Copy input into temporary. 2752 if (src.IsRegister()) { 2753 if (is_long) { 2754 __ movq(tmp, src.AsRegister<CpuRegister>()); 2755 } else { 2756 __ movl(tmp, src.AsRegister<CpuRegister>()); 2757 } 2758 } else if (is_long) { 2759 DCHECK(src.IsDoubleStackSlot()); 2760 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2761 } else { 2762 DCHECK(src.IsStackSlot()); 2763 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2764 } 2765 // Do the bit twiddling: basically tmp & -tmp; 2766 if (is_long) { 2767 __ movq(out, tmp); 2768 __ negq(tmp); 2769 __ andq(out, tmp); 2770 } else { 2771 __ movl(out, tmp); 2772 __ negl(tmp); 2773 __ andl(out, tmp); 2774 } 2775 } 2776 } 2777 2778 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2779 CreateOneBitLocations(allocator_, invoke, /* is_high */ true); 2780 } 2781 2782 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2783 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false); 2784 } 2785 2786 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2787 CreateOneBitLocations(allocator_, invoke, /* is_high */ true); 2788 } 2789 2790 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2791 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true); 2792 } 2793 2794 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2795 CreateOneBitLocations(allocator_, invoke, /* is_high */ false); 2796 } 2797 2798 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2799 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false); 2800 } 2801 2802 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2803 CreateOneBitLocations(allocator_, invoke, /* is_high */ false); 2804 } 2805 2806 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2807 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); 2808 } 2809 2810 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2811 LocationSummary* locations = 2812 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2813 locations->SetInAt(0, Location::Any()); 2814 locations->SetOut(Location::RequiresRegister()); 2815 } 2816 2817 static void GenLeadingZeros(X86_64Assembler* assembler, 2818 CodeGeneratorX86_64* codegen, 2819 HInvoke* invoke, bool is_long) { 2820 LocationSummary* locations = invoke->GetLocations(); 2821 Location src = locations->InAt(0); 2822 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2823 2824 int zero_value_result = is_long ? 64 : 32; 2825 if (invoke->InputAt(0)->IsConstant()) { 2826 // Evaluate this at compile time. 2827 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2828 if (value == 0) { 2829 value = zero_value_result; 2830 } else { 2831 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2832 } 2833 codegen->Load32BitValue(out, value); 2834 return; 2835 } 2836 2837 // Handle the non-constant cases. 2838 if (src.IsRegister()) { 2839 if (is_long) { 2840 __ bsrq(out, src.AsRegister<CpuRegister>()); 2841 } else { 2842 __ bsrl(out, src.AsRegister<CpuRegister>()); 2843 } 2844 } else if (is_long) { 2845 DCHECK(src.IsDoubleStackSlot()); 2846 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2847 } else { 2848 DCHECK(src.IsStackSlot()); 2849 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2850 } 2851 2852 // BSR sets ZF if the input was zero, and the output is undefined. 2853 NearLabel is_zero, done; 2854 __ j(kEqual, &is_zero); 2855 2856 // Correct the result from BSR to get the CLZ result. 2857 __ xorl(out, Immediate(zero_value_result - 1)); 2858 __ jmp(&done); 2859 2860 // Fix the zero case with the expected result. 2861 __ Bind(&is_zero); 2862 __ movl(out, Immediate(zero_value_result)); 2863 2864 __ Bind(&done); 2865 } 2866 2867 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2868 CreateLeadingZeroLocations(allocator_, invoke); 2869 } 2870 2871 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2872 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2873 } 2874 2875 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2876 CreateLeadingZeroLocations(allocator_, invoke); 2877 } 2878 2879 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2880 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2881 } 2882 2883 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2884 LocationSummary* locations = 2885 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2886 locations->SetInAt(0, Location::Any()); 2887 locations->SetOut(Location::RequiresRegister()); 2888 } 2889 2890 static void GenTrailingZeros(X86_64Assembler* assembler, 2891 CodeGeneratorX86_64* codegen, 2892 HInvoke* invoke, bool is_long) { 2893 LocationSummary* locations = invoke->GetLocations(); 2894 Location src = locations->InAt(0); 2895 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2896 2897 int zero_value_result = is_long ? 64 : 32; 2898 if (invoke->InputAt(0)->IsConstant()) { 2899 // Evaluate this at compile time. 2900 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2901 if (value == 0) { 2902 value = zero_value_result; 2903 } else { 2904 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2905 } 2906 codegen->Load32BitValue(out, value); 2907 return; 2908 } 2909 2910 // Handle the non-constant cases. 2911 if (src.IsRegister()) { 2912 if (is_long) { 2913 __ bsfq(out, src.AsRegister<CpuRegister>()); 2914 } else { 2915 __ bsfl(out, src.AsRegister<CpuRegister>()); 2916 } 2917 } else if (is_long) { 2918 DCHECK(src.IsDoubleStackSlot()); 2919 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2920 } else { 2921 DCHECK(src.IsStackSlot()); 2922 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2923 } 2924 2925 // BSF sets ZF if the input was zero, and the output is undefined. 2926 NearLabel done; 2927 __ j(kNotEqual, &done); 2928 2929 // Fix the zero case with the expected result. 2930 __ movl(out, Immediate(zero_value_result)); 2931 2932 __ Bind(&done); 2933 } 2934 2935 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2936 CreateTrailingZeroLocations(allocator_, invoke); 2937 } 2938 2939 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2940 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2941 } 2942 2943 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2944 CreateTrailingZeroLocations(allocator_, invoke); 2945 } 2946 2947 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2948 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2949 } 2950 2951 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { 2952 InvokeRuntimeCallingConvention calling_convention; 2953 IntrinsicVisitor::ComputeIntegerValueOfLocations( 2954 invoke, 2955 codegen_, 2956 Location::RegisterLocation(RAX), 2957 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 2958 } 2959 2960 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { 2961 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 2962 LocationSummary* locations = invoke->GetLocations(); 2963 X86_64Assembler* assembler = GetAssembler(); 2964 2965 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2966 InvokeRuntimeCallingConvention calling_convention; 2967 if (invoke->InputAt(0)->IsConstant()) { 2968 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 2969 if (value >= info.low && value <= info.high) { 2970 // Just embed the j.l.Integer in the code. 2971 ScopedObjectAccess soa(Thread::Current()); 2972 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 2973 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 2974 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 2975 __ movl(out, Immediate(static_cast<int32_t>(address))); 2976 } else { 2977 // Allocate and initialize a new j.l.Integer. 2978 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 2979 // JIT object table. 2980 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); 2981 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 2982 __ movl(argument, Immediate(static_cast<int32_t>(address))); 2983 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 2984 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 2985 __ movl(Address(out, info.value_offset), Immediate(value)); 2986 } 2987 } else { 2988 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>(); 2989 // Check bounds of our cache. 2990 __ leal(out, Address(in, -info.low)); 2991 __ cmpl(out, Immediate(info.high - info.low + 1)); 2992 NearLabel allocate, done; 2993 __ j(kAboveEqual, &allocate); 2994 // If the value is within the bounds, load the j.l.Integer directly from the array. 2995 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 2996 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 2997 if (data_offset + address <= std::numeric_limits<int32_t>::max()) { 2998 __ movl(out, Address(out, TIMES_4, data_offset + address)); 2999 } else { 3000 CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0)); 3001 __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address))); 3002 __ movl(out, Address(temp, out, TIMES_4, 0)); 3003 } 3004 __ MaybeUnpoisonHeapReference(out); 3005 __ jmp(&done); 3006 __ Bind(&allocate); 3007 // Otherwise allocate and initialize a new j.l.Integer. 3008 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); 3009 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3010 __ movl(argument, Immediate(static_cast<int32_t>(address))); 3011 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3012 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3013 __ movl(Address(out, info.value_offset), in); 3014 __ Bind(&done); 3015 } 3016 } 3017 3018 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) { 3019 LocationSummary* locations = 3020 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3021 locations->SetOut(Location::RequiresRegister()); 3022 } 3023 3024 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { 3025 X86_64Assembler* assembler = GetAssembler(); 3026 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); 3027 Address address = Address::Absolute 3028 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true); 3029 NearLabel done; 3030 __ gs()->movl(out, address); 3031 __ testl(out, out); 3032 __ j(kEqual, &done); 3033 __ gs()->movl(address, Immediate(0)); 3034 codegen_->MemoryFence(); 3035 __ Bind(&done); 3036 } 3037 3038 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) { 3039 LocationSummary* locations = 3040 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3041 locations->SetInAt(0, Location::Any()); 3042 } 3043 3044 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 3045 3046 UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) 3047 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) 3048 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) 3049 3050 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); 3051 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); 3052 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend); 3053 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength); 3054 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString); 3055 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend); 3056 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); 3057 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); 3058 3059 // 1.8. 3060 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) 3061 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) 3062 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) 3063 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) 3064 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) 3065 3066 UNREACHABLE_INTRINSICS(X86_64) 3067 3068 #undef __ 3069 3070 } // namespace x86_64 3071 } // namespace art 3072