1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86_64.h" 18 19 #include <limits> 20 21 #include "arch/x86_64/instruction_set_features_x86_64.h" 22 #include "art_method-inl.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86_64.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "intrinsics.h" 27 #include "intrinsics_utils.h" 28 #include "mirror/array-inl.h" 29 #include "mirror/string.h" 30 #include "thread.h" 31 #include "utils/x86_64/assembler_x86_64.h" 32 #include "utils/x86_64/constants_x86_64.h" 33 34 namespace art { 35 36 namespace x86_64 { 37 38 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) 39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { 40 } 41 42 43 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { 44 return down_cast<X86_64Assembler*>(codegen_->GetAssembler()); 45 } 46 47 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { 48 return codegen_->GetGraph()->GetArena(); 49 } 50 51 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { 52 Dispatch(invoke); 53 LocationSummary* res = invoke->GetLocations(); 54 if (res == nullptr) { 55 return false; 56 } 57 if (kEmitCompilerReadBarrier && res->CanCall()) { 58 // Generating an intrinsic for this HInvoke may produce an 59 // IntrinsicSlowPathX86_64 slow path. Currently this approach 60 // does not work when using read barriers, as the emitted 61 // calling sequence will make use of another slow path 62 // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, 63 // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail 64 // out in this case. 65 // 66 // TODO: Find a way to have intrinsics work with read barriers. 67 invoke->SetLocations(nullptr); 68 return false; 69 } 70 return res->Intrinsified(); 71 } 72 73 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 74 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 75 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 76 } 77 78 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; 79 80 #define __ assembler-> 81 82 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 83 LocationSummary* locations = new (arena) LocationSummary(invoke, 84 LocationSummary::kNoCall, 85 kIntrinsified); 86 locations->SetInAt(0, Location::RequiresFpuRegister()); 87 locations->SetOut(Location::RequiresRegister()); 88 } 89 90 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 91 LocationSummary* locations = new (arena) LocationSummary(invoke, 92 LocationSummary::kNoCall, 93 kIntrinsified); 94 locations->SetInAt(0, Location::RequiresRegister()); 95 locations->SetOut(Location::RequiresFpuRegister()); 96 } 97 98 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 99 Location input = locations->InAt(0); 100 Location output = locations->Out(); 101 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); 102 } 103 104 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 105 Location input = locations->InAt(0); 106 Location output = locations->Out(); 107 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); 108 } 109 110 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 111 CreateFPToIntLocations(arena_, invoke); 112 } 113 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 114 CreateIntToFPLocations(arena_, invoke); 115 } 116 117 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 118 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 119 } 120 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 121 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 122 } 123 124 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 125 CreateFPToIntLocations(arena_, invoke); 126 } 127 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 128 CreateIntToFPLocations(arena_, invoke); 129 } 130 131 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 132 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 133 } 134 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 135 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 136 } 137 138 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 139 LocationSummary* locations = new (arena) LocationSummary(invoke, 140 LocationSummary::kNoCall, 141 kIntrinsified); 142 locations->SetInAt(0, Location::RequiresRegister()); 143 locations->SetOut(Location::SameAsFirstInput()); 144 } 145 146 static void GenReverseBytes(LocationSummary* locations, 147 Primitive::Type size, 148 X86_64Assembler* assembler) { 149 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 150 151 switch (size) { 152 case Primitive::kPrimShort: 153 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 154 __ bswapl(out); 155 __ sarl(out, Immediate(16)); 156 break; 157 case Primitive::kPrimInt: 158 __ bswapl(out); 159 break; 160 case Primitive::kPrimLong: 161 __ bswapq(out); 162 break; 163 default: 164 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 165 UNREACHABLE(); 166 } 167 } 168 169 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 170 CreateIntToIntLocations(arena_, invoke); 171 } 172 173 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 174 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 175 } 176 177 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { 178 CreateIntToIntLocations(arena_, invoke); 179 } 180 181 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { 182 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 183 } 184 185 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { 186 CreateIntToIntLocations(arena_, invoke); 187 } 188 189 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { 190 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 191 } 192 193 194 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 195 // need is 64b. 196 197 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { 198 // TODO: Enable memory operations when the assembler supports them. 199 LocationSummary* locations = new (arena) LocationSummary(invoke, 200 LocationSummary::kNoCall, 201 kIntrinsified); 202 locations->SetInAt(0, Location::RequiresFpuRegister()); 203 locations->SetOut(Location::SameAsFirstInput()); 204 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. 205 } 206 207 static void MathAbsFP(LocationSummary* locations, 208 bool is64bit, 209 X86_64Assembler* assembler, 210 CodeGeneratorX86_64* codegen) { 211 Location output = locations->Out(); 212 213 DCHECK(output.IsFpuRegister()); 214 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 215 216 // TODO: Can mask directly with constant area using pand if we can guarantee 217 // that the literal is aligned on a 16 byte boundary. This will avoid a 218 // temporary. 219 if (is64bit) { 220 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 221 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); 222 } else { 223 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 224 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); 225 } 226 } 227 228 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { 229 CreateFloatToFloatPlusTemps(arena_, invoke); 230 } 231 232 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { 233 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); 234 } 235 236 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { 237 CreateFloatToFloatPlusTemps(arena_, invoke); 238 } 239 240 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { 241 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); 242 } 243 244 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { 245 LocationSummary* locations = new (arena) LocationSummary(invoke, 246 LocationSummary::kNoCall, 247 kIntrinsified); 248 locations->SetInAt(0, Location::RequiresRegister()); 249 locations->SetOut(Location::SameAsFirstInput()); 250 locations->AddTemp(Location::RequiresRegister()); 251 } 252 253 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 254 Location output = locations->Out(); 255 CpuRegister out = output.AsRegister<CpuRegister>(); 256 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 257 258 if (is64bit) { 259 // Create mask. 260 __ movq(mask, out); 261 __ sarq(mask, Immediate(63)); 262 // Add mask. 263 __ addq(out, mask); 264 __ xorq(out, mask); 265 } else { 266 // Create mask. 267 __ movl(mask, out); 268 __ sarl(mask, Immediate(31)); 269 // Add mask. 270 __ addl(out, mask); 271 __ xorl(out, mask); 272 } 273 } 274 275 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { 276 CreateIntToIntPlusTemp(arena_, invoke); 277 } 278 279 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { 280 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 281 } 282 283 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { 284 CreateIntToIntPlusTemp(arena_, invoke); 285 } 286 287 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { 288 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 289 } 290 291 static void GenMinMaxFP(LocationSummary* locations, 292 bool is_min, 293 bool is_double, 294 X86_64Assembler* assembler, 295 CodeGeneratorX86_64* codegen) { 296 Location op1_loc = locations->InAt(0); 297 Location op2_loc = locations->InAt(1); 298 Location out_loc = locations->Out(); 299 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 300 301 // Shortcut for same input locations. 302 if (op1_loc.Equals(op2_loc)) { 303 DCHECK(out_loc.Equals(op1_loc)); 304 return; 305 } 306 307 // (out := op1) 308 // out <=? op2 309 // if Nan jmp Nan_label 310 // if out is min jmp done 311 // if op2 is min jmp op2_label 312 // handle -0/+0 313 // jmp done 314 // Nan_label: 315 // out := NaN 316 // op2_label: 317 // out := op2 318 // done: 319 // 320 // This removes one jmp, but needs to copy one input (op1) to out. 321 // 322 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 323 324 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 325 326 NearLabel nan, done, op2_label; 327 if (is_double) { 328 __ ucomisd(out, op2); 329 } else { 330 __ ucomiss(out, op2); 331 } 332 333 __ j(Condition::kParityEven, &nan); 334 335 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 336 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 337 338 // Handle 0.0/-0.0. 339 if (is_min) { 340 if (is_double) { 341 __ orpd(out, op2); 342 } else { 343 __ orps(out, op2); 344 } 345 } else { 346 if (is_double) { 347 __ andpd(out, op2); 348 } else { 349 __ andps(out, op2); 350 } 351 } 352 __ jmp(&done); 353 354 // NaN handling. 355 __ Bind(&nan); 356 if (is_double) { 357 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 358 } else { 359 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); 360 } 361 __ jmp(&done); 362 363 // out := op2; 364 __ Bind(&op2_label); 365 if (is_double) { 366 __ movsd(out, op2); 367 } else { 368 __ movss(out, op2); 369 } 370 371 // Done. 372 __ Bind(&done); 373 } 374 375 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { 376 LocationSummary* locations = new (arena) LocationSummary(invoke, 377 LocationSummary::kNoCall, 378 kIntrinsified); 379 locations->SetInAt(0, Location::RequiresFpuRegister()); 380 locations->SetInAt(1, Location::RequiresFpuRegister()); 381 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 382 // the second input to be the output (we can simply swap inputs). 383 locations->SetOut(Location::SameAsFirstInput()); 384 } 385 386 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 387 CreateFPFPToFP(arena_, invoke); 388 } 389 390 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 391 GenMinMaxFP( 392 invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); 393 } 394 395 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 396 CreateFPFPToFP(arena_, invoke); 397 } 398 399 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 400 GenMinMaxFP( 401 invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); 402 } 403 404 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 405 CreateFPFPToFP(arena_, invoke); 406 } 407 408 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 409 GenMinMaxFP( 410 invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); 411 } 412 413 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 414 CreateFPFPToFP(arena_, invoke); 415 } 416 417 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 418 GenMinMaxFP( 419 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); 420 } 421 422 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 423 X86_64Assembler* assembler) { 424 Location op1_loc = locations->InAt(0); 425 Location op2_loc = locations->InAt(1); 426 427 // Shortcut for same input locations. 428 if (op1_loc.Equals(op2_loc)) { 429 // Can return immediately, as op1_loc == out_loc. 430 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 431 // a copy here. 432 DCHECK(locations->Out().Equals(op1_loc)); 433 return; 434 } 435 436 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 437 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 438 439 // (out := op1) 440 // out <=? op2 441 // if out is min jmp done 442 // out := op2 443 // done: 444 445 if (is_long) { 446 __ cmpq(out, op2); 447 } else { 448 __ cmpl(out, op2); 449 } 450 451 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); 452 } 453 454 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 455 LocationSummary* locations = new (arena) LocationSummary(invoke, 456 LocationSummary::kNoCall, 457 kIntrinsified); 458 locations->SetInAt(0, Location::RequiresRegister()); 459 locations->SetInAt(1, Location::RequiresRegister()); 460 locations->SetOut(Location::SameAsFirstInput()); 461 } 462 463 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { 464 CreateIntIntToIntLocations(arena_, invoke); 465 } 466 467 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { 468 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 469 } 470 471 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { 472 CreateIntIntToIntLocations(arena_, invoke); 473 } 474 475 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { 476 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 477 } 478 479 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 480 CreateIntIntToIntLocations(arena_, invoke); 481 } 482 483 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 484 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 485 } 486 487 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 488 CreateIntIntToIntLocations(arena_, invoke); 489 } 490 491 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 492 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 493 } 494 495 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 496 LocationSummary* locations = new (arena) LocationSummary(invoke, 497 LocationSummary::kNoCall, 498 kIntrinsified); 499 locations->SetInAt(0, Location::RequiresFpuRegister()); 500 locations->SetOut(Location::RequiresFpuRegister()); 501 } 502 503 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { 504 CreateFPToFPLocations(arena_, invoke); 505 } 506 507 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { 508 LocationSummary* locations = invoke->GetLocations(); 509 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 510 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 511 512 GetAssembler()->sqrtsd(out, in); 513 } 514 515 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { 516 MoveArguments(invoke, codegen); 517 518 DCHECK(invoke->IsInvokeStaticOrDirect()); 519 codegen->GenerateStaticOrDirectCall( 520 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); 521 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 522 523 // Copy the result back to the expected output. 524 Location out = invoke->GetLocations()->Out(); 525 if (out.IsValid()) { 526 DCHECK(out.IsRegister()); 527 codegen->MoveFromReturnRegister(out, invoke->GetType()); 528 } 529 } 530 531 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, 532 HInvoke* invoke, 533 CodeGeneratorX86_64* codegen) { 534 // Do we have instruction support? 535 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 536 CreateFPToFPLocations(arena, invoke); 537 return; 538 } 539 540 // We have to fall back to a call to the intrinsic. 541 LocationSummary* locations = new (arena) LocationSummary(invoke, 542 LocationSummary::kCall); 543 InvokeRuntimeCallingConvention calling_convention; 544 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 545 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 546 // Needs to be RDI for the invoke. 547 locations->AddTemp(Location::RegisterLocation(RDI)); 548 } 549 550 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, 551 HInvoke* invoke, 552 X86_64Assembler* assembler, 553 int round_mode) { 554 LocationSummary* locations = invoke->GetLocations(); 555 if (locations->WillCall()) { 556 InvokeOutOfLineIntrinsic(codegen, invoke); 557 } else { 558 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 559 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 560 __ roundsd(out, in, Immediate(round_mode)); 561 } 562 } 563 564 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { 565 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 566 } 567 568 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { 569 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 570 } 571 572 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { 573 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 574 } 575 576 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { 577 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 578 } 579 580 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { 581 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 582 } 583 584 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { 585 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 586 } 587 588 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, 589 HInvoke* invoke, 590 CodeGeneratorX86_64* codegen) { 591 // Do we have instruction support? 592 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 593 LocationSummary* locations = new (arena) LocationSummary(invoke, 594 LocationSummary::kNoCall, 595 kIntrinsified); 596 locations->SetInAt(0, Location::RequiresFpuRegister()); 597 locations->SetOut(Location::RequiresRegister()); 598 locations->AddTemp(Location::RequiresFpuRegister()); 599 return; 600 } 601 602 // We have to fall back to a call to the intrinsic. 603 LocationSummary* locations = new (arena) LocationSummary(invoke, 604 LocationSummary::kCall); 605 InvokeRuntimeCallingConvention calling_convention; 606 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 607 locations->SetOut(Location::RegisterLocation(RAX)); 608 // Needs to be RDI for the invoke. 609 locations->AddTemp(Location::RegisterLocation(RDI)); 610 } 611 612 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { 613 // See intrinsics.h. 614 if (kRoundIsPlusPointFive) { 615 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 616 } 617 } 618 619 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { 620 LocationSummary* locations = invoke->GetLocations(); 621 if (locations->WillCall()) { 622 InvokeOutOfLineIntrinsic(codegen_, invoke); 623 return; 624 } 625 626 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. 627 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 628 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 629 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 630 NearLabel done, nan; 631 X86_64Assembler* assembler = GetAssembler(); 632 633 // Load 0.5 into inPlusPointFive. 634 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); 635 636 // Add in the input. 637 __ addss(inPlusPointFive, in); 638 639 // And truncate to an integer. 640 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); 641 642 // Load maxInt into out. 643 codegen_->Load64BitValue(out, kPrimIntMax); 644 645 // if inPlusPointFive >= maxInt goto done 646 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); 647 __ j(kAboveEqual, &done); 648 649 // if input == NaN goto nan 650 __ j(kUnordered, &nan); 651 652 // output = float-to-int-truncate(input) 653 __ cvttss2si(out, inPlusPointFive); 654 __ jmp(&done); 655 __ Bind(&nan); 656 657 // output = 0 658 __ xorl(out, out); 659 __ Bind(&done); 660 } 661 662 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { 663 // See intrinsics.h. 664 if (kRoundIsPlusPointFive) { 665 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 666 } 667 } 668 669 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { 670 LocationSummary* locations = invoke->GetLocations(); 671 if (locations->WillCall()) { 672 InvokeOutOfLineIntrinsic(codegen_, invoke); 673 return; 674 } 675 676 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. 677 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 678 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 679 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 680 NearLabel done, nan; 681 X86_64Assembler* assembler = GetAssembler(); 682 683 // Load 0.5 into inPlusPointFive. 684 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); 685 686 // Add in the input. 687 __ addsd(inPlusPointFive, in); 688 689 // And truncate to an integer. 690 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); 691 692 // Load maxLong into out. 693 codegen_->Load64BitValue(out, kPrimLongMax); 694 695 // if inPlusPointFive >= maxLong goto done 696 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); 697 __ j(kAboveEqual, &done); 698 699 // if input == NaN goto nan 700 __ j(kUnordered, &nan); 701 702 // output = double-to-long-truncate(input) 703 __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true); 704 __ jmp(&done); 705 __ Bind(&nan); 706 707 // output = 0 708 __ xorl(out, out); 709 __ Bind(&done); 710 } 711 712 static void CreateFPToFPCallLocations(ArenaAllocator* arena, 713 HInvoke* invoke) { 714 LocationSummary* locations = new (arena) LocationSummary(invoke, 715 LocationSummary::kCall, 716 kIntrinsified); 717 InvokeRuntimeCallingConvention calling_convention; 718 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 719 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 720 721 // We have to ensure that the native code doesn't clobber the XMM registers which are 722 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 723 // saved in the prologue and properly restored. 724 for (auto fp_reg : non_volatile_xmm_regs) { 725 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 726 } 727 } 728 729 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, 730 QuickEntrypointEnum entry) { 731 LocationSummary* locations = invoke->GetLocations(); 732 DCHECK(locations->WillCall()); 733 DCHECK(invoke->IsInvokeStaticOrDirect()); 734 X86_64Assembler* assembler = codegen->GetAssembler(); 735 736 __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true)); 737 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 738 } 739 740 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { 741 CreateFPToFPCallLocations(arena_, invoke); 742 } 743 744 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { 745 GenFPToFPCall(invoke, codegen_, kQuickCos); 746 } 747 748 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { 749 CreateFPToFPCallLocations(arena_, invoke); 750 } 751 752 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { 753 GenFPToFPCall(invoke, codegen_, kQuickSin); 754 } 755 756 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { 757 CreateFPToFPCallLocations(arena_, invoke); 758 } 759 760 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { 761 GenFPToFPCall(invoke, codegen_, kQuickAcos); 762 } 763 764 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { 765 CreateFPToFPCallLocations(arena_, invoke); 766 } 767 768 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { 769 GenFPToFPCall(invoke, codegen_, kQuickAsin); 770 } 771 772 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { 773 CreateFPToFPCallLocations(arena_, invoke); 774 } 775 776 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { 777 GenFPToFPCall(invoke, codegen_, kQuickAtan); 778 } 779 780 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { 781 CreateFPToFPCallLocations(arena_, invoke); 782 } 783 784 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { 785 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 786 } 787 788 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { 789 CreateFPToFPCallLocations(arena_, invoke); 790 } 791 792 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { 793 GenFPToFPCall(invoke, codegen_, kQuickCosh); 794 } 795 796 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { 797 CreateFPToFPCallLocations(arena_, invoke); 798 } 799 800 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { 801 GenFPToFPCall(invoke, codegen_, kQuickExp); 802 } 803 804 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { 805 CreateFPToFPCallLocations(arena_, invoke); 806 } 807 808 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { 809 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 810 } 811 812 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { 813 CreateFPToFPCallLocations(arena_, invoke); 814 } 815 816 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { 817 GenFPToFPCall(invoke, codegen_, kQuickLog); 818 } 819 820 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { 821 CreateFPToFPCallLocations(arena_, invoke); 822 } 823 824 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { 825 GenFPToFPCall(invoke, codegen_, kQuickLog10); 826 } 827 828 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { 829 CreateFPToFPCallLocations(arena_, invoke); 830 } 831 832 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { 833 GenFPToFPCall(invoke, codegen_, kQuickSinh); 834 } 835 836 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { 837 CreateFPToFPCallLocations(arena_, invoke); 838 } 839 840 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { 841 GenFPToFPCall(invoke, codegen_, kQuickTan); 842 } 843 844 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { 845 CreateFPToFPCallLocations(arena_, invoke); 846 } 847 848 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { 849 GenFPToFPCall(invoke, codegen_, kQuickTanh); 850 } 851 852 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, 853 HInvoke* invoke) { 854 LocationSummary* locations = new (arena) LocationSummary(invoke, 855 LocationSummary::kCall, 856 kIntrinsified); 857 InvokeRuntimeCallingConvention calling_convention; 858 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 859 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 860 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 861 862 // We have to ensure that the native code doesn't clobber the XMM registers which are 863 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 864 // saved in the prologue and properly restored. 865 for (auto fp_reg : non_volatile_xmm_regs) { 866 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 867 } 868 } 869 870 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { 871 CreateFPFPToFPCallLocations(arena_, invoke); 872 } 873 874 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { 875 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 876 } 877 878 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { 879 CreateFPFPToFPCallLocations(arena_, invoke); 880 } 881 882 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { 883 GenFPToFPCall(invoke, codegen_, kQuickHypot); 884 } 885 886 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { 887 CreateFPFPToFPCallLocations(arena_, invoke); 888 } 889 890 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { 891 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 892 } 893 894 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { 895 // The inputs plus one temp. 896 LocationSummary* locations = new (arena_) LocationSummary(invoke, 897 LocationSummary::kCallOnSlowPath, 898 kIntrinsified); 899 locations->SetInAt(0, Location::RequiresRegister()); 900 locations->SetInAt(1, Location::RequiresRegister()); 901 locations->SetOut(Location::SameAsFirstInput()); 902 locations->AddTemp(Location::RequiresRegister()); 903 } 904 905 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { 906 LocationSummary* locations = invoke->GetLocations(); 907 908 // Location of reference to data array. 909 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 910 // Location of count. 911 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 912 913 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 914 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); 915 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 916 917 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth 918 // the cost. 919 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick 920 // we will not optimize the code for constants (which would save a register). 921 922 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 923 codegen_->AddSlowPath(slow_path); 924 925 X86_64Assembler* assembler = GetAssembler(); 926 927 __ cmpl(idx, Address(obj, count_offset)); 928 codegen_->MaybeRecordImplicitNullCheck(invoke); 929 __ j(kAboveEqual, slow_path->GetEntryLabel()); 930 931 // out = out[2*idx]. 932 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); 933 934 __ Bind(slow_path->GetExitLabel()); 935 } 936 937 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 938 // Check to see if we have known failures that will cause us to have to bail out 939 // to the runtime, and just generate the runtime call directly. 940 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 941 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 942 943 // The positions must be non-negative. 944 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 945 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 946 // We will have to fail anyways. 947 return; 948 } 949 950 // The length must be > 0. 951 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 952 if (length != nullptr) { 953 int32_t len = length->GetValue(); 954 if (len < 0) { 955 // Just call as normal. 956 return; 957 } 958 } 959 960 LocationSummary* locations = new (arena_) LocationSummary(invoke, 961 LocationSummary::kCallOnSlowPath, 962 kIntrinsified); 963 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 964 locations->SetInAt(0, Location::RequiresRegister()); 965 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 966 locations->SetInAt(2, Location::RequiresRegister()); 967 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 968 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 969 970 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 971 locations->AddTemp(Location::RegisterLocation(RSI)); 972 locations->AddTemp(Location::RegisterLocation(RDI)); 973 locations->AddTemp(Location::RegisterLocation(RCX)); 974 } 975 976 static void CheckPosition(X86_64Assembler* assembler, 977 Location pos, 978 CpuRegister input, 979 Location length, 980 SlowPathCode* slow_path, 981 CpuRegister input_len, 982 CpuRegister temp, 983 bool length_is_input_length = false) { 984 // Where is the length in the Array? 985 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 986 987 if (pos.IsConstant()) { 988 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 989 if (pos_const == 0) { 990 if (!length_is_input_length) { 991 // Check that length(input) >= length. 992 if (length.IsConstant()) { 993 __ cmpl(Address(input, length_offset), 994 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 995 } else { 996 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>()); 997 } 998 __ j(kLess, slow_path->GetEntryLabel()); 999 } 1000 } else { 1001 // Check that length(input) >= pos. 1002 __ movl(input_len, Address(input, length_offset)); 1003 __ cmpl(input_len, Immediate(pos_const)); 1004 __ j(kLess, slow_path->GetEntryLabel()); 1005 1006 // Check that (length(input) - pos) >= length. 1007 __ leal(temp, Address(input_len, -pos_const)); 1008 if (length.IsConstant()) { 1009 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1010 } else { 1011 __ cmpl(temp, length.AsRegister<CpuRegister>()); 1012 } 1013 __ j(kLess, slow_path->GetEntryLabel()); 1014 } 1015 } else if (length_is_input_length) { 1016 // The only way the copy can succeed is if pos is zero. 1017 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1018 __ testl(pos_reg, pos_reg); 1019 __ j(kNotEqual, slow_path->GetEntryLabel()); 1020 } else { 1021 // Check that pos >= 0. 1022 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1023 __ testl(pos_reg, pos_reg); 1024 __ j(kLess, slow_path->GetEntryLabel()); 1025 1026 // Check that pos <= length(input). 1027 __ cmpl(Address(input, length_offset), pos_reg); 1028 __ j(kLess, slow_path->GetEntryLabel()); 1029 1030 // Check that (length(input) - pos) >= length. 1031 __ movl(temp, Address(input, length_offset)); 1032 __ subl(temp, pos_reg); 1033 if (length.IsConstant()) { 1034 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1035 } else { 1036 __ cmpl(temp, length.AsRegister<CpuRegister>()); 1037 } 1038 __ j(kLess, slow_path->GetEntryLabel()); 1039 } 1040 } 1041 1042 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 1043 X86_64Assembler* assembler = GetAssembler(); 1044 LocationSummary* locations = invoke->GetLocations(); 1045 1046 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1047 Location src_pos = locations->InAt(1); 1048 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1049 Location dest_pos = locations->InAt(3); 1050 Location length = locations->InAt(4); 1051 1052 // Temporaries that we need for MOVSW. 1053 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>(); 1054 DCHECK_EQ(src_base.AsRegister(), RSI); 1055 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>(); 1056 DCHECK_EQ(dest_base.AsRegister(), RDI); 1057 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>(); 1058 DCHECK_EQ(count.AsRegister(), RCX); 1059 1060 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1061 codegen_->AddSlowPath(slow_path); 1062 1063 // Bail out if the source and destination are the same. 1064 __ cmpl(src, dest); 1065 __ j(kEqual, slow_path->GetEntryLabel()); 1066 1067 // Bail out if the source is null. 1068 __ testl(src, src); 1069 __ j(kEqual, slow_path->GetEntryLabel()); 1070 1071 // Bail out if the destination is null. 1072 __ testl(dest, dest); 1073 __ j(kEqual, slow_path->GetEntryLabel()); 1074 1075 // If the length is negative, bail out. 1076 // We have already checked in the LocationsBuilder for the constant case. 1077 if (!length.IsConstant()) { 1078 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1079 __ j(kLess, slow_path->GetEntryLabel()); 1080 } 1081 1082 // Validity checks: source. 1083 CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base); 1084 1085 // Validity checks: dest. 1086 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base); 1087 1088 // We need the count in RCX. 1089 if (length.IsConstant()) { 1090 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1091 } else { 1092 __ movl(count, length.AsRegister<CpuRegister>()); 1093 } 1094 1095 // Okay, everything checks out. Finally time to do the copy. 1096 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1097 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1098 DCHECK_EQ(char_size, 2u); 1099 1100 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1101 1102 if (src_pos.IsConstant()) { 1103 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1104 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset)); 1105 } else { 1106 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), 1107 ScaleFactor::TIMES_2, data_offset)); 1108 } 1109 if (dest_pos.IsConstant()) { 1110 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1111 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset)); 1112 } else { 1113 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(), 1114 ScaleFactor::TIMES_2, data_offset)); 1115 } 1116 1117 // Do the move. 1118 __ rep_movsw(); 1119 1120 __ Bind(slow_path->GetExitLabel()); 1121 } 1122 1123 1124 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1125 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 1126 } 1127 1128 // TODO: Implement read barriers in the SystemArrayCopy intrinsic. 1129 // Note that this code path is not used (yet) because we do not 1130 // intrinsify methods that can go into the IntrinsicSlowPathX86_64 1131 // slow path. 1132 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1133 X86_64Assembler* assembler = GetAssembler(); 1134 LocationSummary* locations = invoke->GetLocations(); 1135 1136 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1137 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 1138 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 1139 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 1140 1141 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1142 Location src_pos = locations->InAt(1); 1143 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1144 Location dest_pos = locations->InAt(3); 1145 Location length = locations->InAt(4); 1146 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 1147 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 1148 CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); 1149 1150 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1151 codegen_->AddSlowPath(slow_path); 1152 1153 NearLabel conditions_on_positions_validated; 1154 SystemArrayCopyOptimizations optimizations(invoke); 1155 1156 if (!optimizations.GetDestinationIsSource() && 1157 (!src_pos.IsConstant() || !dest_pos.IsConstant())) { 1158 __ cmpl(src, dest); 1159 } 1160 // If source and destination are the same, we go to slow path if we need to do 1161 // forward copying. 1162 if (src_pos.IsConstant()) { 1163 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1164 if (dest_pos.IsConstant()) { 1165 // Checked when building locations. 1166 DCHECK(!optimizations.GetDestinationIsSource() 1167 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); 1168 } else { 1169 if (!optimizations.GetDestinationIsSource()) { 1170 __ j(kNotEqual, &conditions_on_positions_validated); 1171 } 1172 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); 1173 __ j(kGreater, slow_path->GetEntryLabel()); 1174 } 1175 } else { 1176 if (!optimizations.GetDestinationIsSource()) { 1177 __ j(kNotEqual, &conditions_on_positions_validated); 1178 } 1179 if (dest_pos.IsConstant()) { 1180 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1181 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); 1182 __ j(kLess, slow_path->GetEntryLabel()); 1183 } else { 1184 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); 1185 __ j(kLess, slow_path->GetEntryLabel()); 1186 } 1187 } 1188 1189 __ Bind(&conditions_on_positions_validated); 1190 1191 if (!optimizations.GetSourceIsNotNull()) { 1192 // Bail out if the source is null. 1193 __ testl(src, src); 1194 __ j(kEqual, slow_path->GetEntryLabel()); 1195 } 1196 1197 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 1198 // Bail out if the destination is null. 1199 __ testl(dest, dest); 1200 __ j(kEqual, slow_path->GetEntryLabel()); 1201 } 1202 1203 // If the length is negative, bail out. 1204 // We have already checked in the LocationsBuilder for the constant case. 1205 if (!length.IsConstant() && 1206 !optimizations.GetCountIsSourceLength() && 1207 !optimizations.GetCountIsDestinationLength()) { 1208 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1209 __ j(kLess, slow_path->GetEntryLabel()); 1210 } 1211 1212 // Validity checks: source. 1213 CheckPosition(assembler, 1214 src_pos, 1215 src, 1216 length, 1217 slow_path, 1218 temp1, 1219 temp2, 1220 optimizations.GetCountIsSourceLength()); 1221 1222 // Validity checks: dest. 1223 CheckPosition(assembler, 1224 dest_pos, 1225 dest, 1226 length, 1227 slow_path, 1228 temp1, 1229 temp2, 1230 optimizations.GetCountIsDestinationLength()); 1231 1232 if (!optimizations.GetDoesNotNeedTypeCheck()) { 1233 // Check whether all elements of the source array are assignable to the component 1234 // type of the destination array. We do two checks: the classes are the same, 1235 // or the destination is Object[]. If none of these checks succeed, we go to the 1236 // slow path. 1237 __ movl(temp1, Address(dest, class_offset)); 1238 __ movl(temp2, Address(src, class_offset)); 1239 bool did_unpoison = false; 1240 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 1241 !optimizations.GetSourceIsNonPrimitiveArray()) { 1242 // One or two of the references need to be unpoisoned. Unpoison them 1243 // both to make the identity check valid. 1244 __ MaybeUnpoisonHeapReference(temp1); 1245 __ MaybeUnpoisonHeapReference(temp2); 1246 did_unpoison = true; 1247 } 1248 1249 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 1250 // Bail out if the destination is not a non primitive array. 1251 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1252 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1253 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1254 __ j(kEqual, slow_path->GetEntryLabel()); 1255 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1256 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1257 __ j(kNotEqual, slow_path->GetEntryLabel()); 1258 } 1259 1260 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1261 // Bail out if the source is not a non primitive array. 1262 // /* HeapReference<Class> */ TMP = temp2->component_type_ 1263 __ movl(CpuRegister(TMP), Address(temp2, component_offset)); 1264 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1265 __ j(kEqual, slow_path->GetEntryLabel()); 1266 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1267 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1268 __ j(kNotEqual, slow_path->GetEntryLabel()); 1269 } 1270 1271 __ cmpl(temp1, temp2); 1272 1273 if (optimizations.GetDestinationIsTypedObjectArray()) { 1274 NearLabel do_copy; 1275 __ j(kEqual, &do_copy); 1276 if (!did_unpoison) { 1277 __ MaybeUnpoisonHeapReference(temp1); 1278 } 1279 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 1280 __ movl(temp1, Address(temp1, component_offset)); 1281 __ MaybeUnpoisonHeapReference(temp1); 1282 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 1283 __ movl(temp1, Address(temp1, super_offset)); 1284 // No need to unpoison the result, we're comparing against null. 1285 __ testl(temp1, temp1); 1286 __ j(kNotEqual, slow_path->GetEntryLabel()); 1287 __ Bind(&do_copy); 1288 } else { 1289 __ j(kNotEqual, slow_path->GetEntryLabel()); 1290 } 1291 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1292 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 1293 // Bail out if the source is not a non primitive array. 1294 // /* HeapReference<Class> */ temp1 = src->klass_ 1295 __ movl(temp1, Address(src, class_offset)); 1296 __ MaybeUnpoisonHeapReference(temp1); 1297 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1298 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1299 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1300 __ j(kEqual, slow_path->GetEntryLabel()); 1301 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1302 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1303 __ j(kNotEqual, slow_path->GetEntryLabel()); 1304 } 1305 1306 // Compute base source address, base destination address, and end source address. 1307 1308 uint32_t element_size = sizeof(int32_t); 1309 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); 1310 if (src_pos.IsConstant()) { 1311 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1312 __ leal(temp1, Address(src, element_size * constant + offset)); 1313 } else { 1314 __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); 1315 } 1316 1317 if (dest_pos.IsConstant()) { 1318 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1319 __ leal(temp2, Address(dest, element_size * constant + offset)); 1320 } else { 1321 __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); 1322 } 1323 1324 if (length.IsConstant()) { 1325 int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); 1326 __ leal(temp3, Address(temp1, element_size * constant)); 1327 } else { 1328 __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); 1329 } 1330 1331 // Iterate over the arrays and do a raw copy of the objects. We don't need to 1332 // poison/unpoison, nor do any read barrier as the next uses of the destination 1333 // array will do it. 1334 NearLabel loop, done; 1335 __ cmpl(temp1, temp3); 1336 __ j(kEqual, &done); 1337 __ Bind(&loop); 1338 __ movl(CpuRegister(TMP), Address(temp1, 0)); 1339 __ movl(Address(temp2, 0), CpuRegister(TMP)); 1340 __ addl(temp1, Immediate(element_size)); 1341 __ addl(temp2, Immediate(element_size)); 1342 __ cmpl(temp1, temp3); 1343 __ j(kNotEqual, &loop); 1344 __ Bind(&done); 1345 1346 // We only need one card marking on the destination array. 1347 codegen_->MarkGCCard(temp1, 1348 temp2, 1349 dest, 1350 CpuRegister(kNoRegister), 1351 /* value_can_be_null */ false); 1352 1353 __ Bind(slow_path->GetExitLabel()); 1354 } 1355 1356 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { 1357 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1358 LocationSummary::kCall, 1359 kIntrinsified); 1360 InvokeRuntimeCallingConvention calling_convention; 1361 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1362 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1363 locations->SetOut(Location::RegisterLocation(RAX)); 1364 } 1365 1366 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { 1367 X86_64Assembler* assembler = GetAssembler(); 1368 LocationSummary* locations = invoke->GetLocations(); 1369 1370 // Note that the null check must have been done earlier. 1371 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1372 1373 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); 1374 __ testl(argument, argument); 1375 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1376 codegen_->AddSlowPath(slow_path); 1377 __ j(kEqual, slow_path->GetEntryLabel()); 1378 1379 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), 1380 /* no_rip */ true)); 1381 __ Bind(slow_path->GetExitLabel()); 1382 } 1383 1384 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { 1385 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1386 LocationSummary::kNoCall, 1387 kIntrinsified); 1388 locations->SetInAt(0, Location::RequiresRegister()); 1389 locations->SetInAt(1, Location::RequiresRegister()); 1390 1391 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction. 1392 locations->AddTemp(Location::RegisterLocation(RCX)); 1393 locations->AddTemp(Location::RegisterLocation(RDI)); 1394 1395 // Set output, RSI needed for repe_cmpsq instruction anyways. 1396 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap); 1397 } 1398 1399 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { 1400 X86_64Assembler* assembler = GetAssembler(); 1401 LocationSummary* locations = invoke->GetLocations(); 1402 1403 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>(); 1404 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>(); 1405 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>(); 1406 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>(); 1407 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>(); 1408 1409 NearLabel end, return_true, return_false; 1410 1411 // Get offsets of count, value, and class fields within a string object. 1412 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1413 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1414 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1415 1416 // Note that the null check must have been done earlier. 1417 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1418 1419 // Check if input is null, return false if it is. 1420 __ testl(arg, arg); 1421 __ j(kEqual, &return_false); 1422 1423 // Instanceof check for the argument by comparing class fields. 1424 // All string objects must have the same type since String cannot be subclassed. 1425 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1426 // If the argument is a string object, its class field must be equal to receiver's class field. 1427 __ movl(rcx, Address(str, class_offset)); 1428 __ cmpl(rcx, Address(arg, class_offset)); 1429 __ j(kNotEqual, &return_false); 1430 1431 // Reference equality check, return true if same reference. 1432 __ cmpl(str, arg); 1433 __ j(kEqual, &return_true); 1434 1435 // Load length of receiver string. 1436 __ movl(rcx, Address(str, count_offset)); 1437 // Check if lengths are equal, return false if they're not. 1438 __ cmpl(rcx, Address(arg, count_offset)); 1439 __ j(kNotEqual, &return_false); 1440 // Return true if both strings are empty. 1441 __ jrcxz(&return_true); 1442 1443 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. 1444 __ leal(rsi, Address(str, value_offset)); 1445 __ leal(rdi, Address(arg, value_offset)); 1446 1447 // Divide string length by 4 and adjust for lengths not divisible by 4. 1448 __ addl(rcx, Immediate(3)); 1449 __ shrl(rcx, Immediate(2)); 1450 1451 // Assertions that must hold in order to compare strings 4 characters at a time. 1452 DCHECK_ALIGNED(value_offset, 8); 1453 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); 1454 1455 // Loop to compare strings four characters at a time starting at the beginning of the string. 1456 __ repe_cmpsq(); 1457 // If strings are not equal, zero flag will be cleared. 1458 __ j(kNotEqual, &return_false); 1459 1460 // Return true and exit the function. 1461 // If loop does not result in returning false, we return true. 1462 __ Bind(&return_true); 1463 __ movl(rsi, Immediate(1)); 1464 __ jmp(&end); 1465 1466 // Return false and exit the function. 1467 __ Bind(&return_false); 1468 __ xorl(rsi, rsi); 1469 __ Bind(&end); 1470 } 1471 1472 static void CreateStringIndexOfLocations(HInvoke* invoke, 1473 ArenaAllocator* allocator, 1474 bool start_at_zero) { 1475 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1476 LocationSummary::kCallOnSlowPath, 1477 kIntrinsified); 1478 // The data needs to be in RDI for scasw. So request that the string is there, anyways. 1479 locations->SetInAt(0, Location::RegisterLocation(RDI)); 1480 // If we look for a constant char, we'll still have to copy it into RAX. So just request the 1481 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1482 // of the instruction explicitly. 1483 // Note: This works as we don't clobber RAX anywhere. 1484 locations->SetInAt(1, Location::RegisterLocation(RAX)); 1485 if (!start_at_zero) { 1486 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1487 } 1488 // As we clobber RDI during execution anyways, also use it as the output. 1489 locations->SetOut(Location::SameAsFirstInput()); 1490 1491 // repne scasw uses RCX as the counter. 1492 locations->AddTemp(Location::RegisterLocation(RCX)); 1493 // Need another temporary to be able to compute the result. 1494 locations->AddTemp(Location::RequiresRegister()); 1495 } 1496 1497 static void GenerateStringIndexOf(HInvoke* invoke, 1498 X86_64Assembler* assembler, 1499 CodeGeneratorX86_64* codegen, 1500 ArenaAllocator* allocator, 1501 bool start_at_zero) { 1502 LocationSummary* locations = invoke->GetLocations(); 1503 1504 // Note that the null check must have been done earlier. 1505 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1506 1507 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); 1508 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); 1509 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); 1510 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); 1511 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 1512 1513 // Check our assumptions for registers. 1514 DCHECK_EQ(string_obj.AsRegister(), RDI); 1515 DCHECK_EQ(search_value.AsRegister(), RAX); 1516 DCHECK_EQ(counter.AsRegister(), RCX); 1517 DCHECK_EQ(out.AsRegister(), RDI); 1518 1519 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1520 // or directly dispatch if we have a constant. 1521 SlowPathCode* slow_path = nullptr; 1522 if (invoke->InputAt(1)->IsIntConstant()) { 1523 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 1524 std::numeric_limits<uint16_t>::max()) { 1525 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1526 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1527 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 1528 codegen->AddSlowPath(slow_path); 1529 __ jmp(slow_path->GetEntryLabel()); 1530 __ Bind(slow_path->GetExitLabel()); 1531 return; 1532 } 1533 } else { 1534 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1535 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 1536 codegen->AddSlowPath(slow_path); 1537 __ j(kAbove, slow_path->GetEntryLabel()); 1538 } 1539 1540 // From here down, we know that we are looking for a char that fits in 16 bits. 1541 // Location of reference to data array within the String object. 1542 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1543 // Location of count within the String object. 1544 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1545 1546 // Load string length, i.e., the count field of the string. 1547 __ movl(string_length, Address(string_obj, count_offset)); 1548 1549 // Do a length check. 1550 // TODO: Support jecxz. 1551 NearLabel not_found_label; 1552 __ testl(string_length, string_length); 1553 __ j(kEqual, ¬_found_label); 1554 1555 if (start_at_zero) { 1556 // Number of chars to scan is the same as the string length. 1557 __ movl(counter, string_length); 1558 1559 // Move to the start of the string. 1560 __ addq(string_obj, Immediate(value_offset)); 1561 } else { 1562 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); 1563 1564 // Do a start_index check. 1565 __ cmpl(start_index, string_length); 1566 __ j(kGreaterEqual, ¬_found_label); 1567 1568 // Ensure we have a start index >= 0; 1569 __ xorl(counter, counter); 1570 __ cmpl(start_index, Immediate(0)); 1571 __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. 1572 1573 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1574 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1575 1576 // Now update ecx, the work counter: it's gonna be string.length - start_index. 1577 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. 1578 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1579 } 1580 1581 // Everything is set up for repne scasw: 1582 // * Comparison address in RDI. 1583 // * Counter in ECX. 1584 __ repne_scasw(); 1585 1586 // Did we find a match? 1587 __ j(kNotEqual, ¬_found_label); 1588 1589 // Yes, we matched. Compute the index of the result. 1590 __ subl(string_length, counter); 1591 __ leal(out, Address(string_length, -1)); 1592 1593 NearLabel done; 1594 __ jmp(&done); 1595 1596 // Failed to match; return -1. 1597 __ Bind(¬_found_label); 1598 __ movl(out, Immediate(-1)); 1599 1600 // And join up at the end. 1601 __ Bind(&done); 1602 if (slow_path != nullptr) { 1603 __ Bind(slow_path->GetExitLabel()); 1604 } 1605 } 1606 1607 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { 1608 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); 1609 } 1610 1611 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { 1612 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1613 } 1614 1615 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1616 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); 1617 } 1618 1619 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1620 GenerateStringIndexOf( 1621 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1622 } 1623 1624 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1625 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1626 LocationSummary::kCall, 1627 kIntrinsified); 1628 InvokeRuntimeCallingConvention calling_convention; 1629 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1630 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1631 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1632 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1633 locations->SetOut(Location::RegisterLocation(RAX)); 1634 } 1635 1636 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1637 X86_64Assembler* assembler = GetAssembler(); 1638 LocationSummary* locations = invoke->GetLocations(); 1639 1640 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); 1641 __ testl(byte_array, byte_array); 1642 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1643 codegen_->AddSlowPath(slow_path); 1644 __ j(kEqual, slow_path->GetEntryLabel()); 1645 1646 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), 1647 /* no_rip */ true)); 1648 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1649 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1650 __ Bind(slow_path->GetExitLabel()); 1651 } 1652 1653 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1654 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1655 LocationSummary::kCall, 1656 kIntrinsified); 1657 InvokeRuntimeCallingConvention calling_convention; 1658 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1659 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1660 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1661 locations->SetOut(Location::RegisterLocation(RAX)); 1662 } 1663 1664 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1665 X86_64Assembler* assembler = GetAssembler(); 1666 1667 // No need to emit code checking whether `locations->InAt(2)` is a null 1668 // pointer, as callers of the native method 1669 // 1670 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1671 // 1672 // all include a null check on `data` before calling that method. 1673 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), 1674 /* no_rip */ true)); 1675 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1676 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1677 } 1678 1679 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1680 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1681 LocationSummary::kCall, 1682 kIntrinsified); 1683 InvokeRuntimeCallingConvention calling_convention; 1684 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1685 locations->SetOut(Location::RegisterLocation(RAX)); 1686 } 1687 1688 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1689 X86_64Assembler* assembler = GetAssembler(); 1690 LocationSummary* locations = invoke->GetLocations(); 1691 1692 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); 1693 __ testl(string_to_copy, string_to_copy); 1694 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1695 codegen_->AddSlowPath(slow_path); 1696 __ j(kEqual, slow_path->GetEntryLabel()); 1697 1698 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), 1699 /* no_rip */ true)); 1700 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1701 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1702 __ Bind(slow_path->GetExitLabel()); 1703 } 1704 1705 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1706 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1707 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1708 LocationSummary::kNoCall, 1709 kIntrinsified); 1710 locations->SetInAt(0, Location::RequiresRegister()); 1711 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1712 locations->SetInAt(2, Location::RequiresRegister()); 1713 locations->SetInAt(3, Location::RequiresRegister()); 1714 locations->SetInAt(4, Location::RequiresRegister()); 1715 1716 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1717 locations->AddTemp(Location::RegisterLocation(RSI)); 1718 locations->AddTemp(Location::RegisterLocation(RDI)); 1719 locations->AddTemp(Location::RegisterLocation(RCX)); 1720 } 1721 1722 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1723 X86_64Assembler* assembler = GetAssembler(); 1724 LocationSummary* locations = invoke->GetLocations(); 1725 1726 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); 1727 // Location of data in char array buffer. 1728 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1729 // Location of char array data in string. 1730 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1731 1732 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1733 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 1734 Location srcBegin = locations->InAt(1); 1735 int srcBegin_value = 1736 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1737 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>(); 1738 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>(); 1739 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); 1740 1741 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1742 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1743 DCHECK_EQ(char_size, 2u); 1744 1745 // Compute the address of the destination buffer. 1746 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1747 1748 // Compute the address of the source string. 1749 if (srcBegin.IsConstant()) { 1750 // Compute the address of the source string by adding the number of chars from 1751 // the source beginning to the value offset of a string. 1752 __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset)); 1753 } else { 1754 __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(), 1755 ScaleFactor::TIMES_2, value_offset)); 1756 } 1757 1758 // Compute the number of chars (words) to move. 1759 __ movl(CpuRegister(RCX), srcEnd); 1760 if (srcBegin.IsConstant()) { 1761 if (srcBegin_value != 0) { 1762 __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); 1763 } 1764 } else { 1765 DCHECK(srcBegin.IsRegister()); 1766 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>()); 1767 } 1768 1769 // Do the move. 1770 __ rep_movsw(); 1771 } 1772 1773 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1774 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1775 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. 1776 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1777 // to avoid a SIGBUS. 1778 switch (size) { 1779 case Primitive::kPrimByte: 1780 __ movsxb(out, Address(address, 0)); 1781 break; 1782 case Primitive::kPrimShort: 1783 __ movsxw(out, Address(address, 0)); 1784 break; 1785 case Primitive::kPrimInt: 1786 __ movl(out, Address(address, 0)); 1787 break; 1788 case Primitive::kPrimLong: 1789 __ movq(out, Address(address, 0)); 1790 break; 1791 default: 1792 LOG(FATAL) << "Type not recognized for peek: " << size; 1793 UNREACHABLE(); 1794 } 1795 } 1796 1797 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1798 CreateIntToIntLocations(arena_, invoke); 1799 } 1800 1801 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1802 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1803 } 1804 1805 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1806 CreateIntToIntLocations(arena_, invoke); 1807 } 1808 1809 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1810 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1811 } 1812 1813 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1814 CreateIntToIntLocations(arena_, invoke); 1815 } 1816 1817 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1818 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1819 } 1820 1821 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1822 CreateIntToIntLocations(arena_, invoke); 1823 } 1824 1825 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1826 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1827 } 1828 1829 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { 1830 LocationSummary* locations = new (arena) LocationSummary(invoke, 1831 LocationSummary::kNoCall, 1832 kIntrinsified); 1833 locations->SetInAt(0, Location::RequiresRegister()); 1834 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); 1835 } 1836 1837 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1838 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1839 Location value = locations->InAt(1); 1840 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1841 // to avoid a SIGBUS. 1842 switch (size) { 1843 case Primitive::kPrimByte: 1844 if (value.IsConstant()) { 1845 __ movb(Address(address, 0), 1846 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1847 } else { 1848 __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); 1849 } 1850 break; 1851 case Primitive::kPrimShort: 1852 if (value.IsConstant()) { 1853 __ movw(Address(address, 0), 1854 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1855 } else { 1856 __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); 1857 } 1858 break; 1859 case Primitive::kPrimInt: 1860 if (value.IsConstant()) { 1861 __ movl(Address(address, 0), 1862 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1863 } else { 1864 __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); 1865 } 1866 break; 1867 case Primitive::kPrimLong: 1868 if (value.IsConstant()) { 1869 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 1870 DCHECK(IsInt<32>(v)); 1871 int32_t v_32 = v; 1872 __ movq(Address(address, 0), Immediate(v_32)); 1873 } else { 1874 __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); 1875 } 1876 break; 1877 default: 1878 LOG(FATAL) << "Type not recognized for poke: " << size; 1879 UNREACHABLE(); 1880 } 1881 } 1882 1883 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1884 CreateIntIntToVoidLocations(arena_, invoke); 1885 } 1886 1887 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1888 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1889 } 1890 1891 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1892 CreateIntIntToVoidLocations(arena_, invoke); 1893 } 1894 1895 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1896 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1897 } 1898 1899 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1900 CreateIntIntToVoidLocations(arena_, invoke); 1901 } 1902 1903 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1904 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1905 } 1906 1907 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1908 CreateIntIntToVoidLocations(arena_, invoke); 1909 } 1910 1911 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1912 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1913 } 1914 1915 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1916 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1917 LocationSummary::kNoCall, 1918 kIntrinsified); 1919 locations->SetOut(Location::RequiresRegister()); 1920 } 1921 1922 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1923 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); 1924 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), 1925 /* no_rip */ true)); 1926 } 1927 1928 static void GenUnsafeGet(HInvoke* invoke, 1929 Primitive::Type type, 1930 bool is_volatile ATTRIBUTE_UNUSED, 1931 CodeGeneratorX86_64* codegen) { 1932 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 1933 LocationSummary* locations = invoke->GetLocations(); 1934 Location base_loc = locations->InAt(1); 1935 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 1936 Location offset_loc = locations->InAt(2); 1937 CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); 1938 Location output_loc = locations->Out(); 1939 CpuRegister output = output_loc.AsRegister<CpuRegister>(); 1940 1941 switch (type) { 1942 case Primitive::kPrimInt: 1943 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1944 break; 1945 1946 case Primitive::kPrimNot: { 1947 if (kEmitCompilerReadBarrier) { 1948 if (kUseBakerReadBarrier) { 1949 Location temp = locations->GetTemp(0); 1950 codegen->GenerateArrayLoadWithBakerReadBarrier( 1951 invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); 1952 } else { 1953 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1954 codegen->GenerateReadBarrierSlow( 1955 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 1956 } 1957 } else { 1958 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1959 __ MaybeUnpoisonHeapReference(output); 1960 } 1961 break; 1962 } 1963 1964 case Primitive::kPrimLong: 1965 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1966 break; 1967 1968 default: 1969 LOG(FATAL) << "Unsupported op size " << type; 1970 UNREACHABLE(); 1971 } 1972 } 1973 1974 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, 1975 HInvoke* invoke, 1976 Primitive::Type type) { 1977 bool can_call = kEmitCompilerReadBarrier && 1978 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1979 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1980 LocationSummary* locations = new (arena) LocationSummary(invoke, 1981 can_call ? 1982 LocationSummary::kCallOnSlowPath : 1983 LocationSummary::kNoCall, 1984 kIntrinsified); 1985 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1986 locations->SetInAt(1, Location::RequiresRegister()); 1987 locations->SetInAt(2, Location::RequiresRegister()); 1988 locations->SetOut(Location::RequiresRegister()); 1989 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1990 // We need a temporary register for the read barrier marking slow 1991 // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. 1992 locations->AddTemp(Location::RequiresRegister()); 1993 } 1994 } 1995 1996 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { 1997 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 1998 } 1999 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2000 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 2001 } 2002 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2003 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 2004 } 2005 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2006 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 2007 } 2008 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2009 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 2010 } 2011 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2012 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 2013 } 2014 2015 2016 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { 2017 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2018 } 2019 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2020 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2021 } 2022 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2023 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2024 } 2025 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2026 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2027 } 2028 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2029 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2030 } 2031 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2032 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2033 } 2034 2035 2036 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, 2037 Primitive::Type type, 2038 HInvoke* invoke) { 2039 LocationSummary* locations = new (arena) LocationSummary(invoke, 2040 LocationSummary::kNoCall, 2041 kIntrinsified); 2042 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2043 locations->SetInAt(1, Location::RequiresRegister()); 2044 locations->SetInAt(2, Location::RequiresRegister()); 2045 locations->SetInAt(3, Location::RequiresRegister()); 2046 if (type == Primitive::kPrimNot) { 2047 // Need temp registers for card-marking. 2048 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2049 locations->AddTemp(Location::RequiresRegister()); 2050 } 2051 } 2052 2053 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { 2054 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2055 } 2056 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2057 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2058 } 2059 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2060 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2061 } 2062 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2063 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2064 } 2065 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2066 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2067 } 2068 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2069 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2070 } 2071 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2072 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2073 } 2074 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2075 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2076 } 2077 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2078 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2079 } 2080 2081 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2082 // memory model. 2083 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, 2084 CodeGeneratorX86_64* codegen) { 2085 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2086 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2087 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2088 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); 2089 2090 if (type == Primitive::kPrimLong) { 2091 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2092 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 2093 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2094 __ movl(temp, value); 2095 __ PoisonHeapReference(temp); 2096 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2097 } else { 2098 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2099 } 2100 2101 if (is_volatile) { 2102 codegen->MemoryFence(); 2103 } 2104 2105 if (type == Primitive::kPrimNot) { 2106 bool value_can_be_null = true; // TODO: Worth finding out this information? 2107 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 2108 locations->GetTemp(1).AsRegister<CpuRegister>(), 2109 base, 2110 value, 2111 value_can_be_null); 2112 } 2113 } 2114 2115 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { 2116 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2117 } 2118 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2119 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2120 } 2121 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2122 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2123 } 2124 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2125 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2126 } 2127 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2128 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2129 } 2130 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2131 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2132 } 2133 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2134 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2135 } 2136 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2137 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2138 } 2139 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2140 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2141 } 2142 2143 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, 2144 HInvoke* invoke) { 2145 LocationSummary* locations = new (arena) LocationSummary(invoke, 2146 LocationSummary::kNoCall, 2147 kIntrinsified); 2148 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2149 locations->SetInAt(1, Location::RequiresRegister()); 2150 locations->SetInAt(2, Location::RequiresRegister()); 2151 // expected value must be in EAX/RAX. 2152 locations->SetInAt(3, Location::RegisterLocation(RAX)); 2153 locations->SetInAt(4, Location::RequiresRegister()); 2154 2155 locations->SetOut(Location::RequiresRegister()); 2156 if (type == Primitive::kPrimNot) { 2157 // Need temp registers for card-marking. 2158 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2159 locations->AddTemp(Location::RequiresRegister()); 2160 } 2161 } 2162 2163 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2164 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); 2165 } 2166 2167 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2168 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); 2169 } 2170 2171 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2172 // The UnsafeCASObject intrinsic is missing a read barrier, and 2173 // therefore sometimes does not work as expected (b/25883050). 2174 // Turn it off temporarily as a quick fix, until the read barrier is 2175 // implemented. 2176 // 2177 // TODO(rpl): Implement a read barrier in GenCAS below and re-enable 2178 // this intrinsic. 2179 if (kEmitCompilerReadBarrier) { 2180 return; 2181 } 2182 2183 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); 2184 } 2185 2186 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2187 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2188 LocationSummary* locations = invoke->GetLocations(); 2189 2190 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2191 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2192 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); 2193 // Ensure `expected` is in RAX (required by the CMPXCHG instruction). 2194 DCHECK_EQ(expected.AsRegister(), RAX); 2195 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); 2196 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2197 2198 if (type == Primitive::kPrimNot) { 2199 // Mark card for object assuming new value is stored. 2200 bool value_can_be_null = true; // TODO: Worth finding out this information? 2201 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 2202 locations->GetTemp(1).AsRegister<CpuRegister>(), 2203 base, 2204 value, 2205 value_can_be_null); 2206 2207 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 2208 Register value_reg = value.AsRegister(); 2209 if (kPoisonHeapReferences) { 2210 if (base_equals_value) { 2211 // If `base` and `value` are the same register location, move 2212 // `value_reg` to a temporary register. This way, poisoning 2213 // `value_reg` won't invalidate `base`. 2214 value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister(); 2215 __ movl(CpuRegister(value_reg), base); 2216 } 2217 2218 // Check that the register allocator did not assign the location 2219 // of `expected` (RAX) to `value` nor to `base`, so that heap 2220 // poisoning (when enabled) works as intended below. 2221 // - If `value` were equal to `expected`, both references would 2222 // be poisoned twice, meaning they would not be poisoned at 2223 // all, as heap poisoning uses address negation. 2224 // - If `base` were equal to `expected`, poisoning `expected` 2225 // would invalidate `base`. 2226 DCHECK_NE(value_reg, expected.AsRegister()); 2227 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 2228 2229 __ PoisonHeapReference(expected); 2230 __ PoisonHeapReference(CpuRegister(value_reg)); 2231 } 2232 2233 // TODO: Add a read barrier for the reference stored in the object 2234 // before attempting the CAS, similar to the one in the 2235 // art::Unsafe_compareAndSwapObject JNI implementation. 2236 // 2237 // Note that this code is not (yet) used when read barriers are 2238 // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). 2239 DCHECK(!kEmitCompilerReadBarrier); 2240 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); 2241 2242 // LOCK CMPXCHG has full barrier semantics, and we don't need 2243 // scheduling barriers at this time. 2244 2245 // Convert ZF into the boolean result. 2246 __ setcc(kZero, out); 2247 __ movzxb(out, out); 2248 2249 // If heap poisoning is enabled, we need to unpoison the values 2250 // that were poisoned earlier. 2251 if (kPoisonHeapReferences) { 2252 if (base_equals_value) { 2253 // `value_reg` has been moved to a temporary register, no need 2254 // to unpoison it. 2255 } else { 2256 // Ensure `value` is different from `out`, so that unpoisoning 2257 // the former does not invalidate the latter. 2258 DCHECK_NE(value_reg, out.AsRegister()); 2259 __ UnpoisonHeapReference(CpuRegister(value_reg)); 2260 } 2261 // Ensure `expected` is different from `out`, so that unpoisoning 2262 // the former does not invalidate the latter. 2263 DCHECK_NE(expected.AsRegister(), out.AsRegister()); 2264 __ UnpoisonHeapReference(expected); 2265 } 2266 } else { 2267 if (type == Primitive::kPrimInt) { 2268 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); 2269 } else if (type == Primitive::kPrimLong) { 2270 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); 2271 } else { 2272 LOG(FATAL) << "Unexpected CAS type " << type; 2273 } 2274 2275 // LOCK CMPXCHG has full barrier semantics, and we don't need 2276 // scheduling barriers at this time. 2277 2278 // Convert ZF into the boolean result. 2279 __ setcc(kZero, out); 2280 __ movzxb(out, out); 2281 } 2282 } 2283 2284 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2285 GenCAS(Primitive::kPrimInt, invoke, codegen_); 2286 } 2287 2288 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2289 GenCAS(Primitive::kPrimLong, invoke, codegen_); 2290 } 2291 2292 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2293 GenCAS(Primitive::kPrimNot, invoke, codegen_); 2294 } 2295 2296 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { 2297 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2298 LocationSummary::kNoCall, 2299 kIntrinsified); 2300 locations->SetInAt(0, Location::RequiresRegister()); 2301 locations->SetOut(Location::SameAsFirstInput()); 2302 locations->AddTemp(Location::RequiresRegister()); 2303 } 2304 2305 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, 2306 X86_64Assembler* assembler) { 2307 Immediate imm_shift(shift); 2308 Immediate imm_mask(mask); 2309 __ movl(temp, reg); 2310 __ shrl(reg, imm_shift); 2311 __ andl(temp, imm_mask); 2312 __ andl(reg, imm_mask); 2313 __ shll(temp, imm_shift); 2314 __ orl(reg, temp); 2315 } 2316 2317 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { 2318 X86_64Assembler* assembler = GetAssembler(); 2319 LocationSummary* locations = invoke->GetLocations(); 2320 2321 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2322 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2323 2324 /* 2325 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2326 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2327 * compared to generic luni implementation which has 5 rounds of swapping bits. 2328 * x = bswap x 2329 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2330 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2331 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2332 */ 2333 __ bswapl(reg); 2334 SwapBits(reg, temp, 1, 0x55555555, assembler); 2335 SwapBits(reg, temp, 2, 0x33333333, assembler); 2336 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2337 } 2338 2339 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { 2340 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2341 LocationSummary::kNoCall, 2342 kIntrinsified); 2343 locations->SetInAt(0, Location::RequiresRegister()); 2344 locations->SetOut(Location::SameAsFirstInput()); 2345 locations->AddTemp(Location::RequiresRegister()); 2346 locations->AddTemp(Location::RequiresRegister()); 2347 } 2348 2349 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, 2350 int32_t shift, int64_t mask, X86_64Assembler* assembler) { 2351 Immediate imm_shift(shift); 2352 __ movq(temp_mask, Immediate(mask)); 2353 __ movq(temp, reg); 2354 __ shrq(reg, imm_shift); 2355 __ andq(temp, temp_mask); 2356 __ andq(reg, temp_mask); 2357 __ shlq(temp, imm_shift); 2358 __ orq(reg, temp); 2359 } 2360 2361 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { 2362 X86_64Assembler* assembler = GetAssembler(); 2363 LocationSummary* locations = invoke->GetLocations(); 2364 2365 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2366 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 2367 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 2368 2369 /* 2370 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2371 * swapping bits to reverse bits in a long number x. Using bswap to save instructions 2372 * compared to generic luni implementation which has 5 rounds of swapping bits. 2373 * x = bswap x 2374 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; 2375 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; 2376 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; 2377 */ 2378 __ bswapq(reg); 2379 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); 2380 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); 2381 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); 2382 } 2383 2384 static void CreateBitCountLocations( 2385 ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) { 2386 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2387 // Do nothing if there is no popcnt support. This results in generating 2388 // a call for the intrinsic rather than direct code. 2389 return; 2390 } 2391 LocationSummary* locations = new (arena) LocationSummary(invoke, 2392 LocationSummary::kNoCall, 2393 kIntrinsified); 2394 locations->SetInAt(0, Location::Any()); 2395 locations->SetOut(Location::RequiresRegister()); 2396 } 2397 2398 static void GenBitCount(X86_64Assembler* assembler, 2399 CodeGeneratorX86_64* codegen, 2400 HInvoke* invoke, 2401 bool is_long) { 2402 LocationSummary* locations = invoke->GetLocations(); 2403 Location src = locations->InAt(0); 2404 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2405 2406 if (invoke->InputAt(0)->IsConstant()) { 2407 // Evaluate this at compile time. 2408 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2409 int32_t result = is_long 2410 ? POPCOUNT(static_cast<uint64_t>(value)) 2411 : POPCOUNT(static_cast<uint32_t>(value)); 2412 codegen->Load32BitValue(out, result); 2413 return; 2414 } 2415 2416 if (src.IsRegister()) { 2417 if (is_long) { 2418 __ popcntq(out, src.AsRegister<CpuRegister>()); 2419 } else { 2420 __ popcntl(out, src.AsRegister<CpuRegister>()); 2421 } 2422 } else if (is_long) { 2423 DCHECK(src.IsDoubleStackSlot()); 2424 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2425 } else { 2426 DCHECK(src.IsStackSlot()); 2427 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2428 } 2429 } 2430 2431 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2432 CreateBitCountLocations(arena_, codegen_, invoke); 2433 } 2434 2435 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2436 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2437 } 2438 2439 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { 2440 CreateBitCountLocations(arena_, codegen_, invoke); 2441 } 2442 2443 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { 2444 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2445 } 2446 2447 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) { 2448 LocationSummary* locations = new (arena) LocationSummary(invoke, 2449 LocationSummary::kNoCall, 2450 kIntrinsified); 2451 locations->SetInAt(0, Location::Any()); 2452 locations->SetOut(Location::RequiresRegister()); 2453 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL 2454 : Location::RequiresRegister()); // any will do 2455 } 2456 2457 static void GenOneBit(X86_64Assembler* assembler, 2458 CodeGeneratorX86_64* codegen, 2459 HInvoke* invoke, 2460 bool is_high, bool is_long) { 2461 LocationSummary* locations = invoke->GetLocations(); 2462 Location src = locations->InAt(0); 2463 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2464 2465 if (invoke->InputAt(0)->IsConstant()) { 2466 // Evaluate this at compile time. 2467 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2468 if (value == 0) { 2469 __ xorl(out, out); // Clears upper bits too. 2470 return; 2471 } 2472 // Nonzero value. 2473 if (is_high) { 2474 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value)) 2475 : 31 - CLZ(static_cast<uint32_t>(value)); 2476 } else { 2477 value = is_long ? CTZ(static_cast<uint64_t>(value)) 2478 : CTZ(static_cast<uint32_t>(value)); 2479 } 2480 if (is_long) { 2481 codegen->Load64BitValue(out, 1L << value); 2482 } else { 2483 codegen->Load32BitValue(out, 1 << value); 2484 } 2485 return; 2486 } 2487 2488 // Handle the non-constant cases. 2489 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2490 if (is_high) { 2491 // Use architectural support: basically 1 << bsr. 2492 if (src.IsRegister()) { 2493 if (is_long) { 2494 __ bsrq(tmp, src.AsRegister<CpuRegister>()); 2495 } else { 2496 __ bsrl(tmp, src.AsRegister<CpuRegister>()); 2497 } 2498 } else if (is_long) { 2499 DCHECK(src.IsDoubleStackSlot()); 2500 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2501 } else { 2502 DCHECK(src.IsStackSlot()); 2503 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2504 } 2505 // BSR sets ZF if the input was zero. 2506 NearLabel is_zero, done; 2507 __ j(kEqual, &is_zero); 2508 __ movl(out, Immediate(1)); // Clears upper bits too. 2509 if (is_long) { 2510 __ shlq(out, tmp); 2511 } else { 2512 __ shll(out, tmp); 2513 } 2514 __ jmp(&done); 2515 __ Bind(&is_zero); 2516 __ xorl(out, out); // Clears upper bits too. 2517 __ Bind(&done); 2518 } else { 2519 // Copy input into temporary. 2520 if (src.IsRegister()) { 2521 if (is_long) { 2522 __ movq(tmp, src.AsRegister<CpuRegister>()); 2523 } else { 2524 __ movl(tmp, src.AsRegister<CpuRegister>()); 2525 } 2526 } else if (is_long) { 2527 DCHECK(src.IsDoubleStackSlot()); 2528 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2529 } else { 2530 DCHECK(src.IsStackSlot()); 2531 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2532 } 2533 // Do the bit twiddling: basically tmp & -tmp; 2534 if (is_long) { 2535 __ movq(out, tmp); 2536 __ negq(tmp); 2537 __ andq(out, tmp); 2538 } else { 2539 __ movl(out, tmp); 2540 __ negl(tmp); 2541 __ andl(out, tmp); 2542 } 2543 } 2544 } 2545 2546 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2547 CreateOneBitLocations(arena_, invoke, /* is_high */ true); 2548 } 2549 2550 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2551 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false); 2552 } 2553 2554 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2555 CreateOneBitLocations(arena_, invoke, /* is_high */ true); 2556 } 2557 2558 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2559 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true); 2560 } 2561 2562 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2563 CreateOneBitLocations(arena_, invoke, /* is_high */ false); 2564 } 2565 2566 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2567 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false); 2568 } 2569 2570 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2571 CreateOneBitLocations(arena_, invoke, /* is_high */ false); 2572 } 2573 2574 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2575 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); 2576 } 2577 2578 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { 2579 LocationSummary* locations = new (arena) LocationSummary(invoke, 2580 LocationSummary::kNoCall, 2581 kIntrinsified); 2582 locations->SetInAt(0, Location::Any()); 2583 locations->SetOut(Location::RequiresRegister()); 2584 } 2585 2586 static void GenLeadingZeros(X86_64Assembler* assembler, 2587 CodeGeneratorX86_64* codegen, 2588 HInvoke* invoke, bool is_long) { 2589 LocationSummary* locations = invoke->GetLocations(); 2590 Location src = locations->InAt(0); 2591 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2592 2593 int zero_value_result = is_long ? 64 : 32; 2594 if (invoke->InputAt(0)->IsConstant()) { 2595 // Evaluate this at compile time. 2596 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2597 if (value == 0) { 2598 value = zero_value_result; 2599 } else { 2600 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2601 } 2602 codegen->Load32BitValue(out, value); 2603 return; 2604 } 2605 2606 // Handle the non-constant cases. 2607 if (src.IsRegister()) { 2608 if (is_long) { 2609 __ bsrq(out, src.AsRegister<CpuRegister>()); 2610 } else { 2611 __ bsrl(out, src.AsRegister<CpuRegister>()); 2612 } 2613 } else if (is_long) { 2614 DCHECK(src.IsDoubleStackSlot()); 2615 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2616 } else { 2617 DCHECK(src.IsStackSlot()); 2618 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2619 } 2620 2621 // BSR sets ZF if the input was zero, and the output is undefined. 2622 NearLabel is_zero, done; 2623 __ j(kEqual, &is_zero); 2624 2625 // Correct the result from BSR to get the CLZ result. 2626 __ xorl(out, Immediate(zero_value_result - 1)); 2627 __ jmp(&done); 2628 2629 // Fix the zero case with the expected result. 2630 __ Bind(&is_zero); 2631 __ movl(out, Immediate(zero_value_result)); 2632 2633 __ Bind(&done); 2634 } 2635 2636 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2637 CreateLeadingZeroLocations(arena_, invoke); 2638 } 2639 2640 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2641 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2642 } 2643 2644 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2645 CreateLeadingZeroLocations(arena_, invoke); 2646 } 2647 2648 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2649 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2650 } 2651 2652 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { 2653 LocationSummary* locations = new (arena) LocationSummary(invoke, 2654 LocationSummary::kNoCall, 2655 kIntrinsified); 2656 locations->SetInAt(0, Location::Any()); 2657 locations->SetOut(Location::RequiresRegister()); 2658 } 2659 2660 static void GenTrailingZeros(X86_64Assembler* assembler, 2661 CodeGeneratorX86_64* codegen, 2662 HInvoke* invoke, bool is_long) { 2663 LocationSummary* locations = invoke->GetLocations(); 2664 Location src = locations->InAt(0); 2665 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2666 2667 int zero_value_result = is_long ? 64 : 32; 2668 if (invoke->InputAt(0)->IsConstant()) { 2669 // Evaluate this at compile time. 2670 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2671 if (value == 0) { 2672 value = zero_value_result; 2673 } else { 2674 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2675 } 2676 codegen->Load32BitValue(out, value); 2677 return; 2678 } 2679 2680 // Handle the non-constant cases. 2681 if (src.IsRegister()) { 2682 if (is_long) { 2683 __ bsfq(out, src.AsRegister<CpuRegister>()); 2684 } else { 2685 __ bsfl(out, src.AsRegister<CpuRegister>()); 2686 } 2687 } else if (is_long) { 2688 DCHECK(src.IsDoubleStackSlot()); 2689 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2690 } else { 2691 DCHECK(src.IsStackSlot()); 2692 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2693 } 2694 2695 // BSF sets ZF if the input was zero, and the output is undefined. 2696 NearLabel done; 2697 __ j(kNotEqual, &done); 2698 2699 // Fix the zero case with the expected result. 2700 __ movl(out, Immediate(zero_value_result)); 2701 2702 __ Bind(&done); 2703 } 2704 2705 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2706 CreateTrailingZeroLocations(arena_, invoke); 2707 } 2708 2709 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2710 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2711 } 2712 2713 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2714 CreateTrailingZeroLocations(arena_, invoke); 2715 } 2716 2717 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2718 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2719 } 2720 2721 UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) 2722 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) 2723 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) 2724 2725 // 1.8. 2726 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) 2727 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) 2728 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) 2729 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) 2730 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) 2731 2732 UNREACHABLE_INTRINSICS(X86_64) 2733 2734 #undef __ 2735 2736 } // namespace x86_64 2737 } // namespace art 2738