1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86.h" 18 19 #include <limits> 20 21 #include "arch/x86/instruction_set_features_x86.h" 22 #include "art_method.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "intrinsics.h" 27 #include "intrinsics_utils.h" 28 #include "mirror/array-inl.h" 29 #include "mirror/string.h" 30 #include "thread.h" 31 #include "utils/x86/assembler_x86.h" 32 #include "utils/x86/constants_x86.h" 33 34 namespace art { 35 36 namespace x86 { 37 38 static constexpr int kDoubleNaNHigh = 0x7FF80000; 39 static constexpr int kDoubleNaNLow = 0x00000000; 40 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); 41 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); 42 43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) 44 : arena_(codegen->GetGraph()->GetArena()), 45 codegen_(codegen) { 46 } 47 48 49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { 50 return down_cast<X86Assembler*>(codegen_->GetAssembler()); 51 } 52 53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { 54 return codegen_->GetGraph()->GetArena(); 55 } 56 57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { 58 Dispatch(invoke); 59 LocationSummary* res = invoke->GetLocations(); 60 if (res == nullptr) { 61 return false; 62 } 63 if (kEmitCompilerReadBarrier && res->CanCall()) { 64 // Generating an intrinsic for this HInvoke may produce an 65 // IntrinsicSlowPathX86 slow path. Currently this approach 66 // does not work when using read barriers, as the emitted 67 // calling sequence will make use of another slow path 68 // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, 69 // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail 70 // out in this case. 71 // 72 // TODO: Find a way to have intrinsics work with read barriers. 73 invoke->SetLocations(nullptr); 74 return false; 75 } 76 return res->Intrinsified(); 77 } 78 79 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { 80 InvokeDexCallingConventionVisitorX86 calling_convention_visitor; 81 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 82 } 83 84 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; 85 86 #define __ assembler-> 87 88 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { 89 LocationSummary* locations = new (arena) LocationSummary(invoke, 90 LocationSummary::kNoCall, 91 kIntrinsified); 92 locations->SetInAt(0, Location::RequiresFpuRegister()); 93 locations->SetOut(Location::RequiresRegister()); 94 if (is64bit) { 95 locations->AddTemp(Location::RequiresFpuRegister()); 96 } 97 } 98 99 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { 100 LocationSummary* locations = new (arena) LocationSummary(invoke, 101 LocationSummary::kNoCall, 102 kIntrinsified); 103 locations->SetInAt(0, Location::RequiresRegister()); 104 locations->SetOut(Location::RequiresFpuRegister()); 105 if (is64bit) { 106 locations->AddTemp(Location::RequiresFpuRegister()); 107 locations->AddTemp(Location::RequiresFpuRegister()); 108 } 109 } 110 111 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 112 Location input = locations->InAt(0); 113 Location output = locations->Out(); 114 if (is64bit) { 115 // Need to use the temporary. 116 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 117 __ movsd(temp, input.AsFpuRegister<XmmRegister>()); 118 __ movd(output.AsRegisterPairLow<Register>(), temp); 119 __ psrlq(temp, Immediate(32)); 120 __ movd(output.AsRegisterPairHigh<Register>(), temp); 121 } else { 122 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>()); 123 } 124 } 125 126 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 127 Location input = locations->InAt(0); 128 Location output = locations->Out(); 129 if (is64bit) { 130 // Need to use the temporary. 131 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 132 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 133 __ movd(temp1, input.AsRegisterPairLow<Register>()); 134 __ movd(temp2, input.AsRegisterPairHigh<Register>()); 135 __ punpckldq(temp1, temp2); 136 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1); 137 } else { 138 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>()); 139 } 140 } 141 142 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 143 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true); 144 } 145 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 146 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true); 147 } 148 149 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 150 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 151 } 152 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 153 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 154 } 155 156 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 157 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false); 158 } 159 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 160 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false); 161 } 162 163 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 164 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 165 } 166 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 167 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 168 } 169 170 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 171 LocationSummary* locations = new (arena) LocationSummary(invoke, 172 LocationSummary::kNoCall, 173 kIntrinsified); 174 locations->SetInAt(0, Location::RequiresRegister()); 175 locations->SetOut(Location::SameAsFirstInput()); 176 } 177 178 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 179 LocationSummary* locations = new (arena) LocationSummary(invoke, 180 LocationSummary::kNoCall, 181 kIntrinsified); 182 locations->SetInAt(0, Location::RequiresRegister()); 183 locations->SetOut(Location::RequiresRegister()); 184 } 185 186 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { 187 LocationSummary* locations = new (arena) LocationSummary(invoke, 188 LocationSummary::kNoCall, 189 kIntrinsified); 190 locations->SetInAt(0, Location::RequiresRegister()); 191 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 192 } 193 194 static void GenReverseBytes(LocationSummary* locations, 195 Primitive::Type size, 196 X86Assembler* assembler) { 197 Register out = locations->Out().AsRegister<Register>(); 198 199 switch (size) { 200 case Primitive::kPrimShort: 201 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 202 __ bswapl(out); 203 __ sarl(out, Immediate(16)); 204 break; 205 case Primitive::kPrimInt: 206 __ bswapl(out); 207 break; 208 default: 209 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 210 UNREACHABLE(); 211 } 212 } 213 214 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { 215 CreateIntToIntLocations(arena_, invoke); 216 } 217 218 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { 219 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 220 } 221 222 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { 223 CreateLongToLongLocations(arena_, invoke); 224 } 225 226 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { 227 LocationSummary* locations = invoke->GetLocations(); 228 Location input = locations->InAt(0); 229 Register input_lo = input.AsRegisterPairLow<Register>(); 230 Register input_hi = input.AsRegisterPairHigh<Register>(); 231 Location output = locations->Out(); 232 Register output_lo = output.AsRegisterPairLow<Register>(); 233 Register output_hi = output.AsRegisterPairHigh<Register>(); 234 235 X86Assembler* assembler = GetAssembler(); 236 // Assign the inputs to the outputs, mixing low/high. 237 __ movl(output_lo, input_hi); 238 __ movl(output_hi, input_lo); 239 __ bswapl(output_lo); 240 __ bswapl(output_hi); 241 } 242 243 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { 244 CreateIntToIntLocations(arena_, invoke); 245 } 246 247 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { 248 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 249 } 250 251 252 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 253 // need is 64b. 254 255 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { 256 // TODO: Enable memory operations when the assembler supports them. 257 LocationSummary* locations = new (arena) LocationSummary(invoke, 258 LocationSummary::kNoCall, 259 kIntrinsified); 260 locations->SetInAt(0, Location::RequiresFpuRegister()); 261 locations->SetOut(Location::SameAsFirstInput()); 262 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 263 DCHECK(static_or_direct != nullptr); 264 if (static_or_direct->HasSpecialInput() && 265 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 266 // We need addressibility for the constant area. 267 locations->SetInAt(1, Location::RequiresRegister()); 268 // We need a temporary to hold the constant. 269 locations->AddTemp(Location::RequiresFpuRegister()); 270 } 271 } 272 273 static void MathAbsFP(LocationSummary* locations, 274 bool is64bit, 275 X86Assembler* assembler, 276 CodeGeneratorX86* codegen) { 277 Location output = locations->Out(); 278 279 DCHECK(output.IsFpuRegister()); 280 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 281 DCHECK(locations->InAt(1).IsRegister()); 282 // We also have a constant area pointer. 283 Register constant_area = locations->InAt(1).AsRegister<Register>(); 284 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 285 if (is64bit) { 286 __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area)); 287 __ andpd(output.AsFpuRegister<XmmRegister>(), temp); 288 } else { 289 __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area)); 290 __ andps(output.AsFpuRegister<XmmRegister>(), temp); 291 } 292 } else { 293 // Create the right constant on an aligned stack. 294 if (is64bit) { 295 __ subl(ESP, Immediate(8)); 296 __ pushl(Immediate(0x7FFFFFFF)); 297 __ pushl(Immediate(0xFFFFFFFF)); 298 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 299 } else { 300 __ subl(ESP, Immediate(12)); 301 __ pushl(Immediate(0x7FFFFFFF)); 302 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 303 } 304 __ addl(ESP, Immediate(16)); 305 } 306 } 307 308 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { 309 CreateFloatToFloat(arena_, invoke); 310 } 311 312 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { 313 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); 314 } 315 316 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { 317 CreateFloatToFloat(arena_, invoke); 318 } 319 320 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { 321 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); 322 } 323 324 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { 325 LocationSummary* locations = new (arena) LocationSummary(invoke, 326 LocationSummary::kNoCall, 327 kIntrinsified); 328 locations->SetInAt(0, Location::RegisterLocation(EAX)); 329 locations->SetOut(Location::SameAsFirstInput()); 330 locations->AddTemp(Location::RegisterLocation(EDX)); 331 } 332 333 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { 334 Location output = locations->Out(); 335 Register out = output.AsRegister<Register>(); 336 DCHECK_EQ(out, EAX); 337 Register temp = locations->GetTemp(0).AsRegister<Register>(); 338 DCHECK_EQ(temp, EDX); 339 340 // Sign extend EAX into EDX. 341 __ cdq(); 342 343 // XOR EAX with sign. 344 __ xorl(EAX, EDX); 345 346 // Subtract out sign to correct. 347 __ subl(EAX, EDX); 348 349 // The result is in EAX. 350 } 351 352 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) { 353 LocationSummary* locations = new (arena) LocationSummary(invoke, 354 LocationSummary::kNoCall, 355 kIntrinsified); 356 locations->SetInAt(0, Location::RequiresRegister()); 357 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 358 locations->AddTemp(Location::RequiresRegister()); 359 } 360 361 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { 362 Location input = locations->InAt(0); 363 Register input_lo = input.AsRegisterPairLow<Register>(); 364 Register input_hi = input.AsRegisterPairHigh<Register>(); 365 Location output = locations->Out(); 366 Register output_lo = output.AsRegisterPairLow<Register>(); 367 Register output_hi = output.AsRegisterPairHigh<Register>(); 368 Register temp = locations->GetTemp(0).AsRegister<Register>(); 369 370 // Compute the sign into the temporary. 371 __ movl(temp, input_hi); 372 __ sarl(temp, Immediate(31)); 373 374 // Store the sign into the output. 375 __ movl(output_lo, temp); 376 __ movl(output_hi, temp); 377 378 // XOR the input to the output. 379 __ xorl(output_lo, input_lo); 380 __ xorl(output_hi, input_hi); 381 382 // Subtract the sign. 383 __ subl(output_lo, temp); 384 __ sbbl(output_hi, temp); 385 } 386 387 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { 388 CreateAbsIntLocation(arena_, invoke); 389 } 390 391 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { 392 GenAbsInteger(invoke->GetLocations(), GetAssembler()); 393 } 394 395 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { 396 CreateAbsLongLocation(arena_, invoke); 397 } 398 399 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { 400 GenAbsLong(invoke->GetLocations(), GetAssembler()); 401 } 402 403 static void GenMinMaxFP(LocationSummary* locations, 404 bool is_min, 405 bool is_double, 406 X86Assembler* assembler, 407 CodeGeneratorX86* codegen) { 408 Location op1_loc = locations->InAt(0); 409 Location op2_loc = locations->InAt(1); 410 Location out_loc = locations->Out(); 411 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 412 413 // Shortcut for same input locations. 414 if (op1_loc.Equals(op2_loc)) { 415 DCHECK(out_loc.Equals(op1_loc)); 416 return; 417 } 418 419 // (out := op1) 420 // out <=? op2 421 // if Nan jmp Nan_label 422 // if out is min jmp done 423 // if op2 is min jmp op2_label 424 // handle -0/+0 425 // jmp done 426 // Nan_label: 427 // out := NaN 428 // op2_label: 429 // out := op2 430 // done: 431 // 432 // This removes one jmp, but needs to copy one input (op1) to out. 433 // 434 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? 435 436 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 437 438 NearLabel nan, done, op2_label; 439 if (is_double) { 440 __ ucomisd(out, op2); 441 } else { 442 __ ucomiss(out, op2); 443 } 444 445 __ j(Condition::kParityEven, &nan); 446 447 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 448 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 449 450 // Handle 0.0/-0.0. 451 if (is_min) { 452 if (is_double) { 453 __ orpd(out, op2); 454 } else { 455 __ orps(out, op2); 456 } 457 } else { 458 if (is_double) { 459 __ andpd(out, op2); 460 } else { 461 __ andps(out, op2); 462 } 463 } 464 __ jmp(&done); 465 466 // NaN handling. 467 __ Bind(&nan); 468 // Do we have a constant area pointer? 469 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { 470 DCHECK(locations->InAt(2).IsRegister()); 471 Register constant_area = locations->InAt(2).AsRegister<Register>(); 472 if (is_double) { 473 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area)); 474 } else { 475 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area)); 476 } 477 } else { 478 if (is_double) { 479 __ pushl(Immediate(kDoubleNaNHigh)); 480 __ pushl(Immediate(kDoubleNaNLow)); 481 __ movsd(out, Address(ESP, 0)); 482 __ addl(ESP, Immediate(8)); 483 } else { 484 __ pushl(Immediate(kFloatNaN)); 485 __ movss(out, Address(ESP, 0)); 486 __ addl(ESP, Immediate(4)); 487 } 488 } 489 __ jmp(&done); 490 491 // out := op2; 492 __ Bind(&op2_label); 493 if (is_double) { 494 __ movsd(out, op2); 495 } else { 496 __ movss(out, op2); 497 } 498 499 // Done. 500 __ Bind(&done); 501 } 502 503 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 504 LocationSummary* locations = new (arena) LocationSummary(invoke, 505 LocationSummary::kNoCall, 506 kIntrinsified); 507 locations->SetInAt(0, Location::RequiresFpuRegister()); 508 locations->SetInAt(1, Location::RequiresFpuRegister()); 509 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 510 // the second input to be the output (we can simply swap inputs). 511 locations->SetOut(Location::SameAsFirstInput()); 512 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 513 DCHECK(static_or_direct != nullptr); 514 if (static_or_direct->HasSpecialInput() && 515 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 516 locations->SetInAt(2, Location::RequiresRegister()); 517 } 518 } 519 520 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 521 CreateFPFPToFPLocations(arena_, invoke); 522 } 523 524 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 525 GenMinMaxFP(invoke->GetLocations(), 526 /* is_min */ true, 527 /* is_double */ true, 528 GetAssembler(), 529 codegen_); 530 } 531 532 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { 533 CreateFPFPToFPLocations(arena_, invoke); 534 } 535 536 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { 537 GenMinMaxFP(invoke->GetLocations(), 538 /* is_min */ true, 539 /* is_double */ false, 540 GetAssembler(), 541 codegen_); 542 } 543 544 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 545 CreateFPFPToFPLocations(arena_, invoke); 546 } 547 548 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 549 GenMinMaxFP(invoke->GetLocations(), 550 /* is_min */ false, 551 /* is_double */ true, 552 GetAssembler(), 553 codegen_); 554 } 555 556 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 557 CreateFPFPToFPLocations(arena_, invoke); 558 } 559 560 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 561 GenMinMaxFP(invoke->GetLocations(), 562 /* is_min */ false, 563 /* is_double */ false, 564 GetAssembler(), 565 codegen_); 566 } 567 568 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 569 X86Assembler* assembler) { 570 Location op1_loc = locations->InAt(0); 571 Location op2_loc = locations->InAt(1); 572 573 // Shortcut for same input locations. 574 if (op1_loc.Equals(op2_loc)) { 575 // Can return immediately, as op1_loc == out_loc. 576 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 577 // a copy here. 578 DCHECK(locations->Out().Equals(op1_loc)); 579 return; 580 } 581 582 if (is_long) { 583 // Need to perform a subtract to get the sign right. 584 // op1 is already in the same location as the output. 585 Location output = locations->Out(); 586 Register output_lo = output.AsRegisterPairLow<Register>(); 587 Register output_hi = output.AsRegisterPairHigh<Register>(); 588 589 Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); 590 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); 591 592 // Spare register to compute the subtraction to set condition code. 593 Register temp = locations->GetTemp(0).AsRegister<Register>(); 594 595 // Subtract off op2_low. 596 __ movl(temp, output_lo); 597 __ subl(temp, op2_lo); 598 599 // Now use the same tempo and the borrow to finish the subtraction of op2_hi. 600 __ movl(temp, output_hi); 601 __ sbbl(temp, op2_hi); 602 603 // Now the condition code is correct. 604 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; 605 __ cmovl(cond, output_lo, op2_lo); 606 __ cmovl(cond, output_hi, op2_hi); 607 } else { 608 Register out = locations->Out().AsRegister<Register>(); 609 Register op2 = op2_loc.AsRegister<Register>(); 610 611 // (out := op1) 612 // out <=? op2 613 // if out is min jmp done 614 // out := op2 615 // done: 616 617 __ cmpl(out, op2); 618 Condition cond = is_min ? Condition::kGreater : Condition::kLess; 619 __ cmovl(cond, out, op2); 620 } 621 } 622 623 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 624 LocationSummary* locations = new (arena) LocationSummary(invoke, 625 LocationSummary::kNoCall, 626 kIntrinsified); 627 locations->SetInAt(0, Location::RequiresRegister()); 628 locations->SetInAt(1, Location::RequiresRegister()); 629 locations->SetOut(Location::SameAsFirstInput()); 630 } 631 632 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { 633 LocationSummary* locations = new (arena) LocationSummary(invoke, 634 LocationSummary::kNoCall, 635 kIntrinsified); 636 locations->SetInAt(0, Location::RequiresRegister()); 637 locations->SetInAt(1, Location::RequiresRegister()); 638 locations->SetOut(Location::SameAsFirstInput()); 639 // Register to use to perform a long subtract to set cc. 640 locations->AddTemp(Location::RequiresRegister()); 641 } 642 643 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { 644 CreateIntIntToIntLocations(arena_, invoke); 645 } 646 647 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { 648 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 649 } 650 651 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { 652 CreateLongLongToLongLocations(arena_, invoke); 653 } 654 655 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { 656 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 657 } 658 659 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { 660 CreateIntIntToIntLocations(arena_, invoke); 661 } 662 663 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { 664 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 665 } 666 667 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { 668 CreateLongLongToLongLocations(arena_, invoke); 669 } 670 671 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { 672 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 673 } 674 675 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 676 LocationSummary* locations = new (arena) LocationSummary(invoke, 677 LocationSummary::kNoCall, 678 kIntrinsified); 679 locations->SetInAt(0, Location::RequiresFpuRegister()); 680 locations->SetOut(Location::RequiresFpuRegister()); 681 } 682 683 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { 684 CreateFPToFPLocations(arena_, invoke); 685 } 686 687 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { 688 LocationSummary* locations = invoke->GetLocations(); 689 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 690 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 691 692 GetAssembler()->sqrtsd(out, in); 693 } 694 695 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { 696 MoveArguments(invoke, codegen); 697 698 DCHECK(invoke->IsInvokeStaticOrDirect()); 699 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), 700 Location::RegisterLocation(EAX)); 701 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 702 703 // Copy the result back to the expected output. 704 Location out = invoke->GetLocations()->Out(); 705 if (out.IsValid()) { 706 DCHECK(out.IsRegister()); 707 codegen->MoveFromReturnRegister(out, invoke->GetType()); 708 } 709 } 710 711 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, 712 HInvoke* invoke, 713 CodeGeneratorX86* codegen) { 714 // Do we have instruction support? 715 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 716 CreateFPToFPLocations(arena, invoke); 717 return; 718 } 719 720 // We have to fall back to a call to the intrinsic. 721 LocationSummary* locations = new (arena) LocationSummary(invoke, 722 LocationSummary::kCall); 723 InvokeRuntimeCallingConvention calling_convention; 724 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 725 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 726 // Needs to be EAX for the invoke. 727 locations->AddTemp(Location::RegisterLocation(EAX)); 728 } 729 730 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, 731 HInvoke* invoke, 732 X86Assembler* assembler, 733 int round_mode) { 734 LocationSummary* locations = invoke->GetLocations(); 735 if (locations->WillCall()) { 736 InvokeOutOfLineIntrinsic(codegen, invoke); 737 } else { 738 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 739 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 740 __ roundsd(out, in, Immediate(round_mode)); 741 } 742 } 743 744 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { 745 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 746 } 747 748 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { 749 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 750 } 751 752 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { 753 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 754 } 755 756 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { 757 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 758 } 759 760 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { 761 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 762 } 763 764 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { 765 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 766 } 767 768 // Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, 769 // as it needs 64 bit instructions. 770 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { 771 // See intrinsics.h. 772 if (!kRoundIsPlusPointFive) { 773 return; 774 } 775 776 // Do we have instruction support? 777 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { 778 LocationSummary* locations = new (arena_) LocationSummary(invoke, 779 LocationSummary::kNoCall, 780 kIntrinsified); 781 locations->SetInAt(0, Location::RequiresFpuRegister()); 782 locations->SetOut(Location::RequiresRegister()); 783 locations->AddTemp(Location::RequiresFpuRegister()); 784 locations->AddTemp(Location::RequiresFpuRegister()); 785 return; 786 } 787 788 // We have to fall back to a call to the intrinsic. 789 LocationSummary* locations = new (arena_) LocationSummary(invoke, 790 LocationSummary::kCall); 791 InvokeRuntimeCallingConvention calling_convention; 792 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 793 locations->SetOut(Location::RegisterLocation(EAX)); 794 // Needs to be EAX for the invoke. 795 locations->AddTemp(Location::RegisterLocation(EAX)); 796 } 797 798 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { 799 LocationSummary* locations = invoke->GetLocations(); 800 if (locations->WillCall()) { 801 InvokeOutOfLineIntrinsic(codegen_, invoke); 802 return; 803 } 804 805 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. 806 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 807 Register out = locations->Out().AsRegister<Register>(); 808 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 809 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 810 NearLabel done, nan; 811 X86Assembler* assembler = GetAssembler(); 812 813 // Generate 0.5 into inPlusPointFive. 814 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); 815 __ movd(inPlusPointFive, out); 816 817 // Add in the input. 818 __ addss(inPlusPointFive, in); 819 820 // And truncate to an integer. 821 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); 822 823 __ movl(out, Immediate(kPrimIntMax)); 824 // maxInt = int-to-float(out) 825 __ cvtsi2ss(maxInt, out); 826 827 // if inPlusPointFive >= maxInt goto done 828 __ comiss(inPlusPointFive, maxInt); 829 __ j(kAboveEqual, &done); 830 831 // if input == NaN goto nan 832 __ j(kUnordered, &nan); 833 834 // output = float-to-int-truncate(input) 835 __ cvttss2si(out, inPlusPointFive); 836 __ jmp(&done); 837 __ Bind(&nan); 838 839 // output = 0 840 __ xorl(out, out); 841 __ Bind(&done); 842 } 843 844 static void CreateFPToFPCallLocations(ArenaAllocator* arena, 845 HInvoke* invoke) { 846 LocationSummary* locations = new (arena) LocationSummary(invoke, 847 LocationSummary::kCall, 848 kIntrinsified); 849 InvokeRuntimeCallingConvention calling_convention; 850 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 851 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 852 } 853 854 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { 855 LocationSummary* locations = invoke->GetLocations(); 856 DCHECK(locations->WillCall()); 857 DCHECK(invoke->IsInvokeStaticOrDirect()); 858 X86Assembler* assembler = codegen->GetAssembler(); 859 860 // We need some place to pass the parameters. 861 __ subl(ESP, Immediate(16)); 862 __ cfi().AdjustCFAOffset(16); 863 864 // Pass the parameters at the bottom of the stack. 865 __ movsd(Address(ESP, 0), XMM0); 866 867 // If we have a second parameter, pass it next. 868 if (invoke->GetNumberOfArguments() == 2) { 869 __ movsd(Address(ESP, 8), XMM1); 870 } 871 872 // Now do the actual call. 873 __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry))); 874 875 // Extract the return value from the FP stack. 876 __ fstpl(Address(ESP, 0)); 877 __ movsd(XMM0, Address(ESP, 0)); 878 879 // And clean up the stack. 880 __ addl(ESP, Immediate(16)); 881 __ cfi().AdjustCFAOffset(-16); 882 883 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 884 } 885 886 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { 887 CreateFPToFPCallLocations(arena_, invoke); 888 } 889 890 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { 891 GenFPToFPCall(invoke, codegen_, kQuickCos); 892 } 893 894 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { 895 CreateFPToFPCallLocations(arena_, invoke); 896 } 897 898 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { 899 GenFPToFPCall(invoke, codegen_, kQuickSin); 900 } 901 902 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { 903 CreateFPToFPCallLocations(arena_, invoke); 904 } 905 906 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { 907 GenFPToFPCall(invoke, codegen_, kQuickAcos); 908 } 909 910 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { 911 CreateFPToFPCallLocations(arena_, invoke); 912 } 913 914 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { 915 GenFPToFPCall(invoke, codegen_, kQuickAsin); 916 } 917 918 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { 919 CreateFPToFPCallLocations(arena_, invoke); 920 } 921 922 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { 923 GenFPToFPCall(invoke, codegen_, kQuickAtan); 924 } 925 926 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { 927 CreateFPToFPCallLocations(arena_, invoke); 928 } 929 930 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { 931 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 932 } 933 934 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { 935 CreateFPToFPCallLocations(arena_, invoke); 936 } 937 938 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { 939 GenFPToFPCall(invoke, codegen_, kQuickCosh); 940 } 941 942 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { 943 CreateFPToFPCallLocations(arena_, invoke); 944 } 945 946 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { 947 GenFPToFPCall(invoke, codegen_, kQuickExp); 948 } 949 950 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { 951 CreateFPToFPCallLocations(arena_, invoke); 952 } 953 954 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { 955 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 956 } 957 958 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { 959 CreateFPToFPCallLocations(arena_, invoke); 960 } 961 962 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { 963 GenFPToFPCall(invoke, codegen_, kQuickLog); 964 } 965 966 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { 967 CreateFPToFPCallLocations(arena_, invoke); 968 } 969 970 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { 971 GenFPToFPCall(invoke, codegen_, kQuickLog10); 972 } 973 974 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { 975 CreateFPToFPCallLocations(arena_, invoke); 976 } 977 978 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { 979 GenFPToFPCall(invoke, codegen_, kQuickSinh); 980 } 981 982 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { 983 CreateFPToFPCallLocations(arena_, invoke); 984 } 985 986 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { 987 GenFPToFPCall(invoke, codegen_, kQuickTan); 988 } 989 990 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { 991 CreateFPToFPCallLocations(arena_, invoke); 992 } 993 994 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { 995 GenFPToFPCall(invoke, codegen_, kQuickTanh); 996 } 997 998 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, 999 HInvoke* invoke) { 1000 LocationSummary* locations = new (arena) LocationSummary(invoke, 1001 LocationSummary::kCall, 1002 kIntrinsified); 1003 InvokeRuntimeCallingConvention calling_convention; 1004 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 1005 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 1006 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 1007 } 1008 1009 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { 1010 CreateFPFPToFPCallLocations(arena_, invoke); 1011 } 1012 1013 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { 1014 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 1015 } 1016 1017 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { 1018 CreateFPFPToFPCallLocations(arena_, invoke); 1019 } 1020 1021 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { 1022 GenFPToFPCall(invoke, codegen_, kQuickHypot); 1023 } 1024 1025 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { 1026 CreateFPFPToFPCallLocations(arena_, invoke); 1027 } 1028 1029 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { 1030 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 1031 } 1032 1033 void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { 1034 // The inputs plus one temp. 1035 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1036 LocationSummary::kCallOnSlowPath, 1037 kIntrinsified); 1038 locations->SetInAt(0, Location::RequiresRegister()); 1039 locations->SetInAt(1, Location::RequiresRegister()); 1040 locations->SetOut(Location::SameAsFirstInput()); 1041 } 1042 1043 void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { 1044 LocationSummary* locations = invoke->GetLocations(); 1045 1046 // Location of reference to data array. 1047 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1048 // Location of count. 1049 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1050 1051 Register obj = locations->InAt(0).AsRegister<Register>(); 1052 Register idx = locations->InAt(1).AsRegister<Register>(); 1053 Register out = locations->Out().AsRegister<Register>(); 1054 1055 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth 1056 // the cost. 1057 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick 1058 // we will not optimize the code for constants (which would save a register). 1059 1060 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1061 codegen_->AddSlowPath(slow_path); 1062 1063 X86Assembler* assembler = GetAssembler(); 1064 1065 __ cmpl(idx, Address(obj, count_offset)); 1066 codegen_->MaybeRecordImplicitNullCheck(invoke); 1067 __ j(kAboveEqual, slow_path->GetEntryLabel()); 1068 1069 // out = out[2*idx]. 1070 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); 1071 1072 __ Bind(slow_path->GetExitLabel()); 1073 } 1074 1075 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1076 // We need at least two of the positions or length to be an integer constant, 1077 // or else we won't have enough free registers. 1078 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 1079 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 1080 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 1081 1082 int num_constants = 1083 ((src_pos != nullptr) ? 1 : 0) 1084 + ((dest_pos != nullptr) ? 1 : 0) 1085 + ((length != nullptr) ? 1 : 0); 1086 1087 if (num_constants < 2) { 1088 // Not enough free registers. 1089 return; 1090 } 1091 1092 // As long as we are checking, we might as well check to see if the src and dest 1093 // positions are >= 0. 1094 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 1095 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 1096 // We will have to fail anyways. 1097 return; 1098 } 1099 1100 // And since we are already checking, check the length too. 1101 if (length != nullptr) { 1102 int32_t len = length->GetValue(); 1103 if (len < 0) { 1104 // Just call as normal. 1105 return; 1106 } 1107 } 1108 1109 // Okay, it is safe to generate inline code. 1110 LocationSummary* locations = 1111 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 1112 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). 1113 locations->SetInAt(0, Location::RequiresRegister()); 1114 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1115 locations->SetInAt(2, Location::RequiresRegister()); 1116 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 1117 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 1118 1119 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1120 locations->AddTemp(Location::RegisterLocation(ESI)); 1121 locations->AddTemp(Location::RegisterLocation(EDI)); 1122 locations->AddTemp(Location::RegisterLocation(ECX)); 1123 } 1124 1125 static void CheckPosition(X86Assembler* assembler, 1126 Location pos, 1127 Register input, 1128 Register length, 1129 SlowPathCode* slow_path, 1130 Register input_len, 1131 Register temp) { 1132 // Where is the length in the String? 1133 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 1134 1135 if (pos.IsConstant()) { 1136 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 1137 if (pos_const == 0) { 1138 // Check that length(input) >= length. 1139 __ cmpl(Address(input, length_offset), length); 1140 __ j(kLess, slow_path->GetEntryLabel()); 1141 } else { 1142 // Check that length(input) >= pos. 1143 __ movl(input_len, Address(input, length_offset)); 1144 __ cmpl(input_len, Immediate(pos_const)); 1145 __ j(kLess, slow_path->GetEntryLabel()); 1146 1147 // Check that (length(input) - pos) >= length. 1148 __ leal(temp, Address(input_len, -pos_const)); 1149 __ cmpl(temp, length); 1150 __ j(kLess, slow_path->GetEntryLabel()); 1151 } 1152 } else { 1153 // Check that pos >= 0. 1154 Register pos_reg = pos.AsRegister<Register>(); 1155 __ testl(pos_reg, pos_reg); 1156 __ j(kLess, slow_path->GetEntryLabel()); 1157 1158 // Check that pos <= length(input). 1159 __ cmpl(Address(input, length_offset), pos_reg); 1160 __ j(kLess, slow_path->GetEntryLabel()); 1161 1162 // Check that (length(input) - pos) >= length. 1163 __ movl(temp, Address(input, length_offset)); 1164 __ subl(temp, pos_reg); 1165 __ cmpl(temp, length); 1166 __ j(kLess, slow_path->GetEntryLabel()); 1167 } 1168 } 1169 1170 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1171 X86Assembler* assembler = GetAssembler(); 1172 LocationSummary* locations = invoke->GetLocations(); 1173 1174 Register src = locations->InAt(0).AsRegister<Register>(); 1175 Location srcPos = locations->InAt(1); 1176 Register dest = locations->InAt(2).AsRegister<Register>(); 1177 Location destPos = locations->InAt(3); 1178 Location length = locations->InAt(4); 1179 1180 // Temporaries that we need for MOVSW. 1181 Register src_base = locations->GetTemp(0).AsRegister<Register>(); 1182 DCHECK_EQ(src_base, ESI); 1183 Register dest_base = locations->GetTemp(1).AsRegister<Register>(); 1184 DCHECK_EQ(dest_base, EDI); 1185 Register count = locations->GetTemp(2).AsRegister<Register>(); 1186 DCHECK_EQ(count, ECX); 1187 1188 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1189 codegen_->AddSlowPath(slow_path); 1190 1191 // Bail out if the source and destination are the same (to handle overlap). 1192 __ cmpl(src, dest); 1193 __ j(kEqual, slow_path->GetEntryLabel()); 1194 1195 // Bail out if the source is null. 1196 __ testl(src, src); 1197 __ j(kEqual, slow_path->GetEntryLabel()); 1198 1199 // Bail out if the destination is null. 1200 __ testl(dest, dest); 1201 __ j(kEqual, slow_path->GetEntryLabel()); 1202 1203 // If the length is negative, bail out. 1204 // We have already checked in the LocationsBuilder for the constant case. 1205 if (!length.IsConstant()) { 1206 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>()); 1207 __ j(kLess, slow_path->GetEntryLabel()); 1208 } 1209 1210 // We need the count in ECX. 1211 if (length.IsConstant()) { 1212 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1213 } else { 1214 __ movl(count, length.AsRegister<Register>()); 1215 } 1216 1217 // Validity checks: source. 1218 CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base); 1219 1220 // Validity checks: dest. 1221 CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base); 1222 1223 // Okay, everything checks out. Finally time to do the copy. 1224 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1225 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1226 DCHECK_EQ(char_size, 2u); 1227 1228 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1229 1230 if (srcPos.IsConstant()) { 1231 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); 1232 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); 1233 } else { 1234 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(), 1235 ScaleFactor::TIMES_2, data_offset)); 1236 } 1237 if (destPos.IsConstant()) { 1238 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); 1239 1240 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); 1241 } else { 1242 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(), 1243 ScaleFactor::TIMES_2, data_offset)); 1244 } 1245 1246 // Do the move. 1247 __ rep_movsw(); 1248 1249 __ Bind(slow_path->GetExitLabel()); 1250 } 1251 1252 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { 1253 // The inputs plus one temp. 1254 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1255 LocationSummary::kCall, 1256 kIntrinsified); 1257 InvokeRuntimeCallingConvention calling_convention; 1258 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1259 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1260 locations->SetOut(Location::RegisterLocation(EAX)); 1261 } 1262 1263 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { 1264 X86Assembler* assembler = GetAssembler(); 1265 LocationSummary* locations = invoke->GetLocations(); 1266 1267 // Note that the null check must have been done earlier. 1268 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1269 1270 Register argument = locations->InAt(1).AsRegister<Register>(); 1271 __ testl(argument, argument); 1272 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1273 codegen_->AddSlowPath(slow_path); 1274 __ j(kEqual, slow_path->GetEntryLabel()); 1275 1276 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo))); 1277 __ Bind(slow_path->GetExitLabel()); 1278 } 1279 1280 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { 1281 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1282 LocationSummary::kNoCall, 1283 kIntrinsified); 1284 locations->SetInAt(0, Location::RequiresRegister()); 1285 locations->SetInAt(1, Location::RequiresRegister()); 1286 1287 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. 1288 locations->AddTemp(Location::RegisterLocation(ECX)); 1289 locations->AddTemp(Location::RegisterLocation(EDI)); 1290 1291 // Set output, ESI needed for repe_cmpsl instruction anyways. 1292 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); 1293 } 1294 1295 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { 1296 X86Assembler* assembler = GetAssembler(); 1297 LocationSummary* locations = invoke->GetLocations(); 1298 1299 Register str = locations->InAt(0).AsRegister<Register>(); 1300 Register arg = locations->InAt(1).AsRegister<Register>(); 1301 Register ecx = locations->GetTemp(0).AsRegister<Register>(); 1302 Register edi = locations->GetTemp(1).AsRegister<Register>(); 1303 Register esi = locations->Out().AsRegister<Register>(); 1304 1305 NearLabel end, return_true, return_false; 1306 1307 // Get offsets of count, value, and class fields within a string object. 1308 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1309 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1310 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1311 1312 // Note that the null check must have been done earlier. 1313 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1314 1315 StringEqualsOptimizations optimizations(invoke); 1316 if (!optimizations.GetArgumentNotNull()) { 1317 // Check if input is null, return false if it is. 1318 __ testl(arg, arg); 1319 __ j(kEqual, &return_false); 1320 } 1321 1322 // Instanceof check for the argument by comparing class fields. 1323 // All string objects must have the same type since String cannot be subclassed. 1324 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1325 // If the argument is a string object, its class field must be equal to receiver's class field. 1326 if (!optimizations.GetArgumentIsString()) { 1327 __ movl(ecx, Address(str, class_offset)); 1328 __ cmpl(ecx, Address(arg, class_offset)); 1329 __ j(kNotEqual, &return_false); 1330 } 1331 1332 // Reference equality check, return true if same reference. 1333 __ cmpl(str, arg); 1334 __ j(kEqual, &return_true); 1335 1336 // Load length of receiver string. 1337 __ movl(ecx, Address(str, count_offset)); 1338 // Check if lengths are equal, return false if they're not. 1339 __ cmpl(ecx, Address(arg, count_offset)); 1340 __ j(kNotEqual, &return_false); 1341 // Return true if both strings are empty. 1342 __ jecxz(&return_true); 1343 1344 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. 1345 __ leal(esi, Address(str, value_offset)); 1346 __ leal(edi, Address(arg, value_offset)); 1347 1348 // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths. 1349 __ addl(ecx, Immediate(1)); 1350 __ shrl(ecx, Immediate(1)); 1351 1352 // Assertions that must hold in order to compare strings 2 characters at a time. 1353 DCHECK_ALIGNED(value_offset, 4); 1354 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); 1355 1356 // Loop to compare strings two characters at a time starting at the beginning of the string. 1357 __ repe_cmpsl(); 1358 // If strings are not equal, zero flag will be cleared. 1359 __ j(kNotEqual, &return_false); 1360 1361 // Return true and exit the function. 1362 // If loop does not result in returning false, we return true. 1363 __ Bind(&return_true); 1364 __ movl(esi, Immediate(1)); 1365 __ jmp(&end); 1366 1367 // Return false and exit the function. 1368 __ Bind(&return_false); 1369 __ xorl(esi, esi); 1370 __ Bind(&end); 1371 } 1372 1373 static void CreateStringIndexOfLocations(HInvoke* invoke, 1374 ArenaAllocator* allocator, 1375 bool start_at_zero) { 1376 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1377 LocationSummary::kCallOnSlowPath, 1378 kIntrinsified); 1379 // The data needs to be in EDI for scasw. So request that the string is there, anyways. 1380 locations->SetInAt(0, Location::RegisterLocation(EDI)); 1381 // If we look for a constant char, we'll still have to copy it into EAX. So just request the 1382 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1383 // of the instruction explicitly. 1384 // Note: This works as we don't clobber EAX anywhere. 1385 locations->SetInAt(1, Location::RegisterLocation(EAX)); 1386 if (!start_at_zero) { 1387 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1388 } 1389 // As we clobber EDI during execution anyways, also use it as the output. 1390 locations->SetOut(Location::SameAsFirstInput()); 1391 1392 // repne scasw uses ECX as the counter. 1393 locations->AddTemp(Location::RegisterLocation(ECX)); 1394 // Need another temporary to be able to compute the result. 1395 locations->AddTemp(Location::RequiresRegister()); 1396 } 1397 1398 static void GenerateStringIndexOf(HInvoke* invoke, 1399 X86Assembler* assembler, 1400 CodeGeneratorX86* codegen, 1401 ArenaAllocator* allocator, 1402 bool start_at_zero) { 1403 LocationSummary* locations = invoke->GetLocations(); 1404 1405 // Note that the null check must have been done earlier. 1406 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1407 1408 Register string_obj = locations->InAt(0).AsRegister<Register>(); 1409 Register search_value = locations->InAt(1).AsRegister<Register>(); 1410 Register counter = locations->GetTemp(0).AsRegister<Register>(); 1411 Register string_length = locations->GetTemp(1).AsRegister<Register>(); 1412 Register out = locations->Out().AsRegister<Register>(); 1413 1414 // Check our assumptions for registers. 1415 DCHECK_EQ(string_obj, EDI); 1416 DCHECK_EQ(search_value, EAX); 1417 DCHECK_EQ(counter, ECX); 1418 DCHECK_EQ(out, EDI); 1419 1420 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1421 // or directly dispatch if we have a constant. 1422 SlowPathCode* slow_path = nullptr; 1423 if (invoke->InputAt(1)->IsIntConstant()) { 1424 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 1425 std::numeric_limits<uint16_t>::max()) { 1426 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1427 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1428 slow_path = new (allocator) IntrinsicSlowPathX86(invoke); 1429 codegen->AddSlowPath(slow_path); 1430 __ jmp(slow_path->GetEntryLabel()); 1431 __ Bind(slow_path->GetExitLabel()); 1432 return; 1433 } 1434 } else { 1435 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1436 slow_path = new (allocator) IntrinsicSlowPathX86(invoke); 1437 codegen->AddSlowPath(slow_path); 1438 __ j(kAbove, slow_path->GetEntryLabel()); 1439 } 1440 1441 // From here down, we know that we are looking for a char that fits in 16 bits. 1442 // Location of reference to data array within the String object. 1443 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1444 // Location of count within the String object. 1445 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1446 1447 // Load string length, i.e., the count field of the string. 1448 __ movl(string_length, Address(string_obj, count_offset)); 1449 1450 // Do a zero-length check. 1451 // TODO: Support jecxz. 1452 NearLabel not_found_label; 1453 __ testl(string_length, string_length); 1454 __ j(kEqual, ¬_found_label); 1455 1456 if (start_at_zero) { 1457 // Number of chars to scan is the same as the string length. 1458 __ movl(counter, string_length); 1459 1460 // Move to the start of the string. 1461 __ addl(string_obj, Immediate(value_offset)); 1462 } else { 1463 Register start_index = locations->InAt(2).AsRegister<Register>(); 1464 1465 // Do a start_index check. 1466 __ cmpl(start_index, string_length); 1467 __ j(kGreaterEqual, ¬_found_label); 1468 1469 // Ensure we have a start index >= 0; 1470 __ xorl(counter, counter); 1471 __ cmpl(start_index, Immediate(0)); 1472 __ cmovl(kGreater, counter, start_index); 1473 1474 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1475 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1476 1477 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to 1478 // compare. 1479 __ negl(counter); 1480 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1481 } 1482 1483 // Everything is set up for repne scasw: 1484 // * Comparison address in EDI. 1485 // * Counter in ECX. 1486 __ repne_scasw(); 1487 1488 // Did we find a match? 1489 __ j(kNotEqual, ¬_found_label); 1490 1491 // Yes, we matched. Compute the index of the result. 1492 __ subl(string_length, counter); 1493 __ leal(out, Address(string_length, -1)); 1494 1495 NearLabel done; 1496 __ jmp(&done); 1497 1498 // Failed to match; return -1. 1499 __ Bind(¬_found_label); 1500 __ movl(out, Immediate(-1)); 1501 1502 // And join up at the end. 1503 __ Bind(&done); 1504 if (slow_path != nullptr) { 1505 __ Bind(slow_path->GetExitLabel()); 1506 } 1507 } 1508 1509 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { 1510 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); 1511 } 1512 1513 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { 1514 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1515 } 1516 1517 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1518 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); 1519 } 1520 1521 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1522 GenerateStringIndexOf( 1523 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1524 } 1525 1526 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1527 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1528 LocationSummary::kCall, 1529 kIntrinsified); 1530 InvokeRuntimeCallingConvention calling_convention; 1531 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1532 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1533 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1534 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1535 locations->SetOut(Location::RegisterLocation(EAX)); 1536 } 1537 1538 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1539 X86Assembler* assembler = GetAssembler(); 1540 LocationSummary* locations = invoke->GetLocations(); 1541 1542 Register byte_array = locations->InAt(0).AsRegister<Register>(); 1543 __ testl(byte_array, byte_array); 1544 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1545 codegen_->AddSlowPath(slow_path); 1546 __ j(kEqual, slow_path->GetEntryLabel()); 1547 1548 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes))); 1549 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1550 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1551 __ Bind(slow_path->GetExitLabel()); 1552 } 1553 1554 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1555 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1556 LocationSummary::kCall, 1557 kIntrinsified); 1558 InvokeRuntimeCallingConvention calling_convention; 1559 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1560 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1561 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1562 locations->SetOut(Location::RegisterLocation(EAX)); 1563 } 1564 1565 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1566 X86Assembler* assembler = GetAssembler(); 1567 1568 // No need to emit code checking whether `locations->InAt(2)` is a null 1569 // pointer, as callers of the native method 1570 // 1571 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1572 // 1573 // all include a null check on `data` before calling that method. 1574 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars))); 1575 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1576 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1577 } 1578 1579 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { 1580 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1581 LocationSummary::kCall, 1582 kIntrinsified); 1583 InvokeRuntimeCallingConvention calling_convention; 1584 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1585 locations->SetOut(Location::RegisterLocation(EAX)); 1586 } 1587 1588 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { 1589 X86Assembler* assembler = GetAssembler(); 1590 LocationSummary* locations = invoke->GetLocations(); 1591 1592 Register string_to_copy = locations->InAt(0).AsRegister<Register>(); 1593 __ testl(string_to_copy, string_to_copy); 1594 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1595 codegen_->AddSlowPath(slow_path); 1596 __ j(kEqual, slow_path->GetEntryLabel()); 1597 1598 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString))); 1599 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1600 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1601 __ Bind(slow_path->GetExitLabel()); 1602 } 1603 1604 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1605 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1606 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1607 LocationSummary::kNoCall, 1608 kIntrinsified); 1609 locations->SetInAt(0, Location::RequiresRegister()); 1610 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1611 // Place srcEnd in ECX to save a move below. 1612 locations->SetInAt(2, Location::RegisterLocation(ECX)); 1613 locations->SetInAt(3, Location::RequiresRegister()); 1614 locations->SetInAt(4, Location::RequiresRegister()); 1615 1616 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1617 // We don't have enough registers to also grab ECX, so handle below. 1618 locations->AddTemp(Location::RegisterLocation(ESI)); 1619 locations->AddTemp(Location::RegisterLocation(EDI)); 1620 } 1621 1622 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1623 X86Assembler* assembler = GetAssembler(); 1624 LocationSummary* locations = invoke->GetLocations(); 1625 1626 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); 1627 // Location of data in char array buffer. 1628 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1629 // Location of char array data in string. 1630 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1631 1632 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1633 Register obj = locations->InAt(0).AsRegister<Register>(); 1634 Location srcBegin = locations->InAt(1); 1635 int srcBegin_value = 1636 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1637 Register srcEnd = locations->InAt(2).AsRegister<Register>(); 1638 Register dst = locations->InAt(3).AsRegister<Register>(); 1639 Register dstBegin = locations->InAt(4).AsRegister<Register>(); 1640 1641 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1642 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1643 DCHECK_EQ(char_size, 2u); 1644 1645 // Compute the address of the destination buffer. 1646 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1647 1648 // Compute the address of the source string. 1649 if (srcBegin.IsConstant()) { 1650 // Compute the address of the source string by adding the number of chars from 1651 // the source beginning to the value offset of a string. 1652 __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset)); 1653 } else { 1654 __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(), 1655 ScaleFactor::TIMES_2, value_offset)); 1656 } 1657 1658 // Compute the number of chars (words) to move. 1659 // Now is the time to save ECX, since we don't know if it will be used later. 1660 __ pushl(ECX); 1661 int stack_adjust = kX86WordSize; 1662 __ cfi().AdjustCFAOffset(stack_adjust); 1663 DCHECK_EQ(srcEnd, ECX); 1664 if (srcBegin.IsConstant()) { 1665 if (srcBegin_value != 0) { 1666 __ subl(ECX, Immediate(srcBegin_value)); 1667 } 1668 } else { 1669 DCHECK(srcBegin.IsRegister()); 1670 __ subl(ECX, srcBegin.AsRegister<Register>()); 1671 } 1672 1673 // Do the move. 1674 __ rep_movsw(); 1675 1676 // And restore ECX. 1677 __ popl(ECX); 1678 __ cfi().AdjustCFAOffset(-stack_adjust); 1679 } 1680 1681 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { 1682 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1683 Location out_loc = locations->Out(); 1684 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1685 // to avoid a SIGBUS. 1686 switch (size) { 1687 case Primitive::kPrimByte: 1688 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); 1689 break; 1690 case Primitive::kPrimShort: 1691 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); 1692 break; 1693 case Primitive::kPrimInt: 1694 __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); 1695 break; 1696 case Primitive::kPrimLong: 1697 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); 1698 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); 1699 break; 1700 default: 1701 LOG(FATAL) << "Type not recognized for peek: " << size; 1702 UNREACHABLE(); 1703 } 1704 } 1705 1706 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { 1707 CreateLongToIntLocations(arena_, invoke); 1708 } 1709 1710 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { 1711 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1712 } 1713 1714 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1715 CreateLongToIntLocations(arena_, invoke); 1716 } 1717 1718 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1719 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1720 } 1721 1722 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1723 CreateLongToLongLocations(arena_, invoke); 1724 } 1725 1726 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1727 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1728 } 1729 1730 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1731 CreateLongToIntLocations(arena_, invoke); 1732 } 1733 1734 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1735 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1736 } 1737 1738 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size, 1739 HInvoke* invoke) { 1740 LocationSummary* locations = new (arena) LocationSummary(invoke, 1741 LocationSummary::kNoCall, 1742 kIntrinsified); 1743 locations->SetInAt(0, Location::RequiresRegister()); 1744 HInstruction* value = invoke->InputAt(1); 1745 if (size == Primitive::kPrimByte) { 1746 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); 1747 } else { 1748 locations->SetInAt(1, Location::RegisterOrConstant(value)); 1749 } 1750 } 1751 1752 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { 1753 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1754 Location value_loc = locations->InAt(1); 1755 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1756 // to avoid a SIGBUS. 1757 switch (size) { 1758 case Primitive::kPrimByte: 1759 if (value_loc.IsConstant()) { 1760 __ movb(Address(address, 0), 1761 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1762 } else { 1763 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); 1764 } 1765 break; 1766 case Primitive::kPrimShort: 1767 if (value_loc.IsConstant()) { 1768 __ movw(Address(address, 0), 1769 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1770 } else { 1771 __ movw(Address(address, 0), value_loc.AsRegister<Register>()); 1772 } 1773 break; 1774 case Primitive::kPrimInt: 1775 if (value_loc.IsConstant()) { 1776 __ movl(Address(address, 0), 1777 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1778 } else { 1779 __ movl(Address(address, 0), value_loc.AsRegister<Register>()); 1780 } 1781 break; 1782 case Primitive::kPrimLong: 1783 if (value_loc.IsConstant()) { 1784 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); 1785 __ movl(Address(address, 0), Immediate(Low32Bits(value))); 1786 __ movl(Address(address, 4), Immediate(High32Bits(value))); 1787 } else { 1788 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>()); 1789 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>()); 1790 } 1791 break; 1792 default: 1793 LOG(FATAL) << "Type not recognized for poke: " << size; 1794 UNREACHABLE(); 1795 } 1796 } 1797 1798 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { 1799 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke); 1800 } 1801 1802 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { 1803 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1804 } 1805 1806 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1807 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke); 1808 } 1809 1810 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1811 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1812 } 1813 1814 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1815 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke); 1816 } 1817 1818 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1819 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1820 } 1821 1822 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1823 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke); 1824 } 1825 1826 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1827 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1828 } 1829 1830 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { 1831 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1832 LocationSummary::kNoCall, 1833 kIntrinsified); 1834 locations->SetOut(Location::RequiresRegister()); 1835 } 1836 1837 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { 1838 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 1839 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>())); 1840 } 1841 1842 static void GenUnsafeGet(HInvoke* invoke, 1843 Primitive::Type type, 1844 bool is_volatile, 1845 CodeGeneratorX86* codegen) { 1846 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 1847 LocationSummary* locations = invoke->GetLocations(); 1848 Location base_loc = locations->InAt(1); 1849 Register base = base_loc.AsRegister<Register>(); 1850 Location offset_loc = locations->InAt(2); 1851 Register offset = offset_loc.AsRegisterPairLow<Register>(); 1852 Location output_loc = locations->Out(); 1853 1854 switch (type) { 1855 case Primitive::kPrimInt: { 1856 Register output = output_loc.AsRegister<Register>(); 1857 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1858 break; 1859 } 1860 1861 case Primitive::kPrimNot: { 1862 Register output = output_loc.AsRegister<Register>(); 1863 if (kEmitCompilerReadBarrier) { 1864 if (kUseBakerReadBarrier) { 1865 Location temp = locations->GetTemp(0); 1866 codegen->GenerateArrayLoadWithBakerReadBarrier( 1867 invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); 1868 } else { 1869 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1870 codegen->GenerateReadBarrierSlow( 1871 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 1872 } 1873 } else { 1874 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1875 __ MaybeUnpoisonHeapReference(output); 1876 } 1877 break; 1878 } 1879 1880 case Primitive::kPrimLong: { 1881 Register output_lo = output_loc.AsRegisterPairLow<Register>(); 1882 Register output_hi = output_loc.AsRegisterPairHigh<Register>(); 1883 if (is_volatile) { 1884 // Need to use a XMM to read atomically. 1885 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1886 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1887 __ movd(output_lo, temp); 1888 __ psrlq(temp, Immediate(32)); 1889 __ movd(output_hi, temp); 1890 } else { 1891 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1892 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4)); 1893 } 1894 } 1895 break; 1896 1897 default: 1898 LOG(FATAL) << "Unsupported op size " << type; 1899 UNREACHABLE(); 1900 } 1901 } 1902 1903 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, 1904 HInvoke* invoke, 1905 Primitive::Type type, 1906 bool is_volatile) { 1907 bool can_call = kEmitCompilerReadBarrier && 1908 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1909 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1910 LocationSummary* locations = new (arena) LocationSummary(invoke, 1911 can_call ? 1912 LocationSummary::kCallOnSlowPath : 1913 LocationSummary::kNoCall, 1914 kIntrinsified); 1915 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1916 locations->SetInAt(1, Location::RequiresRegister()); 1917 locations->SetInAt(2, Location::RequiresRegister()); 1918 if (type == Primitive::kPrimLong) { 1919 if (is_volatile) { 1920 // Need to use XMM to read volatile. 1921 locations->AddTemp(Location::RequiresFpuRegister()); 1922 locations->SetOut(Location::RequiresRegister()); 1923 } else { 1924 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1925 } 1926 } else { 1927 locations->SetOut(Location::RequiresRegister()); 1928 } 1929 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1930 // We need a temporary register for the read barrier marking slow 1931 // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. 1932 locations->AddTemp(Location::RequiresRegister()); 1933 } 1934 } 1935 1936 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { 1937 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false); 1938 } 1939 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 1940 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true); 1941 } 1942 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { 1943 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false); 1944 } 1945 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1946 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true); 1947 } 1948 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { 1949 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false); 1950 } 1951 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1952 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true); 1953 } 1954 1955 1956 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { 1957 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 1958 } 1959 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 1960 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 1961 } 1962 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { 1963 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 1964 } 1965 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1966 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 1967 } 1968 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { 1969 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 1970 } 1971 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1972 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 1973 } 1974 1975 1976 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, 1977 Primitive::Type type, 1978 HInvoke* invoke, 1979 bool is_volatile) { 1980 LocationSummary* locations = new (arena) LocationSummary(invoke, 1981 LocationSummary::kNoCall, 1982 kIntrinsified); 1983 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1984 locations->SetInAt(1, Location::RequiresRegister()); 1985 locations->SetInAt(2, Location::RequiresRegister()); 1986 locations->SetInAt(3, Location::RequiresRegister()); 1987 if (type == Primitive::kPrimNot) { 1988 // Need temp registers for card-marking. 1989 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 1990 // Ensure the value is in a byte register. 1991 locations->AddTemp(Location::RegisterLocation(ECX)); 1992 } else if (type == Primitive::kPrimLong && is_volatile) { 1993 locations->AddTemp(Location::RequiresFpuRegister()); 1994 locations->AddTemp(Location::RequiresFpuRegister()); 1995 } 1996 } 1997 1998 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { 1999 CreateIntIntIntIntToVoidPlusTempsLocations( 2000 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); 2001 } 2002 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2003 CreateIntIntIntIntToVoidPlusTempsLocations( 2004 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); 2005 } 2006 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2007 CreateIntIntIntIntToVoidPlusTempsLocations( 2008 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true); 2009 } 2010 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { 2011 CreateIntIntIntIntToVoidPlusTempsLocations( 2012 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); 2013 } 2014 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2015 CreateIntIntIntIntToVoidPlusTempsLocations( 2016 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); 2017 } 2018 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2019 CreateIntIntIntIntToVoidPlusTempsLocations( 2020 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true); 2021 } 2022 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { 2023 CreateIntIntIntIntToVoidPlusTempsLocations( 2024 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); 2025 } 2026 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2027 CreateIntIntIntIntToVoidPlusTempsLocations( 2028 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); 2029 } 2030 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2031 CreateIntIntIntIntToVoidPlusTempsLocations( 2032 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true); 2033 } 2034 2035 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2036 // memory model. 2037 static void GenUnsafePut(LocationSummary* locations, 2038 Primitive::Type type, 2039 bool is_volatile, 2040 CodeGeneratorX86* codegen) { 2041 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2042 Register base = locations->InAt(1).AsRegister<Register>(); 2043 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2044 Location value_loc = locations->InAt(3); 2045 2046 if (type == Primitive::kPrimLong) { 2047 Register value_lo = value_loc.AsRegisterPairLow<Register>(); 2048 Register value_hi = value_loc.AsRegisterPairHigh<Register>(); 2049 if (is_volatile) { 2050 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2051 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 2052 __ movd(temp1, value_lo); 2053 __ movd(temp2, value_hi); 2054 __ punpckldq(temp1, temp2); 2055 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1); 2056 } else { 2057 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); 2058 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); 2059 } 2060 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 2061 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2062 __ movl(temp, value_loc.AsRegister<Register>()); 2063 __ PoisonHeapReference(temp); 2064 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2065 } else { 2066 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>()); 2067 } 2068 2069 if (is_volatile) { 2070 codegen->MemoryFence(); 2071 } 2072 2073 if (type == Primitive::kPrimNot) { 2074 bool value_can_be_null = true; // TODO: Worth finding out this information? 2075 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), 2076 locations->GetTemp(1).AsRegister<Register>(), 2077 base, 2078 value_loc.AsRegister<Register>(), 2079 value_can_be_null); 2080 } 2081 } 2082 2083 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { 2084 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2085 } 2086 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2087 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2088 } 2089 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2090 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2091 } 2092 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { 2093 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2094 } 2095 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2096 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2097 } 2098 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2099 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2100 } 2101 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { 2102 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2103 } 2104 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2105 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2106 } 2107 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2108 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2109 } 2110 2111 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, 2112 HInvoke* invoke) { 2113 LocationSummary* locations = new (arena) LocationSummary(invoke, 2114 LocationSummary::kNoCall, 2115 kIntrinsified); 2116 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2117 locations->SetInAt(1, Location::RequiresRegister()); 2118 // Offset is a long, but in 32 bit mode, we only need the low word. 2119 // Can we update the invoke here to remove a TypeConvert to Long? 2120 locations->SetInAt(2, Location::RequiresRegister()); 2121 // Expected value must be in EAX or EDX:EAX. 2122 // For long, new value must be in ECX:EBX. 2123 if (type == Primitive::kPrimLong) { 2124 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); 2125 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); 2126 } else { 2127 locations->SetInAt(3, Location::RegisterLocation(EAX)); 2128 locations->SetInAt(4, Location::RequiresRegister()); 2129 } 2130 2131 // Force a byte register for the output. 2132 locations->SetOut(Location::RegisterLocation(EAX)); 2133 if (type == Primitive::kPrimNot) { 2134 // Need temp registers for card-marking. 2135 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2136 // Need a byte register for marking. 2137 locations->AddTemp(Location::RegisterLocation(ECX)); 2138 } 2139 } 2140 2141 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { 2142 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); 2143 } 2144 2145 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { 2146 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); 2147 } 2148 2149 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { 2150 // The UnsafeCASObject intrinsic is missing a read barrier, and 2151 // therefore sometimes does not work as expected (b/25883050). 2152 // Turn it off temporarily as a quick fix, until the read barrier is 2153 // implemented. 2154 // 2155 // TODO(rpl): Implement a read barrier in GenCAS below and re-enable 2156 // this intrinsic. 2157 if (kEmitCompilerReadBarrier) { 2158 return; 2159 } 2160 2161 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); 2162 } 2163 2164 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { 2165 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2166 LocationSummary* locations = invoke->GetLocations(); 2167 2168 Register base = locations->InAt(1).AsRegister<Register>(); 2169 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2170 Location out = locations->Out(); 2171 DCHECK_EQ(out.AsRegister<Register>(), EAX); 2172 2173 if (type == Primitive::kPrimNot) { 2174 Register expected = locations->InAt(3).AsRegister<Register>(); 2175 // Ensure `expected` is in EAX (required by the CMPXCHG instruction). 2176 DCHECK_EQ(expected, EAX); 2177 Register value = locations->InAt(4).AsRegister<Register>(); 2178 2179 // Mark card for object assuming new value is stored. 2180 bool value_can_be_null = true; // TODO: Worth finding out this information? 2181 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), 2182 locations->GetTemp(1).AsRegister<Register>(), 2183 base, 2184 value, 2185 value_can_be_null); 2186 2187 bool base_equals_value = (base == value); 2188 if (kPoisonHeapReferences) { 2189 if (base_equals_value) { 2190 // If `base` and `value` are the same register location, move 2191 // `value` to a temporary register. This way, poisoning 2192 // `value` won't invalidate `base`. 2193 value = locations->GetTemp(0).AsRegister<Register>(); 2194 __ movl(value, base); 2195 } 2196 2197 // Check that the register allocator did not assign the location 2198 // of `expected` (EAX) to `value` nor to `base`, so that heap 2199 // poisoning (when enabled) works as intended below. 2200 // - If `value` were equal to `expected`, both references would 2201 // be poisoned twice, meaning they would not be poisoned at 2202 // all, as heap poisoning uses address negation. 2203 // - If `base` were equal to `expected`, poisoning `expected` 2204 // would invalidate `base`. 2205 DCHECK_NE(value, expected); 2206 DCHECK_NE(base, expected); 2207 2208 __ PoisonHeapReference(expected); 2209 __ PoisonHeapReference(value); 2210 } 2211 2212 // TODO: Add a read barrier for the reference stored in the object 2213 // before attempting the CAS, similar to the one in the 2214 // art::Unsafe_compareAndSwapObject JNI implementation. 2215 // 2216 // Note that this code is not (yet) used when read barriers are 2217 // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). 2218 DCHECK(!kEmitCompilerReadBarrier); 2219 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); 2220 2221 // LOCK CMPXCHG has full barrier semantics, and we don't need 2222 // scheduling barriers at this time. 2223 2224 // Convert ZF into the boolean result. 2225 __ setb(kZero, out.AsRegister<Register>()); 2226 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2227 2228 // If heap poisoning is enabled, we need to unpoison the values 2229 // that were poisoned earlier. 2230 if (kPoisonHeapReferences) { 2231 if (base_equals_value) { 2232 // `value` has been moved to a temporary register, no need to 2233 // unpoison it. 2234 } else { 2235 // Ensure `value` is different from `out`, so that unpoisoning 2236 // the former does not invalidate the latter. 2237 DCHECK_NE(value, out.AsRegister<Register>()); 2238 __ UnpoisonHeapReference(value); 2239 } 2240 // Do not unpoison the reference contained in register 2241 // `expected`, as it is the same as register `out` (EAX). 2242 } 2243 } else { 2244 if (type == Primitive::kPrimInt) { 2245 // Ensure the expected value is in EAX (required by the CMPXCHG 2246 // instruction). 2247 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); 2248 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), 2249 locations->InAt(4).AsRegister<Register>()); 2250 } else if (type == Primitive::kPrimLong) { 2251 // Ensure the expected value is in EAX:EDX and that the new 2252 // value is in EBX:ECX (required by the CMPXCHG8B instruction). 2253 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); 2254 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); 2255 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); 2256 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); 2257 __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); 2258 } else { 2259 LOG(FATAL) << "Unexpected CAS type " << type; 2260 } 2261 2262 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we 2263 // don't need scheduling barriers at this time. 2264 2265 // Convert ZF into the boolean result. 2266 __ setb(kZero, out.AsRegister<Register>()); 2267 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2268 } 2269 } 2270 2271 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { 2272 GenCAS(Primitive::kPrimInt, invoke, codegen_); 2273 } 2274 2275 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { 2276 GenCAS(Primitive::kPrimLong, invoke, codegen_); 2277 } 2278 2279 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { 2280 GenCAS(Primitive::kPrimNot, invoke, codegen_); 2281 } 2282 2283 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { 2284 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2285 LocationSummary::kNoCall, 2286 kIntrinsified); 2287 locations->SetInAt(0, Location::RequiresRegister()); 2288 locations->SetOut(Location::SameAsFirstInput()); 2289 locations->AddTemp(Location::RequiresRegister()); 2290 } 2291 2292 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, 2293 X86Assembler* assembler) { 2294 Immediate imm_shift(shift); 2295 Immediate imm_mask(mask); 2296 __ movl(temp, reg); 2297 __ shrl(reg, imm_shift); 2298 __ andl(temp, imm_mask); 2299 __ andl(reg, imm_mask); 2300 __ shll(temp, imm_shift); 2301 __ orl(reg, temp); 2302 } 2303 2304 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { 2305 X86Assembler* assembler = GetAssembler(); 2306 LocationSummary* locations = invoke->GetLocations(); 2307 2308 Register reg = locations->InAt(0).AsRegister<Register>(); 2309 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2310 2311 /* 2312 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2313 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2314 * compared to generic luni implementation which has 5 rounds of swapping bits. 2315 * x = bswap x 2316 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2317 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2318 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2319 */ 2320 __ bswapl(reg); 2321 SwapBits(reg, temp, 1, 0x55555555, assembler); 2322 SwapBits(reg, temp, 2, 0x33333333, assembler); 2323 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2324 } 2325 2326 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { 2327 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2328 LocationSummary::kNoCall, 2329 kIntrinsified); 2330 locations->SetInAt(0, Location::RequiresRegister()); 2331 locations->SetOut(Location::SameAsFirstInput()); 2332 locations->AddTemp(Location::RequiresRegister()); 2333 } 2334 2335 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { 2336 X86Assembler* assembler = GetAssembler(); 2337 LocationSummary* locations = invoke->GetLocations(); 2338 2339 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); 2340 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); 2341 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2342 2343 // We want to swap high/low, then bswap each one, and then do the same 2344 // as a 32 bit reverse. 2345 // Exchange high and low. 2346 __ movl(temp, reg_low); 2347 __ movl(reg_low, reg_high); 2348 __ movl(reg_high, temp); 2349 2350 // bit-reverse low 2351 __ bswapl(reg_low); 2352 SwapBits(reg_low, temp, 1, 0x55555555, assembler); 2353 SwapBits(reg_low, temp, 2, 0x33333333, assembler); 2354 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); 2355 2356 // bit-reverse high 2357 __ bswapl(reg_high); 2358 SwapBits(reg_high, temp, 1, 0x55555555, assembler); 2359 SwapBits(reg_high, temp, 2, 0x33333333, assembler); 2360 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); 2361 } 2362 2363 static void CreateBitCountLocations( 2364 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { 2365 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2366 // Do nothing if there is no popcnt support. This results in generating 2367 // a call for the intrinsic rather than direct code. 2368 return; 2369 } 2370 LocationSummary* locations = new (arena) LocationSummary(invoke, 2371 LocationSummary::kNoCall, 2372 kIntrinsified); 2373 if (is_long) { 2374 locations->AddTemp(Location::RequiresRegister()); 2375 } 2376 locations->SetInAt(0, Location::Any()); 2377 locations->SetOut(Location::RequiresRegister()); 2378 } 2379 2380 static void GenBitCount(X86Assembler* assembler, 2381 CodeGeneratorX86* codegen, 2382 HInvoke* invoke, bool is_long) { 2383 LocationSummary* locations = invoke->GetLocations(); 2384 Location src = locations->InAt(0); 2385 Register out = locations->Out().AsRegister<Register>(); 2386 2387 if (invoke->InputAt(0)->IsConstant()) { 2388 // Evaluate this at compile time. 2389 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2390 int32_t result = is_long 2391 ? POPCOUNT(static_cast<uint64_t>(value)) 2392 : POPCOUNT(static_cast<uint32_t>(value)); 2393 codegen->Load32BitValue(out, result); 2394 return; 2395 } 2396 2397 // Handle the non-constant cases. 2398 if (!is_long) { 2399 if (src.IsRegister()) { 2400 __ popcntl(out, src.AsRegister<Register>()); 2401 } else { 2402 DCHECK(src.IsStackSlot()); 2403 __ popcntl(out, Address(ESP, src.GetStackIndex())); 2404 } 2405 } else { 2406 // The 64-bit case needs to worry about two parts. 2407 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2408 if (src.IsRegisterPair()) { 2409 __ popcntl(temp, src.AsRegisterPairLow<Register>()); 2410 __ popcntl(out, src.AsRegisterPairHigh<Register>()); 2411 } else { 2412 DCHECK(src.IsDoubleStackSlot()); 2413 __ popcntl(temp, Address(ESP, src.GetStackIndex())); 2414 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize))); 2415 } 2416 __ addl(out, temp); 2417 } 2418 } 2419 2420 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { 2421 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false); 2422 } 2423 2424 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { 2425 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2426 } 2427 2428 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { 2429 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true); 2430 } 2431 2432 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { 2433 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2434 } 2435 2436 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { 2437 LocationSummary* locations = new (arena) LocationSummary(invoke, 2438 LocationSummary::kNoCall, 2439 kIntrinsified); 2440 if (is_long) { 2441 locations->SetInAt(0, Location::RequiresRegister()); 2442 } else { 2443 locations->SetInAt(0, Location::Any()); 2444 } 2445 locations->SetOut(Location::RequiresRegister()); 2446 } 2447 2448 static void GenLeadingZeros(X86Assembler* assembler, 2449 CodeGeneratorX86* codegen, 2450 HInvoke* invoke, bool is_long) { 2451 LocationSummary* locations = invoke->GetLocations(); 2452 Location src = locations->InAt(0); 2453 Register out = locations->Out().AsRegister<Register>(); 2454 2455 if (invoke->InputAt(0)->IsConstant()) { 2456 // Evaluate this at compile time. 2457 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2458 if (value == 0) { 2459 value = is_long ? 64 : 32; 2460 } else { 2461 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2462 } 2463 codegen->Load32BitValue(out, value); 2464 return; 2465 } 2466 2467 // Handle the non-constant cases. 2468 if (!is_long) { 2469 if (src.IsRegister()) { 2470 __ bsrl(out, src.AsRegister<Register>()); 2471 } else { 2472 DCHECK(src.IsStackSlot()); 2473 __ bsrl(out, Address(ESP, src.GetStackIndex())); 2474 } 2475 2476 // BSR sets ZF if the input was zero, and the output is undefined. 2477 NearLabel all_zeroes, done; 2478 __ j(kEqual, &all_zeroes); 2479 2480 // Correct the result from BSR to get the final CLZ result. 2481 __ xorl(out, Immediate(31)); 2482 __ jmp(&done); 2483 2484 // Fix the zero case with the expected result. 2485 __ Bind(&all_zeroes); 2486 __ movl(out, Immediate(32)); 2487 2488 __ Bind(&done); 2489 return; 2490 } 2491 2492 // 64 bit case needs to worry about both parts of the register. 2493 DCHECK(src.IsRegisterPair()); 2494 Register src_lo = src.AsRegisterPairLow<Register>(); 2495 Register src_hi = src.AsRegisterPairHigh<Register>(); 2496 NearLabel handle_low, done, all_zeroes; 2497 2498 // Is the high word zero? 2499 __ testl(src_hi, src_hi); 2500 __ j(kEqual, &handle_low); 2501 2502 // High word is not zero. We know that the BSR result is defined in this case. 2503 __ bsrl(out, src_hi); 2504 2505 // Correct the result from BSR to get the final CLZ result. 2506 __ xorl(out, Immediate(31)); 2507 __ jmp(&done); 2508 2509 // High word was zero. We have to compute the low word count and add 32. 2510 __ Bind(&handle_low); 2511 __ bsrl(out, src_lo); 2512 __ j(kEqual, &all_zeroes); 2513 2514 // We had a valid result. Use an XOR to both correct the result and add 32. 2515 __ xorl(out, Immediate(63)); 2516 __ jmp(&done); 2517 2518 // All zero case. 2519 __ Bind(&all_zeroes); 2520 __ movl(out, Immediate(64)); 2521 2522 __ Bind(&done); 2523 } 2524 2525 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2526 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false); 2527 } 2528 2529 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2530 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2531 } 2532 2533 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2534 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true); 2535 } 2536 2537 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2538 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2539 } 2540 2541 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { 2542 LocationSummary* locations = new (arena) LocationSummary(invoke, 2543 LocationSummary::kNoCall, 2544 kIntrinsified); 2545 if (is_long) { 2546 locations->SetInAt(0, Location::RequiresRegister()); 2547 } else { 2548 locations->SetInAt(0, Location::Any()); 2549 } 2550 locations->SetOut(Location::RequiresRegister()); 2551 } 2552 2553 static void GenTrailingZeros(X86Assembler* assembler, 2554 CodeGeneratorX86* codegen, 2555 HInvoke* invoke, bool is_long) { 2556 LocationSummary* locations = invoke->GetLocations(); 2557 Location src = locations->InAt(0); 2558 Register out = locations->Out().AsRegister<Register>(); 2559 2560 if (invoke->InputAt(0)->IsConstant()) { 2561 // Evaluate this at compile time. 2562 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2563 if (value == 0) { 2564 value = is_long ? 64 : 32; 2565 } else { 2566 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2567 } 2568 codegen->Load32BitValue(out, value); 2569 return; 2570 } 2571 2572 // Handle the non-constant cases. 2573 if (!is_long) { 2574 if (src.IsRegister()) { 2575 __ bsfl(out, src.AsRegister<Register>()); 2576 } else { 2577 DCHECK(src.IsStackSlot()); 2578 __ bsfl(out, Address(ESP, src.GetStackIndex())); 2579 } 2580 2581 // BSF sets ZF if the input was zero, and the output is undefined. 2582 NearLabel done; 2583 __ j(kNotEqual, &done); 2584 2585 // Fix the zero case with the expected result. 2586 __ movl(out, Immediate(32)); 2587 2588 __ Bind(&done); 2589 return; 2590 } 2591 2592 // 64 bit case needs to worry about both parts of the register. 2593 DCHECK(src.IsRegisterPair()); 2594 Register src_lo = src.AsRegisterPairLow<Register>(); 2595 Register src_hi = src.AsRegisterPairHigh<Register>(); 2596 NearLabel done, all_zeroes; 2597 2598 // If the low word is zero, then ZF will be set. If not, we have the answer. 2599 __ bsfl(out, src_lo); 2600 __ j(kNotEqual, &done); 2601 2602 // Low word was zero. We have to compute the high word count and add 32. 2603 __ bsfl(out, src_hi); 2604 __ j(kEqual, &all_zeroes); 2605 2606 // We had a valid result. Add 32 to account for the low word being zero. 2607 __ addl(out, Immediate(32)); 2608 __ jmp(&done); 2609 2610 // All zero case. 2611 __ Bind(&all_zeroes); 2612 __ movl(out, Immediate(64)); 2613 2614 __ Bind(&done); 2615 } 2616 2617 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2618 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false); 2619 } 2620 2621 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2622 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2623 } 2624 2625 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2626 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true); 2627 } 2628 2629 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2630 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2631 } 2632 2633 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) 2634 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) 2635 UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy) 2636 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) 2637 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) 2638 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) 2639 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) 2640 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) 2641 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) 2642 2643 // 1.8. 2644 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) 2645 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) 2646 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) 2647 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) 2648 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) 2649 2650 UNREACHABLE_INTRINSICS(X86) 2651 2652 #undef __ 2653 2654 } // namespace x86 2655 } // namespace art 2656