1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86.h" 18 19 #include <limits> 20 21 #include "arch/x86/instruction_set_features_x86.h" 22 #include "art_method.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "intrinsics.h" 27 #include "intrinsics_utils.h" 28 #include "lock_word.h" 29 #include "mirror/array-inl.h" 30 #include "mirror/object_array-inl.h" 31 #include "mirror/reference.h" 32 #include "mirror/string.h" 33 #include "scoped_thread_state_change-inl.h" 34 #include "thread-current-inl.h" 35 #include "utils/x86/assembler_x86.h" 36 #include "utils/x86/constants_x86.h" 37 38 namespace art { 39 40 namespace x86 { 41 42 static constexpr int kDoubleNaNHigh = 0x7FF80000; 43 static constexpr int kDoubleNaNLow = 0x00000000; 44 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); 45 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); 46 47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) 48 : arena_(codegen->GetGraph()->GetArena()), 49 codegen_(codegen) { 50 } 51 52 53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { 54 return down_cast<X86Assembler*>(codegen_->GetAssembler()); 55 } 56 57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { 58 return codegen_->GetGraph()->GetArena(); 59 } 60 61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { 62 Dispatch(invoke); 63 LocationSummary* res = invoke->GetLocations(); 64 if (res == nullptr) { 65 return false; 66 } 67 return res->Intrinsified(); 68 } 69 70 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { 71 InvokeDexCallingConventionVisitorX86 calling_convention_visitor; 72 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 73 } 74 75 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; 76 77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT 79 80 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 81 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { 82 public: 83 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) 84 : SlowPathCode(instruction) { 85 DCHECK(kEmitCompilerReadBarrier); 86 DCHECK(kUseBakerReadBarrier); 87 } 88 89 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 90 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 91 LocationSummary* locations = instruction_->GetLocations(); 92 DCHECK(locations->CanCall()); 93 DCHECK(instruction_->IsInvokeStaticOrDirect()) 94 << "Unexpected instruction in read barrier arraycopy slow path: " 95 << instruction_->DebugName(); 96 DCHECK(instruction_->GetLocations()->Intrinsified()); 97 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 98 99 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); 100 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); 101 102 Register src = locations->InAt(0).AsRegister<Register>(); 103 Location src_pos = locations->InAt(1); 104 Register dest = locations->InAt(2).AsRegister<Register>(); 105 Location dest_pos = locations->InAt(3); 106 Location length = locations->InAt(4); 107 Location temp1_loc = locations->GetTemp(0); 108 Register temp1 = temp1_loc.AsRegister<Register>(); 109 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 110 Register temp3 = locations->GetTemp(2).AsRegister<Register>(); 111 112 __ Bind(GetEntryLabel()); 113 // In this code path, registers `temp1`, `temp2`, and `temp3` 114 // (resp.) are not used for the base source address, the base 115 // destination address, and the end source address (resp.), as in 116 // other SystemArrayCopy intrinsic code paths. Instead they are 117 // (resp.) used for: 118 // - the loop index (`i`); 119 // - the source index (`src_index`) and the loaded (source) 120 // reference (`value`); and 121 // - the destination index (`dest_index`). 122 123 // i = 0 124 __ xorl(temp1, temp1); 125 NearLabel loop; 126 __ Bind(&loop); 127 // value = src_array[i + src_pos] 128 if (src_pos.IsConstant()) { 129 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 130 int32_t adjusted_offset = offset + constant * element_size; 131 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); 132 } else { 133 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 134 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); 135 } 136 __ MaybeUnpoisonHeapReference(temp2); 137 // TODO: Inline the mark bit check before calling the runtime? 138 // value = ReadBarrier::Mark(value) 139 // No need to save live registers; it's taken care of by the 140 // entrypoint. Also, there is no need to update the stack mask, 141 // as this runtime call will not trigger a garbage collection. 142 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more 143 // explanations.) 144 DCHECK_NE(temp2, ESP); 145 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; 146 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); 147 // This runtime call does not require a stack map. 148 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 149 __ MaybePoisonHeapReference(temp2); 150 // dest_array[i + dest_pos] = value 151 if (dest_pos.IsConstant()) { 152 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 153 int32_t adjusted_offset = offset + constant * element_size; 154 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); 155 } else { 156 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 157 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); 158 } 159 // ++i 160 __ addl(temp1, Immediate(1)); 161 // if (i != length) goto loop 162 x86_codegen->GenerateIntCompare(temp1_loc, length); 163 __ j(kNotEqual, &loop); 164 __ jmp(GetExitLabel()); 165 } 166 167 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } 168 169 private: 170 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); 171 }; 172 173 #undef __ 174 175 #define __ assembler-> 176 177 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { 178 LocationSummary* locations = new (arena) LocationSummary(invoke, 179 LocationSummary::kNoCall, 180 kIntrinsified); 181 locations->SetInAt(0, Location::RequiresFpuRegister()); 182 locations->SetOut(Location::RequiresRegister()); 183 if (is64bit) { 184 locations->AddTemp(Location::RequiresFpuRegister()); 185 } 186 } 187 188 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { 189 LocationSummary* locations = new (arena) LocationSummary(invoke, 190 LocationSummary::kNoCall, 191 kIntrinsified); 192 locations->SetInAt(0, Location::RequiresRegister()); 193 locations->SetOut(Location::RequiresFpuRegister()); 194 if (is64bit) { 195 locations->AddTemp(Location::RequiresFpuRegister()); 196 locations->AddTemp(Location::RequiresFpuRegister()); 197 } 198 } 199 200 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 201 Location input = locations->InAt(0); 202 Location output = locations->Out(); 203 if (is64bit) { 204 // Need to use the temporary. 205 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 206 __ movsd(temp, input.AsFpuRegister<XmmRegister>()); 207 __ movd(output.AsRegisterPairLow<Register>(), temp); 208 __ psrlq(temp, Immediate(32)); 209 __ movd(output.AsRegisterPairHigh<Register>(), temp); 210 } else { 211 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>()); 212 } 213 } 214 215 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 216 Location input = locations->InAt(0); 217 Location output = locations->Out(); 218 if (is64bit) { 219 // Need to use the temporary. 220 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 221 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 222 __ movd(temp1, input.AsRegisterPairLow<Register>()); 223 __ movd(temp2, input.AsRegisterPairHigh<Register>()); 224 __ punpckldq(temp1, temp2); 225 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1); 226 } else { 227 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>()); 228 } 229 } 230 231 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 232 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true); 233 } 234 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 235 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true); 236 } 237 238 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 239 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 240 } 241 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 242 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 243 } 244 245 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 246 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false); 247 } 248 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 249 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false); 250 } 251 252 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 253 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 254 } 255 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 256 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 257 } 258 259 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 260 LocationSummary* locations = new (arena) LocationSummary(invoke, 261 LocationSummary::kNoCall, 262 kIntrinsified); 263 locations->SetInAt(0, Location::RequiresRegister()); 264 locations->SetOut(Location::SameAsFirstInput()); 265 } 266 267 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 268 LocationSummary* locations = new (arena) LocationSummary(invoke, 269 LocationSummary::kNoCall, 270 kIntrinsified); 271 locations->SetInAt(0, Location::RequiresRegister()); 272 locations->SetOut(Location::RequiresRegister()); 273 } 274 275 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { 276 LocationSummary* locations = new (arena) LocationSummary(invoke, 277 LocationSummary::kNoCall, 278 kIntrinsified); 279 locations->SetInAt(0, Location::RequiresRegister()); 280 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 281 } 282 283 static void GenReverseBytes(LocationSummary* locations, 284 Primitive::Type size, 285 X86Assembler* assembler) { 286 Register out = locations->Out().AsRegister<Register>(); 287 288 switch (size) { 289 case Primitive::kPrimShort: 290 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 291 __ bswapl(out); 292 __ sarl(out, Immediate(16)); 293 break; 294 case Primitive::kPrimInt: 295 __ bswapl(out); 296 break; 297 default: 298 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 299 UNREACHABLE(); 300 } 301 } 302 303 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { 304 CreateIntToIntLocations(arena_, invoke); 305 } 306 307 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { 308 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 309 } 310 311 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { 312 CreateLongToLongLocations(arena_, invoke); 313 } 314 315 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { 316 LocationSummary* locations = invoke->GetLocations(); 317 Location input = locations->InAt(0); 318 Register input_lo = input.AsRegisterPairLow<Register>(); 319 Register input_hi = input.AsRegisterPairHigh<Register>(); 320 Location output = locations->Out(); 321 Register output_lo = output.AsRegisterPairLow<Register>(); 322 Register output_hi = output.AsRegisterPairHigh<Register>(); 323 324 X86Assembler* assembler = GetAssembler(); 325 // Assign the inputs to the outputs, mixing low/high. 326 __ movl(output_lo, input_hi); 327 __ movl(output_hi, input_lo); 328 __ bswapl(output_lo); 329 __ bswapl(output_hi); 330 } 331 332 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { 333 CreateIntToIntLocations(arena_, invoke); 334 } 335 336 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { 337 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 338 } 339 340 341 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 342 // need is 64b. 343 344 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { 345 // TODO: Enable memory operations when the assembler supports them. 346 LocationSummary* locations = new (arena) LocationSummary(invoke, 347 LocationSummary::kNoCall, 348 kIntrinsified); 349 locations->SetInAt(0, Location::RequiresFpuRegister()); 350 locations->SetOut(Location::SameAsFirstInput()); 351 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 352 DCHECK(static_or_direct != nullptr); 353 if (static_or_direct->HasSpecialInput() && 354 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 355 // We need addressibility for the constant area. 356 locations->SetInAt(1, Location::RequiresRegister()); 357 // We need a temporary to hold the constant. 358 locations->AddTemp(Location::RequiresFpuRegister()); 359 } 360 } 361 362 static void MathAbsFP(HInvoke* invoke, 363 bool is64bit, 364 X86Assembler* assembler, 365 CodeGeneratorX86* codegen) { 366 LocationSummary* locations = invoke->GetLocations(); 367 Location output = locations->Out(); 368 369 DCHECK(output.IsFpuRegister()); 370 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 371 HX86ComputeBaseMethodAddress* method_address = 372 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); 373 DCHECK(locations->InAt(1).IsRegister()); 374 // We also have a constant area pointer. 375 Register constant_area = locations->InAt(1).AsRegister<Register>(); 376 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 377 if (is64bit) { 378 __ movsd(temp, codegen->LiteralInt64Address( 379 INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); 380 __ andpd(output.AsFpuRegister<XmmRegister>(), temp); 381 } else { 382 __ movss(temp, codegen->LiteralInt32Address( 383 INT32_C(0x7FFFFFFF), method_address, constant_area)); 384 __ andps(output.AsFpuRegister<XmmRegister>(), temp); 385 } 386 } else { 387 // Create the right constant on an aligned stack. 388 if (is64bit) { 389 __ subl(ESP, Immediate(8)); 390 __ pushl(Immediate(0x7FFFFFFF)); 391 __ pushl(Immediate(0xFFFFFFFF)); 392 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 393 } else { 394 __ subl(ESP, Immediate(12)); 395 __ pushl(Immediate(0x7FFFFFFF)); 396 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 397 } 398 __ addl(ESP, Immediate(16)); 399 } 400 } 401 402 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { 403 CreateFloatToFloat(arena_, invoke); 404 } 405 406 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { 407 MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); 408 } 409 410 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { 411 CreateFloatToFloat(arena_, invoke); 412 } 413 414 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { 415 MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); 416 } 417 418 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { 419 LocationSummary* locations = new (arena) LocationSummary(invoke, 420 LocationSummary::kNoCall, 421 kIntrinsified); 422 locations->SetInAt(0, Location::RegisterLocation(EAX)); 423 locations->SetOut(Location::SameAsFirstInput()); 424 locations->AddTemp(Location::RegisterLocation(EDX)); 425 } 426 427 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { 428 Location output = locations->Out(); 429 Register out = output.AsRegister<Register>(); 430 DCHECK_EQ(out, EAX); 431 Register temp = locations->GetTemp(0).AsRegister<Register>(); 432 DCHECK_EQ(temp, EDX); 433 434 // Sign extend EAX into EDX. 435 __ cdq(); 436 437 // XOR EAX with sign. 438 __ xorl(EAX, EDX); 439 440 // Subtract out sign to correct. 441 __ subl(EAX, EDX); 442 443 // The result is in EAX. 444 } 445 446 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) { 447 LocationSummary* locations = new (arena) LocationSummary(invoke, 448 LocationSummary::kNoCall, 449 kIntrinsified); 450 locations->SetInAt(0, Location::RequiresRegister()); 451 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 452 locations->AddTemp(Location::RequiresRegister()); 453 } 454 455 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { 456 Location input = locations->InAt(0); 457 Register input_lo = input.AsRegisterPairLow<Register>(); 458 Register input_hi = input.AsRegisterPairHigh<Register>(); 459 Location output = locations->Out(); 460 Register output_lo = output.AsRegisterPairLow<Register>(); 461 Register output_hi = output.AsRegisterPairHigh<Register>(); 462 Register temp = locations->GetTemp(0).AsRegister<Register>(); 463 464 // Compute the sign into the temporary. 465 __ movl(temp, input_hi); 466 __ sarl(temp, Immediate(31)); 467 468 // Store the sign into the output. 469 __ movl(output_lo, temp); 470 __ movl(output_hi, temp); 471 472 // XOR the input to the output. 473 __ xorl(output_lo, input_lo); 474 __ xorl(output_hi, input_hi); 475 476 // Subtract the sign. 477 __ subl(output_lo, temp); 478 __ sbbl(output_hi, temp); 479 } 480 481 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { 482 CreateAbsIntLocation(arena_, invoke); 483 } 484 485 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { 486 GenAbsInteger(invoke->GetLocations(), GetAssembler()); 487 } 488 489 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { 490 CreateAbsLongLocation(arena_, invoke); 491 } 492 493 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { 494 GenAbsLong(invoke->GetLocations(), GetAssembler()); 495 } 496 497 static void GenMinMaxFP(HInvoke* invoke, 498 bool is_min, 499 bool is_double, 500 X86Assembler* assembler, 501 CodeGeneratorX86* codegen) { 502 LocationSummary* locations = invoke->GetLocations(); 503 Location op1_loc = locations->InAt(0); 504 Location op2_loc = locations->InAt(1); 505 Location out_loc = locations->Out(); 506 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 507 508 // Shortcut for same input locations. 509 if (op1_loc.Equals(op2_loc)) { 510 DCHECK(out_loc.Equals(op1_loc)); 511 return; 512 } 513 514 // (out := op1) 515 // out <=? op2 516 // if Nan jmp Nan_label 517 // if out is min jmp done 518 // if op2 is min jmp op2_label 519 // handle -0/+0 520 // jmp done 521 // Nan_label: 522 // out := NaN 523 // op2_label: 524 // out := op2 525 // done: 526 // 527 // This removes one jmp, but needs to copy one input (op1) to out. 528 // 529 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? 530 531 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 532 533 NearLabel nan, done, op2_label; 534 if (is_double) { 535 __ ucomisd(out, op2); 536 } else { 537 __ ucomiss(out, op2); 538 } 539 540 __ j(Condition::kParityEven, &nan); 541 542 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 543 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 544 545 // Handle 0.0/-0.0. 546 if (is_min) { 547 if (is_double) { 548 __ orpd(out, op2); 549 } else { 550 __ orps(out, op2); 551 } 552 } else { 553 if (is_double) { 554 __ andpd(out, op2); 555 } else { 556 __ andps(out, op2); 557 } 558 } 559 __ jmp(&done); 560 561 // NaN handling. 562 __ Bind(&nan); 563 // Do we have a constant area pointer? 564 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { 565 HX86ComputeBaseMethodAddress* method_address = 566 invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); 567 DCHECK(locations->InAt(2).IsRegister()); 568 Register constant_area = locations->InAt(2).AsRegister<Register>(); 569 if (is_double) { 570 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); 571 } else { 572 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); 573 } 574 } else { 575 if (is_double) { 576 __ pushl(Immediate(kDoubleNaNHigh)); 577 __ pushl(Immediate(kDoubleNaNLow)); 578 __ movsd(out, Address(ESP, 0)); 579 __ addl(ESP, Immediate(8)); 580 } else { 581 __ pushl(Immediate(kFloatNaN)); 582 __ movss(out, Address(ESP, 0)); 583 __ addl(ESP, Immediate(4)); 584 } 585 } 586 __ jmp(&done); 587 588 // out := op2; 589 __ Bind(&op2_label); 590 if (is_double) { 591 __ movsd(out, op2); 592 } else { 593 __ movss(out, op2); 594 } 595 596 // Done. 597 __ Bind(&done); 598 } 599 600 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 601 LocationSummary* locations = new (arena) LocationSummary(invoke, 602 LocationSummary::kNoCall, 603 kIntrinsified); 604 locations->SetInAt(0, Location::RequiresFpuRegister()); 605 locations->SetInAt(1, Location::RequiresFpuRegister()); 606 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 607 // the second input to be the output (we can simply swap inputs). 608 locations->SetOut(Location::SameAsFirstInput()); 609 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 610 DCHECK(static_or_direct != nullptr); 611 if (static_or_direct->HasSpecialInput() && 612 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 613 locations->SetInAt(2, Location::RequiresRegister()); 614 } 615 } 616 617 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 618 CreateFPFPToFPLocations(arena_, invoke); 619 } 620 621 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 622 GenMinMaxFP(invoke, 623 /* is_min */ true, 624 /* is_double */ true, 625 GetAssembler(), 626 codegen_); 627 } 628 629 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { 630 CreateFPFPToFPLocations(arena_, invoke); 631 } 632 633 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { 634 GenMinMaxFP(invoke, 635 /* is_min */ true, 636 /* is_double */ false, 637 GetAssembler(), 638 codegen_); 639 } 640 641 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 642 CreateFPFPToFPLocations(arena_, invoke); 643 } 644 645 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 646 GenMinMaxFP(invoke, 647 /* is_min */ false, 648 /* is_double */ true, 649 GetAssembler(), 650 codegen_); 651 } 652 653 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 654 CreateFPFPToFPLocations(arena_, invoke); 655 } 656 657 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 658 GenMinMaxFP(invoke, 659 /* is_min */ false, 660 /* is_double */ false, 661 GetAssembler(), 662 codegen_); 663 } 664 665 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 666 X86Assembler* assembler) { 667 Location op1_loc = locations->InAt(0); 668 Location op2_loc = locations->InAt(1); 669 670 // Shortcut for same input locations. 671 if (op1_loc.Equals(op2_loc)) { 672 // Can return immediately, as op1_loc == out_loc. 673 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 674 // a copy here. 675 DCHECK(locations->Out().Equals(op1_loc)); 676 return; 677 } 678 679 if (is_long) { 680 // Need to perform a subtract to get the sign right. 681 // op1 is already in the same location as the output. 682 Location output = locations->Out(); 683 Register output_lo = output.AsRegisterPairLow<Register>(); 684 Register output_hi = output.AsRegisterPairHigh<Register>(); 685 686 Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); 687 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); 688 689 // Spare register to compute the subtraction to set condition code. 690 Register temp = locations->GetTemp(0).AsRegister<Register>(); 691 692 // Subtract off op2_low. 693 __ movl(temp, output_lo); 694 __ subl(temp, op2_lo); 695 696 // Now use the same tempo and the borrow to finish the subtraction of op2_hi. 697 __ movl(temp, output_hi); 698 __ sbbl(temp, op2_hi); 699 700 // Now the condition code is correct. 701 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; 702 __ cmovl(cond, output_lo, op2_lo); 703 __ cmovl(cond, output_hi, op2_hi); 704 } else { 705 Register out = locations->Out().AsRegister<Register>(); 706 Register op2 = op2_loc.AsRegister<Register>(); 707 708 // (out := op1) 709 // out <=? op2 710 // if out is min jmp done 711 // out := op2 712 // done: 713 714 __ cmpl(out, op2); 715 Condition cond = is_min ? Condition::kGreater : Condition::kLess; 716 __ cmovl(cond, out, op2); 717 } 718 } 719 720 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 721 LocationSummary* locations = new (arena) LocationSummary(invoke, 722 LocationSummary::kNoCall, 723 kIntrinsified); 724 locations->SetInAt(0, Location::RequiresRegister()); 725 locations->SetInAt(1, Location::RequiresRegister()); 726 locations->SetOut(Location::SameAsFirstInput()); 727 } 728 729 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { 730 LocationSummary* locations = new (arena) LocationSummary(invoke, 731 LocationSummary::kNoCall, 732 kIntrinsified); 733 locations->SetInAt(0, Location::RequiresRegister()); 734 locations->SetInAt(1, Location::RequiresRegister()); 735 locations->SetOut(Location::SameAsFirstInput()); 736 // Register to use to perform a long subtract to set cc. 737 locations->AddTemp(Location::RequiresRegister()); 738 } 739 740 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { 741 CreateIntIntToIntLocations(arena_, invoke); 742 } 743 744 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { 745 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 746 } 747 748 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { 749 CreateLongLongToLongLocations(arena_, invoke); 750 } 751 752 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { 753 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 754 } 755 756 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { 757 CreateIntIntToIntLocations(arena_, invoke); 758 } 759 760 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { 761 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 762 } 763 764 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { 765 CreateLongLongToLongLocations(arena_, invoke); 766 } 767 768 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { 769 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 770 } 771 772 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 773 LocationSummary* locations = new (arena) LocationSummary(invoke, 774 LocationSummary::kNoCall, 775 kIntrinsified); 776 locations->SetInAt(0, Location::RequiresFpuRegister()); 777 locations->SetOut(Location::RequiresFpuRegister()); 778 } 779 780 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { 781 CreateFPToFPLocations(arena_, invoke); 782 } 783 784 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { 785 LocationSummary* locations = invoke->GetLocations(); 786 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 787 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 788 789 GetAssembler()->sqrtsd(out, in); 790 } 791 792 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { 793 MoveArguments(invoke, codegen); 794 795 DCHECK(invoke->IsInvokeStaticOrDirect()); 796 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), 797 Location::RegisterLocation(EAX)); 798 799 // Copy the result back to the expected output. 800 Location out = invoke->GetLocations()->Out(); 801 if (out.IsValid()) { 802 DCHECK(out.IsRegister()); 803 codegen->MoveFromReturnRegister(out, invoke->GetType()); 804 } 805 } 806 807 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, 808 HInvoke* invoke, 809 CodeGeneratorX86* codegen) { 810 // Do we have instruction support? 811 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 812 CreateFPToFPLocations(arena, invoke); 813 return; 814 } 815 816 // We have to fall back to a call to the intrinsic. 817 LocationSummary* locations = new (arena) LocationSummary(invoke, 818 LocationSummary::kCallOnMainOnly); 819 InvokeRuntimeCallingConvention calling_convention; 820 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 821 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 822 // Needs to be EAX for the invoke. 823 locations->AddTemp(Location::RegisterLocation(EAX)); 824 } 825 826 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, 827 HInvoke* invoke, 828 X86Assembler* assembler, 829 int round_mode) { 830 LocationSummary* locations = invoke->GetLocations(); 831 if (locations->WillCall()) { 832 InvokeOutOfLineIntrinsic(codegen, invoke); 833 } else { 834 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 835 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 836 __ roundsd(out, in, Immediate(round_mode)); 837 } 838 } 839 840 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { 841 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 842 } 843 844 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { 845 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 846 } 847 848 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { 849 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 850 } 851 852 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { 853 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 854 } 855 856 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { 857 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 858 } 859 860 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { 861 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 862 } 863 864 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { 865 // Do we have instruction support? 866 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { 867 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 868 DCHECK(static_or_direct != nullptr); 869 LocationSummary* locations = new (arena_) LocationSummary(invoke, 870 LocationSummary::kNoCall, 871 kIntrinsified); 872 locations->SetInAt(0, Location::RequiresFpuRegister()); 873 if (static_or_direct->HasSpecialInput() && 874 invoke->InputAt( 875 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 876 locations->SetInAt(1, Location::RequiresRegister()); 877 } 878 locations->SetOut(Location::RequiresRegister()); 879 locations->AddTemp(Location::RequiresFpuRegister()); 880 locations->AddTemp(Location::RequiresFpuRegister()); 881 return; 882 } 883 884 // We have to fall back to a call to the intrinsic. 885 LocationSummary* locations = new (arena_) LocationSummary(invoke, 886 LocationSummary::kCallOnMainOnly); 887 InvokeRuntimeCallingConvention calling_convention; 888 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 889 locations->SetOut(Location::RegisterLocation(EAX)); 890 // Needs to be EAX for the invoke. 891 locations->AddTemp(Location::RegisterLocation(EAX)); 892 } 893 894 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { 895 LocationSummary* locations = invoke->GetLocations(); 896 if (locations->WillCall()) { // TODO: can we reach this? 897 InvokeOutOfLineIntrinsic(codegen_, invoke); 898 return; 899 } 900 901 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 902 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 903 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 904 Register out = locations->Out().AsRegister<Register>(); 905 NearLabel skip_incr, done; 906 X86Assembler* assembler = GetAssembler(); 907 908 // Since no direct x86 rounding instruction matches the required semantics, 909 // this intrinsic is implemented as follows: 910 // result = floor(in); 911 // if (in - result >= 0.5f) 912 // result = result + 1.0f; 913 __ movss(t2, in); 914 __ roundss(t1, in, Immediate(1)); 915 __ subss(t2, t1); 916 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 917 // Direct constant area available. 918 HX86ComputeBaseMethodAddress* method_address = 919 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); 920 Register constant_area = locations->InAt(1).AsRegister<Register>(); 921 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), 922 method_address, 923 constant_area)); 924 __ j(kBelow, &skip_incr); 925 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), 926 method_address, 927 constant_area)); 928 __ Bind(&skip_incr); 929 } else { 930 // No constant area: go through stack. 931 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); 932 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); 933 __ comiss(t2, Address(ESP, 4)); 934 __ j(kBelow, &skip_incr); 935 __ addss(t1, Address(ESP, 0)); 936 __ Bind(&skip_incr); 937 __ addl(ESP, Immediate(8)); 938 } 939 940 // Final conversion to an integer. Unfortunately this also does not have a 941 // direct x86 instruction, since NaN should map to 0 and large positive 942 // values need to be clipped to the extreme value. 943 __ movl(out, Immediate(kPrimIntMax)); 944 __ cvtsi2ss(t2, out); 945 __ comiss(t1, t2); 946 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered 947 __ movl(out, Immediate(0)); // does not change flags 948 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) 949 __ cvttss2si(out, t1); 950 __ Bind(&done); 951 } 952 953 static void CreateFPToFPCallLocations(ArenaAllocator* arena, 954 HInvoke* invoke) { 955 LocationSummary* locations = new (arena) LocationSummary(invoke, 956 LocationSummary::kCallOnMainOnly, 957 kIntrinsified); 958 InvokeRuntimeCallingConvention calling_convention; 959 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 960 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 961 } 962 963 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { 964 LocationSummary* locations = invoke->GetLocations(); 965 DCHECK(locations->WillCall()); 966 DCHECK(invoke->IsInvokeStaticOrDirect()); 967 X86Assembler* assembler = codegen->GetAssembler(); 968 969 // We need some place to pass the parameters. 970 __ subl(ESP, Immediate(16)); 971 __ cfi().AdjustCFAOffset(16); 972 973 // Pass the parameters at the bottom of the stack. 974 __ movsd(Address(ESP, 0), XMM0); 975 976 // If we have a second parameter, pass it next. 977 if (invoke->GetNumberOfArguments() == 2) { 978 __ movsd(Address(ESP, 8), XMM1); 979 } 980 981 // Now do the actual call. 982 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 983 984 // Extract the return value from the FP stack. 985 __ fstpl(Address(ESP, 0)); 986 __ movsd(XMM0, Address(ESP, 0)); 987 988 // And clean up the stack. 989 __ addl(ESP, Immediate(16)); 990 __ cfi().AdjustCFAOffset(-16); 991 } 992 993 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { 994 CreateFPToFPCallLocations(arena_, invoke); 995 } 996 997 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { 998 GenFPToFPCall(invoke, codegen_, kQuickCos); 999 } 1000 1001 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { 1002 CreateFPToFPCallLocations(arena_, invoke); 1003 } 1004 1005 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { 1006 GenFPToFPCall(invoke, codegen_, kQuickSin); 1007 } 1008 1009 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { 1010 CreateFPToFPCallLocations(arena_, invoke); 1011 } 1012 1013 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { 1014 GenFPToFPCall(invoke, codegen_, kQuickAcos); 1015 } 1016 1017 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { 1018 CreateFPToFPCallLocations(arena_, invoke); 1019 } 1020 1021 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { 1022 GenFPToFPCall(invoke, codegen_, kQuickAsin); 1023 } 1024 1025 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { 1026 CreateFPToFPCallLocations(arena_, invoke); 1027 } 1028 1029 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { 1030 GenFPToFPCall(invoke, codegen_, kQuickAtan); 1031 } 1032 1033 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { 1034 CreateFPToFPCallLocations(arena_, invoke); 1035 } 1036 1037 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { 1038 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 1039 } 1040 1041 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { 1042 CreateFPToFPCallLocations(arena_, invoke); 1043 } 1044 1045 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { 1046 GenFPToFPCall(invoke, codegen_, kQuickCosh); 1047 } 1048 1049 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { 1050 CreateFPToFPCallLocations(arena_, invoke); 1051 } 1052 1053 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { 1054 GenFPToFPCall(invoke, codegen_, kQuickExp); 1055 } 1056 1057 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { 1058 CreateFPToFPCallLocations(arena_, invoke); 1059 } 1060 1061 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { 1062 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 1063 } 1064 1065 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { 1066 CreateFPToFPCallLocations(arena_, invoke); 1067 } 1068 1069 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { 1070 GenFPToFPCall(invoke, codegen_, kQuickLog); 1071 } 1072 1073 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { 1074 CreateFPToFPCallLocations(arena_, invoke); 1075 } 1076 1077 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { 1078 GenFPToFPCall(invoke, codegen_, kQuickLog10); 1079 } 1080 1081 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { 1082 CreateFPToFPCallLocations(arena_, invoke); 1083 } 1084 1085 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { 1086 GenFPToFPCall(invoke, codegen_, kQuickSinh); 1087 } 1088 1089 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { 1090 CreateFPToFPCallLocations(arena_, invoke); 1091 } 1092 1093 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { 1094 GenFPToFPCall(invoke, codegen_, kQuickTan); 1095 } 1096 1097 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { 1098 CreateFPToFPCallLocations(arena_, invoke); 1099 } 1100 1101 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { 1102 GenFPToFPCall(invoke, codegen_, kQuickTanh); 1103 } 1104 1105 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, 1106 HInvoke* invoke) { 1107 LocationSummary* locations = new (arena) LocationSummary(invoke, 1108 LocationSummary::kCallOnMainOnly, 1109 kIntrinsified); 1110 InvokeRuntimeCallingConvention calling_convention; 1111 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 1112 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 1113 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 1114 } 1115 1116 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { 1117 CreateFPFPToFPCallLocations(arena_, invoke); 1118 } 1119 1120 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { 1121 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 1122 } 1123 1124 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { 1125 CreateFPFPToFPCallLocations(arena_, invoke); 1126 } 1127 1128 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { 1129 GenFPToFPCall(invoke, codegen_, kQuickHypot); 1130 } 1131 1132 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { 1133 CreateFPFPToFPCallLocations(arena_, invoke); 1134 } 1135 1136 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { 1137 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 1138 } 1139 1140 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1141 // We need at least two of the positions or length to be an integer constant, 1142 // or else we won't have enough free registers. 1143 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 1144 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 1145 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 1146 1147 int num_constants = 1148 ((src_pos != nullptr) ? 1 : 0) 1149 + ((dest_pos != nullptr) ? 1 : 0) 1150 + ((length != nullptr) ? 1 : 0); 1151 1152 if (num_constants < 2) { 1153 // Not enough free registers. 1154 return; 1155 } 1156 1157 // As long as we are checking, we might as well check to see if the src and dest 1158 // positions are >= 0. 1159 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 1160 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 1161 // We will have to fail anyways. 1162 return; 1163 } 1164 1165 // And since we are already checking, check the length too. 1166 if (length != nullptr) { 1167 int32_t len = length->GetValue(); 1168 if (len < 0) { 1169 // Just call as normal. 1170 return; 1171 } 1172 } 1173 1174 // Okay, it is safe to generate inline code. 1175 LocationSummary* locations = 1176 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 1177 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). 1178 locations->SetInAt(0, Location::RequiresRegister()); 1179 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1180 locations->SetInAt(2, Location::RequiresRegister()); 1181 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 1182 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 1183 1184 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1185 locations->AddTemp(Location::RegisterLocation(ESI)); 1186 locations->AddTemp(Location::RegisterLocation(EDI)); 1187 locations->AddTemp(Location::RegisterLocation(ECX)); 1188 } 1189 1190 static void CheckPosition(X86Assembler* assembler, 1191 Location pos, 1192 Register input, 1193 Location length, 1194 SlowPathCode* slow_path, 1195 Register temp, 1196 bool length_is_input_length = false) { 1197 // Where is the length in the Array? 1198 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 1199 1200 if (pos.IsConstant()) { 1201 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 1202 if (pos_const == 0) { 1203 if (!length_is_input_length) { 1204 // Check that length(input) >= length. 1205 if (length.IsConstant()) { 1206 __ cmpl(Address(input, length_offset), 1207 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1208 } else { 1209 __ cmpl(Address(input, length_offset), length.AsRegister<Register>()); 1210 } 1211 __ j(kLess, slow_path->GetEntryLabel()); 1212 } 1213 } else { 1214 // Check that length(input) >= pos. 1215 __ movl(temp, Address(input, length_offset)); 1216 __ subl(temp, Immediate(pos_const)); 1217 __ j(kLess, slow_path->GetEntryLabel()); 1218 1219 // Check that (length(input) - pos) >= length. 1220 if (length.IsConstant()) { 1221 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1222 } else { 1223 __ cmpl(temp, length.AsRegister<Register>()); 1224 } 1225 __ j(kLess, slow_path->GetEntryLabel()); 1226 } 1227 } else if (length_is_input_length) { 1228 // The only way the copy can succeed is if pos is zero. 1229 Register pos_reg = pos.AsRegister<Register>(); 1230 __ testl(pos_reg, pos_reg); 1231 __ j(kNotEqual, slow_path->GetEntryLabel()); 1232 } else { 1233 // Check that pos >= 0. 1234 Register pos_reg = pos.AsRegister<Register>(); 1235 __ testl(pos_reg, pos_reg); 1236 __ j(kLess, slow_path->GetEntryLabel()); 1237 1238 // Check that pos <= length(input). 1239 __ cmpl(Address(input, length_offset), pos_reg); 1240 __ j(kLess, slow_path->GetEntryLabel()); 1241 1242 // Check that (length(input) - pos) >= length. 1243 __ movl(temp, Address(input, length_offset)); 1244 __ subl(temp, pos_reg); 1245 if (length.IsConstant()) { 1246 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1247 } else { 1248 __ cmpl(temp, length.AsRegister<Register>()); 1249 } 1250 __ j(kLess, slow_path->GetEntryLabel()); 1251 } 1252 } 1253 1254 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1255 X86Assembler* assembler = GetAssembler(); 1256 LocationSummary* locations = invoke->GetLocations(); 1257 1258 Register src = locations->InAt(0).AsRegister<Register>(); 1259 Location srcPos = locations->InAt(1); 1260 Register dest = locations->InAt(2).AsRegister<Register>(); 1261 Location destPos = locations->InAt(3); 1262 Location length = locations->InAt(4); 1263 1264 // Temporaries that we need for MOVSW. 1265 Register src_base = locations->GetTemp(0).AsRegister<Register>(); 1266 DCHECK_EQ(src_base, ESI); 1267 Register dest_base = locations->GetTemp(1).AsRegister<Register>(); 1268 DCHECK_EQ(dest_base, EDI); 1269 Register count = locations->GetTemp(2).AsRegister<Register>(); 1270 DCHECK_EQ(count, ECX); 1271 1272 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1273 codegen_->AddSlowPath(slow_path); 1274 1275 // Bail out if the source and destination are the same (to handle overlap). 1276 __ cmpl(src, dest); 1277 __ j(kEqual, slow_path->GetEntryLabel()); 1278 1279 // Bail out if the source is null. 1280 __ testl(src, src); 1281 __ j(kEqual, slow_path->GetEntryLabel()); 1282 1283 // Bail out if the destination is null. 1284 __ testl(dest, dest); 1285 __ j(kEqual, slow_path->GetEntryLabel()); 1286 1287 // If the length is negative, bail out. 1288 // We have already checked in the LocationsBuilder for the constant case. 1289 if (!length.IsConstant()) { 1290 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>()); 1291 __ j(kLess, slow_path->GetEntryLabel()); 1292 } 1293 1294 // We need the count in ECX. 1295 if (length.IsConstant()) { 1296 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1297 } else { 1298 __ movl(count, length.AsRegister<Register>()); 1299 } 1300 1301 // Validity checks: source. Use src_base as a temporary register. 1302 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base); 1303 1304 // Validity checks: dest. Use src_base as a temporary register. 1305 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base); 1306 1307 // Okay, everything checks out. Finally time to do the copy. 1308 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1309 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1310 DCHECK_EQ(char_size, 2u); 1311 1312 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1313 1314 if (srcPos.IsConstant()) { 1315 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); 1316 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); 1317 } else { 1318 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(), 1319 ScaleFactor::TIMES_2, data_offset)); 1320 } 1321 if (destPos.IsConstant()) { 1322 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); 1323 1324 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); 1325 } else { 1326 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(), 1327 ScaleFactor::TIMES_2, data_offset)); 1328 } 1329 1330 // Do the move. 1331 __ rep_movsw(); 1332 1333 __ Bind(slow_path->GetExitLabel()); 1334 } 1335 1336 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { 1337 // The inputs plus one temp. 1338 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1339 LocationSummary::kCallOnMainAndSlowPath, 1340 kIntrinsified); 1341 InvokeRuntimeCallingConvention calling_convention; 1342 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1343 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1344 locations->SetOut(Location::RegisterLocation(EAX)); 1345 } 1346 1347 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { 1348 X86Assembler* assembler = GetAssembler(); 1349 LocationSummary* locations = invoke->GetLocations(); 1350 1351 // Note that the null check must have been done earlier. 1352 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1353 1354 Register argument = locations->InAt(1).AsRegister<Register>(); 1355 __ testl(argument, argument); 1356 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1357 codegen_->AddSlowPath(slow_path); 1358 __ j(kEqual, slow_path->GetEntryLabel()); 1359 1360 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); 1361 __ Bind(slow_path->GetExitLabel()); 1362 } 1363 1364 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { 1365 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1366 LocationSummary::kNoCall, 1367 kIntrinsified); 1368 locations->SetInAt(0, Location::RequiresRegister()); 1369 locations->SetInAt(1, Location::RequiresRegister()); 1370 1371 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. 1372 locations->AddTemp(Location::RegisterLocation(ECX)); 1373 locations->AddTemp(Location::RegisterLocation(EDI)); 1374 1375 // Set output, ESI needed for repe_cmpsl instruction anyways. 1376 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); 1377 } 1378 1379 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { 1380 X86Assembler* assembler = GetAssembler(); 1381 LocationSummary* locations = invoke->GetLocations(); 1382 1383 Register str = locations->InAt(0).AsRegister<Register>(); 1384 Register arg = locations->InAt(1).AsRegister<Register>(); 1385 Register ecx = locations->GetTemp(0).AsRegister<Register>(); 1386 Register edi = locations->GetTemp(1).AsRegister<Register>(); 1387 Register esi = locations->Out().AsRegister<Register>(); 1388 1389 NearLabel end, return_true, return_false; 1390 1391 // Get offsets of count, value, and class fields within a string object. 1392 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1393 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1394 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1395 1396 // Note that the null check must have been done earlier. 1397 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1398 1399 StringEqualsOptimizations optimizations(invoke); 1400 if (!optimizations.GetArgumentNotNull()) { 1401 // Check if input is null, return false if it is. 1402 __ testl(arg, arg); 1403 __ j(kEqual, &return_false); 1404 } 1405 1406 if (!optimizations.GetArgumentIsString()) { 1407 // Instanceof check for the argument by comparing class fields. 1408 // All string objects must have the same type since String cannot be subclassed. 1409 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1410 // If the argument is a string object, its class field must be equal to receiver's class field. 1411 __ movl(ecx, Address(str, class_offset)); 1412 __ cmpl(ecx, Address(arg, class_offset)); 1413 __ j(kNotEqual, &return_false); 1414 } 1415 1416 // Reference equality check, return true if same reference. 1417 __ cmpl(str, arg); 1418 __ j(kEqual, &return_true); 1419 1420 // Load length and compression flag of receiver string. 1421 __ movl(ecx, Address(str, count_offset)); 1422 // Check if lengths and compression flags are equal, return false if they're not. 1423 // Two identical strings will always have same compression style since 1424 // compression style is decided on alloc. 1425 __ cmpl(ecx, Address(arg, count_offset)); 1426 __ j(kNotEqual, &return_false); 1427 // Return true if strings are empty. Even with string compression `count == 0` means empty. 1428 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1429 "Expecting 0=compressed, 1=uncompressed"); 1430 __ jecxz(&return_true); 1431 1432 if (mirror::kUseStringCompression) { 1433 NearLabel string_uncompressed; 1434 // Extract length and differentiate between both compressed or both uncompressed. 1435 // Different compression style is cut above. 1436 __ shrl(ecx, Immediate(1)); 1437 __ j(kCarrySet, &string_uncompressed); 1438 // Divide string length by 2, rounding up, and continue as if uncompressed. 1439 __ addl(ecx, Immediate(1)); 1440 __ shrl(ecx, Immediate(1)); 1441 __ Bind(&string_uncompressed); 1442 } 1443 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. 1444 __ leal(esi, Address(str, value_offset)); 1445 __ leal(edi, Address(arg, value_offset)); 1446 1447 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not 1448 // divisible by 2. 1449 __ addl(ecx, Immediate(1)); 1450 __ shrl(ecx, Immediate(1)); 1451 1452 // Assertions that must hold in order to compare strings 2 characters (uncompressed) 1453 // or 4 characters (compressed) at a time. 1454 DCHECK_ALIGNED(value_offset, 4); 1455 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); 1456 1457 // Loop to compare strings two characters at a time starting at the beginning of the string. 1458 __ repe_cmpsl(); 1459 // If strings are not equal, zero flag will be cleared. 1460 __ j(kNotEqual, &return_false); 1461 1462 // Return true and exit the function. 1463 // If loop does not result in returning false, we return true. 1464 __ Bind(&return_true); 1465 __ movl(esi, Immediate(1)); 1466 __ jmp(&end); 1467 1468 // Return false and exit the function. 1469 __ Bind(&return_false); 1470 __ xorl(esi, esi); 1471 __ Bind(&end); 1472 } 1473 1474 static void CreateStringIndexOfLocations(HInvoke* invoke, 1475 ArenaAllocator* allocator, 1476 bool start_at_zero) { 1477 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1478 LocationSummary::kCallOnSlowPath, 1479 kIntrinsified); 1480 // The data needs to be in EDI for scasw. So request that the string is there, anyways. 1481 locations->SetInAt(0, Location::RegisterLocation(EDI)); 1482 // If we look for a constant char, we'll still have to copy it into EAX. So just request the 1483 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1484 // of the instruction explicitly. 1485 // Note: This works as we don't clobber EAX anywhere. 1486 locations->SetInAt(1, Location::RegisterLocation(EAX)); 1487 if (!start_at_zero) { 1488 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1489 } 1490 // As we clobber EDI during execution anyways, also use it as the output. 1491 locations->SetOut(Location::SameAsFirstInput()); 1492 1493 // repne scasw uses ECX as the counter. 1494 locations->AddTemp(Location::RegisterLocation(ECX)); 1495 // Need another temporary to be able to compute the result. 1496 locations->AddTemp(Location::RequiresRegister()); 1497 if (mirror::kUseStringCompression) { 1498 // Need another temporary to be able to save unflagged string length. 1499 locations->AddTemp(Location::RequiresRegister()); 1500 } 1501 } 1502 1503 static void GenerateStringIndexOf(HInvoke* invoke, 1504 X86Assembler* assembler, 1505 CodeGeneratorX86* codegen, 1506 ArenaAllocator* allocator, 1507 bool start_at_zero) { 1508 LocationSummary* locations = invoke->GetLocations(); 1509 1510 // Note that the null check must have been done earlier. 1511 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1512 1513 Register string_obj = locations->InAt(0).AsRegister<Register>(); 1514 Register search_value = locations->InAt(1).AsRegister<Register>(); 1515 Register counter = locations->GetTemp(0).AsRegister<Register>(); 1516 Register string_length = locations->GetTemp(1).AsRegister<Register>(); 1517 Register out = locations->Out().AsRegister<Register>(); 1518 // Only used when string compression feature is on. 1519 Register string_length_flagged; 1520 1521 // Check our assumptions for registers. 1522 DCHECK_EQ(string_obj, EDI); 1523 DCHECK_EQ(search_value, EAX); 1524 DCHECK_EQ(counter, ECX); 1525 DCHECK_EQ(out, EDI); 1526 1527 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1528 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1529 SlowPathCode* slow_path = nullptr; 1530 HInstruction* code_point = invoke->InputAt(1); 1531 if (code_point->IsIntConstant()) { 1532 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 1533 std::numeric_limits<uint16_t>::max()) { 1534 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1535 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1536 slow_path = new (allocator) IntrinsicSlowPathX86(invoke); 1537 codegen->AddSlowPath(slow_path); 1538 __ jmp(slow_path->GetEntryLabel()); 1539 __ Bind(slow_path->GetExitLabel()); 1540 return; 1541 } 1542 } else if (code_point->GetType() != Primitive::kPrimChar) { 1543 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1544 slow_path = new (allocator) IntrinsicSlowPathX86(invoke); 1545 codegen->AddSlowPath(slow_path); 1546 __ j(kAbove, slow_path->GetEntryLabel()); 1547 } 1548 1549 // From here down, we know that we are looking for a char that fits in 16 bits. 1550 // Location of reference to data array within the String object. 1551 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1552 // Location of count within the String object. 1553 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1554 1555 // Load the count field of the string containing the length and compression flag. 1556 __ movl(string_length, Address(string_obj, count_offset)); 1557 1558 // Do a zero-length check. Even with string compression `count == 0` means empty. 1559 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1560 "Expecting 0=compressed, 1=uncompressed"); 1561 // TODO: Support jecxz. 1562 NearLabel not_found_label; 1563 __ testl(string_length, string_length); 1564 __ j(kEqual, ¬_found_label); 1565 1566 if (mirror::kUseStringCompression) { 1567 string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); 1568 __ movl(string_length_flagged, string_length); 1569 // Extract the length and shift out the least significant bit used as compression flag. 1570 __ shrl(string_length, Immediate(1)); 1571 } 1572 1573 if (start_at_zero) { 1574 // Number of chars to scan is the same as the string length. 1575 __ movl(counter, string_length); 1576 1577 // Move to the start of the string. 1578 __ addl(string_obj, Immediate(value_offset)); 1579 } else { 1580 Register start_index = locations->InAt(2).AsRegister<Register>(); 1581 1582 // Do a start_index check. 1583 __ cmpl(start_index, string_length); 1584 __ j(kGreaterEqual, ¬_found_label); 1585 1586 // Ensure we have a start index >= 0; 1587 __ xorl(counter, counter); 1588 __ cmpl(start_index, Immediate(0)); 1589 __ cmovl(kGreater, counter, start_index); 1590 1591 if (mirror::kUseStringCompression) { 1592 NearLabel modify_counter, offset_uncompressed_label; 1593 __ testl(string_length_flagged, Immediate(1)); 1594 __ j(kNotZero, &offset_uncompressed_label); 1595 // Move to the start of the string: string_obj + value_offset + start_index. 1596 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); 1597 __ jmp(&modify_counter); 1598 1599 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1600 __ Bind(&offset_uncompressed_label); 1601 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1602 1603 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to 1604 // compare. 1605 __ Bind(&modify_counter); 1606 } else { 1607 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1608 } 1609 __ negl(counter); 1610 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1611 } 1612 1613 if (mirror::kUseStringCompression) { 1614 NearLabel uncompressed_string_comparison; 1615 NearLabel comparison_done; 1616 __ testl(string_length_flagged, Immediate(1)); 1617 __ j(kNotZero, &uncompressed_string_comparison); 1618 1619 // Check if EAX (search_value) is ASCII. 1620 __ cmpl(search_value, Immediate(127)); 1621 __ j(kGreater, ¬_found_label); 1622 // Comparing byte-per-byte. 1623 __ repne_scasb(); 1624 __ jmp(&comparison_done); 1625 1626 // Everything is set up for repne scasw: 1627 // * Comparison address in EDI. 1628 // * Counter in ECX. 1629 __ Bind(&uncompressed_string_comparison); 1630 __ repne_scasw(); 1631 __ Bind(&comparison_done); 1632 } else { 1633 __ repne_scasw(); 1634 } 1635 // Did we find a match? 1636 __ j(kNotEqual, ¬_found_label); 1637 1638 // Yes, we matched. Compute the index of the result. 1639 __ subl(string_length, counter); 1640 __ leal(out, Address(string_length, -1)); 1641 1642 NearLabel done; 1643 __ jmp(&done); 1644 1645 // Failed to match; return -1. 1646 __ Bind(¬_found_label); 1647 __ movl(out, Immediate(-1)); 1648 1649 // And join up at the end. 1650 __ Bind(&done); 1651 if (slow_path != nullptr) { 1652 __ Bind(slow_path->GetExitLabel()); 1653 } 1654 } 1655 1656 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { 1657 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); 1658 } 1659 1660 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { 1661 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1662 } 1663 1664 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1665 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); 1666 } 1667 1668 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1669 GenerateStringIndexOf( 1670 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1671 } 1672 1673 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1674 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1675 LocationSummary::kCallOnMainAndSlowPath, 1676 kIntrinsified); 1677 InvokeRuntimeCallingConvention calling_convention; 1678 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1679 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1680 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1681 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1682 locations->SetOut(Location::RegisterLocation(EAX)); 1683 } 1684 1685 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1686 X86Assembler* assembler = GetAssembler(); 1687 LocationSummary* locations = invoke->GetLocations(); 1688 1689 Register byte_array = locations->InAt(0).AsRegister<Register>(); 1690 __ testl(byte_array, byte_array); 1691 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1692 codegen_->AddSlowPath(slow_path); 1693 __ j(kEqual, slow_path->GetEntryLabel()); 1694 1695 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); 1696 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1697 __ Bind(slow_path->GetExitLabel()); 1698 } 1699 1700 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1701 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1702 LocationSummary::kCallOnMainOnly, 1703 kIntrinsified); 1704 InvokeRuntimeCallingConvention calling_convention; 1705 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1706 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1707 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1708 locations->SetOut(Location::RegisterLocation(EAX)); 1709 } 1710 1711 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1712 // No need to emit code checking whether `locations->InAt(2)` is a null 1713 // pointer, as callers of the native method 1714 // 1715 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1716 // 1717 // all include a null check on `data` before calling that method. 1718 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1719 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1720 } 1721 1722 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { 1723 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1724 LocationSummary::kCallOnMainAndSlowPath, 1725 kIntrinsified); 1726 InvokeRuntimeCallingConvention calling_convention; 1727 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1728 locations->SetOut(Location::RegisterLocation(EAX)); 1729 } 1730 1731 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { 1732 X86Assembler* assembler = GetAssembler(); 1733 LocationSummary* locations = invoke->GetLocations(); 1734 1735 Register string_to_copy = locations->InAt(0).AsRegister<Register>(); 1736 __ testl(string_to_copy, string_to_copy); 1737 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 1738 codegen_->AddSlowPath(slow_path); 1739 __ j(kEqual, slow_path->GetEntryLabel()); 1740 1741 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); 1742 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1743 __ Bind(slow_path->GetExitLabel()); 1744 } 1745 1746 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1747 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1748 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1749 LocationSummary::kNoCall, 1750 kIntrinsified); 1751 locations->SetInAt(0, Location::RequiresRegister()); 1752 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1753 // Place srcEnd in ECX to save a move below. 1754 locations->SetInAt(2, Location::RegisterLocation(ECX)); 1755 locations->SetInAt(3, Location::RequiresRegister()); 1756 locations->SetInAt(4, Location::RequiresRegister()); 1757 1758 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1759 // We don't have enough registers to also grab ECX, so handle below. 1760 locations->AddTemp(Location::RegisterLocation(ESI)); 1761 locations->AddTemp(Location::RegisterLocation(EDI)); 1762 } 1763 1764 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1765 X86Assembler* assembler = GetAssembler(); 1766 LocationSummary* locations = invoke->GetLocations(); 1767 1768 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); 1769 // Location of data in char array buffer. 1770 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1771 // Location of char array data in string. 1772 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1773 1774 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1775 Register obj = locations->InAt(0).AsRegister<Register>(); 1776 Location srcBegin = locations->InAt(1); 1777 int srcBegin_value = 1778 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1779 Register srcEnd = locations->InAt(2).AsRegister<Register>(); 1780 Register dst = locations->InAt(3).AsRegister<Register>(); 1781 Register dstBegin = locations->InAt(4).AsRegister<Register>(); 1782 1783 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1784 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1785 DCHECK_EQ(char_size, 2u); 1786 1787 // Compute the number of chars (words) to move. 1788 // Save ECX, since we don't know if it will be used later. 1789 __ pushl(ECX); 1790 int stack_adjust = kX86WordSize; 1791 __ cfi().AdjustCFAOffset(stack_adjust); 1792 DCHECK_EQ(srcEnd, ECX); 1793 if (srcBegin.IsConstant()) { 1794 __ subl(ECX, Immediate(srcBegin_value)); 1795 } else { 1796 DCHECK(srcBegin.IsRegister()); 1797 __ subl(ECX, srcBegin.AsRegister<Register>()); 1798 } 1799 1800 NearLabel done; 1801 if (mirror::kUseStringCompression) { 1802 // Location of count in string 1803 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1804 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); 1805 DCHECK_EQ(c_char_size, 1u); 1806 __ pushl(EAX); 1807 __ cfi().AdjustCFAOffset(stack_adjust); 1808 1809 NearLabel copy_loop, copy_uncompressed; 1810 __ testl(Address(obj, count_offset), Immediate(1)); 1811 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1812 "Expecting 0=compressed, 1=uncompressed"); 1813 __ j(kNotZero, ©_uncompressed); 1814 // Compute the address of the source string by adding the number of chars from 1815 // the source beginning to the value offset of a string. 1816 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); 1817 1818 // Start the loop to copy String's value to Array of Char. 1819 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1820 __ Bind(©_loop); 1821 __ jecxz(&done); 1822 // Use EAX temporary (convert byte from ESI to word). 1823 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0. 1824 __ movzxb(EAX, Address(ESI, 0)); 1825 __ movw(Address(EDI, 0), EAX); 1826 __ leal(EDI, Address(EDI, char_size)); 1827 __ leal(ESI, Address(ESI, c_char_size)); 1828 // TODO: Add support for LOOP to X86Assembler. 1829 __ subl(ECX, Immediate(1)); 1830 __ jmp(©_loop); 1831 __ Bind(©_uncompressed); 1832 } 1833 1834 // Do the copy for uncompressed string. 1835 // Compute the address of the destination buffer. 1836 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1837 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); 1838 __ rep_movsw(); 1839 1840 __ Bind(&done); 1841 if (mirror::kUseStringCompression) { 1842 // Restore EAX. 1843 __ popl(EAX); 1844 __ cfi().AdjustCFAOffset(-stack_adjust); 1845 } 1846 // Restore ECX. 1847 __ popl(ECX); 1848 __ cfi().AdjustCFAOffset(-stack_adjust); 1849 } 1850 1851 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { 1852 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1853 Location out_loc = locations->Out(); 1854 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1855 // to avoid a SIGBUS. 1856 switch (size) { 1857 case Primitive::kPrimByte: 1858 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); 1859 break; 1860 case Primitive::kPrimShort: 1861 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); 1862 break; 1863 case Primitive::kPrimInt: 1864 __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); 1865 break; 1866 case Primitive::kPrimLong: 1867 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); 1868 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); 1869 break; 1870 default: 1871 LOG(FATAL) << "Type not recognized for peek: " << size; 1872 UNREACHABLE(); 1873 } 1874 } 1875 1876 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { 1877 CreateLongToIntLocations(arena_, invoke); 1878 } 1879 1880 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { 1881 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1882 } 1883 1884 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1885 CreateLongToIntLocations(arena_, invoke); 1886 } 1887 1888 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1889 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1890 } 1891 1892 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1893 CreateLongToLongLocations(arena_, invoke); 1894 } 1895 1896 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1897 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1898 } 1899 1900 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1901 CreateLongToIntLocations(arena_, invoke); 1902 } 1903 1904 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1905 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1906 } 1907 1908 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size, 1909 HInvoke* invoke) { 1910 LocationSummary* locations = new (arena) LocationSummary(invoke, 1911 LocationSummary::kNoCall, 1912 kIntrinsified); 1913 locations->SetInAt(0, Location::RequiresRegister()); 1914 HInstruction* value = invoke->InputAt(1); 1915 if (size == Primitive::kPrimByte) { 1916 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); 1917 } else { 1918 locations->SetInAt(1, Location::RegisterOrConstant(value)); 1919 } 1920 } 1921 1922 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { 1923 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1924 Location value_loc = locations->InAt(1); 1925 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1926 // to avoid a SIGBUS. 1927 switch (size) { 1928 case Primitive::kPrimByte: 1929 if (value_loc.IsConstant()) { 1930 __ movb(Address(address, 0), 1931 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1932 } else { 1933 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); 1934 } 1935 break; 1936 case Primitive::kPrimShort: 1937 if (value_loc.IsConstant()) { 1938 __ movw(Address(address, 0), 1939 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1940 } else { 1941 __ movw(Address(address, 0), value_loc.AsRegister<Register>()); 1942 } 1943 break; 1944 case Primitive::kPrimInt: 1945 if (value_loc.IsConstant()) { 1946 __ movl(Address(address, 0), 1947 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1948 } else { 1949 __ movl(Address(address, 0), value_loc.AsRegister<Register>()); 1950 } 1951 break; 1952 case Primitive::kPrimLong: 1953 if (value_loc.IsConstant()) { 1954 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); 1955 __ movl(Address(address, 0), Immediate(Low32Bits(value))); 1956 __ movl(Address(address, 4), Immediate(High32Bits(value))); 1957 } else { 1958 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>()); 1959 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>()); 1960 } 1961 break; 1962 default: 1963 LOG(FATAL) << "Type not recognized for poke: " << size; 1964 UNREACHABLE(); 1965 } 1966 } 1967 1968 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { 1969 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke); 1970 } 1971 1972 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { 1973 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1974 } 1975 1976 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1977 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke); 1978 } 1979 1980 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1981 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1982 } 1983 1984 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1985 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke); 1986 } 1987 1988 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1989 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1990 } 1991 1992 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1993 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke); 1994 } 1995 1996 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1997 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1998 } 1999 2000 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { 2001 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2002 LocationSummary::kNoCall, 2003 kIntrinsified); 2004 locations->SetOut(Location::RequiresRegister()); 2005 } 2006 2007 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { 2008 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 2009 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>())); 2010 } 2011 2012 static void GenUnsafeGet(HInvoke* invoke, 2013 Primitive::Type type, 2014 bool is_volatile, 2015 CodeGeneratorX86* codegen) { 2016 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2017 LocationSummary* locations = invoke->GetLocations(); 2018 Location base_loc = locations->InAt(1); 2019 Register base = base_loc.AsRegister<Register>(); 2020 Location offset_loc = locations->InAt(2); 2021 Register offset = offset_loc.AsRegisterPairLow<Register>(); 2022 Location output_loc = locations->Out(); 2023 2024 switch (type) { 2025 case Primitive::kPrimInt: { 2026 Register output = output_loc.AsRegister<Register>(); 2027 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2028 break; 2029 } 2030 2031 case Primitive::kPrimNot: { 2032 Register output = output_loc.AsRegister<Register>(); 2033 if (kEmitCompilerReadBarrier) { 2034 if (kUseBakerReadBarrier) { 2035 Address src(base, offset, ScaleFactor::TIMES_1, 0); 2036 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2037 invoke, output_loc, base, src, /* needs_null_check */ false); 2038 } else { 2039 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2040 codegen->GenerateReadBarrierSlow( 2041 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 2042 } 2043 } else { 2044 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2045 __ MaybeUnpoisonHeapReference(output); 2046 } 2047 break; 2048 } 2049 2050 case Primitive::kPrimLong: { 2051 Register output_lo = output_loc.AsRegisterPairLow<Register>(); 2052 Register output_hi = output_loc.AsRegisterPairHigh<Register>(); 2053 if (is_volatile) { 2054 // Need to use a XMM to read atomically. 2055 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2056 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2057 __ movd(output_lo, temp); 2058 __ psrlq(temp, Immediate(32)); 2059 __ movd(output_hi, temp); 2060 } else { 2061 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2062 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4)); 2063 } 2064 } 2065 break; 2066 2067 default: 2068 LOG(FATAL) << "Unsupported op size " << type; 2069 UNREACHABLE(); 2070 } 2071 } 2072 2073 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, 2074 HInvoke* invoke, 2075 Primitive::Type type, 2076 bool is_volatile) { 2077 bool can_call = kEmitCompilerReadBarrier && 2078 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 2079 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 2080 LocationSummary* locations = new (arena) LocationSummary(invoke, 2081 (can_call 2082 ? LocationSummary::kCallOnSlowPath 2083 : LocationSummary::kNoCall), 2084 kIntrinsified); 2085 if (can_call && kUseBakerReadBarrier) { 2086 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2087 } 2088 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2089 locations->SetInAt(1, Location::RequiresRegister()); 2090 locations->SetInAt(2, Location::RequiresRegister()); 2091 if (type == Primitive::kPrimLong) { 2092 if (is_volatile) { 2093 // Need to use XMM to read volatile. 2094 locations->AddTemp(Location::RequiresFpuRegister()); 2095 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2096 } else { 2097 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2098 } 2099 } else { 2100 locations->SetOut(Location::RequiresRegister(), 2101 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 2102 } 2103 } 2104 2105 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { 2106 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false); 2107 } 2108 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 2109 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true); 2110 } 2111 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { 2112 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false); 2113 } 2114 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2115 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true); 2116 } 2117 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { 2118 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false); 2119 } 2120 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2121 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true); 2122 } 2123 2124 2125 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { 2126 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2127 } 2128 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 2129 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2130 } 2131 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { 2132 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2133 } 2134 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2135 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2136 } 2137 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { 2138 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2139 } 2140 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2141 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2142 } 2143 2144 2145 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, 2146 Primitive::Type type, 2147 HInvoke* invoke, 2148 bool is_volatile) { 2149 LocationSummary* locations = new (arena) LocationSummary(invoke, 2150 LocationSummary::kNoCall, 2151 kIntrinsified); 2152 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2153 locations->SetInAt(1, Location::RequiresRegister()); 2154 locations->SetInAt(2, Location::RequiresRegister()); 2155 locations->SetInAt(3, Location::RequiresRegister()); 2156 if (type == Primitive::kPrimNot) { 2157 // Need temp registers for card-marking. 2158 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2159 // Ensure the value is in a byte register. 2160 locations->AddTemp(Location::RegisterLocation(ECX)); 2161 } else if (type == Primitive::kPrimLong && is_volatile) { 2162 locations->AddTemp(Location::RequiresFpuRegister()); 2163 locations->AddTemp(Location::RequiresFpuRegister()); 2164 } 2165 } 2166 2167 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { 2168 CreateIntIntIntIntToVoidPlusTempsLocations( 2169 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); 2170 } 2171 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2172 CreateIntIntIntIntToVoidPlusTempsLocations( 2173 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); 2174 } 2175 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2176 CreateIntIntIntIntToVoidPlusTempsLocations( 2177 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true); 2178 } 2179 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { 2180 CreateIntIntIntIntToVoidPlusTempsLocations( 2181 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); 2182 } 2183 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2184 CreateIntIntIntIntToVoidPlusTempsLocations( 2185 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); 2186 } 2187 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2188 CreateIntIntIntIntToVoidPlusTempsLocations( 2189 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true); 2190 } 2191 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { 2192 CreateIntIntIntIntToVoidPlusTempsLocations( 2193 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); 2194 } 2195 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2196 CreateIntIntIntIntToVoidPlusTempsLocations( 2197 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); 2198 } 2199 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2200 CreateIntIntIntIntToVoidPlusTempsLocations( 2201 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true); 2202 } 2203 2204 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2205 // memory model. 2206 static void GenUnsafePut(LocationSummary* locations, 2207 Primitive::Type type, 2208 bool is_volatile, 2209 CodeGeneratorX86* codegen) { 2210 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2211 Register base = locations->InAt(1).AsRegister<Register>(); 2212 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2213 Location value_loc = locations->InAt(3); 2214 2215 if (type == Primitive::kPrimLong) { 2216 Register value_lo = value_loc.AsRegisterPairLow<Register>(); 2217 Register value_hi = value_loc.AsRegisterPairHigh<Register>(); 2218 if (is_volatile) { 2219 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2220 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 2221 __ movd(temp1, value_lo); 2222 __ movd(temp2, value_hi); 2223 __ punpckldq(temp1, temp2); 2224 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1); 2225 } else { 2226 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); 2227 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); 2228 } 2229 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 2230 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2231 __ movl(temp, value_loc.AsRegister<Register>()); 2232 __ PoisonHeapReference(temp); 2233 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2234 } else { 2235 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>()); 2236 } 2237 2238 if (is_volatile) { 2239 codegen->MemoryFence(); 2240 } 2241 2242 if (type == Primitive::kPrimNot) { 2243 bool value_can_be_null = true; // TODO: Worth finding out this information? 2244 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), 2245 locations->GetTemp(1).AsRegister<Register>(), 2246 base, 2247 value_loc.AsRegister<Register>(), 2248 value_can_be_null); 2249 } 2250 } 2251 2252 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { 2253 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2254 } 2255 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2256 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2257 } 2258 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2259 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2260 } 2261 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { 2262 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2263 } 2264 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2265 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2266 } 2267 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2268 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2269 } 2270 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { 2271 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2272 } 2273 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2274 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2275 } 2276 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2277 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2278 } 2279 2280 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, 2281 Primitive::Type type, 2282 HInvoke* invoke) { 2283 bool can_call = kEmitCompilerReadBarrier && 2284 kUseBakerReadBarrier && 2285 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 2286 LocationSummary* locations = new (arena) LocationSummary(invoke, 2287 (can_call 2288 ? LocationSummary::kCallOnSlowPath 2289 : LocationSummary::kNoCall), 2290 kIntrinsified); 2291 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2292 locations->SetInAt(1, Location::RequiresRegister()); 2293 // Offset is a long, but in 32 bit mode, we only need the low word. 2294 // Can we update the invoke here to remove a TypeConvert to Long? 2295 locations->SetInAt(2, Location::RequiresRegister()); 2296 // Expected value must be in EAX or EDX:EAX. 2297 // For long, new value must be in ECX:EBX. 2298 if (type == Primitive::kPrimLong) { 2299 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); 2300 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); 2301 } else { 2302 locations->SetInAt(3, Location::RegisterLocation(EAX)); 2303 locations->SetInAt(4, Location::RequiresRegister()); 2304 } 2305 2306 // Force a byte register for the output. 2307 locations->SetOut(Location::RegisterLocation(EAX)); 2308 if (type == Primitive::kPrimNot) { 2309 // Need temporary registers for card-marking, and possibly for 2310 // (Baker) read barrier. 2311 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2312 // Need a byte register for marking. 2313 locations->AddTemp(Location::RegisterLocation(ECX)); 2314 } 2315 } 2316 2317 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { 2318 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); 2319 } 2320 2321 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { 2322 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); 2323 } 2324 2325 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { 2326 // The only read barrier implementation supporting the 2327 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2328 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2329 return; 2330 } 2331 2332 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); 2333 } 2334 2335 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { 2336 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2337 LocationSummary* locations = invoke->GetLocations(); 2338 2339 Register base = locations->InAt(1).AsRegister<Register>(); 2340 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2341 Location out = locations->Out(); 2342 DCHECK_EQ(out.AsRegister<Register>(), EAX); 2343 2344 // The address of the field within the holding object. 2345 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); 2346 2347 if (type == Primitive::kPrimNot) { 2348 // The only read barrier implementation supporting the 2349 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2350 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2351 2352 Location temp1_loc = locations->GetTemp(0); 2353 Register temp1 = temp1_loc.AsRegister<Register>(); 2354 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 2355 2356 Register expected = locations->InAt(3).AsRegister<Register>(); 2357 // Ensure `expected` is in EAX (required by the CMPXCHG instruction). 2358 DCHECK_EQ(expected, EAX); 2359 Register value = locations->InAt(4).AsRegister<Register>(); 2360 2361 // Mark card for object assuming new value is stored. 2362 bool value_can_be_null = true; // TODO: Worth finding out this information? 2363 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); 2364 2365 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2366 // Need to make sure the reference stored in the field is a to-space 2367 // one before attempting the CAS or the CAS could fail incorrectly. 2368 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2369 invoke, 2370 temp1_loc, // Unused, used only as a "temporary" within the read barrier. 2371 base, 2372 field_addr, 2373 /* needs_null_check */ false, 2374 /* always_update_field */ true, 2375 &temp2); 2376 } 2377 2378 bool base_equals_value = (base == value); 2379 if (kPoisonHeapReferences) { 2380 if (base_equals_value) { 2381 // If `base` and `value` are the same register location, move 2382 // `value` to a temporary register. This way, poisoning 2383 // `value` won't invalidate `base`. 2384 value = temp1; 2385 __ movl(value, base); 2386 } 2387 2388 // Check that the register allocator did not assign the location 2389 // of `expected` (EAX) to `value` nor to `base`, so that heap 2390 // poisoning (when enabled) works as intended below. 2391 // - If `value` were equal to `expected`, both references would 2392 // be poisoned twice, meaning they would not be poisoned at 2393 // all, as heap poisoning uses address negation. 2394 // - If `base` were equal to `expected`, poisoning `expected` 2395 // would invalidate `base`. 2396 DCHECK_NE(value, expected); 2397 DCHECK_NE(base, expected); 2398 2399 __ PoisonHeapReference(expected); 2400 __ PoisonHeapReference(value); 2401 } 2402 2403 __ LockCmpxchgl(field_addr, value); 2404 2405 // LOCK CMPXCHG has full barrier semantics, and we don't need 2406 // scheduling barriers at this time. 2407 2408 // Convert ZF into the Boolean result. 2409 __ setb(kZero, out.AsRegister<Register>()); 2410 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2411 2412 // If heap poisoning is enabled, we need to unpoison the values 2413 // that were poisoned earlier. 2414 if (kPoisonHeapReferences) { 2415 if (base_equals_value) { 2416 // `value` has been moved to a temporary register, no need to 2417 // unpoison it. 2418 } else { 2419 // Ensure `value` is different from `out`, so that unpoisoning 2420 // the former does not invalidate the latter. 2421 DCHECK_NE(value, out.AsRegister<Register>()); 2422 __ UnpoisonHeapReference(value); 2423 } 2424 // Do not unpoison the reference contained in register 2425 // `expected`, as it is the same as register `out` (EAX). 2426 } 2427 } else { 2428 if (type == Primitive::kPrimInt) { 2429 // Ensure the expected value is in EAX (required by the CMPXCHG 2430 // instruction). 2431 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); 2432 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); 2433 } else if (type == Primitive::kPrimLong) { 2434 // Ensure the expected value is in EAX:EDX and that the new 2435 // value is in EBX:ECX (required by the CMPXCHG8B instruction). 2436 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); 2437 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); 2438 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); 2439 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); 2440 __ LockCmpxchg8b(field_addr); 2441 } else { 2442 LOG(FATAL) << "Unexpected CAS type " << type; 2443 } 2444 2445 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we 2446 // don't need scheduling barriers at this time. 2447 2448 // Convert ZF into the Boolean result. 2449 __ setb(kZero, out.AsRegister<Register>()); 2450 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2451 } 2452 } 2453 2454 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { 2455 GenCAS(Primitive::kPrimInt, invoke, codegen_); 2456 } 2457 2458 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { 2459 GenCAS(Primitive::kPrimLong, invoke, codegen_); 2460 } 2461 2462 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { 2463 // The only read barrier implementation supporting the 2464 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2465 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2466 2467 GenCAS(Primitive::kPrimNot, invoke, codegen_); 2468 } 2469 2470 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { 2471 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2472 LocationSummary::kNoCall, 2473 kIntrinsified); 2474 locations->SetInAt(0, Location::RequiresRegister()); 2475 locations->SetOut(Location::SameAsFirstInput()); 2476 locations->AddTemp(Location::RequiresRegister()); 2477 } 2478 2479 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, 2480 X86Assembler* assembler) { 2481 Immediate imm_shift(shift); 2482 Immediate imm_mask(mask); 2483 __ movl(temp, reg); 2484 __ shrl(reg, imm_shift); 2485 __ andl(temp, imm_mask); 2486 __ andl(reg, imm_mask); 2487 __ shll(temp, imm_shift); 2488 __ orl(reg, temp); 2489 } 2490 2491 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { 2492 X86Assembler* assembler = GetAssembler(); 2493 LocationSummary* locations = invoke->GetLocations(); 2494 2495 Register reg = locations->InAt(0).AsRegister<Register>(); 2496 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2497 2498 /* 2499 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2500 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2501 * compared to generic luni implementation which has 5 rounds of swapping bits. 2502 * x = bswap x 2503 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2504 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2505 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2506 */ 2507 __ bswapl(reg); 2508 SwapBits(reg, temp, 1, 0x55555555, assembler); 2509 SwapBits(reg, temp, 2, 0x33333333, assembler); 2510 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2511 } 2512 2513 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { 2514 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2515 LocationSummary::kNoCall, 2516 kIntrinsified); 2517 locations->SetInAt(0, Location::RequiresRegister()); 2518 locations->SetOut(Location::SameAsFirstInput()); 2519 locations->AddTemp(Location::RequiresRegister()); 2520 } 2521 2522 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { 2523 X86Assembler* assembler = GetAssembler(); 2524 LocationSummary* locations = invoke->GetLocations(); 2525 2526 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); 2527 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); 2528 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2529 2530 // We want to swap high/low, then bswap each one, and then do the same 2531 // as a 32 bit reverse. 2532 // Exchange high and low. 2533 __ movl(temp, reg_low); 2534 __ movl(reg_low, reg_high); 2535 __ movl(reg_high, temp); 2536 2537 // bit-reverse low 2538 __ bswapl(reg_low); 2539 SwapBits(reg_low, temp, 1, 0x55555555, assembler); 2540 SwapBits(reg_low, temp, 2, 0x33333333, assembler); 2541 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); 2542 2543 // bit-reverse high 2544 __ bswapl(reg_high); 2545 SwapBits(reg_high, temp, 1, 0x55555555, assembler); 2546 SwapBits(reg_high, temp, 2, 0x33333333, assembler); 2547 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); 2548 } 2549 2550 static void CreateBitCountLocations( 2551 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { 2552 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2553 // Do nothing if there is no popcnt support. This results in generating 2554 // a call for the intrinsic rather than direct code. 2555 return; 2556 } 2557 LocationSummary* locations = new (arena) LocationSummary(invoke, 2558 LocationSummary::kNoCall, 2559 kIntrinsified); 2560 if (is_long) { 2561 locations->AddTemp(Location::RequiresRegister()); 2562 } 2563 locations->SetInAt(0, Location::Any()); 2564 locations->SetOut(Location::RequiresRegister()); 2565 } 2566 2567 static void GenBitCount(X86Assembler* assembler, 2568 CodeGeneratorX86* codegen, 2569 HInvoke* invoke, bool is_long) { 2570 LocationSummary* locations = invoke->GetLocations(); 2571 Location src = locations->InAt(0); 2572 Register out = locations->Out().AsRegister<Register>(); 2573 2574 if (invoke->InputAt(0)->IsConstant()) { 2575 // Evaluate this at compile time. 2576 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2577 int32_t result = is_long 2578 ? POPCOUNT(static_cast<uint64_t>(value)) 2579 : POPCOUNT(static_cast<uint32_t>(value)); 2580 codegen->Load32BitValue(out, result); 2581 return; 2582 } 2583 2584 // Handle the non-constant cases. 2585 if (!is_long) { 2586 if (src.IsRegister()) { 2587 __ popcntl(out, src.AsRegister<Register>()); 2588 } else { 2589 DCHECK(src.IsStackSlot()); 2590 __ popcntl(out, Address(ESP, src.GetStackIndex())); 2591 } 2592 } else { 2593 // The 64-bit case needs to worry about two parts. 2594 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2595 if (src.IsRegisterPair()) { 2596 __ popcntl(temp, src.AsRegisterPairLow<Register>()); 2597 __ popcntl(out, src.AsRegisterPairHigh<Register>()); 2598 } else { 2599 DCHECK(src.IsDoubleStackSlot()); 2600 __ popcntl(temp, Address(ESP, src.GetStackIndex())); 2601 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize))); 2602 } 2603 __ addl(out, temp); 2604 } 2605 } 2606 2607 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { 2608 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false); 2609 } 2610 2611 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { 2612 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2613 } 2614 2615 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { 2616 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true); 2617 } 2618 2619 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { 2620 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2621 } 2622 2623 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { 2624 LocationSummary* locations = new (arena) LocationSummary(invoke, 2625 LocationSummary::kNoCall, 2626 kIntrinsified); 2627 if (is_long) { 2628 locations->SetInAt(0, Location::RequiresRegister()); 2629 } else { 2630 locations->SetInAt(0, Location::Any()); 2631 } 2632 locations->SetOut(Location::RequiresRegister()); 2633 } 2634 2635 static void GenLeadingZeros(X86Assembler* assembler, 2636 CodeGeneratorX86* codegen, 2637 HInvoke* invoke, bool is_long) { 2638 LocationSummary* locations = invoke->GetLocations(); 2639 Location src = locations->InAt(0); 2640 Register out = locations->Out().AsRegister<Register>(); 2641 2642 if (invoke->InputAt(0)->IsConstant()) { 2643 // Evaluate this at compile time. 2644 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2645 if (value == 0) { 2646 value = is_long ? 64 : 32; 2647 } else { 2648 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2649 } 2650 codegen->Load32BitValue(out, value); 2651 return; 2652 } 2653 2654 // Handle the non-constant cases. 2655 if (!is_long) { 2656 if (src.IsRegister()) { 2657 __ bsrl(out, src.AsRegister<Register>()); 2658 } else { 2659 DCHECK(src.IsStackSlot()); 2660 __ bsrl(out, Address(ESP, src.GetStackIndex())); 2661 } 2662 2663 // BSR sets ZF if the input was zero, and the output is undefined. 2664 NearLabel all_zeroes, done; 2665 __ j(kEqual, &all_zeroes); 2666 2667 // Correct the result from BSR to get the final CLZ result. 2668 __ xorl(out, Immediate(31)); 2669 __ jmp(&done); 2670 2671 // Fix the zero case with the expected result. 2672 __ Bind(&all_zeroes); 2673 __ movl(out, Immediate(32)); 2674 2675 __ Bind(&done); 2676 return; 2677 } 2678 2679 // 64 bit case needs to worry about both parts of the register. 2680 DCHECK(src.IsRegisterPair()); 2681 Register src_lo = src.AsRegisterPairLow<Register>(); 2682 Register src_hi = src.AsRegisterPairHigh<Register>(); 2683 NearLabel handle_low, done, all_zeroes; 2684 2685 // Is the high word zero? 2686 __ testl(src_hi, src_hi); 2687 __ j(kEqual, &handle_low); 2688 2689 // High word is not zero. We know that the BSR result is defined in this case. 2690 __ bsrl(out, src_hi); 2691 2692 // Correct the result from BSR to get the final CLZ result. 2693 __ xorl(out, Immediate(31)); 2694 __ jmp(&done); 2695 2696 // High word was zero. We have to compute the low word count and add 32. 2697 __ Bind(&handle_low); 2698 __ bsrl(out, src_lo); 2699 __ j(kEqual, &all_zeroes); 2700 2701 // We had a valid result. Use an XOR to both correct the result and add 32. 2702 __ xorl(out, Immediate(63)); 2703 __ jmp(&done); 2704 2705 // All zero case. 2706 __ Bind(&all_zeroes); 2707 __ movl(out, Immediate(64)); 2708 2709 __ Bind(&done); 2710 } 2711 2712 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2713 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false); 2714 } 2715 2716 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2717 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2718 } 2719 2720 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2721 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true); 2722 } 2723 2724 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2725 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2726 } 2727 2728 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { 2729 LocationSummary* locations = new (arena) LocationSummary(invoke, 2730 LocationSummary::kNoCall, 2731 kIntrinsified); 2732 if (is_long) { 2733 locations->SetInAt(0, Location::RequiresRegister()); 2734 } else { 2735 locations->SetInAt(0, Location::Any()); 2736 } 2737 locations->SetOut(Location::RequiresRegister()); 2738 } 2739 2740 static void GenTrailingZeros(X86Assembler* assembler, 2741 CodeGeneratorX86* codegen, 2742 HInvoke* invoke, bool is_long) { 2743 LocationSummary* locations = invoke->GetLocations(); 2744 Location src = locations->InAt(0); 2745 Register out = locations->Out().AsRegister<Register>(); 2746 2747 if (invoke->InputAt(0)->IsConstant()) { 2748 // Evaluate this at compile time. 2749 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2750 if (value == 0) { 2751 value = is_long ? 64 : 32; 2752 } else { 2753 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2754 } 2755 codegen->Load32BitValue(out, value); 2756 return; 2757 } 2758 2759 // Handle the non-constant cases. 2760 if (!is_long) { 2761 if (src.IsRegister()) { 2762 __ bsfl(out, src.AsRegister<Register>()); 2763 } else { 2764 DCHECK(src.IsStackSlot()); 2765 __ bsfl(out, Address(ESP, src.GetStackIndex())); 2766 } 2767 2768 // BSF sets ZF if the input was zero, and the output is undefined. 2769 NearLabel done; 2770 __ j(kNotEqual, &done); 2771 2772 // Fix the zero case with the expected result. 2773 __ movl(out, Immediate(32)); 2774 2775 __ Bind(&done); 2776 return; 2777 } 2778 2779 // 64 bit case needs to worry about both parts of the register. 2780 DCHECK(src.IsRegisterPair()); 2781 Register src_lo = src.AsRegisterPairLow<Register>(); 2782 Register src_hi = src.AsRegisterPairHigh<Register>(); 2783 NearLabel done, all_zeroes; 2784 2785 // If the low word is zero, then ZF will be set. If not, we have the answer. 2786 __ bsfl(out, src_lo); 2787 __ j(kNotEqual, &done); 2788 2789 // Low word was zero. We have to compute the high word count and add 32. 2790 __ bsfl(out, src_hi); 2791 __ j(kEqual, &all_zeroes); 2792 2793 // We had a valid result. Add 32 to account for the low word being zero. 2794 __ addl(out, Immediate(32)); 2795 __ jmp(&done); 2796 2797 // All zero case. 2798 __ Bind(&all_zeroes); 2799 __ movl(out, Immediate(64)); 2800 2801 __ Bind(&done); 2802 } 2803 2804 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2805 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false); 2806 } 2807 2808 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2809 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2810 } 2811 2812 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2813 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true); 2814 } 2815 2816 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2817 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2818 } 2819 2820 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { 2821 return instruction->InputAt(input0) == instruction->InputAt(input1); 2822 } 2823 2824 // Compute base address for the System.arraycopy intrinsic in `base`. 2825 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, 2826 Primitive::Type type, 2827 const Register& array, 2828 const Location& pos, 2829 const Register& base) { 2830 // This routine is only used by the SystemArrayCopy intrinsic at the 2831 // moment. We can allow Primitive::kPrimNot as `type` to implement 2832 // the SystemArrayCopyChar intrinsic. 2833 DCHECK_EQ(type, Primitive::kPrimNot); 2834 const int32_t element_size = Primitive::ComponentSize(type); 2835 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); 2836 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2837 2838 if (pos.IsConstant()) { 2839 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); 2840 __ leal(base, Address(array, element_size * constant + data_offset)); 2841 } else { 2842 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset)); 2843 } 2844 } 2845 2846 // Compute end source address for the System.arraycopy intrinsic in `end`. 2847 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, 2848 Primitive::Type type, 2849 const Location& copy_length, 2850 const Register& base, 2851 const Register& end) { 2852 // This routine is only used by the SystemArrayCopy intrinsic at the 2853 // moment. We can allow Primitive::kPrimNot as `type` to implement 2854 // the SystemArrayCopyChar intrinsic. 2855 DCHECK_EQ(type, Primitive::kPrimNot); 2856 const int32_t element_size = Primitive::ComponentSize(type); 2857 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); 2858 2859 if (copy_length.IsConstant()) { 2860 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2861 __ leal(end, Address(base, element_size * constant)); 2862 } else { 2863 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0)); 2864 } 2865 } 2866 2867 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { 2868 // The only read barrier implementation supporting the 2869 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2870 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2871 return; 2872 } 2873 2874 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 2875 if (invoke->GetLocations() != nullptr) { 2876 // Need a byte register for marking. 2877 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX)); 2878 2879 static constexpr size_t kSrc = 0; 2880 static constexpr size_t kSrcPos = 1; 2881 static constexpr size_t kDest = 2; 2882 static constexpr size_t kDestPos = 3; 2883 static constexpr size_t kLength = 4; 2884 2885 if (!invoke->InputAt(kSrcPos)->IsIntConstant() && 2886 !invoke->InputAt(kDestPos)->IsIntConstant() && 2887 !invoke->InputAt(kLength)->IsIntConstant()) { 2888 if (!IsSameInput(invoke, kSrcPos, kDestPos) && 2889 !IsSameInput(invoke, kSrcPos, kLength) && 2890 !IsSameInput(invoke, kDestPos, kLength) && 2891 !IsSameInput(invoke, kSrc, kDest)) { 2892 // Not enough registers, make the length also take a stack slot. 2893 invoke->GetLocations()->SetInAt(kLength, Location::Any()); 2894 } 2895 } 2896 } 2897 } 2898 2899 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { 2900 // The only read barrier implementation supporting the 2901 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2902 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2903 2904 X86Assembler* assembler = GetAssembler(); 2905 LocationSummary* locations = invoke->GetLocations(); 2906 2907 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2908 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2909 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2910 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2911 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2912 2913 Register src = locations->InAt(0).AsRegister<Register>(); 2914 Location src_pos = locations->InAt(1); 2915 Register dest = locations->InAt(2).AsRegister<Register>(); 2916 Location dest_pos = locations->InAt(3); 2917 Location length_arg = locations->InAt(4); 2918 Location length = length_arg; 2919 Location temp1_loc = locations->GetTemp(0); 2920 Register temp1 = temp1_loc.AsRegister<Register>(); 2921 Location temp2_loc = locations->GetTemp(1); 2922 Register temp2 = temp2_loc.AsRegister<Register>(); 2923 2924 SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); 2925 codegen_->AddSlowPath(intrinsic_slow_path); 2926 2927 NearLabel conditions_on_positions_validated; 2928 SystemArrayCopyOptimizations optimizations(invoke); 2929 2930 // If source and destination are the same, we go to slow path if we need to do 2931 // forward copying. 2932 if (src_pos.IsConstant()) { 2933 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2934 if (dest_pos.IsConstant()) { 2935 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2936 if (optimizations.GetDestinationIsSource()) { 2937 // Checked when building locations. 2938 DCHECK_GE(src_pos_constant, dest_pos_constant); 2939 } else if (src_pos_constant < dest_pos_constant) { 2940 __ cmpl(src, dest); 2941 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2942 } 2943 } else { 2944 if (!optimizations.GetDestinationIsSource()) { 2945 __ cmpl(src, dest); 2946 __ j(kNotEqual, &conditions_on_positions_validated); 2947 } 2948 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); 2949 __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); 2950 } 2951 } else { 2952 if (!optimizations.GetDestinationIsSource()) { 2953 __ cmpl(src, dest); 2954 __ j(kNotEqual, &conditions_on_positions_validated); 2955 } 2956 if (dest_pos.IsConstant()) { 2957 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2958 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); 2959 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2960 } else { 2961 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); 2962 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2963 } 2964 } 2965 2966 __ Bind(&conditions_on_positions_validated); 2967 2968 if (!optimizations.GetSourceIsNotNull()) { 2969 // Bail out if the source is null. 2970 __ testl(src, src); 2971 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2972 } 2973 2974 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2975 // Bail out if the destination is null. 2976 __ testl(dest, dest); 2977 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2978 } 2979 2980 Location temp3_loc = locations->GetTemp(2); 2981 Register temp3 = temp3_loc.AsRegister<Register>(); 2982 if (length.IsStackSlot()) { 2983 __ movl(temp3, Address(ESP, length.GetStackIndex())); 2984 length = Location::RegisterLocation(temp3); 2985 } 2986 2987 // If the length is negative, bail out. 2988 // We have already checked in the LocationsBuilder for the constant case. 2989 if (!length.IsConstant() && 2990 !optimizations.GetCountIsSourceLength() && 2991 !optimizations.GetCountIsDestinationLength()) { 2992 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); 2993 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2994 } 2995 2996 // Validity checks: source. 2997 CheckPosition(assembler, 2998 src_pos, 2999 src, 3000 length, 3001 intrinsic_slow_path, 3002 temp1, 3003 optimizations.GetCountIsSourceLength()); 3004 3005 // Validity checks: dest. 3006 CheckPosition(assembler, 3007 dest_pos, 3008 dest, 3009 length, 3010 intrinsic_slow_path, 3011 temp1, 3012 optimizations.GetCountIsDestinationLength()); 3013 3014 if (!optimizations.GetDoesNotNeedTypeCheck()) { 3015 // Check whether all elements of the source array are assignable to the component 3016 // type of the destination array. We do two checks: the classes are the same, 3017 // or the destination is Object[]. If none of these checks succeed, we go to the 3018 // slow path. 3019 3020 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 3021 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3022 // /* HeapReference<Class> */ temp1 = src->klass_ 3023 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3024 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); 3025 // Bail out if the source is not a non primitive array. 3026 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3027 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3028 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3029 __ testl(temp1, temp1); 3030 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3031 // If heap poisoning is enabled, `temp1` has been unpoisoned 3032 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3033 } else { 3034 // /* HeapReference<Class> */ temp1 = src->klass_ 3035 __ movl(temp1, Address(src, class_offset)); 3036 __ MaybeUnpoisonHeapReference(temp1); 3037 // Bail out if the source is not a non primitive array. 3038 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3039 __ movl(temp1, Address(temp1, component_offset)); 3040 __ testl(temp1, temp1); 3041 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3042 __ MaybeUnpoisonHeapReference(temp1); 3043 } 3044 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 3045 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3046 } 3047 3048 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3049 if (length.Equals(Location::RegisterLocation(temp3))) { 3050 // When Baker read barriers are enabled, register `temp3`, 3051 // which in the present case contains the `length` parameter, 3052 // will be overwritten below. Make the `length` location 3053 // reference the original stack location; it will be moved 3054 // back to `temp3` later if necessary. 3055 DCHECK(length_arg.IsStackSlot()); 3056 length = length_arg; 3057 } 3058 3059 // /* HeapReference<Class> */ temp1 = dest->klass_ 3060 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3061 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); 3062 3063 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 3064 // Bail out if the destination is not a non primitive array. 3065 // 3066 // Register `temp1` is not trashed by the read barrier emitted 3067 // by GenerateFieldLoadWithBakerReadBarrier below, as that 3068 // method produces a call to a ReadBarrierMarkRegX entry point, 3069 // which saves all potentially live registers, including 3070 // temporaries such a `temp1`. 3071 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 3072 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3073 invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); 3074 __ testl(temp2, temp2); 3075 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3076 // If heap poisoning is enabled, `temp2` has been unpoisoned 3077 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3078 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 3079 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3080 } 3081 3082 // For the same reason given earlier, `temp1` is not trashed by the 3083 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 3084 // /* HeapReference<Class> */ temp2 = src->klass_ 3085 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3086 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); 3087 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 3088 __ cmpl(temp1, temp2); 3089 3090 if (optimizations.GetDestinationIsTypedObjectArray()) { 3091 NearLabel do_copy; 3092 __ j(kEqual, &do_copy); 3093 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3094 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3095 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3096 // We do not need to emit a read barrier for the following 3097 // heap reference load, as `temp1` is only used in a 3098 // comparison with null below, and this reference is not 3099 // kept afterwards. 3100 __ cmpl(Address(temp1, super_offset), Immediate(0)); 3101 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3102 __ Bind(&do_copy); 3103 } else { 3104 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3105 } 3106 } else { 3107 // Non read barrier code. 3108 3109 // /* HeapReference<Class> */ temp1 = dest->klass_ 3110 __ movl(temp1, Address(dest, class_offset)); 3111 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 3112 __ MaybeUnpoisonHeapReference(temp1); 3113 // Bail out if the destination is not a non primitive array. 3114 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 3115 __ movl(temp2, Address(temp1, component_offset)); 3116 __ testl(temp2, temp2); 3117 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3118 __ MaybeUnpoisonHeapReference(temp2); 3119 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 3120 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3121 // Re-poison the heap reference to make the compare instruction below 3122 // compare two poisoned references. 3123 __ PoisonHeapReference(temp1); 3124 } 3125 3126 // Note: if heap poisoning is on, we are comparing two poisoned references here. 3127 __ cmpl(temp1, Address(src, class_offset)); 3128 3129 if (optimizations.GetDestinationIsTypedObjectArray()) { 3130 NearLabel do_copy; 3131 __ j(kEqual, &do_copy); 3132 __ MaybeUnpoisonHeapReference(temp1); 3133 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3134 __ movl(temp1, Address(temp1, component_offset)); 3135 __ MaybeUnpoisonHeapReference(temp1); 3136 __ cmpl(Address(temp1, super_offset), Immediate(0)); 3137 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3138 __ Bind(&do_copy); 3139 } else { 3140 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3141 } 3142 } 3143 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 3144 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 3145 // Bail out if the source is not a non primitive array. 3146 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3147 // /* HeapReference<Class> */ temp1 = src->klass_ 3148 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3149 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); 3150 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3151 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3152 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3153 __ testl(temp1, temp1); 3154 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3155 // If heap poisoning is enabled, `temp1` has been unpoisoned 3156 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3157 } else { 3158 // /* HeapReference<Class> */ temp1 = src->klass_ 3159 __ movl(temp1, Address(src, class_offset)); 3160 __ MaybeUnpoisonHeapReference(temp1); 3161 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3162 __ movl(temp1, Address(temp1, component_offset)); 3163 __ testl(temp1, temp1); 3164 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3165 __ MaybeUnpoisonHeapReference(temp1); 3166 } 3167 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 3168 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3169 } 3170 3171 const Primitive::Type type = Primitive::kPrimNot; 3172 const int32_t element_size = Primitive::ComponentSize(type); 3173 3174 // Compute the base source address in `temp1`. 3175 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 3176 3177 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3178 // If it is needed (in the case of the fast-path loop), the base 3179 // destination address is computed later, as `temp2` is used for 3180 // intermediate computations. 3181 3182 // Compute the end source address in `temp3`. 3183 if (length.IsStackSlot()) { 3184 // Location `length` is again pointing at a stack slot, as 3185 // register `temp3` (which was containing the length parameter 3186 // earlier) has been overwritten; restore it now 3187 DCHECK(length.Equals(length_arg)); 3188 __ movl(temp3, Address(ESP, length.GetStackIndex())); 3189 length = Location::RegisterLocation(temp3); 3190 } 3191 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 3192 3193 // SystemArrayCopy implementation for Baker read barriers (see 3194 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): 3195 // 3196 // if (src_ptr != end_ptr) { 3197 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 3198 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 3199 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 3200 // if (is_gray) { 3201 // // Slow-path copy. 3202 // for (size_t i = 0; i != length; ++i) { 3203 // dest_array[dest_pos + i] = 3204 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); 3205 // } 3206 // } else { 3207 // // Fast-path copy. 3208 // do { 3209 // *dest_ptr++ = *src_ptr++; 3210 // } while (src_ptr != end_ptr) 3211 // } 3212 // } 3213 3214 NearLabel loop, done; 3215 3216 // Don't enter copy loop if `length == 0`. 3217 __ cmpl(temp1, temp3); 3218 __ j(kEqual, &done); 3219 3220 // Given the numeric representation, it's enough to check the low bit of the rb_state. 3221 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 3222 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 3223 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 3224 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 3225 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 3226 3227 // if (rb_state == ReadBarrier::GrayState()) 3228 // goto slow_path; 3229 // At this point, just do the "if" and make sure that flags are preserved until the branch. 3230 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); 3231 3232 // Load fence to prevent load-load reordering. 3233 // Note that this is a no-op, thanks to the x86 memory model. 3234 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 3235 3236 // Slow path used to copy array when `src` is gray. 3237 SlowPathCode* read_barrier_slow_path = 3238 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); 3239 codegen_->AddSlowPath(read_barrier_slow_path); 3240 3241 // We have done the "if" of the gray bit check above, now branch based on the flags. 3242 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); 3243 3244 // Fast-path copy. 3245 // Compute the base destination address in `temp2`. 3246 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 3247 // Iterate over the arrays and do a raw copy of the objects. We don't need to 3248 // poison/unpoison. 3249 __ Bind(&loop); 3250 __ pushl(Address(temp1, 0)); 3251 __ cfi().AdjustCFAOffset(4); 3252 __ popl(Address(temp2, 0)); 3253 __ cfi().AdjustCFAOffset(-4); 3254 __ addl(temp1, Immediate(element_size)); 3255 __ addl(temp2, Immediate(element_size)); 3256 __ cmpl(temp1, temp3); 3257 __ j(kNotEqual, &loop); 3258 3259 __ Bind(read_barrier_slow_path->GetExitLabel()); 3260 __ Bind(&done); 3261 } else { 3262 // Non read barrier code. 3263 // Compute the base destination address in `temp2`. 3264 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 3265 // Compute the end source address in `temp3`. 3266 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 3267 // Iterate over the arrays and do a raw copy of the objects. We don't need to 3268 // poison/unpoison. 3269 NearLabel loop, done; 3270 __ cmpl(temp1, temp3); 3271 __ j(kEqual, &done); 3272 __ Bind(&loop); 3273 __ pushl(Address(temp1, 0)); 3274 __ cfi().AdjustCFAOffset(4); 3275 __ popl(Address(temp2, 0)); 3276 __ cfi().AdjustCFAOffset(-4); 3277 __ addl(temp1, Immediate(element_size)); 3278 __ addl(temp2, Immediate(element_size)); 3279 __ cmpl(temp1, temp3); 3280 __ j(kNotEqual, &loop); 3281 __ Bind(&done); 3282 } 3283 3284 // We only need one card marking on the destination array. 3285 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); 3286 3287 __ Bind(intrinsic_slow_path->GetExitLabel()); 3288 } 3289 3290 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { 3291 InvokeRuntimeCallingConvention calling_convention; 3292 IntrinsicVisitor::ComputeIntegerValueOfLocations( 3293 invoke, 3294 codegen_, 3295 Location::RegisterLocation(EAX), 3296 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 3297 } 3298 3299 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { 3300 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 3301 LocationSummary* locations = invoke->GetLocations(); 3302 X86Assembler* assembler = GetAssembler(); 3303 3304 Register out = locations->Out().AsRegister<Register>(); 3305 InvokeRuntimeCallingConvention calling_convention; 3306 if (invoke->InputAt(0)->IsConstant()) { 3307 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 3308 if (value >= info.low && value <= info.high) { 3309 // Just embed the j.l.Integer in the code. 3310 ScopedObjectAccess soa(Thread::Current()); 3311 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 3312 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 3313 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 3314 __ movl(out, Immediate(address)); 3315 } else { 3316 // Allocate and initialize a new j.l.Integer. 3317 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 3318 // JIT object table. 3319 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3320 __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); 3321 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3322 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3323 __ movl(Address(out, info.value_offset), Immediate(value)); 3324 } 3325 } else { 3326 Register in = locations->InAt(0).AsRegister<Register>(); 3327 // Check bounds of our cache. 3328 __ leal(out, Address(in, -info.low)); 3329 __ cmpl(out, Immediate(info.high - info.low + 1)); 3330 NearLabel allocate, done; 3331 __ j(kAboveEqual, &allocate); 3332 // If the value is within the bounds, load the j.l.Integer directly from the array. 3333 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3334 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 3335 __ movl(out, Address(out, TIMES_4, data_offset + address)); 3336 __ MaybeUnpoisonHeapReference(out); 3337 __ jmp(&done); 3338 __ Bind(&allocate); 3339 // Otherwise allocate and initialize a new j.l.Integer. 3340 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3341 __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); 3342 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3343 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3344 __ movl(Address(out, info.value_offset), in); 3345 __ Bind(&done); 3346 } 3347 } 3348 3349 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) { 3350 LocationSummary* locations = new (arena_) LocationSummary(invoke, 3351 LocationSummary::kNoCall, 3352 kIntrinsified); 3353 locations->SetOut(Location::RequiresRegister()); 3354 } 3355 3356 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { 3357 X86Assembler* assembler = GetAssembler(); 3358 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 3359 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value()); 3360 NearLabel done; 3361 __ fs()->movl(out, address); 3362 __ testl(out, out); 3363 __ j(kEqual, &done); 3364 __ fs()->movl(address, Immediate(0)); 3365 codegen_->MemoryFence(); 3366 __ Bind(&done); 3367 } 3368 3369 3370 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) 3371 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) 3372 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) 3373 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) 3374 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) 3375 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) 3376 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) 3377 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) 3378 3379 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); 3380 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); 3381 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); 3382 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); 3383 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); 3384 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend); 3385 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); 3386 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); 3387 3388 // 1.8. 3389 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) 3390 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) 3391 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) 3392 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) 3393 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) 3394 3395 UNREACHABLE_INTRINSICS(X86) 3396 3397 #undef __ 3398 3399 } // namespace x86 3400 } // namespace art 3401