1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_x86.h" 18 19 #include <limits> 20 21 #include "arch/x86/instruction_set_features_x86.h" 22 #include "art_method.h" 23 #include "base/bit_utils.h" 24 #include "code_generator_x86.h" 25 #include "entrypoints/quick/quick_entrypoints.h" 26 #include "heap_poisoning.h" 27 #include "intrinsics.h" 28 #include "intrinsics_utils.h" 29 #include "lock_word.h" 30 #include "mirror/array-inl.h" 31 #include "mirror/object_array-inl.h" 32 #include "mirror/reference.h" 33 #include "mirror/string.h" 34 #include "scoped_thread_state_change-inl.h" 35 #include "thread-current-inl.h" 36 #include "utils/x86/assembler_x86.h" 37 #include "utils/x86/constants_x86.h" 38 39 namespace art { 40 41 namespace x86 { 42 43 static constexpr int kDoubleNaNHigh = 0x7FF80000; 44 static constexpr int kDoubleNaNLow = 0x00000000; 45 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); 46 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); 47 48 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) 49 : allocator_(codegen->GetGraph()->GetAllocator()), 50 codegen_(codegen) { 51 } 52 53 54 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { 55 return down_cast<X86Assembler*>(codegen_->GetAssembler()); 56 } 57 58 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { 59 return codegen_->GetGraph()->GetAllocator(); 60 } 61 62 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { 63 Dispatch(invoke); 64 LocationSummary* res = invoke->GetLocations(); 65 if (res == nullptr) { 66 return false; 67 } 68 return res->Intrinsified(); 69 } 70 71 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { 72 InvokeDexCallingConventionVisitorX86 calling_convention_visitor; 73 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 74 } 75 76 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; 77 78 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 79 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT 80 81 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 82 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { 83 public: 84 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) 85 : SlowPathCode(instruction) { 86 DCHECK(kEmitCompilerReadBarrier); 87 DCHECK(kUseBakerReadBarrier); 88 } 89 90 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 91 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 92 LocationSummary* locations = instruction_->GetLocations(); 93 DCHECK(locations->CanCall()); 94 DCHECK(instruction_->IsInvokeStaticOrDirect()) 95 << "Unexpected instruction in read barrier arraycopy slow path: " 96 << instruction_->DebugName(); 97 DCHECK(instruction_->GetLocations()->Intrinsified()); 98 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 99 100 int32_t element_size = DataType::Size(DataType::Type::kReference); 101 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); 102 103 Register src = locations->InAt(0).AsRegister<Register>(); 104 Location src_pos = locations->InAt(1); 105 Register dest = locations->InAt(2).AsRegister<Register>(); 106 Location dest_pos = locations->InAt(3); 107 Location length = locations->InAt(4); 108 Location temp1_loc = locations->GetTemp(0); 109 Register temp1 = temp1_loc.AsRegister<Register>(); 110 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 111 Register temp3 = locations->GetTemp(2).AsRegister<Register>(); 112 113 __ Bind(GetEntryLabel()); 114 // In this code path, registers `temp1`, `temp2`, and `temp3` 115 // (resp.) are not used for the base source address, the base 116 // destination address, and the end source address (resp.), as in 117 // other SystemArrayCopy intrinsic code paths. Instead they are 118 // (resp.) used for: 119 // - the loop index (`i`); 120 // - the source index (`src_index`) and the loaded (source) 121 // reference (`value`); and 122 // - the destination index (`dest_index`). 123 124 // i = 0 125 __ xorl(temp1, temp1); 126 NearLabel loop; 127 __ Bind(&loop); 128 // value = src_array[i + src_pos] 129 if (src_pos.IsConstant()) { 130 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 131 int32_t adjusted_offset = offset + constant * element_size; 132 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); 133 } else { 134 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 135 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); 136 } 137 __ MaybeUnpoisonHeapReference(temp2); 138 // TODO: Inline the mark bit check before calling the runtime? 139 // value = ReadBarrier::Mark(value) 140 // No need to save live registers; it's taken care of by the 141 // entrypoint. Also, there is no need to update the stack mask, 142 // as this runtime call will not trigger a garbage collection. 143 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more 144 // explanations.) 145 DCHECK_NE(temp2, ESP); 146 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; 147 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); 148 // This runtime call does not require a stack map. 149 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 150 __ MaybePoisonHeapReference(temp2); 151 // dest_array[i + dest_pos] = value 152 if (dest_pos.IsConstant()) { 153 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 154 int32_t adjusted_offset = offset + constant * element_size; 155 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); 156 } else { 157 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); 158 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); 159 } 160 // ++i 161 __ addl(temp1, Immediate(1)); 162 // if (i != length) goto loop 163 x86_codegen->GenerateIntCompare(temp1_loc, length); 164 __ j(kNotEqual, &loop); 165 __ jmp(GetExitLabel()); 166 } 167 168 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } 169 170 private: 171 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); 172 }; 173 174 #undef __ 175 176 #define __ assembler-> 177 178 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { 179 LocationSummary* locations = 180 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 181 locations->SetInAt(0, Location::RequiresFpuRegister()); 182 locations->SetOut(Location::RequiresRegister()); 183 if (is64bit) { 184 locations->AddTemp(Location::RequiresFpuRegister()); 185 } 186 } 187 188 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { 189 LocationSummary* locations = 190 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 191 locations->SetInAt(0, Location::RequiresRegister()); 192 locations->SetOut(Location::RequiresFpuRegister()); 193 if (is64bit) { 194 locations->AddTemp(Location::RequiresFpuRegister()); 195 locations->AddTemp(Location::RequiresFpuRegister()); 196 } 197 } 198 199 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 200 Location input = locations->InAt(0); 201 Location output = locations->Out(); 202 if (is64bit) { 203 // Need to use the temporary. 204 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 205 __ movsd(temp, input.AsFpuRegister<XmmRegister>()); 206 __ movd(output.AsRegisterPairLow<Register>(), temp); 207 __ psrlq(temp, Immediate(32)); 208 __ movd(output.AsRegisterPairHigh<Register>(), temp); 209 } else { 210 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>()); 211 } 212 } 213 214 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { 215 Location input = locations->InAt(0); 216 Location output = locations->Out(); 217 if (is64bit) { 218 // Need to use the temporary. 219 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 220 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 221 __ movd(temp1, input.AsRegisterPairLow<Register>()); 222 __ movd(temp2, input.AsRegisterPairHigh<Register>()); 223 __ punpckldq(temp1, temp2); 224 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1); 225 } else { 226 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>()); 227 } 228 } 229 230 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 231 CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true); 232 } 233 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 234 CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true); 235 } 236 237 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 238 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 239 } 240 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 241 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 242 } 243 244 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 245 CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false); 246 } 247 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 248 CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false); 249 } 250 251 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 252 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 253 } 254 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { 255 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 256 } 257 258 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 259 LocationSummary* locations = 260 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 261 locations->SetInAt(0, Location::RequiresRegister()); 262 locations->SetOut(Location::SameAsFirstInput()); 263 } 264 265 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 266 LocationSummary* locations = 267 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 268 locations->SetInAt(0, Location::RequiresRegister()); 269 locations->SetOut(Location::RequiresRegister()); 270 } 271 272 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { 273 LocationSummary* locations = 274 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 275 locations->SetInAt(0, Location::RequiresRegister()); 276 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 277 } 278 279 static void GenReverseBytes(LocationSummary* locations, 280 DataType::Type size, 281 X86Assembler* assembler) { 282 Register out = locations->Out().AsRegister<Register>(); 283 284 switch (size) { 285 case DataType::Type::kInt16: 286 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 287 __ bswapl(out); 288 __ sarl(out, Immediate(16)); 289 break; 290 case DataType::Type::kInt32: 291 __ bswapl(out); 292 break; 293 default: 294 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 295 UNREACHABLE(); 296 } 297 } 298 299 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { 300 CreateIntToIntLocations(allocator_, invoke); 301 } 302 303 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { 304 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 305 } 306 307 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { 308 CreateLongToLongLocations(allocator_, invoke); 309 } 310 311 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { 312 LocationSummary* locations = invoke->GetLocations(); 313 Location input = locations->InAt(0); 314 Register input_lo = input.AsRegisterPairLow<Register>(); 315 Register input_hi = input.AsRegisterPairHigh<Register>(); 316 Location output = locations->Out(); 317 Register output_lo = output.AsRegisterPairLow<Register>(); 318 Register output_hi = output.AsRegisterPairHigh<Register>(); 319 320 X86Assembler* assembler = GetAssembler(); 321 // Assign the inputs to the outputs, mixing low/high. 322 __ movl(output_lo, input_hi); 323 __ movl(output_hi, input_lo); 324 __ bswapl(output_lo); 325 __ bswapl(output_hi); 326 } 327 328 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { 329 CreateIntToIntLocations(allocator_, invoke); 330 } 331 332 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { 333 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 334 } 335 336 337 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 338 // need is 64b. 339 340 static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { 341 // TODO: Enable memory operations when the assembler supports them. 342 LocationSummary* locations = 343 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 344 locations->SetInAt(0, Location::RequiresFpuRegister()); 345 locations->SetOut(Location::SameAsFirstInput()); 346 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 347 DCHECK(static_or_direct != nullptr); 348 if (static_or_direct->HasSpecialInput() && 349 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 350 // We need addressibility for the constant area. 351 locations->SetInAt(1, Location::RequiresRegister()); 352 // We need a temporary to hold the constant. 353 locations->AddTemp(Location::RequiresFpuRegister()); 354 } 355 } 356 357 static void MathAbsFP(HInvoke* invoke, 358 bool is64bit, 359 X86Assembler* assembler, 360 CodeGeneratorX86* codegen) { 361 LocationSummary* locations = invoke->GetLocations(); 362 Location output = locations->Out(); 363 364 DCHECK(output.IsFpuRegister()); 365 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 366 HX86ComputeBaseMethodAddress* method_address = 367 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); 368 DCHECK(locations->InAt(1).IsRegister()); 369 // We also have a constant area pointer. 370 Register constant_area = locations->InAt(1).AsRegister<Register>(); 371 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 372 if (is64bit) { 373 __ movsd(temp, codegen->LiteralInt64Address( 374 INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); 375 __ andpd(output.AsFpuRegister<XmmRegister>(), temp); 376 } else { 377 __ movss(temp, codegen->LiteralInt32Address( 378 INT32_C(0x7FFFFFFF), method_address, constant_area)); 379 __ andps(output.AsFpuRegister<XmmRegister>(), temp); 380 } 381 } else { 382 // Create the right constant on an aligned stack. 383 if (is64bit) { 384 __ subl(ESP, Immediate(8)); 385 __ pushl(Immediate(0x7FFFFFFF)); 386 __ pushl(Immediate(0xFFFFFFFF)); 387 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 388 } else { 389 __ subl(ESP, Immediate(12)); 390 __ pushl(Immediate(0x7FFFFFFF)); 391 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 392 } 393 __ addl(ESP, Immediate(16)); 394 } 395 } 396 397 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { 398 CreateFloatToFloat(allocator_, invoke); 399 } 400 401 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { 402 MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); 403 } 404 405 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { 406 CreateFloatToFloat(allocator_, invoke); 407 } 408 409 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { 410 MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); 411 } 412 413 static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { 414 LocationSummary* locations = 415 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 416 locations->SetInAt(0, Location::RegisterLocation(EAX)); 417 locations->SetOut(Location::SameAsFirstInput()); 418 locations->AddTemp(Location::RegisterLocation(EDX)); 419 } 420 421 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { 422 Location output = locations->Out(); 423 Register out = output.AsRegister<Register>(); 424 DCHECK_EQ(out, EAX); 425 Register temp = locations->GetTemp(0).AsRegister<Register>(); 426 DCHECK_EQ(temp, EDX); 427 428 // Sign extend EAX into EDX. 429 __ cdq(); 430 431 // XOR EAX with sign. 432 __ xorl(EAX, EDX); 433 434 // Subtract out sign to correct. 435 __ subl(EAX, EDX); 436 437 // The result is in EAX. 438 } 439 440 static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { 441 LocationSummary* locations = 442 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 443 locations->SetInAt(0, Location::RequiresRegister()); 444 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 445 locations->AddTemp(Location::RequiresRegister()); 446 } 447 448 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { 449 Location input = locations->InAt(0); 450 Register input_lo = input.AsRegisterPairLow<Register>(); 451 Register input_hi = input.AsRegisterPairHigh<Register>(); 452 Location output = locations->Out(); 453 Register output_lo = output.AsRegisterPairLow<Register>(); 454 Register output_hi = output.AsRegisterPairHigh<Register>(); 455 Register temp = locations->GetTemp(0).AsRegister<Register>(); 456 457 // Compute the sign into the temporary. 458 __ movl(temp, input_hi); 459 __ sarl(temp, Immediate(31)); 460 461 // Store the sign into the output. 462 __ movl(output_lo, temp); 463 __ movl(output_hi, temp); 464 465 // XOR the input to the output. 466 __ xorl(output_lo, input_lo); 467 __ xorl(output_hi, input_hi); 468 469 // Subtract the sign. 470 __ subl(output_lo, temp); 471 __ sbbl(output_hi, temp); 472 } 473 474 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { 475 CreateAbsIntLocation(allocator_, invoke); 476 } 477 478 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { 479 GenAbsInteger(invoke->GetLocations(), GetAssembler()); 480 } 481 482 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { 483 CreateAbsLongLocation(allocator_, invoke); 484 } 485 486 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { 487 GenAbsLong(invoke->GetLocations(), GetAssembler()); 488 } 489 490 static void GenMinMaxFP(HInvoke* invoke, 491 bool is_min, 492 bool is_double, 493 X86Assembler* assembler, 494 CodeGeneratorX86* codegen) { 495 LocationSummary* locations = invoke->GetLocations(); 496 Location op1_loc = locations->InAt(0); 497 Location op2_loc = locations->InAt(1); 498 Location out_loc = locations->Out(); 499 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 500 501 // Shortcut for same input locations. 502 if (op1_loc.Equals(op2_loc)) { 503 DCHECK(out_loc.Equals(op1_loc)); 504 return; 505 } 506 507 // (out := op1) 508 // out <=? op2 509 // if Nan jmp Nan_label 510 // if out is min jmp done 511 // if op2 is min jmp op2_label 512 // handle -0/+0 513 // jmp done 514 // Nan_label: 515 // out := NaN 516 // op2_label: 517 // out := op2 518 // done: 519 // 520 // This removes one jmp, but needs to copy one input (op1) to out. 521 // 522 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? 523 524 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 525 526 NearLabel nan, done, op2_label; 527 if (is_double) { 528 __ ucomisd(out, op2); 529 } else { 530 __ ucomiss(out, op2); 531 } 532 533 __ j(Condition::kParityEven, &nan); 534 535 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 536 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 537 538 // Handle 0.0/-0.0. 539 if (is_min) { 540 if (is_double) { 541 __ orpd(out, op2); 542 } else { 543 __ orps(out, op2); 544 } 545 } else { 546 if (is_double) { 547 __ andpd(out, op2); 548 } else { 549 __ andps(out, op2); 550 } 551 } 552 __ jmp(&done); 553 554 // NaN handling. 555 __ Bind(&nan); 556 // Do we have a constant area pointer? 557 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { 558 HX86ComputeBaseMethodAddress* method_address = 559 invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); 560 DCHECK(locations->InAt(2).IsRegister()); 561 Register constant_area = locations->InAt(2).AsRegister<Register>(); 562 if (is_double) { 563 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); 564 } else { 565 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); 566 } 567 } else { 568 if (is_double) { 569 __ pushl(Immediate(kDoubleNaNHigh)); 570 __ pushl(Immediate(kDoubleNaNLow)); 571 __ movsd(out, Address(ESP, 0)); 572 __ addl(ESP, Immediate(8)); 573 } else { 574 __ pushl(Immediate(kFloatNaN)); 575 __ movss(out, Address(ESP, 0)); 576 __ addl(ESP, Immediate(4)); 577 } 578 } 579 __ jmp(&done); 580 581 // out := op2; 582 __ Bind(&op2_label); 583 if (is_double) { 584 __ movsd(out, op2); 585 } else { 586 __ movss(out, op2); 587 } 588 589 // Done. 590 __ Bind(&done); 591 } 592 593 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 594 LocationSummary* locations = 595 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 596 locations->SetInAt(0, Location::RequiresFpuRegister()); 597 locations->SetInAt(1, Location::RequiresFpuRegister()); 598 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 599 // the second input to be the output (we can simply swap inputs). 600 locations->SetOut(Location::SameAsFirstInput()); 601 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 602 DCHECK(static_or_direct != nullptr); 603 if (static_or_direct->HasSpecialInput() && 604 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 605 locations->SetInAt(2, Location::RequiresRegister()); 606 } 607 } 608 609 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 610 CreateFPFPToFPLocations(allocator_, invoke); 611 } 612 613 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { 614 GenMinMaxFP(invoke, 615 /* is_min */ true, 616 /* is_double */ true, 617 GetAssembler(), 618 codegen_); 619 } 620 621 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { 622 CreateFPFPToFPLocations(allocator_, invoke); 623 } 624 625 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { 626 GenMinMaxFP(invoke, 627 /* is_min */ true, 628 /* is_double */ false, 629 GetAssembler(), 630 codegen_); 631 } 632 633 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 634 CreateFPFPToFPLocations(allocator_, invoke); 635 } 636 637 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { 638 GenMinMaxFP(invoke, 639 /* is_min */ false, 640 /* is_double */ true, 641 GetAssembler(), 642 codegen_); 643 } 644 645 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 646 CreateFPFPToFPLocations(allocator_, invoke); 647 } 648 649 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { 650 GenMinMaxFP(invoke, 651 /* is_min */ false, 652 /* is_double */ false, 653 GetAssembler(), 654 codegen_); 655 } 656 657 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 658 X86Assembler* assembler) { 659 Location op1_loc = locations->InAt(0); 660 Location op2_loc = locations->InAt(1); 661 662 // Shortcut for same input locations. 663 if (op1_loc.Equals(op2_loc)) { 664 // Can return immediately, as op1_loc == out_loc. 665 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 666 // a copy here. 667 DCHECK(locations->Out().Equals(op1_loc)); 668 return; 669 } 670 671 if (is_long) { 672 // Need to perform a subtract to get the sign right. 673 // op1 is already in the same location as the output. 674 Location output = locations->Out(); 675 Register output_lo = output.AsRegisterPairLow<Register>(); 676 Register output_hi = output.AsRegisterPairHigh<Register>(); 677 678 Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); 679 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); 680 681 // Spare register to compute the subtraction to set condition code. 682 Register temp = locations->GetTemp(0).AsRegister<Register>(); 683 684 // Subtract off op2_low. 685 __ movl(temp, output_lo); 686 __ subl(temp, op2_lo); 687 688 // Now use the same tempo and the borrow to finish the subtraction of op2_hi. 689 __ movl(temp, output_hi); 690 __ sbbl(temp, op2_hi); 691 692 // Now the condition code is correct. 693 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; 694 __ cmovl(cond, output_lo, op2_lo); 695 __ cmovl(cond, output_hi, op2_hi); 696 } else { 697 Register out = locations->Out().AsRegister<Register>(); 698 Register op2 = op2_loc.AsRegister<Register>(); 699 700 // (out := op1) 701 // out <=? op2 702 // if out is min jmp done 703 // out := op2 704 // done: 705 706 __ cmpl(out, op2); 707 Condition cond = is_min ? Condition::kGreater : Condition::kLess; 708 __ cmovl(cond, out, op2); 709 } 710 } 711 712 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 713 LocationSummary* locations = 714 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 715 locations->SetInAt(0, Location::RequiresRegister()); 716 locations->SetInAt(1, Location::RequiresRegister()); 717 locations->SetOut(Location::SameAsFirstInput()); 718 } 719 720 static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { 721 LocationSummary* locations = 722 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 723 locations->SetInAt(0, Location::RequiresRegister()); 724 locations->SetInAt(1, Location::RequiresRegister()); 725 locations->SetOut(Location::SameAsFirstInput()); 726 // Register to use to perform a long subtract to set cc. 727 locations->AddTemp(Location::RequiresRegister()); 728 } 729 730 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { 731 CreateIntIntToIntLocations(allocator_, invoke); 732 } 733 734 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { 735 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 736 } 737 738 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { 739 CreateLongLongToLongLocations(allocator_, invoke); 740 } 741 742 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { 743 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 744 } 745 746 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { 747 CreateIntIntToIntLocations(allocator_, invoke); 748 } 749 750 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { 751 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 752 } 753 754 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { 755 CreateLongLongToLongLocations(allocator_, invoke); 756 } 757 758 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { 759 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 760 } 761 762 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 763 LocationSummary* locations = 764 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 765 locations->SetInAt(0, Location::RequiresFpuRegister()); 766 locations->SetOut(Location::RequiresFpuRegister()); 767 } 768 769 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { 770 CreateFPToFPLocations(allocator_, invoke); 771 } 772 773 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { 774 LocationSummary* locations = invoke->GetLocations(); 775 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 776 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 777 778 GetAssembler()->sqrtsd(out, in); 779 } 780 781 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { 782 MoveArguments(invoke, codegen); 783 784 DCHECK(invoke->IsInvokeStaticOrDirect()); 785 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), 786 Location::RegisterLocation(EAX)); 787 788 // Copy the result back to the expected output. 789 Location out = invoke->GetLocations()->Out(); 790 if (out.IsValid()) { 791 DCHECK(out.IsRegister()); 792 codegen->MoveFromReturnRegister(out, invoke->GetType()); 793 } 794 } 795 796 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator, 797 HInvoke* invoke, 798 CodeGeneratorX86* codegen) { 799 // Do we have instruction support? 800 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 801 CreateFPToFPLocations(allocator, invoke); 802 return; 803 } 804 805 // We have to fall back to a call to the intrinsic. 806 LocationSummary* locations = 807 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 808 InvokeRuntimeCallingConvention calling_convention; 809 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 810 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 811 // Needs to be EAX for the invoke. 812 locations->AddTemp(Location::RegisterLocation(EAX)); 813 } 814 815 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, 816 HInvoke* invoke, 817 X86Assembler* assembler, 818 int round_mode) { 819 LocationSummary* locations = invoke->GetLocations(); 820 if (locations->WillCall()) { 821 InvokeOutOfLineIntrinsic(codegen, invoke); 822 } else { 823 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 824 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 825 __ roundsd(out, in, Immediate(round_mode)); 826 } 827 } 828 829 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { 830 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 831 } 832 833 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { 834 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 835 } 836 837 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { 838 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 839 } 840 841 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { 842 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 843 } 844 845 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { 846 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); 847 } 848 849 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { 850 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 851 } 852 853 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { 854 // Do we have instruction support? 855 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { 856 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); 857 DCHECK(static_or_direct != nullptr); 858 LocationSummary* locations = 859 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 860 locations->SetInAt(0, Location::RequiresFpuRegister()); 861 if (static_or_direct->HasSpecialInput() && 862 invoke->InputAt( 863 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { 864 locations->SetInAt(1, Location::RequiresRegister()); 865 } 866 locations->SetOut(Location::RequiresRegister()); 867 locations->AddTemp(Location::RequiresFpuRegister()); 868 locations->AddTemp(Location::RequiresFpuRegister()); 869 return; 870 } 871 872 // We have to fall back to a call to the intrinsic. 873 LocationSummary* locations = 874 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); 875 InvokeRuntimeCallingConvention calling_convention; 876 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 877 locations->SetOut(Location::RegisterLocation(EAX)); 878 // Needs to be EAX for the invoke. 879 locations->AddTemp(Location::RegisterLocation(EAX)); 880 } 881 882 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { 883 LocationSummary* locations = invoke->GetLocations(); 884 if (locations->WillCall()) { // TODO: can we reach this? 885 InvokeOutOfLineIntrinsic(codegen_, invoke); 886 return; 887 } 888 889 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 890 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 891 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 892 Register out = locations->Out().AsRegister<Register>(); 893 NearLabel skip_incr, done; 894 X86Assembler* assembler = GetAssembler(); 895 896 // Since no direct x86 rounding instruction matches the required semantics, 897 // this intrinsic is implemented as follows: 898 // result = floor(in); 899 // if (in - result >= 0.5f) 900 // result = result + 1.0f; 901 __ movss(t2, in); 902 __ roundss(t1, in, Immediate(1)); 903 __ subss(t2, t1); 904 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { 905 // Direct constant area available. 906 HX86ComputeBaseMethodAddress* method_address = 907 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); 908 Register constant_area = locations->InAt(1).AsRegister<Register>(); 909 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), 910 method_address, 911 constant_area)); 912 __ j(kBelow, &skip_incr); 913 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), 914 method_address, 915 constant_area)); 916 __ Bind(&skip_incr); 917 } else { 918 // No constant area: go through stack. 919 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); 920 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); 921 __ comiss(t2, Address(ESP, 4)); 922 __ j(kBelow, &skip_incr); 923 __ addss(t1, Address(ESP, 0)); 924 __ Bind(&skip_incr); 925 __ addl(ESP, Immediate(8)); 926 } 927 928 // Final conversion to an integer. Unfortunately this also does not have a 929 // direct x86 instruction, since NaN should map to 0 and large positive 930 // values need to be clipped to the extreme value. 931 __ movl(out, Immediate(kPrimIntMax)); 932 __ cvtsi2ss(t2, out); 933 __ comiss(t1, t2); 934 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered 935 __ movl(out, Immediate(0)); // does not change flags 936 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) 937 __ cvttss2si(out, t1); 938 __ Bind(&done); 939 } 940 941 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 942 LocationSummary* locations = 943 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 944 InvokeRuntimeCallingConvention calling_convention; 945 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 946 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 947 } 948 949 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { 950 LocationSummary* locations = invoke->GetLocations(); 951 DCHECK(locations->WillCall()); 952 DCHECK(invoke->IsInvokeStaticOrDirect()); 953 X86Assembler* assembler = codegen->GetAssembler(); 954 955 // We need some place to pass the parameters. 956 __ subl(ESP, Immediate(16)); 957 __ cfi().AdjustCFAOffset(16); 958 959 // Pass the parameters at the bottom of the stack. 960 __ movsd(Address(ESP, 0), XMM0); 961 962 // If we have a second parameter, pass it next. 963 if (invoke->GetNumberOfArguments() == 2) { 964 __ movsd(Address(ESP, 8), XMM1); 965 } 966 967 // Now do the actual call. 968 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 969 970 // Extract the return value from the FP stack. 971 __ fstpl(Address(ESP, 0)); 972 __ movsd(XMM0, Address(ESP, 0)); 973 974 // And clean up the stack. 975 __ addl(ESP, Immediate(16)); 976 __ cfi().AdjustCFAOffset(-16); 977 } 978 979 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { 980 CreateFPToFPCallLocations(allocator_, invoke); 981 } 982 983 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { 984 GenFPToFPCall(invoke, codegen_, kQuickCos); 985 } 986 987 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { 988 CreateFPToFPCallLocations(allocator_, invoke); 989 } 990 991 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { 992 GenFPToFPCall(invoke, codegen_, kQuickSin); 993 } 994 995 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { 996 CreateFPToFPCallLocations(allocator_, invoke); 997 } 998 999 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { 1000 GenFPToFPCall(invoke, codegen_, kQuickAcos); 1001 } 1002 1003 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { 1004 CreateFPToFPCallLocations(allocator_, invoke); 1005 } 1006 1007 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { 1008 GenFPToFPCall(invoke, codegen_, kQuickAsin); 1009 } 1010 1011 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { 1012 CreateFPToFPCallLocations(allocator_, invoke); 1013 } 1014 1015 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { 1016 GenFPToFPCall(invoke, codegen_, kQuickAtan); 1017 } 1018 1019 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { 1020 CreateFPToFPCallLocations(allocator_, invoke); 1021 } 1022 1023 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { 1024 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 1025 } 1026 1027 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { 1028 CreateFPToFPCallLocations(allocator_, invoke); 1029 } 1030 1031 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { 1032 GenFPToFPCall(invoke, codegen_, kQuickCosh); 1033 } 1034 1035 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { 1036 CreateFPToFPCallLocations(allocator_, invoke); 1037 } 1038 1039 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { 1040 GenFPToFPCall(invoke, codegen_, kQuickExp); 1041 } 1042 1043 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { 1044 CreateFPToFPCallLocations(allocator_, invoke); 1045 } 1046 1047 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { 1048 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 1049 } 1050 1051 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { 1052 CreateFPToFPCallLocations(allocator_, invoke); 1053 } 1054 1055 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { 1056 GenFPToFPCall(invoke, codegen_, kQuickLog); 1057 } 1058 1059 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { 1060 CreateFPToFPCallLocations(allocator_, invoke); 1061 } 1062 1063 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { 1064 GenFPToFPCall(invoke, codegen_, kQuickLog10); 1065 } 1066 1067 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { 1068 CreateFPToFPCallLocations(allocator_, invoke); 1069 } 1070 1071 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { 1072 GenFPToFPCall(invoke, codegen_, kQuickSinh); 1073 } 1074 1075 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { 1076 CreateFPToFPCallLocations(allocator_, invoke); 1077 } 1078 1079 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { 1080 GenFPToFPCall(invoke, codegen_, kQuickTan); 1081 } 1082 1083 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { 1084 CreateFPToFPCallLocations(allocator_, invoke); 1085 } 1086 1087 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { 1088 GenFPToFPCall(invoke, codegen_, kQuickTanh); 1089 } 1090 1091 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 1092 LocationSummary* locations = 1093 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1094 InvokeRuntimeCallingConvention calling_convention; 1095 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 1096 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 1097 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 1098 } 1099 1100 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { 1101 CreateFPFPToFPCallLocations(allocator_, invoke); 1102 } 1103 1104 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { 1105 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 1106 } 1107 1108 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { 1109 CreateFPFPToFPCallLocations(allocator_, invoke); 1110 } 1111 1112 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { 1113 GenFPToFPCall(invoke, codegen_, kQuickPow); 1114 } 1115 1116 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { 1117 CreateFPFPToFPCallLocations(allocator_, invoke); 1118 } 1119 1120 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { 1121 GenFPToFPCall(invoke, codegen_, kQuickHypot); 1122 } 1123 1124 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { 1125 CreateFPFPToFPCallLocations(allocator_, invoke); 1126 } 1127 1128 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { 1129 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 1130 } 1131 1132 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1133 // We need at least two of the positions or length to be an integer constant, 1134 // or else we won't have enough free registers. 1135 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 1136 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 1137 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 1138 1139 int num_constants = 1140 ((src_pos != nullptr) ? 1 : 0) 1141 + ((dest_pos != nullptr) ? 1 : 0) 1142 + ((length != nullptr) ? 1 : 0); 1143 1144 if (num_constants < 2) { 1145 // Not enough free registers. 1146 return; 1147 } 1148 1149 // As long as we are checking, we might as well check to see if the src and dest 1150 // positions are >= 0. 1151 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 1152 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 1153 // We will have to fail anyways. 1154 return; 1155 } 1156 1157 // And since we are already checking, check the length too. 1158 if (length != nullptr) { 1159 int32_t len = length->GetValue(); 1160 if (len < 0) { 1161 // Just call as normal. 1162 return; 1163 } 1164 } 1165 1166 // Okay, it is safe to generate inline code. 1167 LocationSummary* locations = 1168 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 1169 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). 1170 locations->SetInAt(0, Location::RequiresRegister()); 1171 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1172 locations->SetInAt(2, Location::RequiresRegister()); 1173 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 1174 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 1175 1176 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1177 locations->AddTemp(Location::RegisterLocation(ESI)); 1178 locations->AddTemp(Location::RegisterLocation(EDI)); 1179 locations->AddTemp(Location::RegisterLocation(ECX)); 1180 } 1181 1182 static void CheckPosition(X86Assembler* assembler, 1183 Location pos, 1184 Register input, 1185 Location length, 1186 SlowPathCode* slow_path, 1187 Register temp, 1188 bool length_is_input_length = false) { 1189 // Where is the length in the Array? 1190 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 1191 1192 if (pos.IsConstant()) { 1193 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 1194 if (pos_const == 0) { 1195 if (!length_is_input_length) { 1196 // Check that length(input) >= length. 1197 if (length.IsConstant()) { 1198 __ cmpl(Address(input, length_offset), 1199 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1200 } else { 1201 __ cmpl(Address(input, length_offset), length.AsRegister<Register>()); 1202 } 1203 __ j(kLess, slow_path->GetEntryLabel()); 1204 } 1205 } else { 1206 // Check that length(input) >= pos. 1207 __ movl(temp, Address(input, length_offset)); 1208 __ subl(temp, Immediate(pos_const)); 1209 __ j(kLess, slow_path->GetEntryLabel()); 1210 1211 // Check that (length(input) - pos) >= length. 1212 if (length.IsConstant()) { 1213 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1214 } else { 1215 __ cmpl(temp, length.AsRegister<Register>()); 1216 } 1217 __ j(kLess, slow_path->GetEntryLabel()); 1218 } 1219 } else if (length_is_input_length) { 1220 // The only way the copy can succeed is if pos is zero. 1221 Register pos_reg = pos.AsRegister<Register>(); 1222 __ testl(pos_reg, pos_reg); 1223 __ j(kNotEqual, slow_path->GetEntryLabel()); 1224 } else { 1225 // Check that pos >= 0. 1226 Register pos_reg = pos.AsRegister<Register>(); 1227 __ testl(pos_reg, pos_reg); 1228 __ j(kLess, slow_path->GetEntryLabel()); 1229 1230 // Check that pos <= length(input). 1231 __ cmpl(Address(input, length_offset), pos_reg); 1232 __ j(kLess, slow_path->GetEntryLabel()); 1233 1234 // Check that (length(input) - pos) >= length. 1235 __ movl(temp, Address(input, length_offset)); 1236 __ subl(temp, pos_reg); 1237 if (length.IsConstant()) { 1238 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1239 } else { 1240 __ cmpl(temp, length.AsRegister<Register>()); 1241 } 1242 __ j(kLess, slow_path->GetEntryLabel()); 1243 } 1244 } 1245 1246 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { 1247 X86Assembler* assembler = GetAssembler(); 1248 LocationSummary* locations = invoke->GetLocations(); 1249 1250 Register src = locations->InAt(0).AsRegister<Register>(); 1251 Location srcPos = locations->InAt(1); 1252 Register dest = locations->InAt(2).AsRegister<Register>(); 1253 Location destPos = locations->InAt(3); 1254 Location length = locations->InAt(4); 1255 1256 // Temporaries that we need for MOVSW. 1257 Register src_base = locations->GetTemp(0).AsRegister<Register>(); 1258 DCHECK_EQ(src_base, ESI); 1259 Register dest_base = locations->GetTemp(1).AsRegister<Register>(); 1260 DCHECK_EQ(dest_base, EDI); 1261 Register count = locations->GetTemp(2).AsRegister<Register>(); 1262 DCHECK_EQ(count, ECX); 1263 1264 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1265 codegen_->AddSlowPath(slow_path); 1266 1267 // Bail out if the source and destination are the same (to handle overlap). 1268 __ cmpl(src, dest); 1269 __ j(kEqual, slow_path->GetEntryLabel()); 1270 1271 // Bail out if the source is null. 1272 __ testl(src, src); 1273 __ j(kEqual, slow_path->GetEntryLabel()); 1274 1275 // Bail out if the destination is null. 1276 __ testl(dest, dest); 1277 __ j(kEqual, slow_path->GetEntryLabel()); 1278 1279 // If the length is negative, bail out. 1280 // We have already checked in the LocationsBuilder for the constant case. 1281 if (!length.IsConstant()) { 1282 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>()); 1283 __ j(kLess, slow_path->GetEntryLabel()); 1284 } 1285 1286 // We need the count in ECX. 1287 if (length.IsConstant()) { 1288 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1289 } else { 1290 __ movl(count, length.AsRegister<Register>()); 1291 } 1292 1293 // Validity checks: source. Use src_base as a temporary register. 1294 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base); 1295 1296 // Validity checks: dest. Use src_base as a temporary register. 1297 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base); 1298 1299 // Okay, everything checks out. Finally time to do the copy. 1300 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1301 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1302 DCHECK_EQ(char_size, 2u); 1303 1304 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1305 1306 if (srcPos.IsConstant()) { 1307 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); 1308 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); 1309 } else { 1310 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(), 1311 ScaleFactor::TIMES_2, data_offset)); 1312 } 1313 if (destPos.IsConstant()) { 1314 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); 1315 1316 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); 1317 } else { 1318 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(), 1319 ScaleFactor::TIMES_2, data_offset)); 1320 } 1321 1322 // Do the move. 1323 __ rep_movsw(); 1324 1325 __ Bind(slow_path->GetExitLabel()); 1326 } 1327 1328 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { 1329 // The inputs plus one temp. 1330 LocationSummary* locations = new (allocator_) LocationSummary( 1331 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1332 InvokeRuntimeCallingConvention calling_convention; 1333 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1334 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1335 locations->SetOut(Location::RegisterLocation(EAX)); 1336 } 1337 1338 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { 1339 X86Assembler* assembler = GetAssembler(); 1340 LocationSummary* locations = invoke->GetLocations(); 1341 1342 // Note that the null check must have been done earlier. 1343 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1344 1345 Register argument = locations->InAt(1).AsRegister<Register>(); 1346 __ testl(argument, argument); 1347 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1348 codegen_->AddSlowPath(slow_path); 1349 __ j(kEqual, slow_path->GetEntryLabel()); 1350 1351 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); 1352 __ Bind(slow_path->GetExitLabel()); 1353 } 1354 1355 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { 1356 if (kEmitCompilerReadBarrier && 1357 !StringEqualsOptimizations(invoke).GetArgumentIsString() && 1358 !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { 1359 // No support for this odd case (String class is moveable, not in the boot image). 1360 return; 1361 } 1362 1363 LocationSummary* locations = 1364 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1365 locations->SetInAt(0, Location::RequiresRegister()); 1366 locations->SetInAt(1, Location::RequiresRegister()); 1367 1368 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. 1369 locations->AddTemp(Location::RegisterLocation(ECX)); 1370 locations->AddTemp(Location::RegisterLocation(EDI)); 1371 1372 // Set output, ESI needed for repe_cmpsl instruction anyways. 1373 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); 1374 } 1375 1376 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { 1377 X86Assembler* assembler = GetAssembler(); 1378 LocationSummary* locations = invoke->GetLocations(); 1379 1380 Register str = locations->InAt(0).AsRegister<Register>(); 1381 Register arg = locations->InAt(1).AsRegister<Register>(); 1382 Register ecx = locations->GetTemp(0).AsRegister<Register>(); 1383 Register edi = locations->GetTemp(1).AsRegister<Register>(); 1384 Register esi = locations->Out().AsRegister<Register>(); 1385 1386 NearLabel end, return_true, return_false; 1387 1388 // Get offsets of count, value, and class fields within a string object. 1389 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1390 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1391 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1392 1393 // Note that the null check must have been done earlier. 1394 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1395 1396 StringEqualsOptimizations optimizations(invoke); 1397 if (!optimizations.GetArgumentNotNull()) { 1398 // Check if input is null, return false if it is. 1399 __ testl(arg, arg); 1400 __ j(kEqual, &return_false); 1401 } 1402 1403 if (!optimizations.GetArgumentIsString()) { 1404 // Instanceof check for the argument by comparing class fields. 1405 // All string objects must have the same type since String cannot be subclassed. 1406 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1407 // If the argument is a string object, its class field must be equal to receiver's class field. 1408 __ movl(ecx, Address(str, class_offset)); 1409 __ cmpl(ecx, Address(arg, class_offset)); 1410 __ j(kNotEqual, &return_false); 1411 } 1412 1413 // Reference equality check, return true if same reference. 1414 __ cmpl(str, arg); 1415 __ j(kEqual, &return_true); 1416 1417 // Load length and compression flag of receiver string. 1418 __ movl(ecx, Address(str, count_offset)); 1419 // Check if lengths and compression flags are equal, return false if they're not. 1420 // Two identical strings will always have same compression style since 1421 // compression style is decided on alloc. 1422 __ cmpl(ecx, Address(arg, count_offset)); 1423 __ j(kNotEqual, &return_false); 1424 // Return true if strings are empty. Even with string compression `count == 0` means empty. 1425 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1426 "Expecting 0=compressed, 1=uncompressed"); 1427 __ jecxz(&return_true); 1428 1429 if (mirror::kUseStringCompression) { 1430 NearLabel string_uncompressed; 1431 // Extract length and differentiate between both compressed or both uncompressed. 1432 // Different compression style is cut above. 1433 __ shrl(ecx, Immediate(1)); 1434 __ j(kCarrySet, &string_uncompressed); 1435 // Divide string length by 2, rounding up, and continue as if uncompressed. 1436 __ addl(ecx, Immediate(1)); 1437 __ shrl(ecx, Immediate(1)); 1438 __ Bind(&string_uncompressed); 1439 } 1440 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. 1441 __ leal(esi, Address(str, value_offset)); 1442 __ leal(edi, Address(arg, value_offset)); 1443 1444 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not 1445 // divisible by 2. 1446 __ addl(ecx, Immediate(1)); 1447 __ shrl(ecx, Immediate(1)); 1448 1449 // Assertions that must hold in order to compare strings 2 characters (uncompressed) 1450 // or 4 characters (compressed) at a time. 1451 DCHECK_ALIGNED(value_offset, 4); 1452 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); 1453 1454 // Loop to compare strings two characters at a time starting at the beginning of the string. 1455 __ repe_cmpsl(); 1456 // If strings are not equal, zero flag will be cleared. 1457 __ j(kNotEqual, &return_false); 1458 1459 // Return true and exit the function. 1460 // If loop does not result in returning false, we return true. 1461 __ Bind(&return_true); 1462 __ movl(esi, Immediate(1)); 1463 __ jmp(&end); 1464 1465 // Return false and exit the function. 1466 __ Bind(&return_false); 1467 __ xorl(esi, esi); 1468 __ Bind(&end); 1469 } 1470 1471 static void CreateStringIndexOfLocations(HInvoke* invoke, 1472 ArenaAllocator* allocator, 1473 bool start_at_zero) { 1474 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1475 LocationSummary::kCallOnSlowPath, 1476 kIntrinsified); 1477 // The data needs to be in EDI for scasw. So request that the string is there, anyways. 1478 locations->SetInAt(0, Location::RegisterLocation(EDI)); 1479 // If we look for a constant char, we'll still have to copy it into EAX. So just request the 1480 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1481 // of the instruction explicitly. 1482 // Note: This works as we don't clobber EAX anywhere. 1483 locations->SetInAt(1, Location::RegisterLocation(EAX)); 1484 if (!start_at_zero) { 1485 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1486 } 1487 // As we clobber EDI during execution anyways, also use it as the output. 1488 locations->SetOut(Location::SameAsFirstInput()); 1489 1490 // repne scasw uses ECX as the counter. 1491 locations->AddTemp(Location::RegisterLocation(ECX)); 1492 // Need another temporary to be able to compute the result. 1493 locations->AddTemp(Location::RequiresRegister()); 1494 if (mirror::kUseStringCompression) { 1495 // Need another temporary to be able to save unflagged string length. 1496 locations->AddTemp(Location::RequiresRegister()); 1497 } 1498 } 1499 1500 static void GenerateStringIndexOf(HInvoke* invoke, 1501 X86Assembler* assembler, 1502 CodeGeneratorX86* codegen, 1503 bool start_at_zero) { 1504 LocationSummary* locations = invoke->GetLocations(); 1505 1506 // Note that the null check must have been done earlier. 1507 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1508 1509 Register string_obj = locations->InAt(0).AsRegister<Register>(); 1510 Register search_value = locations->InAt(1).AsRegister<Register>(); 1511 Register counter = locations->GetTemp(0).AsRegister<Register>(); 1512 Register string_length = locations->GetTemp(1).AsRegister<Register>(); 1513 Register out = locations->Out().AsRegister<Register>(); 1514 // Only used when string compression feature is on. 1515 Register string_length_flagged; 1516 1517 // Check our assumptions for registers. 1518 DCHECK_EQ(string_obj, EDI); 1519 DCHECK_EQ(search_value, EAX); 1520 DCHECK_EQ(counter, ECX); 1521 DCHECK_EQ(out, EDI); 1522 1523 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1524 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1525 SlowPathCode* slow_path = nullptr; 1526 HInstruction* code_point = invoke->InputAt(1); 1527 if (code_point->IsIntConstant()) { 1528 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 1529 std::numeric_limits<uint16_t>::max()) { 1530 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1531 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1532 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1533 codegen->AddSlowPath(slow_path); 1534 __ jmp(slow_path->GetEntryLabel()); 1535 __ Bind(slow_path->GetExitLabel()); 1536 return; 1537 } 1538 } else if (code_point->GetType() != DataType::Type::kUint16) { 1539 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1540 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1541 codegen->AddSlowPath(slow_path); 1542 __ j(kAbove, slow_path->GetEntryLabel()); 1543 } 1544 1545 // From here down, we know that we are looking for a char that fits in 16 bits. 1546 // Location of reference to data array within the String object. 1547 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1548 // Location of count within the String object. 1549 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1550 1551 // Load the count field of the string containing the length and compression flag. 1552 __ movl(string_length, Address(string_obj, count_offset)); 1553 1554 // Do a zero-length check. Even with string compression `count == 0` means empty. 1555 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1556 "Expecting 0=compressed, 1=uncompressed"); 1557 // TODO: Support jecxz. 1558 NearLabel not_found_label; 1559 __ testl(string_length, string_length); 1560 __ j(kEqual, ¬_found_label); 1561 1562 if (mirror::kUseStringCompression) { 1563 string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); 1564 __ movl(string_length_flagged, string_length); 1565 // Extract the length and shift out the least significant bit used as compression flag. 1566 __ shrl(string_length, Immediate(1)); 1567 } 1568 1569 if (start_at_zero) { 1570 // Number of chars to scan is the same as the string length. 1571 __ movl(counter, string_length); 1572 1573 // Move to the start of the string. 1574 __ addl(string_obj, Immediate(value_offset)); 1575 } else { 1576 Register start_index = locations->InAt(2).AsRegister<Register>(); 1577 1578 // Do a start_index check. 1579 __ cmpl(start_index, string_length); 1580 __ j(kGreaterEqual, ¬_found_label); 1581 1582 // Ensure we have a start index >= 0; 1583 __ xorl(counter, counter); 1584 __ cmpl(start_index, Immediate(0)); 1585 __ cmovl(kGreater, counter, start_index); 1586 1587 if (mirror::kUseStringCompression) { 1588 NearLabel modify_counter, offset_uncompressed_label; 1589 __ testl(string_length_flagged, Immediate(1)); 1590 __ j(kNotZero, &offset_uncompressed_label); 1591 // Move to the start of the string: string_obj + value_offset + start_index. 1592 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); 1593 __ jmp(&modify_counter); 1594 1595 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1596 __ Bind(&offset_uncompressed_label); 1597 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1598 1599 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to 1600 // compare. 1601 __ Bind(&modify_counter); 1602 } else { 1603 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1604 } 1605 __ negl(counter); 1606 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1607 } 1608 1609 if (mirror::kUseStringCompression) { 1610 NearLabel uncompressed_string_comparison; 1611 NearLabel comparison_done; 1612 __ testl(string_length_flagged, Immediate(1)); 1613 __ j(kNotZero, &uncompressed_string_comparison); 1614 1615 // Check if EAX (search_value) is ASCII. 1616 __ cmpl(search_value, Immediate(127)); 1617 __ j(kGreater, ¬_found_label); 1618 // Comparing byte-per-byte. 1619 __ repne_scasb(); 1620 __ jmp(&comparison_done); 1621 1622 // Everything is set up for repne scasw: 1623 // * Comparison address in EDI. 1624 // * Counter in ECX. 1625 __ Bind(&uncompressed_string_comparison); 1626 __ repne_scasw(); 1627 __ Bind(&comparison_done); 1628 } else { 1629 __ repne_scasw(); 1630 } 1631 // Did we find a match? 1632 __ j(kNotEqual, ¬_found_label); 1633 1634 // Yes, we matched. Compute the index of the result. 1635 __ subl(string_length, counter); 1636 __ leal(out, Address(string_length, -1)); 1637 1638 NearLabel done; 1639 __ jmp(&done); 1640 1641 // Failed to match; return -1. 1642 __ Bind(¬_found_label); 1643 __ movl(out, Immediate(-1)); 1644 1645 // And join up at the end. 1646 __ Bind(&done); 1647 if (slow_path != nullptr) { 1648 __ Bind(slow_path->GetExitLabel()); 1649 } 1650 } 1651 1652 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { 1653 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); 1654 } 1655 1656 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { 1657 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); 1658 } 1659 1660 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1661 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); 1662 } 1663 1664 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { 1665 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); 1666 } 1667 1668 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1669 LocationSummary* locations = new (allocator_) LocationSummary( 1670 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1671 InvokeRuntimeCallingConvention calling_convention; 1672 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1673 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1674 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1675 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1676 locations->SetOut(Location::RegisterLocation(EAX)); 1677 } 1678 1679 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { 1680 X86Assembler* assembler = GetAssembler(); 1681 LocationSummary* locations = invoke->GetLocations(); 1682 1683 Register byte_array = locations->InAt(0).AsRegister<Register>(); 1684 __ testl(byte_array, byte_array); 1685 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1686 codegen_->AddSlowPath(slow_path); 1687 __ j(kEqual, slow_path->GetEntryLabel()); 1688 1689 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); 1690 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1691 __ Bind(slow_path->GetExitLabel()); 1692 } 1693 1694 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1695 LocationSummary* locations = 1696 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1697 InvokeRuntimeCallingConvention calling_convention; 1698 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1699 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1700 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1701 locations->SetOut(Location::RegisterLocation(EAX)); 1702 } 1703 1704 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { 1705 // No need to emit code checking whether `locations->InAt(2)` is a null 1706 // pointer, as callers of the native method 1707 // 1708 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1709 // 1710 // all include a null check on `data` before calling that method. 1711 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1712 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1713 } 1714 1715 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { 1716 LocationSummary* locations = new (allocator_) LocationSummary( 1717 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1718 InvokeRuntimeCallingConvention calling_convention; 1719 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1720 locations->SetOut(Location::RegisterLocation(EAX)); 1721 } 1722 1723 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { 1724 X86Assembler* assembler = GetAssembler(); 1725 LocationSummary* locations = invoke->GetLocations(); 1726 1727 Register string_to_copy = locations->InAt(0).AsRegister<Register>(); 1728 __ testl(string_to_copy, string_to_copy); 1729 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 1730 codegen_->AddSlowPath(slow_path); 1731 __ j(kEqual, slow_path->GetEntryLabel()); 1732 1733 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); 1734 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1735 __ Bind(slow_path->GetExitLabel()); 1736 } 1737 1738 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1739 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1740 LocationSummary* locations = 1741 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1742 locations->SetInAt(0, Location::RequiresRegister()); 1743 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1744 // Place srcEnd in ECX to save a move below. 1745 locations->SetInAt(2, Location::RegisterLocation(ECX)); 1746 locations->SetInAt(3, Location::RequiresRegister()); 1747 locations->SetInAt(4, Location::RequiresRegister()); 1748 1749 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1750 // We don't have enough registers to also grab ECX, so handle below. 1751 locations->AddTemp(Location::RegisterLocation(ESI)); 1752 locations->AddTemp(Location::RegisterLocation(EDI)); 1753 } 1754 1755 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1756 X86Assembler* assembler = GetAssembler(); 1757 LocationSummary* locations = invoke->GetLocations(); 1758 1759 size_t char_component_size = DataType::Size(DataType::Type::kUint16); 1760 // Location of data in char array buffer. 1761 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1762 // Location of char array data in string. 1763 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1764 1765 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1766 Register obj = locations->InAt(0).AsRegister<Register>(); 1767 Location srcBegin = locations->InAt(1); 1768 int srcBegin_value = 1769 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1770 Register srcEnd = locations->InAt(2).AsRegister<Register>(); 1771 Register dst = locations->InAt(3).AsRegister<Register>(); 1772 Register dstBegin = locations->InAt(4).AsRegister<Register>(); 1773 1774 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1775 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1776 DCHECK_EQ(char_size, 2u); 1777 1778 // Compute the number of chars (words) to move. 1779 // Save ECX, since we don't know if it will be used later. 1780 __ pushl(ECX); 1781 int stack_adjust = kX86WordSize; 1782 __ cfi().AdjustCFAOffset(stack_adjust); 1783 DCHECK_EQ(srcEnd, ECX); 1784 if (srcBegin.IsConstant()) { 1785 __ subl(ECX, Immediate(srcBegin_value)); 1786 } else { 1787 DCHECK(srcBegin.IsRegister()); 1788 __ subl(ECX, srcBegin.AsRegister<Register>()); 1789 } 1790 1791 NearLabel done; 1792 if (mirror::kUseStringCompression) { 1793 // Location of count in string 1794 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1795 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1796 DCHECK_EQ(c_char_size, 1u); 1797 __ pushl(EAX); 1798 __ cfi().AdjustCFAOffset(stack_adjust); 1799 1800 NearLabel copy_loop, copy_uncompressed; 1801 __ testl(Address(obj, count_offset), Immediate(1)); 1802 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1803 "Expecting 0=compressed, 1=uncompressed"); 1804 __ j(kNotZero, ©_uncompressed); 1805 // Compute the address of the source string by adding the number of chars from 1806 // the source beginning to the value offset of a string. 1807 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); 1808 1809 // Start the loop to copy String's value to Array of Char. 1810 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1811 __ Bind(©_loop); 1812 __ jecxz(&done); 1813 // Use EAX temporary (convert byte from ESI to word). 1814 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0. 1815 __ movzxb(EAX, Address(ESI, 0)); 1816 __ movw(Address(EDI, 0), EAX); 1817 __ leal(EDI, Address(EDI, char_size)); 1818 __ leal(ESI, Address(ESI, c_char_size)); 1819 // TODO: Add support for LOOP to X86Assembler. 1820 __ subl(ECX, Immediate(1)); 1821 __ jmp(©_loop); 1822 __ Bind(©_uncompressed); 1823 } 1824 1825 // Do the copy for uncompressed string. 1826 // Compute the address of the destination buffer. 1827 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1828 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); 1829 __ rep_movsw(); 1830 1831 __ Bind(&done); 1832 if (mirror::kUseStringCompression) { 1833 // Restore EAX. 1834 __ popl(EAX); 1835 __ cfi().AdjustCFAOffset(-stack_adjust); 1836 } 1837 // Restore ECX. 1838 __ popl(ECX); 1839 __ cfi().AdjustCFAOffset(-stack_adjust); 1840 } 1841 1842 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { 1843 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1844 Location out_loc = locations->Out(); 1845 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1846 // to avoid a SIGBUS. 1847 switch (size) { 1848 case DataType::Type::kInt8: 1849 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); 1850 break; 1851 case DataType::Type::kInt16: 1852 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); 1853 break; 1854 case DataType::Type::kInt32: 1855 __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); 1856 break; 1857 case DataType::Type::kInt64: 1858 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); 1859 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); 1860 break; 1861 default: 1862 LOG(FATAL) << "Type not recognized for peek: " << size; 1863 UNREACHABLE(); 1864 } 1865 } 1866 1867 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { 1868 CreateLongToIntLocations(allocator_, invoke); 1869 } 1870 1871 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { 1872 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 1873 } 1874 1875 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1876 CreateLongToIntLocations(allocator_, invoke); 1877 } 1878 1879 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { 1880 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 1881 } 1882 1883 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1884 CreateLongToLongLocations(allocator_, invoke); 1885 } 1886 1887 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { 1888 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 1889 } 1890 1891 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1892 CreateLongToIntLocations(allocator_, invoke); 1893 } 1894 1895 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { 1896 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 1897 } 1898 1899 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator, 1900 DataType::Type size, 1901 HInvoke* invoke) { 1902 LocationSummary* locations = 1903 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1904 locations->SetInAt(0, Location::RequiresRegister()); 1905 HInstruction* value = invoke->InputAt(1); 1906 if (size == DataType::Type::kInt8) { 1907 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); 1908 } else { 1909 locations->SetInAt(1, Location::RegisterOrConstant(value)); 1910 } 1911 } 1912 1913 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { 1914 Register address = locations->InAt(0).AsRegisterPairLow<Register>(); 1915 Location value_loc = locations->InAt(1); 1916 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1917 // to avoid a SIGBUS. 1918 switch (size) { 1919 case DataType::Type::kInt8: 1920 if (value_loc.IsConstant()) { 1921 __ movb(Address(address, 0), 1922 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1923 } else { 1924 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); 1925 } 1926 break; 1927 case DataType::Type::kInt16: 1928 if (value_loc.IsConstant()) { 1929 __ movw(Address(address, 0), 1930 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1931 } else { 1932 __ movw(Address(address, 0), value_loc.AsRegister<Register>()); 1933 } 1934 break; 1935 case DataType::Type::kInt32: 1936 if (value_loc.IsConstant()) { 1937 __ movl(Address(address, 0), 1938 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); 1939 } else { 1940 __ movl(Address(address, 0), value_loc.AsRegister<Register>()); 1941 } 1942 break; 1943 case DataType::Type::kInt64: 1944 if (value_loc.IsConstant()) { 1945 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); 1946 __ movl(Address(address, 0), Immediate(Low32Bits(value))); 1947 __ movl(Address(address, 4), Immediate(High32Bits(value))); 1948 } else { 1949 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>()); 1950 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>()); 1951 } 1952 break; 1953 default: 1954 LOG(FATAL) << "Type not recognized for poke: " << size; 1955 UNREACHABLE(); 1956 } 1957 } 1958 1959 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { 1960 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke); 1961 } 1962 1963 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { 1964 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); 1965 } 1966 1967 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1968 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke); 1969 } 1970 1971 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { 1972 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); 1973 } 1974 1975 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1976 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke); 1977 } 1978 1979 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { 1980 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); 1981 } 1982 1983 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1984 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke); 1985 } 1986 1987 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { 1988 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); 1989 } 1990 1991 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { 1992 LocationSummary* locations = 1993 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1994 locations->SetOut(Location::RequiresRegister()); 1995 } 1996 1997 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { 1998 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 1999 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>())); 2000 } 2001 2002 static void GenUnsafeGet(HInvoke* invoke, 2003 DataType::Type type, 2004 bool is_volatile, 2005 CodeGeneratorX86* codegen) { 2006 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2007 LocationSummary* locations = invoke->GetLocations(); 2008 Location base_loc = locations->InAt(1); 2009 Register base = base_loc.AsRegister<Register>(); 2010 Location offset_loc = locations->InAt(2); 2011 Register offset = offset_loc.AsRegisterPairLow<Register>(); 2012 Location output_loc = locations->Out(); 2013 2014 switch (type) { 2015 case DataType::Type::kInt32: { 2016 Register output = output_loc.AsRegister<Register>(); 2017 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2018 break; 2019 } 2020 2021 case DataType::Type::kReference: { 2022 Register output = output_loc.AsRegister<Register>(); 2023 if (kEmitCompilerReadBarrier) { 2024 if (kUseBakerReadBarrier) { 2025 Address src(base, offset, ScaleFactor::TIMES_1, 0); 2026 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2027 invoke, output_loc, base, src, /* needs_null_check */ false); 2028 } else { 2029 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2030 codegen->GenerateReadBarrierSlow( 2031 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 2032 } 2033 } else { 2034 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2035 __ MaybeUnpoisonHeapReference(output); 2036 } 2037 break; 2038 } 2039 2040 case DataType::Type::kInt64: { 2041 Register output_lo = output_loc.AsRegisterPairLow<Register>(); 2042 Register output_hi = output_loc.AsRegisterPairHigh<Register>(); 2043 if (is_volatile) { 2044 // Need to use a XMM to read atomically. 2045 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2046 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2047 __ movd(output_lo, temp); 2048 __ psrlq(temp, Immediate(32)); 2049 __ movd(output_hi, temp); 2050 } else { 2051 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0)); 2052 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4)); 2053 } 2054 } 2055 break; 2056 2057 default: 2058 LOG(FATAL) << "Unsupported op size " << type; 2059 UNREACHABLE(); 2060 } 2061 } 2062 2063 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, 2064 HInvoke* invoke, 2065 DataType::Type type, 2066 bool is_volatile) { 2067 bool can_call = kEmitCompilerReadBarrier && 2068 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 2069 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 2070 LocationSummary* locations = 2071 new (allocator) LocationSummary(invoke, 2072 can_call 2073 ? LocationSummary::kCallOnSlowPath 2074 : LocationSummary::kNoCall, 2075 kIntrinsified); 2076 if (can_call && kUseBakerReadBarrier) { 2077 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2078 } 2079 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2080 locations->SetInAt(1, Location::RequiresRegister()); 2081 locations->SetInAt(2, Location::RequiresRegister()); 2082 if (type == DataType::Type::kInt64) { 2083 if (is_volatile) { 2084 // Need to use XMM to read volatile. 2085 locations->AddTemp(Location::RequiresFpuRegister()); 2086 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2087 } else { 2088 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2089 } 2090 } else { 2091 locations->SetOut(Location::RequiresRegister(), 2092 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 2093 } 2094 } 2095 2096 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { 2097 CreateIntIntIntToIntLocations( 2098 allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false); 2099 } 2100 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 2101 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true); 2102 } 2103 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { 2104 CreateIntIntIntToIntLocations( 2105 allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false); 2106 } 2107 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2108 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true); 2109 } 2110 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { 2111 CreateIntIntIntToIntLocations( 2112 allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false); 2113 } 2114 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2115 CreateIntIntIntToIntLocations( 2116 allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true); 2117 } 2118 2119 2120 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { 2121 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2122 } 2123 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { 2124 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); 2125 } 2126 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { 2127 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2128 } 2129 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2130 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); 2131 } 2132 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { 2133 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); 2134 } 2135 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2136 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); 2137 } 2138 2139 2140 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, 2141 DataType::Type type, 2142 HInvoke* invoke, 2143 bool is_volatile) { 2144 LocationSummary* locations = 2145 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2146 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2147 locations->SetInAt(1, Location::RequiresRegister()); 2148 locations->SetInAt(2, Location::RequiresRegister()); 2149 locations->SetInAt(3, Location::RequiresRegister()); 2150 if (type == DataType::Type::kReference) { 2151 // Need temp registers for card-marking. 2152 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2153 // Ensure the value is in a byte register. 2154 locations->AddTemp(Location::RegisterLocation(ECX)); 2155 } else if (type == DataType::Type::kInt64 && is_volatile) { 2156 locations->AddTemp(Location::RequiresFpuRegister()); 2157 locations->AddTemp(Location::RequiresFpuRegister()); 2158 } 2159 } 2160 2161 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { 2162 CreateIntIntIntIntToVoidPlusTempsLocations( 2163 allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); 2164 } 2165 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2166 CreateIntIntIntIntToVoidPlusTempsLocations( 2167 allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); 2168 } 2169 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2170 CreateIntIntIntIntToVoidPlusTempsLocations( 2171 allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true); 2172 } 2173 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { 2174 CreateIntIntIntIntToVoidPlusTempsLocations( 2175 allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); 2176 } 2177 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2178 CreateIntIntIntIntToVoidPlusTempsLocations( 2179 allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); 2180 } 2181 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2182 CreateIntIntIntIntToVoidPlusTempsLocations( 2183 allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true); 2184 } 2185 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { 2186 CreateIntIntIntIntToVoidPlusTempsLocations( 2187 allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); 2188 } 2189 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2190 CreateIntIntIntIntToVoidPlusTempsLocations( 2191 allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); 2192 } 2193 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2194 CreateIntIntIntIntToVoidPlusTempsLocations( 2195 allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true); 2196 } 2197 2198 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2199 // memory model. 2200 static void GenUnsafePut(LocationSummary* locations, 2201 DataType::Type type, 2202 bool is_volatile, 2203 CodeGeneratorX86* codegen) { 2204 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2205 Register base = locations->InAt(1).AsRegister<Register>(); 2206 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2207 Location value_loc = locations->InAt(3); 2208 2209 if (type == DataType::Type::kInt64) { 2210 Register value_lo = value_loc.AsRegisterPairLow<Register>(); 2211 Register value_hi = value_loc.AsRegisterPairHigh<Register>(); 2212 if (is_volatile) { 2213 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2214 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 2215 __ movd(temp1, value_lo); 2216 __ movd(temp2, value_hi); 2217 __ punpckldq(temp1, temp2); 2218 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1); 2219 } else { 2220 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); 2221 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); 2222 } 2223 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) { 2224 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2225 __ movl(temp, value_loc.AsRegister<Register>()); 2226 __ PoisonHeapReference(temp); 2227 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2228 } else { 2229 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>()); 2230 } 2231 2232 if (is_volatile) { 2233 codegen->MemoryFence(); 2234 } 2235 2236 if (type == DataType::Type::kReference) { 2237 bool value_can_be_null = true; // TODO: Worth finding out this information? 2238 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), 2239 locations->GetTemp(1).AsRegister<Register>(), 2240 base, 2241 value_loc.AsRegister<Register>(), 2242 value_can_be_null); 2243 } 2244 } 2245 2246 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { 2247 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2248 } 2249 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { 2250 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); 2251 } 2252 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { 2253 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); 2254 } 2255 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { 2256 GenUnsafePut( 2257 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); 2258 } 2259 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2260 GenUnsafePut( 2261 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); 2262 } 2263 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2264 GenUnsafePut( 2265 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); 2266 } 2267 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { 2268 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2269 } 2270 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2271 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); 2272 } 2273 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2274 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); 2275 } 2276 2277 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, 2278 DataType::Type type, 2279 HInvoke* invoke) { 2280 bool can_call = kEmitCompilerReadBarrier && 2281 kUseBakerReadBarrier && 2282 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 2283 LocationSummary* locations = 2284 new (allocator) LocationSummary(invoke, 2285 can_call 2286 ? LocationSummary::kCallOnSlowPath 2287 : LocationSummary::kNoCall, 2288 kIntrinsified); 2289 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2290 locations->SetInAt(1, Location::RequiresRegister()); 2291 // Offset is a long, but in 32 bit mode, we only need the low word. 2292 // Can we update the invoke here to remove a TypeConvert to Long? 2293 locations->SetInAt(2, Location::RequiresRegister()); 2294 // Expected value must be in EAX or EDX:EAX. 2295 // For long, new value must be in ECX:EBX. 2296 if (type == DataType::Type::kInt64) { 2297 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); 2298 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); 2299 } else { 2300 locations->SetInAt(3, Location::RegisterLocation(EAX)); 2301 locations->SetInAt(4, Location::RequiresRegister()); 2302 } 2303 2304 // Force a byte register for the output. 2305 locations->SetOut(Location::RegisterLocation(EAX)); 2306 if (type == DataType::Type::kReference) { 2307 // Need temporary registers for card-marking, and possibly for 2308 // (Baker) read barrier. 2309 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2310 // Need a byte register for marking. 2311 locations->AddTemp(Location::RegisterLocation(ECX)); 2312 } 2313 } 2314 2315 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { 2316 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); 2317 } 2318 2319 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { 2320 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); 2321 } 2322 2323 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { 2324 // The only read barrier implementation supporting the 2325 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2326 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2327 return; 2328 } 2329 2330 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); 2331 } 2332 2333 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { 2334 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); 2335 LocationSummary* locations = invoke->GetLocations(); 2336 2337 Register base = locations->InAt(1).AsRegister<Register>(); 2338 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); 2339 Location out = locations->Out(); 2340 DCHECK_EQ(out.AsRegister<Register>(), EAX); 2341 2342 // The address of the field within the holding object. 2343 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); 2344 2345 if (type == DataType::Type::kReference) { 2346 // The only read barrier implementation supporting the 2347 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2348 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2349 2350 Location temp1_loc = locations->GetTemp(0); 2351 Register temp1 = temp1_loc.AsRegister<Register>(); 2352 Register temp2 = locations->GetTemp(1).AsRegister<Register>(); 2353 2354 Register expected = locations->InAt(3).AsRegister<Register>(); 2355 // Ensure `expected` is in EAX (required by the CMPXCHG instruction). 2356 DCHECK_EQ(expected, EAX); 2357 Register value = locations->InAt(4).AsRegister<Register>(); 2358 2359 // Mark card for object assuming new value is stored. 2360 bool value_can_be_null = true; // TODO: Worth finding out this information? 2361 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); 2362 2363 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2364 // Need to make sure the reference stored in the field is a to-space 2365 // one before attempting the CAS or the CAS could fail incorrectly. 2366 codegen->GenerateReferenceLoadWithBakerReadBarrier( 2367 invoke, 2368 temp1_loc, // Unused, used only as a "temporary" within the read barrier. 2369 base, 2370 field_addr, 2371 /* needs_null_check */ false, 2372 /* always_update_field */ true, 2373 &temp2); 2374 } 2375 2376 bool base_equals_value = (base == value); 2377 if (kPoisonHeapReferences) { 2378 if (base_equals_value) { 2379 // If `base` and `value` are the same register location, move 2380 // `value` to a temporary register. This way, poisoning 2381 // `value` won't invalidate `base`. 2382 value = temp1; 2383 __ movl(value, base); 2384 } 2385 2386 // Check that the register allocator did not assign the location 2387 // of `expected` (EAX) to `value` nor to `base`, so that heap 2388 // poisoning (when enabled) works as intended below. 2389 // - If `value` were equal to `expected`, both references would 2390 // be poisoned twice, meaning they would not be poisoned at 2391 // all, as heap poisoning uses address negation. 2392 // - If `base` were equal to `expected`, poisoning `expected` 2393 // would invalidate `base`. 2394 DCHECK_NE(value, expected); 2395 DCHECK_NE(base, expected); 2396 2397 __ PoisonHeapReference(expected); 2398 __ PoisonHeapReference(value); 2399 } 2400 2401 __ LockCmpxchgl(field_addr, value); 2402 2403 // LOCK CMPXCHG has full barrier semantics, and we don't need 2404 // scheduling barriers at this time. 2405 2406 // Convert ZF into the Boolean result. 2407 __ setb(kZero, out.AsRegister<Register>()); 2408 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2409 2410 // If heap poisoning is enabled, we need to unpoison the values 2411 // that were poisoned earlier. 2412 if (kPoisonHeapReferences) { 2413 if (base_equals_value) { 2414 // `value` has been moved to a temporary register, no need to 2415 // unpoison it. 2416 } else { 2417 // Ensure `value` is different from `out`, so that unpoisoning 2418 // the former does not invalidate the latter. 2419 DCHECK_NE(value, out.AsRegister<Register>()); 2420 __ UnpoisonHeapReference(value); 2421 } 2422 // Do not unpoison the reference contained in register 2423 // `expected`, as it is the same as register `out` (EAX). 2424 } 2425 } else { 2426 if (type == DataType::Type::kInt32) { 2427 // Ensure the expected value is in EAX (required by the CMPXCHG 2428 // instruction). 2429 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); 2430 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); 2431 } else if (type == DataType::Type::kInt64) { 2432 // Ensure the expected value is in EAX:EDX and that the new 2433 // value is in EBX:ECX (required by the CMPXCHG8B instruction). 2434 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); 2435 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); 2436 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); 2437 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); 2438 __ LockCmpxchg8b(field_addr); 2439 } else { 2440 LOG(FATAL) << "Unexpected CAS type " << type; 2441 } 2442 2443 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we 2444 // don't need scheduling barriers at this time. 2445 2446 // Convert ZF into the Boolean result. 2447 __ setb(kZero, out.AsRegister<Register>()); 2448 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); 2449 } 2450 } 2451 2452 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { 2453 GenCAS(DataType::Type::kInt32, invoke, codegen_); 2454 } 2455 2456 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { 2457 GenCAS(DataType::Type::kInt64, invoke, codegen_); 2458 } 2459 2460 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { 2461 // The only read barrier implementation supporting the 2462 // UnsafeCASObject intrinsic is the Baker-style read barriers. 2463 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2464 2465 GenCAS(DataType::Type::kReference, invoke, codegen_); 2466 } 2467 2468 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { 2469 LocationSummary* locations = 2470 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2471 locations->SetInAt(0, Location::RequiresRegister()); 2472 locations->SetOut(Location::SameAsFirstInput()); 2473 locations->AddTemp(Location::RequiresRegister()); 2474 } 2475 2476 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, 2477 X86Assembler* assembler) { 2478 Immediate imm_shift(shift); 2479 Immediate imm_mask(mask); 2480 __ movl(temp, reg); 2481 __ shrl(reg, imm_shift); 2482 __ andl(temp, imm_mask); 2483 __ andl(reg, imm_mask); 2484 __ shll(temp, imm_shift); 2485 __ orl(reg, temp); 2486 } 2487 2488 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { 2489 X86Assembler* assembler = GetAssembler(); 2490 LocationSummary* locations = invoke->GetLocations(); 2491 2492 Register reg = locations->InAt(0).AsRegister<Register>(); 2493 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2494 2495 /* 2496 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2497 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2498 * compared to generic luni implementation which has 5 rounds of swapping bits. 2499 * x = bswap x 2500 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2501 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2502 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2503 */ 2504 __ bswapl(reg); 2505 SwapBits(reg, temp, 1, 0x55555555, assembler); 2506 SwapBits(reg, temp, 2, 0x33333333, assembler); 2507 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2508 } 2509 2510 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { 2511 LocationSummary* locations = 2512 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2513 locations->SetInAt(0, Location::RequiresRegister()); 2514 locations->SetOut(Location::SameAsFirstInput()); 2515 locations->AddTemp(Location::RequiresRegister()); 2516 } 2517 2518 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { 2519 X86Assembler* assembler = GetAssembler(); 2520 LocationSummary* locations = invoke->GetLocations(); 2521 2522 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); 2523 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); 2524 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2525 2526 // We want to swap high/low, then bswap each one, and then do the same 2527 // as a 32 bit reverse. 2528 // Exchange high and low. 2529 __ movl(temp, reg_low); 2530 __ movl(reg_low, reg_high); 2531 __ movl(reg_high, temp); 2532 2533 // bit-reverse low 2534 __ bswapl(reg_low); 2535 SwapBits(reg_low, temp, 1, 0x55555555, assembler); 2536 SwapBits(reg_low, temp, 2, 0x33333333, assembler); 2537 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); 2538 2539 // bit-reverse high 2540 __ bswapl(reg_high); 2541 SwapBits(reg_high, temp, 1, 0x55555555, assembler); 2542 SwapBits(reg_high, temp, 2, 0x33333333, assembler); 2543 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); 2544 } 2545 2546 static void CreateBitCountLocations( 2547 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { 2548 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2549 // Do nothing if there is no popcnt support. This results in generating 2550 // a call for the intrinsic rather than direct code. 2551 return; 2552 } 2553 LocationSummary* locations = 2554 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2555 if (is_long) { 2556 locations->AddTemp(Location::RequiresRegister()); 2557 } 2558 locations->SetInAt(0, Location::Any()); 2559 locations->SetOut(Location::RequiresRegister()); 2560 } 2561 2562 static void GenBitCount(X86Assembler* assembler, 2563 CodeGeneratorX86* codegen, 2564 HInvoke* invoke, bool is_long) { 2565 LocationSummary* locations = invoke->GetLocations(); 2566 Location src = locations->InAt(0); 2567 Register out = locations->Out().AsRegister<Register>(); 2568 2569 if (invoke->InputAt(0)->IsConstant()) { 2570 // Evaluate this at compile time. 2571 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2572 int32_t result = is_long 2573 ? POPCOUNT(static_cast<uint64_t>(value)) 2574 : POPCOUNT(static_cast<uint32_t>(value)); 2575 codegen->Load32BitValue(out, result); 2576 return; 2577 } 2578 2579 // Handle the non-constant cases. 2580 if (!is_long) { 2581 if (src.IsRegister()) { 2582 __ popcntl(out, src.AsRegister<Register>()); 2583 } else { 2584 DCHECK(src.IsStackSlot()); 2585 __ popcntl(out, Address(ESP, src.GetStackIndex())); 2586 } 2587 } else { 2588 // The 64-bit case needs to worry about two parts. 2589 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2590 if (src.IsRegisterPair()) { 2591 __ popcntl(temp, src.AsRegisterPairLow<Register>()); 2592 __ popcntl(out, src.AsRegisterPairHigh<Register>()); 2593 } else { 2594 DCHECK(src.IsDoubleStackSlot()); 2595 __ popcntl(temp, Address(ESP, src.GetStackIndex())); 2596 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize))); 2597 } 2598 __ addl(out, temp); 2599 } 2600 } 2601 2602 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { 2603 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false); 2604 } 2605 2606 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { 2607 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2608 } 2609 2610 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { 2611 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true); 2612 } 2613 2614 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { 2615 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2616 } 2617 2618 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { 2619 LocationSummary* locations = 2620 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2621 if (is_long) { 2622 locations->SetInAt(0, Location::RequiresRegister()); 2623 } else { 2624 locations->SetInAt(0, Location::Any()); 2625 } 2626 locations->SetOut(Location::RequiresRegister()); 2627 } 2628 2629 static void GenLeadingZeros(X86Assembler* assembler, 2630 CodeGeneratorX86* codegen, 2631 HInvoke* invoke, bool is_long) { 2632 LocationSummary* locations = invoke->GetLocations(); 2633 Location src = locations->InAt(0); 2634 Register out = locations->Out().AsRegister<Register>(); 2635 2636 if (invoke->InputAt(0)->IsConstant()) { 2637 // Evaluate this at compile time. 2638 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2639 if (value == 0) { 2640 value = is_long ? 64 : 32; 2641 } else { 2642 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2643 } 2644 codegen->Load32BitValue(out, value); 2645 return; 2646 } 2647 2648 // Handle the non-constant cases. 2649 if (!is_long) { 2650 if (src.IsRegister()) { 2651 __ bsrl(out, src.AsRegister<Register>()); 2652 } else { 2653 DCHECK(src.IsStackSlot()); 2654 __ bsrl(out, Address(ESP, src.GetStackIndex())); 2655 } 2656 2657 // BSR sets ZF if the input was zero, and the output is undefined. 2658 NearLabel all_zeroes, done; 2659 __ j(kEqual, &all_zeroes); 2660 2661 // Correct the result from BSR to get the final CLZ result. 2662 __ xorl(out, Immediate(31)); 2663 __ jmp(&done); 2664 2665 // Fix the zero case with the expected result. 2666 __ Bind(&all_zeroes); 2667 __ movl(out, Immediate(32)); 2668 2669 __ Bind(&done); 2670 return; 2671 } 2672 2673 // 64 bit case needs to worry about both parts of the register. 2674 DCHECK(src.IsRegisterPair()); 2675 Register src_lo = src.AsRegisterPairLow<Register>(); 2676 Register src_hi = src.AsRegisterPairHigh<Register>(); 2677 NearLabel handle_low, done, all_zeroes; 2678 2679 // Is the high word zero? 2680 __ testl(src_hi, src_hi); 2681 __ j(kEqual, &handle_low); 2682 2683 // High word is not zero. We know that the BSR result is defined in this case. 2684 __ bsrl(out, src_hi); 2685 2686 // Correct the result from BSR to get the final CLZ result. 2687 __ xorl(out, Immediate(31)); 2688 __ jmp(&done); 2689 2690 // High word was zero. We have to compute the low word count and add 32. 2691 __ Bind(&handle_low); 2692 __ bsrl(out, src_lo); 2693 __ j(kEqual, &all_zeroes); 2694 2695 // We had a valid result. Use an XOR to both correct the result and add 32. 2696 __ xorl(out, Immediate(63)); 2697 __ jmp(&done); 2698 2699 // All zero case. 2700 __ Bind(&all_zeroes); 2701 __ movl(out, Immediate(64)); 2702 2703 __ Bind(&done); 2704 } 2705 2706 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2707 CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false); 2708 } 2709 2710 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2711 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2712 } 2713 2714 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2715 CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true); 2716 } 2717 2718 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2719 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2720 } 2721 2722 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { 2723 LocationSummary* locations = 2724 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2725 if (is_long) { 2726 locations->SetInAt(0, Location::RequiresRegister()); 2727 } else { 2728 locations->SetInAt(0, Location::Any()); 2729 } 2730 locations->SetOut(Location::RequiresRegister()); 2731 } 2732 2733 static void GenTrailingZeros(X86Assembler* assembler, 2734 CodeGeneratorX86* codegen, 2735 HInvoke* invoke, bool is_long) { 2736 LocationSummary* locations = invoke->GetLocations(); 2737 Location src = locations->InAt(0); 2738 Register out = locations->Out().AsRegister<Register>(); 2739 2740 if (invoke->InputAt(0)->IsConstant()) { 2741 // Evaluate this at compile time. 2742 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2743 if (value == 0) { 2744 value = is_long ? 64 : 32; 2745 } else { 2746 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2747 } 2748 codegen->Load32BitValue(out, value); 2749 return; 2750 } 2751 2752 // Handle the non-constant cases. 2753 if (!is_long) { 2754 if (src.IsRegister()) { 2755 __ bsfl(out, src.AsRegister<Register>()); 2756 } else { 2757 DCHECK(src.IsStackSlot()); 2758 __ bsfl(out, Address(ESP, src.GetStackIndex())); 2759 } 2760 2761 // BSF sets ZF if the input was zero, and the output is undefined. 2762 NearLabel done; 2763 __ j(kNotEqual, &done); 2764 2765 // Fix the zero case with the expected result. 2766 __ movl(out, Immediate(32)); 2767 2768 __ Bind(&done); 2769 return; 2770 } 2771 2772 // 64 bit case needs to worry about both parts of the register. 2773 DCHECK(src.IsRegisterPair()); 2774 Register src_lo = src.AsRegisterPairLow<Register>(); 2775 Register src_hi = src.AsRegisterPairHigh<Register>(); 2776 NearLabel done, all_zeroes; 2777 2778 // If the low word is zero, then ZF will be set. If not, we have the answer. 2779 __ bsfl(out, src_lo); 2780 __ j(kNotEqual, &done); 2781 2782 // Low word was zero. We have to compute the high word count and add 32. 2783 __ bsfl(out, src_hi); 2784 __ j(kEqual, &all_zeroes); 2785 2786 // We had a valid result. Add 32 to account for the low word being zero. 2787 __ addl(out, Immediate(32)); 2788 __ jmp(&done); 2789 2790 // All zero case. 2791 __ Bind(&all_zeroes); 2792 __ movl(out, Immediate(64)); 2793 2794 __ Bind(&done); 2795 } 2796 2797 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2798 CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false); 2799 } 2800 2801 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2802 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2803 } 2804 2805 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2806 CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true); 2807 } 2808 2809 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2810 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2811 } 2812 2813 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { 2814 return instruction->InputAt(input0) == instruction->InputAt(input1); 2815 } 2816 2817 // Compute base address for the System.arraycopy intrinsic in `base`. 2818 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, 2819 DataType::Type type, 2820 const Register& array, 2821 const Location& pos, 2822 const Register& base) { 2823 // This routine is only used by the SystemArrayCopy intrinsic at the 2824 // moment. We can allow DataType::Type::kReference as `type` to implement 2825 // the SystemArrayCopyChar intrinsic. 2826 DCHECK_EQ(type, DataType::Type::kReference); 2827 const int32_t element_size = DataType::Size(type); 2828 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); 2829 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2830 2831 if (pos.IsConstant()) { 2832 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); 2833 __ leal(base, Address(array, element_size * constant + data_offset)); 2834 } else { 2835 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset)); 2836 } 2837 } 2838 2839 // Compute end source address for the System.arraycopy intrinsic in `end`. 2840 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, 2841 DataType::Type type, 2842 const Location& copy_length, 2843 const Register& base, 2844 const Register& end) { 2845 // This routine is only used by the SystemArrayCopy intrinsic at the 2846 // moment. We can allow DataType::Type::kReference as `type` to implement 2847 // the SystemArrayCopyChar intrinsic. 2848 DCHECK_EQ(type, DataType::Type::kReference); 2849 const int32_t element_size = DataType::Size(type); 2850 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); 2851 2852 if (copy_length.IsConstant()) { 2853 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2854 __ leal(end, Address(base, element_size * constant)); 2855 } else { 2856 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0)); 2857 } 2858 } 2859 2860 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { 2861 // The only read barrier implementation supporting the 2862 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2863 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2864 return; 2865 } 2866 2867 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 2868 if (invoke->GetLocations() != nullptr) { 2869 // Need a byte register for marking. 2870 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX)); 2871 2872 static constexpr size_t kSrc = 0; 2873 static constexpr size_t kSrcPos = 1; 2874 static constexpr size_t kDest = 2; 2875 static constexpr size_t kDestPos = 3; 2876 static constexpr size_t kLength = 4; 2877 2878 if (!invoke->InputAt(kSrcPos)->IsIntConstant() && 2879 !invoke->InputAt(kDestPos)->IsIntConstant() && 2880 !invoke->InputAt(kLength)->IsIntConstant()) { 2881 if (!IsSameInput(invoke, kSrcPos, kDestPos) && 2882 !IsSameInput(invoke, kSrcPos, kLength) && 2883 !IsSameInput(invoke, kDestPos, kLength) && 2884 !IsSameInput(invoke, kSrc, kDest)) { 2885 // Not enough registers, make the length also take a stack slot. 2886 invoke->GetLocations()->SetInAt(kLength, Location::Any()); 2887 } 2888 } 2889 } 2890 } 2891 2892 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { 2893 // The only read barrier implementation supporting the 2894 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2895 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2896 2897 X86Assembler* assembler = GetAssembler(); 2898 LocationSummary* locations = invoke->GetLocations(); 2899 2900 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2901 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2902 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2903 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2904 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2905 2906 Register src = locations->InAt(0).AsRegister<Register>(); 2907 Location src_pos = locations->InAt(1); 2908 Register dest = locations->InAt(2).AsRegister<Register>(); 2909 Location dest_pos = locations->InAt(3); 2910 Location length_arg = locations->InAt(4); 2911 Location length = length_arg; 2912 Location temp1_loc = locations->GetTemp(0); 2913 Register temp1 = temp1_loc.AsRegister<Register>(); 2914 Location temp2_loc = locations->GetTemp(1); 2915 Register temp2 = temp2_loc.AsRegister<Register>(); 2916 2917 SlowPathCode* intrinsic_slow_path = 2918 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); 2919 codegen_->AddSlowPath(intrinsic_slow_path); 2920 2921 NearLabel conditions_on_positions_validated; 2922 SystemArrayCopyOptimizations optimizations(invoke); 2923 2924 // If source and destination are the same, we go to slow path if we need to do 2925 // forward copying. 2926 if (src_pos.IsConstant()) { 2927 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2928 if (dest_pos.IsConstant()) { 2929 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2930 if (optimizations.GetDestinationIsSource()) { 2931 // Checked when building locations. 2932 DCHECK_GE(src_pos_constant, dest_pos_constant); 2933 } else if (src_pos_constant < dest_pos_constant) { 2934 __ cmpl(src, dest); 2935 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2936 } 2937 } else { 2938 if (!optimizations.GetDestinationIsSource()) { 2939 __ cmpl(src, dest); 2940 __ j(kNotEqual, &conditions_on_positions_validated); 2941 } 2942 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); 2943 __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); 2944 } 2945 } else { 2946 if (!optimizations.GetDestinationIsSource()) { 2947 __ cmpl(src, dest); 2948 __ j(kNotEqual, &conditions_on_positions_validated); 2949 } 2950 if (dest_pos.IsConstant()) { 2951 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2952 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); 2953 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2954 } else { 2955 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); 2956 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2957 } 2958 } 2959 2960 __ Bind(&conditions_on_positions_validated); 2961 2962 if (!optimizations.GetSourceIsNotNull()) { 2963 // Bail out if the source is null. 2964 __ testl(src, src); 2965 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2966 } 2967 2968 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2969 // Bail out if the destination is null. 2970 __ testl(dest, dest); 2971 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 2972 } 2973 2974 Location temp3_loc = locations->GetTemp(2); 2975 Register temp3 = temp3_loc.AsRegister<Register>(); 2976 if (length.IsStackSlot()) { 2977 __ movl(temp3, Address(ESP, length.GetStackIndex())); 2978 length = Location::RegisterLocation(temp3); 2979 } 2980 2981 // If the length is negative, bail out. 2982 // We have already checked in the LocationsBuilder for the constant case. 2983 if (!length.IsConstant() && 2984 !optimizations.GetCountIsSourceLength() && 2985 !optimizations.GetCountIsDestinationLength()) { 2986 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); 2987 __ j(kLess, intrinsic_slow_path->GetEntryLabel()); 2988 } 2989 2990 // Validity checks: source. 2991 CheckPosition(assembler, 2992 src_pos, 2993 src, 2994 length, 2995 intrinsic_slow_path, 2996 temp1, 2997 optimizations.GetCountIsSourceLength()); 2998 2999 // Validity checks: dest. 3000 CheckPosition(assembler, 3001 dest_pos, 3002 dest, 3003 length, 3004 intrinsic_slow_path, 3005 temp1, 3006 optimizations.GetCountIsDestinationLength()); 3007 3008 if (!optimizations.GetDoesNotNeedTypeCheck()) { 3009 // Check whether all elements of the source array are assignable to the component 3010 // type of the destination array. We do two checks: the classes are the same, 3011 // or the destination is Object[]. If none of these checks succeed, we go to the 3012 // slow path. 3013 3014 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 3015 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3016 // /* HeapReference<Class> */ temp1 = src->klass_ 3017 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3018 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); 3019 // Bail out if the source is not a non primitive array. 3020 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3021 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3022 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3023 __ testl(temp1, temp1); 3024 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3025 // If heap poisoning is enabled, `temp1` has been unpoisoned 3026 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3027 } else { 3028 // /* HeapReference<Class> */ temp1 = src->klass_ 3029 __ movl(temp1, Address(src, class_offset)); 3030 __ MaybeUnpoisonHeapReference(temp1); 3031 // Bail out if the source is not a non primitive array. 3032 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3033 __ movl(temp1, Address(temp1, component_offset)); 3034 __ testl(temp1, temp1); 3035 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3036 __ MaybeUnpoisonHeapReference(temp1); 3037 } 3038 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 3039 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3040 } 3041 3042 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3043 if (length.Equals(Location::RegisterLocation(temp3))) { 3044 // When Baker read barriers are enabled, register `temp3`, 3045 // which in the present case contains the `length` parameter, 3046 // will be overwritten below. Make the `length` location 3047 // reference the original stack location; it will be moved 3048 // back to `temp3` later if necessary. 3049 DCHECK(length_arg.IsStackSlot()); 3050 length = length_arg; 3051 } 3052 3053 // /* HeapReference<Class> */ temp1 = dest->klass_ 3054 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3055 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); 3056 3057 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 3058 // Bail out if the destination is not a non primitive array. 3059 // 3060 // Register `temp1` is not trashed by the read barrier emitted 3061 // by GenerateFieldLoadWithBakerReadBarrier below, as that 3062 // method produces a call to a ReadBarrierMarkRegX entry point, 3063 // which saves all potentially live registers, including 3064 // temporaries such a `temp1`. 3065 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 3066 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3067 invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); 3068 __ testl(temp2, temp2); 3069 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3070 // If heap poisoning is enabled, `temp2` has been unpoisoned 3071 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3072 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 3073 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3074 } 3075 3076 // For the same reason given earlier, `temp1` is not trashed by the 3077 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 3078 // /* HeapReference<Class> */ temp2 = src->klass_ 3079 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3080 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); 3081 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 3082 __ cmpl(temp1, temp2); 3083 3084 if (optimizations.GetDestinationIsTypedObjectArray()) { 3085 NearLabel do_copy; 3086 __ j(kEqual, &do_copy); 3087 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3088 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3089 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3090 // We do not need to emit a read barrier for the following 3091 // heap reference load, as `temp1` is only used in a 3092 // comparison with null below, and this reference is not 3093 // kept afterwards. 3094 __ cmpl(Address(temp1, super_offset), Immediate(0)); 3095 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3096 __ Bind(&do_copy); 3097 } else { 3098 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3099 } 3100 } else { 3101 // Non read barrier code. 3102 3103 // /* HeapReference<Class> */ temp1 = dest->klass_ 3104 __ movl(temp1, Address(dest, class_offset)); 3105 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 3106 __ MaybeUnpoisonHeapReference(temp1); 3107 // Bail out if the destination is not a non primitive array. 3108 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 3109 __ movl(temp2, Address(temp1, component_offset)); 3110 __ testl(temp2, temp2); 3111 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3112 __ MaybeUnpoisonHeapReference(temp2); 3113 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); 3114 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3115 // Re-poison the heap reference to make the compare instruction below 3116 // compare two poisoned references. 3117 __ PoisonHeapReference(temp1); 3118 } 3119 3120 // Note: if heap poisoning is on, we are comparing two poisoned references here. 3121 __ cmpl(temp1, Address(src, class_offset)); 3122 3123 if (optimizations.GetDestinationIsTypedObjectArray()) { 3124 NearLabel do_copy; 3125 __ j(kEqual, &do_copy); 3126 __ MaybeUnpoisonHeapReference(temp1); 3127 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3128 __ movl(temp1, Address(temp1, component_offset)); 3129 __ MaybeUnpoisonHeapReference(temp1); 3130 __ cmpl(Address(temp1, super_offset), Immediate(0)); 3131 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3132 __ Bind(&do_copy); 3133 } else { 3134 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3135 } 3136 } 3137 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 3138 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 3139 // Bail out if the source is not a non primitive array. 3140 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3141 // /* HeapReference<Class> */ temp1 = src->klass_ 3142 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3143 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); 3144 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3145 codegen_->GenerateFieldLoadWithBakerReadBarrier( 3146 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); 3147 __ testl(temp1, temp1); 3148 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3149 // If heap poisoning is enabled, `temp1` has been unpoisoned 3150 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 3151 } else { 3152 // /* HeapReference<Class> */ temp1 = src->klass_ 3153 __ movl(temp1, Address(src, class_offset)); 3154 __ MaybeUnpoisonHeapReference(temp1); 3155 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 3156 __ movl(temp1, Address(temp1, component_offset)); 3157 __ testl(temp1, temp1); 3158 __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); 3159 __ MaybeUnpoisonHeapReference(temp1); 3160 } 3161 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); 3162 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); 3163 } 3164 3165 const DataType::Type type = DataType::Type::kReference; 3166 const int32_t element_size = DataType::Size(type); 3167 3168 // Compute the base source address in `temp1`. 3169 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 3170 3171 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 3172 // If it is needed (in the case of the fast-path loop), the base 3173 // destination address is computed later, as `temp2` is used for 3174 // intermediate computations. 3175 3176 // Compute the end source address in `temp3`. 3177 if (length.IsStackSlot()) { 3178 // Location `length` is again pointing at a stack slot, as 3179 // register `temp3` (which was containing the length parameter 3180 // earlier) has been overwritten; restore it now 3181 DCHECK(length.Equals(length_arg)); 3182 __ movl(temp3, Address(ESP, length.GetStackIndex())); 3183 length = Location::RegisterLocation(temp3); 3184 } 3185 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 3186 3187 // SystemArrayCopy implementation for Baker read barriers (see 3188 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): 3189 // 3190 // if (src_ptr != end_ptr) { 3191 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 3192 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 3193 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 3194 // if (is_gray) { 3195 // // Slow-path copy. 3196 // for (size_t i = 0; i != length; ++i) { 3197 // dest_array[dest_pos + i] = 3198 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); 3199 // } 3200 // } else { 3201 // // Fast-path copy. 3202 // do { 3203 // *dest_ptr++ = *src_ptr++; 3204 // } while (src_ptr != end_ptr) 3205 // } 3206 // } 3207 3208 NearLabel loop, done; 3209 3210 // Don't enter copy loop if `length == 0`. 3211 __ cmpl(temp1, temp3); 3212 __ j(kEqual, &done); 3213 3214 // Given the numeric representation, it's enough to check the low bit of the rb_state. 3215 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 3216 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 3217 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 3218 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 3219 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 3220 3221 // if (rb_state == ReadBarrier::GrayState()) 3222 // goto slow_path; 3223 // At this point, just do the "if" and make sure that flags are preserved until the branch. 3224 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); 3225 3226 // Load fence to prevent load-load reordering. 3227 // Note that this is a no-op, thanks to the x86 memory model. 3228 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 3229 3230 // Slow path used to copy array when `src` is gray. 3231 SlowPathCode* read_barrier_slow_path = 3232 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); 3233 codegen_->AddSlowPath(read_barrier_slow_path); 3234 3235 // We have done the "if" of the gray bit check above, now branch based on the flags. 3236 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); 3237 3238 // Fast-path copy. 3239 // Compute the base destination address in `temp2`. 3240 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 3241 // Iterate over the arrays and do a raw copy of the objects. We don't need to 3242 // poison/unpoison. 3243 __ Bind(&loop); 3244 __ pushl(Address(temp1, 0)); 3245 __ cfi().AdjustCFAOffset(4); 3246 __ popl(Address(temp2, 0)); 3247 __ cfi().AdjustCFAOffset(-4); 3248 __ addl(temp1, Immediate(element_size)); 3249 __ addl(temp2, Immediate(element_size)); 3250 __ cmpl(temp1, temp3); 3251 __ j(kNotEqual, &loop); 3252 3253 __ Bind(read_barrier_slow_path->GetExitLabel()); 3254 __ Bind(&done); 3255 } else { 3256 // Non read barrier code. 3257 // Compute the base destination address in `temp2`. 3258 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 3259 // Compute the end source address in `temp3`. 3260 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 3261 // Iterate over the arrays and do a raw copy of the objects. We don't need to 3262 // poison/unpoison. 3263 NearLabel loop, done; 3264 __ cmpl(temp1, temp3); 3265 __ j(kEqual, &done); 3266 __ Bind(&loop); 3267 __ pushl(Address(temp1, 0)); 3268 __ cfi().AdjustCFAOffset(4); 3269 __ popl(Address(temp2, 0)); 3270 __ cfi().AdjustCFAOffset(-4); 3271 __ addl(temp1, Immediate(element_size)); 3272 __ addl(temp2, Immediate(element_size)); 3273 __ cmpl(temp1, temp3); 3274 __ j(kNotEqual, &loop); 3275 __ Bind(&done); 3276 } 3277 3278 // We only need one card marking on the destination array. 3279 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); 3280 3281 __ Bind(intrinsic_slow_path->GetExitLabel()); 3282 } 3283 3284 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { 3285 InvokeRuntimeCallingConvention calling_convention; 3286 IntrinsicVisitor::ComputeIntegerValueOfLocations( 3287 invoke, 3288 codegen_, 3289 Location::RegisterLocation(EAX), 3290 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 3291 } 3292 3293 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { 3294 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 3295 LocationSummary* locations = invoke->GetLocations(); 3296 X86Assembler* assembler = GetAssembler(); 3297 3298 Register out = locations->Out().AsRegister<Register>(); 3299 InvokeRuntimeCallingConvention calling_convention; 3300 if (invoke->InputAt(0)->IsConstant()) { 3301 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 3302 if (value >= info.low && value <= info.high) { 3303 // Just embed the j.l.Integer in the code. 3304 ScopedObjectAccess soa(Thread::Current()); 3305 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 3306 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 3307 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 3308 __ movl(out, Immediate(address)); 3309 } else { 3310 // Allocate and initialize a new j.l.Integer. 3311 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 3312 // JIT object table. 3313 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3314 __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); 3315 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3316 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3317 __ movl(Address(out, info.value_offset), Immediate(value)); 3318 } 3319 } else { 3320 Register in = locations->InAt(0).AsRegister<Register>(); 3321 // Check bounds of our cache. 3322 __ leal(out, Address(in, -info.low)); 3323 __ cmpl(out, Immediate(info.high - info.low + 1)); 3324 NearLabel allocate, done; 3325 __ j(kAboveEqual, &allocate); 3326 // If the value is within the bounds, load the j.l.Integer directly from the array. 3327 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3328 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 3329 __ movl(out, Address(out, TIMES_4, data_offset + address)); 3330 __ MaybeUnpoisonHeapReference(out); 3331 __ jmp(&done); 3332 __ Bind(&allocate); 3333 // Otherwise allocate and initialize a new j.l.Integer. 3334 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3335 __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); 3336 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3337 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3338 __ movl(Address(out, info.value_offset), in); 3339 __ Bind(&done); 3340 } 3341 } 3342 3343 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) { 3344 LocationSummary* locations = 3345 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3346 locations->SetOut(Location::RequiresRegister()); 3347 } 3348 3349 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { 3350 X86Assembler* assembler = GetAssembler(); 3351 Register out = invoke->GetLocations()->Out().AsRegister<Register>(); 3352 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value()); 3353 NearLabel done; 3354 __ fs()->movl(out, address); 3355 __ testl(out, out); 3356 __ j(kEqual, &done); 3357 __ fs()->movl(address, Immediate(0)); 3358 codegen_->MemoryFence(); 3359 __ Bind(&done); 3360 } 3361 3362 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { 3363 LocationSummary* locations = 3364 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3365 locations->SetInAt(0, Location::Any()); 3366 } 3367 3368 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 3369 3370 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) 3371 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) 3372 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) 3373 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) 3374 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) 3375 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) 3376 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) 3377 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) 3378 3379 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); 3380 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); 3381 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); 3382 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); 3383 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); 3384 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend); 3385 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); 3386 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); 3387 3388 // 1.8. 3389 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) 3390 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) 3391 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) 3392 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) 3393 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) 3394 3395 UNREACHABLE_INTRINSICS(X86) 3396 3397 #undef __ 3398 3399 } // namespace x86 3400 } // namespace art 3401