1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_arm_vixl.h" 18 19 #include "arch/arm/instruction_set_features_arm.h" 20 #include "art_method.h" 21 #include "code_generator_arm_vixl.h" 22 #include "common_arm.h" 23 #include "heap_poisoning.h" 24 #include "lock_word.h" 25 #include "mirror/array-inl.h" 26 #include "mirror/object_array-inl.h" 27 #include "mirror/reference.h" 28 #include "mirror/string.h" 29 #include "scoped_thread_state_change-inl.h" 30 #include "thread-current-inl.h" 31 32 #include "aarch32/constants-aarch32.h" 33 34 namespace art { 35 namespace arm { 36 37 #define __ assembler->GetVIXLAssembler()-> 38 39 using helpers::DRegisterFrom; 40 using helpers::HighRegisterFrom; 41 using helpers::InputDRegisterAt; 42 using helpers::InputRegisterAt; 43 using helpers::InputSRegisterAt; 44 using helpers::InputVRegisterAt; 45 using helpers::Int32ConstantFrom; 46 using helpers::LocationFrom; 47 using helpers::LowRegisterFrom; 48 using helpers::LowSRegisterFrom; 49 using helpers::HighSRegisterFrom; 50 using helpers::OutputDRegister; 51 using helpers::OutputSRegister; 52 using helpers::OutputRegister; 53 using helpers::OutputVRegister; 54 using helpers::RegisterFrom; 55 using helpers::SRegisterFrom; 56 using helpers::DRegisterFromS; 57 58 using namespace vixl::aarch32; // NOLINT(build/namespaces) 59 60 using vixl::ExactAssemblyScope; 61 using vixl::CodeBufferCheckScope; 62 63 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() { 64 return codegen_->GetAssembler(); 65 } 66 67 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() { 68 return codegen_->GetGraph()->GetAllocator(); 69 } 70 71 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an 72 // intrinsified call. This will copy the arguments into the positions for a regular call. 73 // 74 // Note: The actual parameters are required to be in the locations given by the invoke's location 75 // summary. If an intrinsic modifies those locations before a slowpath call, they must be 76 // restored! 77 // 78 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially 79 // sub-optimal (compared to a direct pointer call), but this is a slow-path. 80 81 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { 82 public: 83 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke) 84 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {} 85 86 Location MoveArguments(CodeGenerator* codegen) { 87 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor; 88 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor); 89 return calling_convention_visitor.GetMethodLocation(); 90 } 91 92 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 93 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); 94 __ Bind(GetEntryLabel()); 95 96 SaveLiveRegisters(codegen, invoke_->GetLocations()); 97 98 Location method_loc = MoveArguments(codegen); 99 100 if (invoke_->IsInvokeStaticOrDirect()) { 101 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); 102 } else { 103 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); 104 } 105 106 // Copy the result back to the expected output. 107 Location out = invoke_->GetLocations()->Out(); 108 if (out.IsValid()) { 109 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 110 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 111 codegen->MoveFromReturnRegister(out, invoke_->GetType()); 112 } 113 114 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 115 __ B(GetExitLabel()); 116 } 117 118 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } 119 120 private: 121 // The instruction where this slow path is happening. 122 HInvoke* const invoke_; 123 124 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL); 125 }; 126 127 // Compute base address for the System.arraycopy intrinsic in `base`. 128 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler, 129 DataType::Type type, 130 const vixl32::Register& array, 131 const Location& pos, 132 const vixl32::Register& base) { 133 // This routine is only used by the SystemArrayCopy intrinsic at the 134 // moment. We can allow DataType::Type::kReference as `type` to implement 135 // the SystemArrayCopyChar intrinsic. 136 DCHECK_EQ(type, DataType::Type::kReference); 137 const int32_t element_size = DataType::Size(type); 138 const uint32_t element_size_shift = DataType::SizeShift(type); 139 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 140 141 if (pos.IsConstant()) { 142 int32_t constant = Int32ConstantFrom(pos); 143 __ Add(base, array, element_size * constant + data_offset); 144 } else { 145 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift)); 146 __ Add(base, base, data_offset); 147 } 148 } 149 150 // Compute end address for the System.arraycopy intrinsic in `end`. 151 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler, 152 DataType::Type type, 153 const Location& copy_length, 154 const vixl32::Register& base, 155 const vixl32::Register& end) { 156 // This routine is only used by the SystemArrayCopy intrinsic at the 157 // moment. We can allow DataType::Type::kReference as `type` to implement 158 // the SystemArrayCopyChar intrinsic. 159 DCHECK_EQ(type, DataType::Type::kReference); 160 const int32_t element_size = DataType::Size(type); 161 const uint32_t element_size_shift = DataType::SizeShift(type); 162 163 if (copy_length.IsConstant()) { 164 int32_t constant = Int32ConstantFrom(copy_length); 165 __ Add(end, base, element_size * constant); 166 } else { 167 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift)); 168 } 169 } 170 171 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 172 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { 173 public: 174 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) 175 : SlowPathCodeARMVIXL(instruction) { 176 DCHECK(kEmitCompilerReadBarrier); 177 DCHECK(kUseBakerReadBarrier); 178 } 179 180 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 181 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 182 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); 183 LocationSummary* locations = instruction_->GetLocations(); 184 DCHECK(locations->CanCall()); 185 DCHECK(instruction_->IsInvokeStaticOrDirect()) 186 << "Unexpected instruction in read barrier arraycopy slow path: " 187 << instruction_->DebugName(); 188 DCHECK(instruction_->GetLocations()->Intrinsified()); 189 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 190 191 DataType::Type type = DataType::Type::kReference; 192 const int32_t element_size = DataType::Size(type); 193 194 vixl32::Register dest = InputRegisterAt(instruction_, 2); 195 Location dest_pos = locations->InAt(3); 196 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0)); 197 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1)); 198 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2)); 199 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3)); 200 201 __ Bind(GetEntryLabel()); 202 // Compute the base destination address in `dst_curr_addr`. 203 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr); 204 205 vixl32::Label loop; 206 __ Bind(&loop); 207 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 208 assembler->MaybeUnpoisonHeapReference(tmp); 209 // TODO: Inline the mark bit check before calling the runtime? 210 // tmp = ReadBarrier::Mark(tmp); 211 // No need to save live registers; it's taken care of by the 212 // entrypoint. Also, there is no need to update the stack mask, 213 // as this runtime call will not trigger a garbage collection. 214 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more 215 // explanations.) 216 DCHECK(!tmp.IsSP()); 217 DCHECK(!tmp.IsLR()); 218 DCHECK(!tmp.IsPC()); 219 // IP is used internally by the ReadBarrierMarkRegX entry point 220 // as a temporary (and not preserved). It thus cannot be used by 221 // any live register in this slow path. 222 DCHECK(!src_curr_addr.Is(ip)); 223 DCHECK(!dst_curr_addr.Is(ip)); 224 DCHECK(!src_stop_addr.Is(ip)); 225 DCHECK(!tmp.Is(ip)); 226 DCHECK(tmp.IsRegister()) << tmp; 227 // TODO: Load the entrypoint once before the loop, instead of 228 // loading it at every iteration. 229 int32_t entry_point_offset = 230 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); 231 // This runtime call does not require a stack map. 232 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 233 assembler->MaybePoisonHeapReference(tmp); 234 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 235 __ Cmp(src_curr_addr, src_stop_addr); 236 __ B(ne, &loop, /* far_target */ false); 237 __ B(GetExitLabel()); 238 } 239 240 const char* GetDescription() const OVERRIDE { 241 return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; 242 } 243 244 private: 245 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL); 246 }; 247 248 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) 249 : allocator_(codegen->GetGraph()->GetAllocator()), 250 codegen_(codegen), 251 assembler_(codegen->GetAssembler()), 252 features_(codegen->GetInstructionSetFeatures()) {} 253 254 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) { 255 Dispatch(invoke); 256 LocationSummary* res = invoke->GetLocations(); 257 if (res == nullptr) { 258 return false; 259 } 260 return res->Intrinsified(); 261 } 262 263 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 264 LocationSummary* locations = 265 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 266 locations->SetInAt(0, Location::RequiresFpuRegister()); 267 locations->SetOut(Location::RequiresRegister()); 268 } 269 270 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 271 LocationSummary* locations = 272 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 273 locations->SetInAt(0, Location::RequiresRegister()); 274 locations->SetOut(Location::RequiresFpuRegister()); 275 } 276 277 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { 278 Location input = locations->InAt(0); 279 Location output = locations->Out(); 280 if (is64bit) { 281 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input)); 282 } else { 283 __ Vmov(RegisterFrom(output), SRegisterFrom(input)); 284 } 285 } 286 287 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { 288 Location input = locations->InAt(0); 289 Location output = locations->Out(); 290 if (is64bit) { 291 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input)); 292 } else { 293 __ Vmov(SRegisterFrom(output), RegisterFrom(input)); 294 } 295 } 296 297 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 298 CreateFPToIntLocations(allocator_, invoke); 299 } 300 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 301 CreateIntToFPLocations(allocator_, invoke); 302 } 303 304 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 305 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 306 } 307 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 308 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 309 } 310 311 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 312 CreateFPToIntLocations(allocator_, invoke); 313 } 314 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { 315 CreateIntToFPLocations(allocator_, invoke); 316 } 317 318 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 319 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 320 } 321 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { 322 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 323 } 324 325 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 326 LocationSummary* locations = 327 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 328 locations->SetInAt(0, Location::RequiresRegister()); 329 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 330 } 331 332 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) { 333 LocationSummary* locations = 334 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 335 locations->SetInAt(0, Location::RequiresRegister()); 336 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 337 } 338 339 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 340 LocationSummary* locations = 341 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 342 locations->SetInAt(0, Location::RequiresFpuRegister()); 343 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 344 } 345 346 static void GenNumberOfLeadingZeros(HInvoke* invoke, 347 DataType::Type type, 348 CodeGeneratorARMVIXL* codegen) { 349 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 350 LocationSummary* locations = invoke->GetLocations(); 351 Location in = locations->InAt(0); 352 vixl32::Register out = RegisterFrom(locations->Out()); 353 354 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64)); 355 356 if (type == DataType::Type::kInt64) { 357 vixl32::Register in_reg_lo = LowRegisterFrom(in); 358 vixl32::Register in_reg_hi = HighRegisterFrom(in); 359 vixl32::Label end; 360 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); 361 __ Clz(out, in_reg_hi); 362 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); 363 __ Clz(out, in_reg_lo); 364 __ Add(out, out, 32); 365 if (end.IsReferenced()) { 366 __ Bind(&end); 367 } 368 } else { 369 __ Clz(out, RegisterFrom(in)); 370 } 371 } 372 373 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 374 CreateIntToIntLocations(allocator_, invoke); 375 } 376 377 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 378 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_); 379 } 380 381 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 382 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 383 } 384 385 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 386 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_); 387 } 388 389 static void GenNumberOfTrailingZeros(HInvoke* invoke, 390 DataType::Type type, 391 CodeGeneratorARMVIXL* codegen) { 392 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64)); 393 394 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 395 LocationSummary* locations = invoke->GetLocations(); 396 vixl32::Register out = RegisterFrom(locations->Out()); 397 398 if (type == DataType::Type::kInt64) { 399 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 400 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 401 vixl32::Label end; 402 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); 403 __ Rbit(out, in_reg_lo); 404 __ Clz(out, out); 405 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); 406 __ Rbit(out, in_reg_hi); 407 __ Clz(out, out); 408 __ Add(out, out, 32); 409 if (end.IsReferenced()) { 410 __ Bind(&end); 411 } 412 } else { 413 vixl32::Register in = RegisterFrom(locations->InAt(0)); 414 __ Rbit(out, in); 415 __ Clz(out, out); 416 } 417 } 418 419 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 420 CreateIntToIntLocations(allocator_, invoke); 421 } 422 423 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 424 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_); 425 } 426 427 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 428 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 429 } 430 431 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 432 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); 433 } 434 435 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { 436 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); 437 } 438 439 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { 440 CreateFPToFPLocations(allocator_, invoke); 441 } 442 443 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { 444 MathAbsFP(invoke, GetAssembler()); 445 } 446 447 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { 448 CreateFPToFPLocations(allocator_, invoke); 449 } 450 451 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { 452 MathAbsFP(invoke, GetAssembler()); 453 } 454 455 static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { 456 LocationSummary* locations = 457 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 458 locations->SetInAt(0, Location::RequiresRegister()); 459 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 460 461 locations->AddTemp(Location::RequiresRegister()); 462 } 463 464 static void GenAbsInteger(LocationSummary* locations, 465 bool is64bit, 466 ArmVIXLAssembler* assembler) { 467 Location in = locations->InAt(0); 468 Location output = locations->Out(); 469 470 vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); 471 472 if (is64bit) { 473 vixl32::Register in_reg_lo = LowRegisterFrom(in); 474 vixl32::Register in_reg_hi = HighRegisterFrom(in); 475 vixl32::Register out_reg_lo = LowRegisterFrom(output); 476 vixl32::Register out_reg_hi = HighRegisterFrom(output); 477 478 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; 479 480 __ Asr(mask, in_reg_hi, 31); 481 __ Adds(out_reg_lo, in_reg_lo, mask); 482 __ Adc(out_reg_hi, in_reg_hi, mask); 483 __ Eor(out_reg_lo, mask, out_reg_lo); 484 __ Eor(out_reg_hi, mask, out_reg_hi); 485 } else { 486 vixl32::Register in_reg = RegisterFrom(in); 487 vixl32::Register out_reg = RegisterFrom(output); 488 489 __ Asr(mask, in_reg, 31); 490 __ Add(out_reg, in_reg, mask); 491 __ Eor(out_reg, mask, out_reg); 492 } 493 } 494 495 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { 496 CreateIntToIntPlusTemp(allocator_, invoke); 497 } 498 499 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { 500 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 501 } 502 503 504 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { 505 CreateIntToIntPlusTemp(allocator_, invoke); 506 } 507 508 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { 509 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 510 } 511 512 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { 513 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 514 Location op1_loc = invoke->GetLocations()->InAt(0); 515 Location op2_loc = invoke->GetLocations()->InAt(1); 516 Location out_loc = invoke->GetLocations()->Out(); 517 518 // Optimization: don't generate any code if inputs are the same. 519 if (op1_loc.Equals(op2_loc)) { 520 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. 521 return; 522 } 523 524 vixl32::SRegister op1 = SRegisterFrom(op1_loc); 525 vixl32::SRegister op2 = SRegisterFrom(op2_loc); 526 vixl32::SRegister out = OutputSRegister(invoke); 527 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 528 const vixl32::Register temp1 = temps.Acquire(); 529 vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); 530 vixl32::Label nan, done; 531 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); 532 533 DCHECK(op1.Is(out)); 534 535 __ Vcmp(op1, op2); 536 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 537 __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. 538 539 // op1 <> op2 540 vixl32::ConditionType cond = is_min ? gt : lt; 541 { 542 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 543 2 * kMaxInstructionSizeInBytes, 544 CodeBufferCheckScope::kMaximumSize); 545 __ it(cond); 546 __ vmov(cond, F32, out, op2); 547 } 548 // for <>(not equal), we've done min/max calculation. 549 __ B(ne, final_label, /* far_target */ false); 550 551 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). 552 __ Vmov(temp1, op1); 553 __ Vmov(temp2, op2); 554 if (is_min) { 555 __ Orr(temp1, temp1, temp2); 556 } else { 557 __ And(temp1, temp1, temp2); 558 } 559 __ Vmov(out, temp1); 560 __ B(final_label); 561 562 // handle NaN input. 563 __ Bind(&nan); 564 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. 565 __ Vmov(out, temp1); 566 567 if (done.IsReferenced()) { 568 __ Bind(&done); 569 } 570 } 571 572 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 573 LocationSummary* locations = 574 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 575 locations->SetInAt(0, Location::RequiresFpuRegister()); 576 locations->SetInAt(1, Location::RequiresFpuRegister()); 577 locations->SetOut(Location::SameAsFirstInput()); 578 } 579 580 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { 581 CreateFPFPToFPLocations(allocator_, invoke); 582 invoke->GetLocations()->AddTemp(Location::RequiresRegister()); 583 } 584 585 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { 586 GenMinMaxFloat(invoke, /* is_min */ true, codegen_); 587 } 588 589 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { 590 CreateFPFPToFPLocations(allocator_, invoke); 591 invoke->GetLocations()->AddTemp(Location::RequiresRegister()); 592 } 593 594 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { 595 GenMinMaxFloat(invoke, /* is_min */ false, codegen_); 596 } 597 598 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { 599 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 600 Location op1_loc = invoke->GetLocations()->InAt(0); 601 Location op2_loc = invoke->GetLocations()->InAt(1); 602 Location out_loc = invoke->GetLocations()->Out(); 603 604 // Optimization: don't generate any code if inputs are the same. 605 if (op1_loc.Equals(op2_loc)) { 606 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. 607 return; 608 } 609 610 vixl32::DRegister op1 = DRegisterFrom(op1_loc); 611 vixl32::DRegister op2 = DRegisterFrom(op2_loc); 612 vixl32::DRegister out = OutputDRegister(invoke); 613 vixl32::Label handle_nan_eq, done; 614 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); 615 616 DCHECK(op1.Is(out)); 617 618 __ Vcmp(op1, op2); 619 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 620 __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. 621 622 // op1 <> op2 623 vixl32::ConditionType cond = is_min ? gt : lt; 624 { 625 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 626 2 * kMaxInstructionSizeInBytes, 627 CodeBufferCheckScope::kMaximumSize); 628 __ it(cond); 629 __ vmov(cond, F64, out, op2); 630 } 631 // for <>(not equal), we've done min/max calculation. 632 __ B(ne, final_label, /* far_target */ false); 633 634 // handle op1 == op2, max(+0.0,-0.0). 635 if (!is_min) { 636 __ Vand(F64, out, op1, op2); 637 __ B(final_label); 638 } 639 640 // handle op1 == op2, min(+0.0,-0.0), NaN input. 641 __ Bind(&handle_nan_eq); 642 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. 643 644 if (done.IsReferenced()) { 645 __ Bind(&done); 646 } 647 } 648 649 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { 650 CreateFPFPToFPLocations(allocator_, invoke); 651 } 652 653 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { 654 GenMinMaxDouble(invoke, /* is_min */ true , codegen_); 655 } 656 657 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { 658 CreateFPFPToFPLocations(allocator_, invoke); 659 } 660 661 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { 662 GenMinMaxDouble(invoke, /* is_min */ false, codegen_); 663 } 664 665 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { 666 Location op1_loc = invoke->GetLocations()->InAt(0); 667 Location op2_loc = invoke->GetLocations()->InAt(1); 668 Location out_loc = invoke->GetLocations()->Out(); 669 670 // Optimization: don't generate any code if inputs are the same. 671 if (op1_loc.Equals(op2_loc)) { 672 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. 673 return; 674 } 675 676 vixl32::Register op1_lo = LowRegisterFrom(op1_loc); 677 vixl32::Register op1_hi = HighRegisterFrom(op1_loc); 678 vixl32::Register op2_lo = LowRegisterFrom(op2_loc); 679 vixl32::Register op2_hi = HighRegisterFrom(op2_loc); 680 vixl32::Register out_lo = LowRegisterFrom(out_loc); 681 vixl32::Register out_hi = HighRegisterFrom(out_loc); 682 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 683 const vixl32::Register temp = temps.Acquire(); 684 685 DCHECK(op1_lo.Is(out_lo)); 686 DCHECK(op1_hi.Is(out_hi)); 687 688 // Compare op1 >= op2, or op1 < op2. 689 __ Cmp(out_lo, op2_lo); 690 __ Sbcs(temp, out_hi, op2_hi); 691 692 // Now GE/LT condition code is correct for the long comparison. 693 { 694 vixl32::ConditionType cond = is_min ? ge : lt; 695 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 696 3 * kMaxInstructionSizeInBytes, 697 CodeBufferCheckScope::kMaximumSize); 698 __ itt(cond); 699 __ mov(cond, out_lo, op2_lo); 700 __ mov(cond, out_hi, op2_hi); 701 } 702 } 703 704 static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { 705 LocationSummary* locations = 706 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 707 locations->SetInAt(0, Location::RequiresRegister()); 708 locations->SetInAt(1, Location::RequiresRegister()); 709 locations->SetOut(Location::SameAsFirstInput()); 710 } 711 712 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { 713 CreateLongLongToLongLocations(allocator_, invoke); 714 } 715 716 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { 717 GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); 718 } 719 720 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { 721 CreateLongLongToLongLocations(allocator_, invoke); 722 } 723 724 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { 725 GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); 726 } 727 728 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { 729 vixl32::Register op1 = InputRegisterAt(invoke, 0); 730 vixl32::Register op2 = InputRegisterAt(invoke, 1); 731 vixl32::Register out = OutputRegister(invoke); 732 733 __ Cmp(op1, op2); 734 735 { 736 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 737 3 * kMaxInstructionSizeInBytes, 738 CodeBufferCheckScope::kMaximumSize); 739 740 __ ite(is_min ? lt : gt); 741 __ mov(is_min ? lt : gt, out, op1); 742 __ mov(is_min ? ge : le, out, op2); 743 } 744 } 745 746 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 747 LocationSummary* locations = 748 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 749 locations->SetInAt(0, Location::RequiresRegister()); 750 locations->SetInAt(1, Location::RequiresRegister()); 751 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 752 } 753 754 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { 755 CreateIntIntToIntLocations(allocator_, invoke); 756 } 757 758 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { 759 GenMinMax(invoke, /* is_min */ true, GetAssembler()); 760 } 761 762 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { 763 CreateIntIntToIntLocations(allocator_, invoke); 764 } 765 766 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { 767 GenMinMax(invoke, /* is_min */ false, GetAssembler()); 768 } 769 770 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { 771 CreateFPToFPLocations(allocator_, invoke); 772 } 773 774 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { 775 ArmVIXLAssembler* assembler = GetAssembler(); 776 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 777 } 778 779 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { 780 if (features_.HasARMv8AInstructions()) { 781 CreateFPToFPLocations(allocator_, invoke); 782 } 783 } 784 785 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { 786 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 787 ArmVIXLAssembler* assembler = GetAssembler(); 788 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 789 } 790 791 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { 792 if (features_.HasARMv8AInstructions()) { 793 LocationSummary* locations = 794 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 795 locations->SetInAt(0, Location::RequiresFpuRegister()); 796 locations->SetOut(Location::RequiresRegister()); 797 locations->AddTemp(Location::RequiresFpuRegister()); 798 } 799 } 800 801 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { 802 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 803 804 ArmVIXLAssembler* assembler = GetAssembler(); 805 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0); 806 vixl32::Register out_reg = OutputRegister(invoke); 807 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0)); 808 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0)); 809 vixl32::Label done; 810 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); 811 812 // Round to nearest integer, ties away from zero. 813 __ Vcvta(S32, F32, temp1, in_reg); 814 __ Vmov(out_reg, temp1); 815 816 // For positive, zero or NaN inputs, rounding is done. 817 __ Cmp(out_reg, 0); 818 __ B(ge, final_label, /* far_target */ false); 819 820 // Handle input < 0 cases. 821 // If input is negative but not a tie, previous result (round to nearest) is valid. 822 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. 823 __ Vrinta(F32, F32, temp1, in_reg); 824 __ Vmov(temp2, 0.5); 825 __ Vsub(F32, temp1, in_reg, temp1); 826 __ Vcmp(F32, temp1, temp2); 827 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 828 { 829 // Use ExactAsemblyScope here because we are using IT. 830 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 831 2 * kMaxInstructionSizeInBytes, 832 CodeBufferCheckScope::kMaximumSize); 833 __ it(eq); 834 __ add(eq, out_reg, out_reg, 1); 835 } 836 837 if (done.IsReferenced()) { 838 __ Bind(&done); 839 } 840 } 841 842 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { 843 CreateIntToIntLocations(allocator_, invoke); 844 } 845 846 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { 847 ArmVIXLAssembler* assembler = GetAssembler(); 848 // Ignore upper 4B of long address. 849 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 850 } 851 852 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { 853 CreateIntToIntLocations(allocator_, invoke); 854 } 855 856 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { 857 ArmVIXLAssembler* assembler = GetAssembler(); 858 // Ignore upper 4B of long address. 859 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 860 } 861 862 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { 863 CreateIntToIntLocations(allocator_, invoke); 864 } 865 866 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { 867 ArmVIXLAssembler* assembler = GetAssembler(); 868 // Ignore upper 4B of long address. 869 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); 870 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor 871 // exception. So we can't use ldrd as addr may be unaligned. 872 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out()); 873 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out()); 874 if (addr.Is(lo)) { 875 __ Ldr(hi, MemOperand(addr, 4)); 876 __ Ldr(lo, MemOperand(addr)); 877 } else { 878 __ Ldr(lo, MemOperand(addr)); 879 __ Ldr(hi, MemOperand(addr, 4)); 880 } 881 } 882 883 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { 884 CreateIntToIntLocations(allocator_, invoke); 885 } 886 887 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { 888 ArmVIXLAssembler* assembler = GetAssembler(); 889 // Ignore upper 4B of long address. 890 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 891 } 892 893 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { 894 LocationSummary* locations = 895 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 896 locations->SetInAt(0, Location::RequiresRegister()); 897 locations->SetInAt(1, Location::RequiresRegister()); 898 } 899 900 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { 901 CreateIntIntToVoidLocations(allocator_, invoke); 902 } 903 904 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { 905 ArmVIXLAssembler* assembler = GetAssembler(); 906 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 907 } 908 909 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { 910 CreateIntIntToVoidLocations(allocator_, invoke); 911 } 912 913 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { 914 ArmVIXLAssembler* assembler = GetAssembler(); 915 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 916 } 917 918 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { 919 CreateIntIntToVoidLocations(allocator_, invoke); 920 } 921 922 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { 923 ArmVIXLAssembler* assembler = GetAssembler(); 924 // Ignore upper 4B of long address. 925 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); 926 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor 927 // exception. So we can't use ldrd as addr may be unaligned. 928 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr)); 929 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4)); 930 } 931 932 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { 933 CreateIntIntToVoidLocations(allocator_, invoke); 934 } 935 936 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { 937 ArmVIXLAssembler* assembler = GetAssembler(); 938 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 939 } 940 941 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { 942 LocationSummary* locations = 943 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 944 locations->SetOut(Location::RequiresRegister()); 945 } 946 947 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { 948 ArmVIXLAssembler* assembler = GetAssembler(); 949 __ Ldr(OutputRegister(invoke), 950 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value())); 951 } 952 953 static void GenUnsafeGet(HInvoke* invoke, 954 DataType::Type type, 955 bool is_volatile, 956 CodeGeneratorARMVIXL* codegen) { 957 LocationSummary* locations = invoke->GetLocations(); 958 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 959 Location base_loc = locations->InAt(1); 960 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. 961 Location offset_loc = locations->InAt(2); 962 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only. 963 Location trg_loc = locations->Out(); 964 965 switch (type) { 966 case DataType::Type::kInt32: { 967 vixl32::Register trg = RegisterFrom(trg_loc); 968 __ Ldr(trg, MemOperand(base, offset)); 969 if (is_volatile) { 970 __ Dmb(vixl32::ISH); 971 } 972 break; 973 } 974 975 case DataType::Type::kReference: { 976 vixl32::Register trg = RegisterFrom(trg_loc); 977 if (kEmitCompilerReadBarrier) { 978 if (kUseBakerReadBarrier) { 979 Location temp = locations->GetTemp(0); 980 codegen->GenerateReferenceLoadWithBakerReadBarrier( 981 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); 982 if (is_volatile) { 983 __ Dmb(vixl32::ISH); 984 } 985 } else { 986 __ Ldr(trg, MemOperand(base, offset)); 987 if (is_volatile) { 988 __ Dmb(vixl32::ISH); 989 } 990 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); 991 } 992 } else { 993 __ Ldr(trg, MemOperand(base, offset)); 994 if (is_volatile) { 995 __ Dmb(vixl32::ISH); 996 } 997 assembler->MaybeUnpoisonHeapReference(trg); 998 } 999 break; 1000 } 1001 1002 case DataType::Type::kInt64: { 1003 vixl32::Register trg_lo = LowRegisterFrom(trg_loc); 1004 vixl32::Register trg_hi = HighRegisterFrom(trg_loc); 1005 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { 1006 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1007 const vixl32::Register temp_reg = temps.Acquire(); 1008 __ Add(temp_reg, base, offset); 1009 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg)); 1010 } else { 1011 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset)); 1012 } 1013 if (is_volatile) { 1014 __ Dmb(vixl32::ISH); 1015 } 1016 break; 1017 } 1018 1019 default: 1020 LOG(FATAL) << "Unexpected type " << type; 1021 UNREACHABLE(); 1022 } 1023 } 1024 1025 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, 1026 HInvoke* invoke, 1027 DataType::Type type) { 1028 bool can_call = kEmitCompilerReadBarrier && 1029 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1030 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1031 LocationSummary* locations = 1032 new (allocator) LocationSummary(invoke, 1033 can_call 1034 ? LocationSummary::kCallOnSlowPath 1035 : LocationSummary::kNoCall, 1036 kIntrinsified); 1037 if (can_call && kUseBakerReadBarrier) { 1038 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 1039 } 1040 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1041 locations->SetInAt(1, Location::RequiresRegister()); 1042 locations->SetInAt(2, Location::RequiresRegister()); 1043 locations->SetOut(Location::RequiresRegister(), 1044 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 1045 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1046 // We need a temporary register for the read barrier marking slow 1047 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier. 1048 locations->AddTemp(Location::RequiresRegister()); 1049 } 1050 } 1051 1052 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) { 1053 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); 1054 } 1055 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { 1056 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); 1057 } 1058 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { 1059 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); 1060 } 1061 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1062 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); 1063 } 1064 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { 1065 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); 1066 } 1067 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1068 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); 1069 } 1070 1071 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { 1072 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); 1073 } 1074 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { 1075 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); 1076 } 1077 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { 1078 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); 1079 } 1080 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1081 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); 1082 } 1083 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { 1084 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); 1085 } 1086 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1087 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); 1088 } 1089 1090 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, 1091 const ArmInstructionSetFeatures& features, 1092 DataType::Type type, 1093 bool is_volatile, 1094 HInvoke* invoke) { 1095 LocationSummary* locations = 1096 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1097 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1098 locations->SetInAt(1, Location::RequiresRegister()); 1099 locations->SetInAt(2, Location::RequiresRegister()); 1100 locations->SetInAt(3, Location::RequiresRegister()); 1101 1102 if (type == DataType::Type::kInt64) { 1103 // Potentially need temps for ldrexd-strexd loop. 1104 if (is_volatile && !features.HasAtomicLdrdAndStrd()) { 1105 locations->AddTemp(Location::RequiresRegister()); // Temp_lo. 1106 locations->AddTemp(Location::RequiresRegister()); // Temp_hi. 1107 } 1108 } else if (type == DataType::Type::kReference) { 1109 // Temps for card-marking. 1110 locations->AddTemp(Location::RequiresRegister()); // Temp. 1111 locations->AddTemp(Location::RequiresRegister()); // Card. 1112 } 1113 } 1114 1115 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { 1116 CreateIntIntIntIntToVoid( 1117 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); 1118 } 1119 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { 1120 CreateIntIntIntIntToVoid( 1121 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); 1122 } 1123 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { 1124 CreateIntIntIntIntToVoid( 1125 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke); 1126 } 1127 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { 1128 CreateIntIntIntIntToVoid( 1129 allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); 1130 } 1131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1132 CreateIntIntIntIntToVoid( 1133 allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); 1134 } 1135 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1136 CreateIntIntIntIntToVoid( 1137 allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke); 1138 } 1139 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { 1140 CreateIntIntIntIntToVoid( 1141 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); 1142 } 1143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1144 CreateIntIntIntIntToVoid( 1145 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); 1146 } 1147 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1148 CreateIntIntIntIntToVoid( 1149 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke); 1150 } 1151 1152 static void GenUnsafePut(LocationSummary* locations, 1153 DataType::Type type, 1154 bool is_volatile, 1155 bool is_ordered, 1156 CodeGeneratorARMVIXL* codegen) { 1157 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 1158 1159 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer. 1160 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. 1161 vixl32::Register value; 1162 1163 if (is_volatile || is_ordered) { 1164 __ Dmb(vixl32::ISH); 1165 } 1166 1167 if (type == DataType::Type::kInt64) { 1168 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3)); 1169 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3)); 1170 value = value_lo; 1171 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { 1172 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0)); 1173 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1)); 1174 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1175 const vixl32::Register temp_reg = temps.Acquire(); 1176 1177 __ Add(temp_reg, base, offset); 1178 vixl32::Label loop_head; 1179 __ Bind(&loop_head); 1180 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); 1181 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); 1182 __ Cmp(temp_lo, 0); 1183 __ B(ne, &loop_head, /* far_target */ false); 1184 } else { 1185 __ Strd(value_lo, value_hi, MemOperand(base, offset)); 1186 } 1187 } else { 1188 value = RegisterFrom(locations->InAt(3)); 1189 vixl32::Register source = value; 1190 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1191 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1192 __ Mov(temp, value); 1193 assembler->PoisonHeapReference(temp); 1194 source = temp; 1195 } 1196 __ Str(source, MemOperand(base, offset)); 1197 } 1198 1199 if (is_volatile) { 1200 __ Dmb(vixl32::ISH); 1201 } 1202 1203 if (type == DataType::Type::kReference) { 1204 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1205 vixl32::Register card = RegisterFrom(locations->GetTemp(1)); 1206 bool value_can_be_null = true; // TODO: Worth finding out this information? 1207 codegen->MarkGCCard(temp, card, base, value, value_can_be_null); 1208 } 1209 } 1210 1211 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { 1212 GenUnsafePut(invoke->GetLocations(), 1213 DataType::Type::kInt32, 1214 /* is_volatile */ false, 1215 /* is_ordered */ false, 1216 codegen_); 1217 } 1218 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { 1219 GenUnsafePut(invoke->GetLocations(), 1220 DataType::Type::kInt32, 1221 /* is_volatile */ false, 1222 /* is_ordered */ true, 1223 codegen_); 1224 } 1225 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { 1226 GenUnsafePut(invoke->GetLocations(), 1227 DataType::Type::kInt32, 1228 /* is_volatile */ true, 1229 /* is_ordered */ false, 1230 codegen_); 1231 } 1232 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { 1233 GenUnsafePut(invoke->GetLocations(), 1234 DataType::Type::kReference, 1235 /* is_volatile */ false, 1236 /* is_ordered */ false, 1237 codegen_); 1238 } 1239 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1240 GenUnsafePut(invoke->GetLocations(), 1241 DataType::Type::kReference, 1242 /* is_volatile */ false, 1243 /* is_ordered */ true, 1244 codegen_); 1245 } 1246 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1247 GenUnsafePut(invoke->GetLocations(), 1248 DataType::Type::kReference, 1249 /* is_volatile */ true, 1250 /* is_ordered */ false, 1251 codegen_); 1252 } 1253 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { 1254 GenUnsafePut(invoke->GetLocations(), 1255 DataType::Type::kInt64, 1256 /* is_volatile */ false, 1257 /* is_ordered */ false, 1258 codegen_); 1259 } 1260 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1261 GenUnsafePut(invoke->GetLocations(), 1262 DataType::Type::kInt64, 1263 /* is_volatile */ false, 1264 /* is_ordered */ true, 1265 codegen_); 1266 } 1267 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1268 GenUnsafePut(invoke->GetLocations(), 1269 DataType::Type::kInt64, 1270 /* is_volatile */ true, 1271 /* is_ordered */ false, 1272 codegen_); 1273 } 1274 1275 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, 1276 HInvoke* invoke, 1277 DataType::Type type) { 1278 bool can_call = kEmitCompilerReadBarrier && 1279 kUseBakerReadBarrier && 1280 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 1281 LocationSummary* locations = 1282 new (allocator) LocationSummary(invoke, 1283 can_call 1284 ? LocationSummary::kCallOnSlowPath 1285 : LocationSummary::kNoCall, 1286 kIntrinsified); 1287 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1288 locations->SetInAt(1, Location::RequiresRegister()); 1289 locations->SetInAt(2, Location::RequiresRegister()); 1290 locations->SetInAt(3, Location::RequiresRegister()); 1291 locations->SetInAt(4, Location::RequiresRegister()); 1292 1293 // If heap poisoning is enabled, we don't want the unpoisoning 1294 // operations to potentially clobber the output. Likewise when 1295 // emitting a (Baker) read barrier, which may call. 1296 Location::OutputOverlap overlaps = 1297 ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) 1298 ? Location::kOutputOverlap 1299 : Location::kNoOutputOverlap; 1300 locations->SetOut(Location::RequiresRegister(), overlaps); 1301 1302 // Temporary registers used in CAS. In the object case 1303 // (UnsafeCASObject intrinsic), these are also used for 1304 // card-marking, and possibly for (Baker) read barrier. 1305 locations->AddTemp(Location::RequiresRegister()); // Pointer. 1306 locations->AddTemp(Location::RequiresRegister()); // Temp 1. 1307 } 1308 1309 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) { 1310 DCHECK_NE(type, DataType::Type::kInt64); 1311 1312 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 1313 LocationSummary* locations = invoke->GetLocations(); 1314 1315 Location out_loc = locations->Out(); 1316 vixl32::Register out = OutputRegister(invoke); // Boolean result. 1317 1318 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. 1319 Location offset_loc = locations->InAt(2); 1320 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). 1321 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. 1322 vixl32::Register value = InputRegisterAt(invoke, 4); // Value. 1323 1324 Location tmp_ptr_loc = locations->GetTemp(0); 1325 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. 1326 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. 1327 1328 if (type == DataType::Type::kReference) { 1329 // The only read barrier implementation supporting the 1330 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1331 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1332 1333 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged 1334 // object and scan the receiver at the next GC for nothing. 1335 bool value_can_be_null = true; // TODO: Worth finding out this information? 1336 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); 1337 1338 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1339 // Need to make sure the reference stored in the field is a to-space 1340 // one before attempting the CAS or the CAS could fail incorrectly. 1341 codegen->UpdateReferenceFieldWithBakerReadBarrier( 1342 invoke, 1343 out_loc, // Unused, used only as a "temporary" within the read barrier. 1344 base, 1345 /* field_offset */ offset_loc, 1346 tmp_ptr_loc, 1347 /* needs_null_check */ false, 1348 tmp); 1349 } 1350 } 1351 1352 // Prevent reordering with prior memory operations. 1353 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the 1354 // latter allows a preceding load to be delayed past the STXR 1355 // instruction below. 1356 __ Dmb(vixl32::ISH); 1357 1358 __ Add(tmp_ptr, base, offset); 1359 1360 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1361 codegen->GetAssembler()->PoisonHeapReference(expected); 1362 if (value.Is(expected)) { 1363 // Do not poison `value`, as it is the same register as 1364 // `expected`, which has just been poisoned. 1365 } else { 1366 codegen->GetAssembler()->PoisonHeapReference(value); 1367 } 1368 } 1369 1370 // do { 1371 // tmp = [r_ptr] - expected; 1372 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 1373 // result = tmp != 0; 1374 1375 vixl32::Label loop_head; 1376 __ Bind(&loop_head); 1377 1378 __ Ldrex(tmp, MemOperand(tmp_ptr)); 1379 1380 __ Subs(tmp, tmp, expected); 1381 1382 { 1383 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1384 3 * kMaxInstructionSizeInBytes, 1385 CodeBufferCheckScope::kMaximumSize); 1386 1387 __ itt(eq); 1388 __ strex(eq, tmp, value, MemOperand(tmp_ptr)); 1389 __ cmp(eq, tmp, 1); 1390 } 1391 1392 __ B(eq, &loop_head, /* far_target */ false); 1393 1394 __ Dmb(vixl32::ISH); 1395 1396 __ Rsbs(out, tmp, 1); 1397 1398 { 1399 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1400 2 * kMaxInstructionSizeInBytes, 1401 CodeBufferCheckScope::kMaximumSize); 1402 1403 __ it(cc); 1404 __ mov(cc, out, 0); 1405 } 1406 1407 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 1408 codegen->GetAssembler()->UnpoisonHeapReference(expected); 1409 if (value.Is(expected)) { 1410 // Do not unpoison `value`, as it is the same register as 1411 // `expected`, which has just been unpoisoned. 1412 } else { 1413 codegen->GetAssembler()->UnpoisonHeapReference(value); 1414 } 1415 } 1416 } 1417 1418 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { 1419 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32); 1420 } 1421 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { 1422 // The only read barrier implementation supporting the 1423 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1424 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1425 return; 1426 } 1427 1428 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference); 1429 } 1430 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { 1431 GenCas(invoke, DataType::Type::kInt32, codegen_); 1432 } 1433 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { 1434 // The only read barrier implementation supporting the 1435 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1436 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1437 1438 GenCas(invoke, DataType::Type::kReference, codegen_); 1439 } 1440 1441 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { 1442 // The inputs plus one temp. 1443 LocationSummary* locations = 1444 new (allocator_) LocationSummary(invoke, 1445 invoke->InputAt(1)->CanBeNull() 1446 ? LocationSummary::kCallOnSlowPath 1447 : LocationSummary::kNoCall, 1448 kIntrinsified); 1449 locations->SetInAt(0, Location::RequiresRegister()); 1450 locations->SetInAt(1, Location::RequiresRegister()); 1451 locations->AddTemp(Location::RequiresRegister()); 1452 locations->AddTemp(Location::RequiresRegister()); 1453 locations->AddTemp(Location::RequiresRegister()); 1454 // Need temporary registers for String compression's feature. 1455 if (mirror::kUseStringCompression) { 1456 locations->AddTemp(Location::RequiresRegister()); 1457 } 1458 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1459 } 1460 1461 // Forward declaration. 1462 // 1463 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated 1464 // by the compiler for every C++ function, and if this function gets inlined in 1465 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a 1466 // build failure. That is the reason why NO_INLINE attribute is used. 1467 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, 1468 HInvoke* invoke, 1469 vixl32::Label* end, 1470 vixl32::Label* different_compression); 1471 1472 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { 1473 ArmVIXLAssembler* assembler = GetAssembler(); 1474 LocationSummary* locations = invoke->GetLocations(); 1475 1476 const vixl32::Register str = InputRegisterAt(invoke, 0); 1477 const vixl32::Register arg = InputRegisterAt(invoke, 1); 1478 const vixl32::Register out = OutputRegister(invoke); 1479 1480 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); 1481 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1482 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); 1483 vixl32::Register temp3; 1484 if (mirror::kUseStringCompression) { 1485 temp3 = RegisterFrom(locations->GetTemp(3)); 1486 } 1487 1488 vixl32::Label end; 1489 vixl32::Label different_compression; 1490 1491 // Get offsets of count and value fields within a string object. 1492 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1493 1494 // Note that the null check must have been done earlier. 1495 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1496 1497 // Take slow path and throw if input can be and is null. 1498 SlowPathCodeARMVIXL* slow_path = nullptr; 1499 const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); 1500 if (can_slow_path) { 1501 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1502 codegen_->AddSlowPath(slow_path); 1503 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel()); 1504 } 1505 1506 // Reference equality check, return 0 if same reference. 1507 __ Subs(out, str, arg); 1508 __ B(eq, &end); 1509 1510 if (mirror::kUseStringCompression) { 1511 // Load `count` fields of this and argument strings. 1512 __ Ldr(temp3, MemOperand(str, count_offset)); 1513 __ Ldr(temp2, MemOperand(arg, count_offset)); 1514 // Extract lengths from the `count` fields. 1515 __ Lsr(temp0, temp3, 1u); 1516 __ Lsr(temp1, temp2, 1u); 1517 } else { 1518 // Load lengths of this and argument strings. 1519 __ Ldr(temp0, MemOperand(str, count_offset)); 1520 __ Ldr(temp1, MemOperand(arg, count_offset)); 1521 } 1522 // out = length diff. 1523 __ Subs(out, temp0, temp1); 1524 // temp0 = min(len(str), len(arg)). 1525 1526 { 1527 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1528 2 * kMaxInstructionSizeInBytes, 1529 CodeBufferCheckScope::kMaximumSize); 1530 1531 __ it(gt); 1532 __ mov(gt, temp0, temp1); 1533 } 1534 1535 // Shorter string is empty? 1536 // Note that mirror::kUseStringCompression==true introduces lots of instructions, 1537 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'. 1538 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression); 1539 1540 if (mirror::kUseStringCompression) { 1541 // Check if both strings using same compression style to use this comparison loop. 1542 __ Eors(temp2, temp2, temp3); 1543 __ Lsrs(temp2, temp2, 1u); 1544 __ B(cs, &different_compression); 1545 // For string compression, calculate the number of bytes to compare (not chars). 1546 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. 1547 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag. 1548 1549 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1550 2 * kMaxInstructionSizeInBytes, 1551 CodeBufferCheckScope::kMaximumSize); 1552 1553 __ it(ne); 1554 __ add(ne, temp0, temp0, temp0); 1555 } 1556 1557 1558 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression); 1559 1560 __ Bind(&end); 1561 1562 if (can_slow_path) { 1563 __ Bind(slow_path->GetExitLabel()); 1564 } 1565 } 1566 1567 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, 1568 HInvoke* invoke, 1569 vixl32::Label* end, 1570 vixl32::Label* different_compression) { 1571 LocationSummary* locations = invoke->GetLocations(); 1572 1573 const vixl32::Register str = InputRegisterAt(invoke, 0); 1574 const vixl32::Register arg = InputRegisterAt(invoke, 1); 1575 const vixl32::Register out = OutputRegister(invoke); 1576 1577 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); 1578 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1579 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); 1580 vixl32::Register temp3; 1581 if (mirror::kUseStringCompression) { 1582 temp3 = RegisterFrom(locations->GetTemp(3)); 1583 } 1584 1585 vixl32::Label loop; 1586 vixl32::Label find_char_diff; 1587 1588 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1589 // Store offset of string value in preparation for comparison loop. 1590 __ Mov(temp1, value_offset); 1591 1592 // Assertions that must hold in order to compare multiple characters at a time. 1593 CHECK_ALIGNED(value_offset, 8); 1594 static_assert(IsAligned<8>(kObjectAlignment), 1595 "String data must be 8-byte aligned for unrolled CompareTo loop."); 1596 1597 const unsigned char_size = DataType::Size(DataType::Type::kUint16); 1598 DCHECK_EQ(char_size, 2u); 1599 1600 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1601 1602 vixl32::Label find_char_diff_2nd_cmp; 1603 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). 1604 __ Bind(&loop); 1605 vixl32::Register temp_reg = temps.Acquire(); 1606 __ Ldr(temp_reg, MemOperand(str, temp1)); 1607 __ Ldr(temp2, MemOperand(arg, temp1)); 1608 __ Cmp(temp_reg, temp2); 1609 __ B(ne, &find_char_diff, /* far_target */ false); 1610 __ Add(temp1, temp1, char_size * 2); 1611 1612 __ Ldr(temp_reg, MemOperand(str, temp1)); 1613 __ Ldr(temp2, MemOperand(arg, temp1)); 1614 __ Cmp(temp_reg, temp2); 1615 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); 1616 __ Add(temp1, temp1, char_size * 2); 1617 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1618 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); 1619 __ B(hi, &loop, /* far_target */ false); 1620 __ B(end); 1621 1622 __ Bind(&find_char_diff_2nd_cmp); 1623 if (mirror::kUseStringCompression) { 1624 __ Subs(temp0, temp0, 4); // 4 bytes previously compared. 1625 __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end? 1626 } else { 1627 // Without string compression, we can start treating temp0 as signed 1628 // and rely on the signed comparison below. 1629 __ Sub(temp0, temp0, 2); 1630 } 1631 1632 // Find the single character difference. 1633 __ Bind(&find_char_diff); 1634 // Get the bit position of the first character that differs. 1635 __ Eor(temp1, temp2, temp_reg); 1636 __ Rbit(temp1, temp1); 1637 __ Clz(temp1, temp1); 1638 1639 // temp0 = number of characters remaining to compare. 1640 // (Without string compression, it could be < 1 if a difference is found by the second CMP 1641 // in the comparison loop, and after the end of the shorter string data). 1642 1643 // Without string compression (temp1 >> 4) = character where difference occurs between the last 1644 // two words compared, in the interval [0,1]. 1645 // (0 for low half-word different, 1 for high half-word different). 1646 // With string compression, (temp1 << 3) = byte where the difference occurs, 1647 // in the interval [0,3]. 1648 1649 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside 1650 // the remaining string data, so just return length diff (out). 1651 // The comparison is unsigned for string compression, otherwise signed. 1652 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); 1653 __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false); 1654 1655 // Extract the characters and calculate the difference. 1656 if (mirror::kUseStringCompression) { 1657 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear 1658 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`. 1659 // The compression flag is now in the highest bit of temp3, so let's play some tricks. 1660 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u 1661 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u) 1662 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u. 1663 __ Lsr(temp2, temp2, temp1); // Extract second character. 1664 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu 1665 __ Lsr(out, temp_reg, temp1); // Extract first character. 1666 __ And(temp2, temp2, temp3); 1667 __ And(out, out, temp3); 1668 } else { 1669 __ Bic(temp1, temp1, 0xf); 1670 __ Lsr(temp2, temp2, temp1); 1671 __ Lsr(out, temp_reg, temp1); 1672 __ Movt(temp2, 0); 1673 __ Movt(out, 0); 1674 } 1675 1676 __ Sub(out, out, temp2); 1677 temps.Release(temp_reg); 1678 1679 if (mirror::kUseStringCompression) { 1680 __ B(end); 1681 __ Bind(different_compression); 1682 1683 // Comparison for different compression style. 1684 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1685 DCHECK_EQ(c_char_size, 1u); 1686 1687 // We want to free up the temp3, currently holding `str.count`, for comparison. 1688 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen 1689 // need to treat as unsigned. Start by freeing the bit with an ADD and continue 1690 // further down by a LSRS+SBC which will flip the meaning of the flag but allow 1691 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. 1692 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit. 1693 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. 1694 __ Mov(temp1, str); 1695 __ Mov(temp2, arg); 1696 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag. 1697 { 1698 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1699 3 * kMaxInstructionSizeInBytes, 1700 CodeBufferCheckScope::kMaximumSize); 1701 __ itt(cs); // Interleave with selection of temp1 and temp2. 1702 __ mov(cs, temp1, arg); // Preserves flags. 1703 __ mov(cs, temp2, str); // Preserves flags. 1704 } 1705 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag. 1706 1707 // Adjust temp1 and temp2 from string pointers to data pointers. 1708 __ Add(temp1, temp1, value_offset); 1709 __ Add(temp2, temp2, value_offset); 1710 1711 vixl32::Label different_compression_loop; 1712 vixl32::Label different_compression_diff; 1713 1714 // Main loop for different compression. 1715 temp_reg = temps.Acquire(); 1716 __ Bind(&different_compression_loop); 1717 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); 1718 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); 1719 __ Cmp(temp_reg, temp3); 1720 __ B(ne, &different_compression_diff, /* far_target */ false); 1721 __ Subs(temp0, temp0, 2); 1722 __ B(hi, &different_compression_loop, /* far_target */ false); 1723 __ B(end); 1724 1725 // Calculate the difference. 1726 __ Bind(&different_compression_diff); 1727 __ Sub(out, temp_reg, temp3); 1728 temps.Release(temp_reg); 1729 // Flip the difference if the `arg` is compressed. 1730 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag. 1731 __ Lsrs(temp0, temp0, 1u); 1732 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1733 "Expecting 0=compressed, 1=uncompressed"); 1734 1735 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1736 2 * kMaxInstructionSizeInBytes, 1737 CodeBufferCheckScope::kMaximumSize); 1738 __ it(cc); 1739 __ rsb(cc, out, out, 0); 1740 } 1741 } 1742 1743 // The cut off for unrolling the loop in String.equals() intrinsic for const strings. 1744 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression 1745 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4 1746 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE). 1747 // Allow up to 12 instructions (32 bytes) for the unrolled loop. 1748 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16; 1749 1750 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { 1751 if (candidate->IsLoadString()) { 1752 HLoadString* load_string = candidate->AsLoadString(); 1753 const DexFile& dex_file = load_string->GetDexFile(); 1754 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); 1755 } 1756 return nullptr; 1757 } 1758 1759 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { 1760 if (kEmitCompilerReadBarrier && 1761 !StringEqualsOptimizations(invoke).GetArgumentIsString() && 1762 !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { 1763 // No support for this odd case (String class is moveable, not in the boot image). 1764 return; 1765 } 1766 1767 LocationSummary* locations = 1768 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1769 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1770 locations->SetInAt(0, Location::RequiresRegister()); 1771 locations->SetInAt(1, Location::RequiresRegister()); 1772 1773 // Temporary registers to store lengths of strings and for calculations. 1774 // Using instruction cbz requires a low register, so explicitly set a temp to be R0. 1775 locations->AddTemp(LocationFrom(r0)); 1776 1777 // For the generic implementation and for long const strings we need an extra temporary. 1778 // We do not need it for short const strings, up to 4 bytes, see code generation below. 1779 uint32_t const_string_length = 0u; 1780 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1781 if (const_string == nullptr) { 1782 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1783 } 1784 bool is_compressed = 1785 mirror::kUseStringCompression && 1786 const_string != nullptr && 1787 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1788 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) { 1789 locations->AddTemp(Location::RequiresRegister()); 1790 } 1791 1792 // TODO: If the String.equals() is used only for an immediately following HIf, we can 1793 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. 1794 // Then we shall need an extra temporary register instead of the output register. 1795 locations->SetOut(Location::RequiresRegister()); 1796 } 1797 1798 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { 1799 ArmVIXLAssembler* assembler = GetAssembler(); 1800 LocationSummary* locations = invoke->GetLocations(); 1801 1802 vixl32::Register str = InputRegisterAt(invoke, 0); 1803 vixl32::Register arg = InputRegisterAt(invoke, 1); 1804 vixl32::Register out = OutputRegister(invoke); 1805 1806 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1807 1808 vixl32::Label loop; 1809 vixl32::Label end; 1810 vixl32::Label return_true; 1811 vixl32::Label return_false; 1812 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end); 1813 1814 // Get offsets of count, value, and class fields within a string object. 1815 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1816 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1817 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1818 1819 // Note that the null check must have been done earlier. 1820 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1821 1822 StringEqualsOptimizations optimizations(invoke); 1823 if (!optimizations.GetArgumentNotNull()) { 1824 // Check if input is null, return false if it is. 1825 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false); 1826 } 1827 1828 // Reference equality check, return true if same reference. 1829 __ Cmp(str, arg); 1830 __ B(eq, &return_true, /* far_target */ false); 1831 1832 if (!optimizations.GetArgumentIsString()) { 1833 // Instanceof check for the argument by comparing class fields. 1834 // All string objects must have the same type since String cannot be subclassed. 1835 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1836 // If the argument is a string object, its class field must be equal to receiver's class field. 1837 __ Ldr(temp, MemOperand(str, class_offset)); 1838 __ Ldr(out, MemOperand(arg, class_offset)); 1839 __ Cmp(temp, out); 1840 __ B(ne, &return_false, /* far_target */ false); 1841 } 1842 1843 // Check if one of the inputs is a const string. Do not special-case both strings 1844 // being const, such cases should be handled by constant folding if needed. 1845 uint32_t const_string_length = 0u; 1846 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1847 if (const_string == nullptr) { 1848 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1849 if (const_string != nullptr) { 1850 std::swap(str, arg); // Make sure the const string is in `str`. 1851 } 1852 } 1853 bool is_compressed = 1854 mirror::kUseStringCompression && 1855 const_string != nullptr && 1856 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1857 1858 if (const_string != nullptr) { 1859 // Load `count` field of the argument string and check if it matches the const string. 1860 // Also compares the compression style, if differs return false. 1861 __ Ldr(temp, MemOperand(arg, count_offset)); 1862 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); 1863 __ B(ne, &return_false, /* far_target */ false); 1864 } else { 1865 // Load `count` fields of this and argument strings. 1866 __ Ldr(temp, MemOperand(str, count_offset)); 1867 __ Ldr(out, MemOperand(arg, count_offset)); 1868 // Check if `count` fields are equal, return false if they're not. 1869 // Also compares the compression style, if differs return false. 1870 __ Cmp(temp, out); 1871 __ B(ne, &return_false, /* far_target */ false); 1872 } 1873 1874 // Assertions that must hold in order to compare strings 4 bytes at a time. 1875 // Ok to do this because strings are zero-padded to kObjectAlignment. 1876 DCHECK_ALIGNED(value_offset, 4); 1877 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); 1878 1879 if (const_string != nullptr && 1880 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes 1881 : kShortConstStringEqualsCutoffInBytes / 2u)) { 1882 // Load and compare the contents. Though we know the contents of the short const string 1883 // at compile time, materializing constants may be more code than loading from memory. 1884 int32_t offset = value_offset; 1885 size_t remaining_bytes = 1886 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u); 1887 while (remaining_bytes > sizeof(uint32_t)) { 1888 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1889 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); 1890 vixl32::Register temp2 = scratch_scope.Acquire(); 1891 __ Ldrd(temp, temp1, MemOperand(str, offset)); 1892 __ Ldrd(temp2, out, MemOperand(arg, offset)); 1893 __ Cmp(temp, temp2); 1894 __ B(ne, &return_false, /* far_label */ false); 1895 __ Cmp(temp1, out); 1896 __ B(ne, &return_false, /* far_label */ false); 1897 offset += 2u * sizeof(uint32_t); 1898 remaining_bytes -= 2u * sizeof(uint32_t); 1899 } 1900 if (remaining_bytes != 0u) { 1901 __ Ldr(temp, MemOperand(str, offset)); 1902 __ Ldr(out, MemOperand(arg, offset)); 1903 __ Cmp(temp, out); 1904 __ B(ne, &return_false, /* far_label */ false); 1905 } 1906 } else { 1907 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1908 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1909 "Expecting 0=compressed, 1=uncompressed"); 1910 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); 1911 1912 if (mirror::kUseStringCompression) { 1913 // For string compression, calculate the number of bytes to compare (not chars). 1914 // This could in theory exceed INT32_MAX, so treat temp as unsigned. 1915 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. 1916 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1917 2 * kMaxInstructionSizeInBytes, 1918 CodeBufferCheckScope::kMaximumSize); 1919 __ it(cs); // If uncompressed, 1920 __ add(cs, temp, temp, temp); // double the byte count. 1921 } 1922 1923 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1924 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); 1925 vixl32::Register temp2 = scratch_scope.Acquire(); 1926 1927 // Store offset of string value in preparation for comparison loop. 1928 __ Mov(temp1, value_offset); 1929 1930 // Loop to compare strings 4 bytes at a time starting at the front of the string. 1931 __ Bind(&loop); 1932 __ Ldr(out, MemOperand(str, temp1)); 1933 __ Ldr(temp2, MemOperand(arg, temp1)); 1934 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); 1935 __ Cmp(out, temp2); 1936 __ B(ne, &return_false, /* far_target */ false); 1937 // With string compression, we have compared 4 bytes, otherwise 2 chars. 1938 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); 1939 __ B(hi, &loop, /* far_target */ false); 1940 } 1941 1942 // Return true and exit the function. 1943 // If loop does not result in returning false, we return true. 1944 __ Bind(&return_true); 1945 __ Mov(out, 1); 1946 __ B(final_label); 1947 1948 // Return false and exit the function. 1949 __ Bind(&return_false); 1950 __ Mov(out, 0); 1951 1952 if (end.IsReferenced()) { 1953 __ Bind(&end); 1954 } 1955 } 1956 1957 static void GenerateVisitStringIndexOf(HInvoke* invoke, 1958 ArmVIXLAssembler* assembler, 1959 CodeGeneratorARMVIXL* codegen, 1960 bool start_at_zero) { 1961 LocationSummary* locations = invoke->GetLocations(); 1962 1963 // Note that the null check must have been done earlier. 1964 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1965 1966 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1967 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1968 SlowPathCodeARMVIXL* slow_path = nullptr; 1969 HInstruction* code_point = invoke->InputAt(1); 1970 if (code_point->IsIntConstant()) { 1971 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) > 1972 std::numeric_limits<uint16_t>::max()) { 1973 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1974 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1975 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1976 codegen->AddSlowPath(slow_path); 1977 __ B(slow_path->GetEntryLabel()); 1978 __ Bind(slow_path->GetExitLabel()); 1979 return; 1980 } 1981 } else if (code_point->GetType() != DataType::Type::kUint16) { 1982 vixl32::Register char_reg = InputRegisterAt(invoke, 1); 1983 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`. 1984 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); 1985 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1986 codegen->AddSlowPath(slow_path); 1987 __ B(hs, slow_path->GetEntryLabel()); 1988 } 1989 1990 if (start_at_zero) { 1991 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0)); 1992 DCHECK(tmp_reg.Is(r2)); 1993 // Start-index = 0. 1994 __ Mov(tmp_reg, 0); 1995 } 1996 1997 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); 1998 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); 1999 2000 if (slow_path != nullptr) { 2001 __ Bind(slow_path->GetExitLabel()); 2002 } 2003 } 2004 2005 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { 2006 LocationSummary* locations = new (allocator_) LocationSummary( 2007 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 2008 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 2009 // best to align the inputs accordingly. 2010 InvokeRuntimeCallingConventionARMVIXL calling_convention; 2011 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 2012 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 2013 locations->SetOut(LocationFrom(r0)); 2014 2015 // Need to send start-index=0. 2016 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 2017 } 2018 2019 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { 2020 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); 2021 } 2022 2023 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { 2024 LocationSummary* locations = new (allocator_) LocationSummary( 2025 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 2026 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 2027 // best to align the inputs accordingly. 2028 InvokeRuntimeCallingConventionARMVIXL calling_convention; 2029 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 2030 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 2031 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 2032 locations->SetOut(LocationFrom(r0)); 2033 } 2034 2035 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { 2036 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); 2037 } 2038 2039 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { 2040 LocationSummary* locations = new (allocator_) LocationSummary( 2041 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 2042 InvokeRuntimeCallingConventionARMVIXL calling_convention; 2043 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 2044 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 2045 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 2046 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); 2047 locations->SetOut(LocationFrom(r0)); 2048 } 2049 2050 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { 2051 ArmVIXLAssembler* assembler = GetAssembler(); 2052 vixl32::Register byte_array = InputRegisterAt(invoke, 0); 2053 __ Cmp(byte_array, 0); 2054 SlowPathCodeARMVIXL* slow_path = 2055 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 2056 codegen_->AddSlowPath(slow_path); 2057 __ B(eq, slow_path->GetEntryLabel()); 2058 2059 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); 2060 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 2061 __ Bind(slow_path->GetExitLabel()); 2062 } 2063 2064 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { 2065 LocationSummary* locations = 2066 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 2067 InvokeRuntimeCallingConventionARMVIXL calling_convention; 2068 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 2069 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 2070 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 2071 locations->SetOut(LocationFrom(r0)); 2072 } 2073 2074 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { 2075 // No need to emit code checking whether `locations->InAt(2)` is a null 2076 // pointer, as callers of the native method 2077 // 2078 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 2079 // 2080 // all include a null check on `data` before calling that method. 2081 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 2082 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 2083 } 2084 2085 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { 2086 LocationSummary* locations = new (allocator_) LocationSummary( 2087 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 2088 InvokeRuntimeCallingConventionARMVIXL calling_convention; 2089 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 2090 locations->SetOut(LocationFrom(r0)); 2091 } 2092 2093 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { 2094 ArmVIXLAssembler* assembler = GetAssembler(); 2095 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0); 2096 __ Cmp(string_to_copy, 0); 2097 SlowPathCodeARMVIXL* slow_path = 2098 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 2099 codegen_->AddSlowPath(slow_path); 2100 __ B(eq, slow_path->GetEntryLabel()); 2101 2102 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); 2103 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 2104 2105 __ Bind(slow_path->GetExitLabel()); 2106 } 2107 2108 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { 2109 // The only read barrier implementation supporting the 2110 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2111 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2112 return; 2113 } 2114 2115 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 2116 LocationSummary* locations = invoke->GetLocations(); 2117 if (locations == nullptr) { 2118 return; 2119 } 2120 2121 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2122 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 2123 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2124 2125 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { 2126 locations->SetInAt(1, Location::RequiresRegister()); 2127 } 2128 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) { 2129 locations->SetInAt(3, Location::RequiresRegister()); 2130 } 2131 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { 2132 locations->SetInAt(4, Location::RequiresRegister()); 2133 } 2134 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2135 // Temporary register IP cannot be used in 2136 // ReadBarrierSystemArrayCopySlowPathARM (because that register 2137 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra 2138 // temporary register from the register allocator. 2139 locations->AddTemp(Location::RequiresRegister()); 2140 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); 2141 arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); 2142 } 2143 } 2144 2145 static void CheckPosition(ArmVIXLAssembler* assembler, 2146 Location pos, 2147 vixl32::Register input, 2148 Location length, 2149 SlowPathCodeARMVIXL* slow_path, 2150 vixl32::Register temp, 2151 bool length_is_input_length = false) { 2152 // Where is the length in the Array? 2153 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 2154 2155 if (pos.IsConstant()) { 2156 int32_t pos_const = Int32ConstantFrom(pos); 2157 if (pos_const == 0) { 2158 if (!length_is_input_length) { 2159 // Check that length(input) >= length. 2160 __ Ldr(temp, MemOperand(input, length_offset)); 2161 if (length.IsConstant()) { 2162 __ Cmp(temp, Int32ConstantFrom(length)); 2163 } else { 2164 __ Cmp(temp, RegisterFrom(length)); 2165 } 2166 __ B(lt, slow_path->GetEntryLabel()); 2167 } 2168 } else { 2169 // Check that length(input) >= pos. 2170 __ Ldr(temp, MemOperand(input, length_offset)); 2171 __ Subs(temp, temp, pos_const); 2172 __ B(lt, slow_path->GetEntryLabel()); 2173 2174 // Check that (length(input) - pos) >= length. 2175 if (length.IsConstant()) { 2176 __ Cmp(temp, Int32ConstantFrom(length)); 2177 } else { 2178 __ Cmp(temp, RegisterFrom(length)); 2179 } 2180 __ B(lt, slow_path->GetEntryLabel()); 2181 } 2182 } else if (length_is_input_length) { 2183 // The only way the copy can succeed is if pos is zero. 2184 vixl32::Register pos_reg = RegisterFrom(pos); 2185 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel()); 2186 } else { 2187 // Check that pos >= 0. 2188 vixl32::Register pos_reg = RegisterFrom(pos); 2189 __ Cmp(pos_reg, 0); 2190 __ B(lt, slow_path->GetEntryLabel()); 2191 2192 // Check that pos <= length(input). 2193 __ Ldr(temp, MemOperand(input, length_offset)); 2194 __ Subs(temp, temp, pos_reg); 2195 __ B(lt, slow_path->GetEntryLabel()); 2196 2197 // Check that (length(input) - pos) >= length. 2198 if (length.IsConstant()) { 2199 __ Cmp(temp, Int32ConstantFrom(length)); 2200 } else { 2201 __ Cmp(temp, RegisterFrom(length)); 2202 } 2203 __ B(lt, slow_path->GetEntryLabel()); 2204 } 2205 } 2206 2207 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { 2208 // The only read barrier implementation supporting the 2209 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2210 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2211 2212 ArmVIXLAssembler* assembler = GetAssembler(); 2213 LocationSummary* locations = invoke->GetLocations(); 2214 2215 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2216 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2217 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2218 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2219 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2220 2221 vixl32::Register src = InputRegisterAt(invoke, 0); 2222 Location src_pos = locations->InAt(1); 2223 vixl32::Register dest = InputRegisterAt(invoke, 2); 2224 Location dest_pos = locations->InAt(3); 2225 Location length = locations->InAt(4); 2226 Location temp1_loc = locations->GetTemp(0); 2227 vixl32::Register temp1 = RegisterFrom(temp1_loc); 2228 Location temp2_loc = locations->GetTemp(1); 2229 vixl32::Register temp2 = RegisterFrom(temp2_loc); 2230 Location temp3_loc = locations->GetTemp(2); 2231 vixl32::Register temp3 = RegisterFrom(temp3_loc); 2232 2233 SlowPathCodeARMVIXL* intrinsic_slow_path = 2234 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); 2235 codegen_->AddSlowPath(intrinsic_slow_path); 2236 2237 vixl32::Label conditions_on_positions_validated; 2238 SystemArrayCopyOptimizations optimizations(invoke); 2239 2240 // If source and destination are the same, we go to slow path if we need to do 2241 // forward copying. 2242 if (src_pos.IsConstant()) { 2243 int32_t src_pos_constant = Int32ConstantFrom(src_pos); 2244 if (dest_pos.IsConstant()) { 2245 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); 2246 if (optimizations.GetDestinationIsSource()) { 2247 // Checked when building locations. 2248 DCHECK_GE(src_pos_constant, dest_pos_constant); 2249 } else if (src_pos_constant < dest_pos_constant) { 2250 __ Cmp(src, dest); 2251 __ B(eq, intrinsic_slow_path->GetEntryLabel()); 2252 } 2253 2254 // Checked when building locations. 2255 DCHECK(!optimizations.GetDestinationIsSource() 2256 || (src_pos_constant >= Int32ConstantFrom(dest_pos))); 2257 } else { 2258 if (!optimizations.GetDestinationIsSource()) { 2259 __ Cmp(src, dest); 2260 __ B(ne, &conditions_on_positions_validated, /* far_target */ false); 2261 } 2262 __ Cmp(RegisterFrom(dest_pos), src_pos_constant); 2263 __ B(gt, intrinsic_slow_path->GetEntryLabel()); 2264 } 2265 } else { 2266 if (!optimizations.GetDestinationIsSource()) { 2267 __ Cmp(src, dest); 2268 __ B(ne, &conditions_on_positions_validated, /* far_target */ false); 2269 } 2270 if (dest_pos.IsConstant()) { 2271 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); 2272 __ Cmp(RegisterFrom(src_pos), dest_pos_constant); 2273 } else { 2274 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos)); 2275 } 2276 __ B(lt, intrinsic_slow_path->GetEntryLabel()); 2277 } 2278 2279 __ Bind(&conditions_on_positions_validated); 2280 2281 if (!optimizations.GetSourceIsNotNull()) { 2282 // Bail out if the source is null. 2283 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel()); 2284 } 2285 2286 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2287 // Bail out if the destination is null. 2288 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel()); 2289 } 2290 2291 // If the length is negative, bail out. 2292 // We have already checked in the LocationsBuilder for the constant case. 2293 if (!length.IsConstant() && 2294 !optimizations.GetCountIsSourceLength() && 2295 !optimizations.GetCountIsDestinationLength()) { 2296 __ Cmp(RegisterFrom(length), 0); 2297 __ B(lt, intrinsic_slow_path->GetEntryLabel()); 2298 } 2299 2300 // Validity checks: source. 2301 CheckPosition(assembler, 2302 src_pos, 2303 src, 2304 length, 2305 intrinsic_slow_path, 2306 temp1, 2307 optimizations.GetCountIsSourceLength()); 2308 2309 // Validity checks: dest. 2310 CheckPosition(assembler, 2311 dest_pos, 2312 dest, 2313 length, 2314 intrinsic_slow_path, 2315 temp1, 2316 optimizations.GetCountIsDestinationLength()); 2317 2318 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2319 // Check whether all elements of the source array are assignable to the component 2320 // type of the destination array. We do two checks: the classes are the same, 2321 // or the destination is Object[]. If none of these checks succeed, we go to the 2322 // slow path. 2323 2324 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2325 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2326 // /* HeapReference<Class> */ temp1 = src->klass_ 2327 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2328 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); 2329 // Bail out if the source is not a non primitive array. 2330 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2331 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2332 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2333 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2334 // If heap poisoning is enabled, `temp1` has been unpoisoned 2335 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2336 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); 2337 __ Ldrh(temp1, MemOperand(temp1, primitive_offset)); 2338 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2339 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2340 } 2341 2342 // /* HeapReference<Class> */ temp1 = dest->klass_ 2343 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2344 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); 2345 2346 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2347 // Bail out if the destination is not a non primitive array. 2348 // 2349 // Register `temp1` is not trashed by the read barrier emitted 2350 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2351 // method produces a call to a ReadBarrierMarkRegX entry point, 2352 // which saves all potentially live registers, including 2353 // temporaries such a `temp1`. 2354 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2355 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2356 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); 2357 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); 2358 // If heap poisoning is enabled, `temp2` has been unpoisoned 2359 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2360 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2361 __ Ldrh(temp2, MemOperand(temp2, primitive_offset)); 2362 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2363 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel()); 2364 } 2365 2366 // For the same reason given earlier, `temp1` is not trashed by the 2367 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2368 // /* HeapReference<Class> */ temp2 = src->klass_ 2369 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2370 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); 2371 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2372 __ Cmp(temp1, temp2); 2373 2374 if (optimizations.GetDestinationIsTypedObjectArray()) { 2375 vixl32::Label do_copy; 2376 __ B(eq, &do_copy, /* far_target */ false); 2377 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2378 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2379 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2380 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2381 // We do not need to emit a read barrier for the following 2382 // heap reference load, as `temp1` is only used in a 2383 // comparison with null below, and this reference is not 2384 // kept afterwards. 2385 __ Ldr(temp1, MemOperand(temp1, super_offset)); 2386 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2387 __ Bind(&do_copy); 2388 } else { 2389 __ B(ne, intrinsic_slow_path->GetEntryLabel()); 2390 } 2391 } else { 2392 // Non read barrier code. 2393 2394 // /* HeapReference<Class> */ temp1 = dest->klass_ 2395 __ Ldr(temp1, MemOperand(dest, class_offset)); 2396 // /* HeapReference<Class> */ temp2 = src->klass_ 2397 __ Ldr(temp2, MemOperand(src, class_offset)); 2398 bool did_unpoison = false; 2399 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 2400 !optimizations.GetSourceIsNonPrimitiveArray()) { 2401 // One or two of the references need to be unpoisoned. Unpoison them 2402 // both to make the identity check valid. 2403 assembler->MaybeUnpoisonHeapReference(temp1); 2404 assembler->MaybeUnpoisonHeapReference(temp2); 2405 did_unpoison = true; 2406 } 2407 2408 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2409 // Bail out if the destination is not a non primitive array. 2410 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2411 __ Ldr(temp3, MemOperand(temp1, component_offset)); 2412 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2413 assembler->MaybeUnpoisonHeapReference(temp3); 2414 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2415 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2416 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2417 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2418 } 2419 2420 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2421 // Bail out if the source is not a non primitive array. 2422 // /* HeapReference<Class> */ temp3 = temp2->component_type_ 2423 __ Ldr(temp3, MemOperand(temp2, component_offset)); 2424 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2425 assembler->MaybeUnpoisonHeapReference(temp3); 2426 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2427 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2428 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2429 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2430 } 2431 2432 __ Cmp(temp1, temp2); 2433 2434 if (optimizations.GetDestinationIsTypedObjectArray()) { 2435 vixl32::Label do_copy; 2436 __ B(eq, &do_copy, /* far_target */ false); 2437 if (!did_unpoison) { 2438 assembler->MaybeUnpoisonHeapReference(temp1); 2439 } 2440 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2441 __ Ldr(temp1, MemOperand(temp1, component_offset)); 2442 assembler->MaybeUnpoisonHeapReference(temp1); 2443 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2444 __ Ldr(temp1, MemOperand(temp1, super_offset)); 2445 // No need to unpoison the result, we're comparing against null. 2446 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2447 __ Bind(&do_copy); 2448 } else { 2449 __ B(ne, intrinsic_slow_path->GetEntryLabel()); 2450 } 2451 } 2452 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2453 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2454 // Bail out if the source is not a non primitive array. 2455 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2456 // /* HeapReference<Class> */ temp1 = src->klass_ 2457 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2458 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); 2459 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2460 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2461 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2462 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2463 // If heap poisoning is enabled, `temp3` has been unpoisoned 2464 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2465 } else { 2466 // /* HeapReference<Class> */ temp1 = src->klass_ 2467 __ Ldr(temp1, MemOperand(src, class_offset)); 2468 assembler->MaybeUnpoisonHeapReference(temp1); 2469 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2470 __ Ldr(temp3, MemOperand(temp1, component_offset)); 2471 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2472 assembler->MaybeUnpoisonHeapReference(temp3); 2473 } 2474 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2475 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2476 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2477 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2478 } 2479 2480 if (length.IsConstant() && Int32ConstantFrom(length) == 0) { 2481 // Null constant length: not need to emit the loop code at all. 2482 } else { 2483 vixl32::Label done; 2484 const DataType::Type type = DataType::Type::kReference; 2485 const int32_t element_size = DataType::Size(type); 2486 2487 if (length.IsRegister()) { 2488 // Don't enter the copy loop if the length is null. 2489 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); 2490 } 2491 2492 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2493 // TODO: Also convert this intrinsic to the IsGcMarking strategy? 2494 2495 // SystemArrayCopy implementation for Baker read barriers (see 2496 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier): 2497 // 2498 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2499 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2500 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2501 // if (is_gray) { 2502 // // Slow-path copy. 2503 // do { 2504 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 2505 // } while (src_ptr != end_ptr) 2506 // } else { 2507 // // Fast-path copy. 2508 // do { 2509 // *dest_ptr++ = *src_ptr++; 2510 // } while (src_ptr != end_ptr) 2511 // } 2512 2513 // /* int32_t */ monitor = src->monitor_ 2514 __ Ldr(temp2, MemOperand(src, monitor_offset)); 2515 // /* LockWord */ lock_word = LockWord(monitor) 2516 static_assert(sizeof(LockWord) == sizeof(int32_t), 2517 "art::LockWord and int32_t have different sizes."); 2518 2519 // Introduce a dependency on the lock_word including the rb_state, 2520 // which shall prevent load-load reordering without using 2521 // a memory barrier (which would be more expensive). 2522 // `src` is unchanged by this operation, but its value now depends 2523 // on `temp2`. 2524 __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); 2525 2526 // Compute the base source address in `temp1`. 2527 // Note that `temp1` (the base source address) is computed from 2528 // `src` (and `src_pos`) here, and thus honors the artificial 2529 // dependency of `src` on `temp2`. 2530 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 2531 // Compute the end source address in `temp3`. 2532 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2533 // The base destination address is computed later, as `temp2` is 2534 // used for intermediate computations. 2535 2536 // Slow path used to copy array when `src` is gray. 2537 // Note that the base destination address is computed in `temp2` 2538 // by the slow path code. 2539 SlowPathCodeARMVIXL* read_barrier_slow_path = 2540 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); 2541 codegen_->AddSlowPath(read_barrier_slow_path); 2542 2543 // Given the numeric representation, it's enough to check the low bit of the 2544 // rb_state. We do that by shifting the bit out of the lock word with LSRS 2545 // which can be a 16-bit instruction unlike the TST immediate. 2546 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 2547 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2548 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); 2549 // Carry flag is the last bit shifted out by LSRS. 2550 __ B(cs, read_barrier_slow_path->GetEntryLabel()); 2551 2552 // Fast-path copy. 2553 // Compute the base destination address in `temp2`. 2554 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2555 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2556 // poison/unpoison. 2557 vixl32::Label loop; 2558 __ Bind(&loop); 2559 { 2560 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2561 const vixl32::Register temp_reg = temps.Acquire(); 2562 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); 2563 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); 2564 } 2565 __ Cmp(temp1, temp3); 2566 __ B(ne, &loop, /* far_target */ false); 2567 2568 __ Bind(read_barrier_slow_path->GetExitLabel()); 2569 } else { 2570 // Non read barrier code. 2571 // Compute the base source address in `temp1`. 2572 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 2573 // Compute the base destination address in `temp2`. 2574 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2575 // Compute the end source address in `temp3`. 2576 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2577 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2578 // poison/unpoison. 2579 vixl32::Label loop; 2580 __ Bind(&loop); 2581 { 2582 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2583 const vixl32::Register temp_reg = temps.Acquire(); 2584 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); 2585 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); 2586 } 2587 __ Cmp(temp1, temp3); 2588 __ B(ne, &loop, /* far_target */ false); 2589 } 2590 __ Bind(&done); 2591 } 2592 2593 // We only need one card marking on the destination array. 2594 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); 2595 2596 __ Bind(intrinsic_slow_path->GetExitLabel()); 2597 } 2598 2599 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2600 // If the graph is debuggable, all callee-saved floating-point registers are blocked by 2601 // the code generator. Furthermore, the register allocator creates fixed live intervals 2602 // for all caller-saved registers because we are doing a function call. As a result, if 2603 // the input and output locations are unallocated, the register allocator runs out of 2604 // registers and fails; however, a debuggable graph is not the common case. 2605 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { 2606 return; 2607 } 2608 2609 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 2610 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64); 2611 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); 2612 2613 LocationSummary* const locations = 2614 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 2615 const InvokeRuntimeCallingConventionARMVIXL calling_convention; 2616 2617 locations->SetInAt(0, Location::RequiresFpuRegister()); 2618 locations->SetOut(Location::RequiresFpuRegister()); 2619 // Native code uses the soft float ABI. 2620 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); 2621 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); 2622 } 2623 2624 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 2625 // If the graph is debuggable, all callee-saved floating-point registers are blocked by 2626 // the code generator. Furthermore, the register allocator creates fixed live intervals 2627 // for all caller-saved registers because we are doing a function call. As a result, if 2628 // the input and output locations are unallocated, the register allocator runs out of 2629 // registers and fails; however, a debuggable graph is not the common case. 2630 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { 2631 return; 2632 } 2633 2634 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 2635 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64); 2636 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64); 2637 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); 2638 2639 LocationSummary* const locations = 2640 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 2641 const InvokeRuntimeCallingConventionARMVIXL calling_convention; 2642 2643 locations->SetInAt(0, Location::RequiresFpuRegister()); 2644 locations->SetInAt(1, Location::RequiresFpuRegister()); 2645 locations->SetOut(Location::RequiresFpuRegister()); 2646 // Native code uses the soft float ABI. 2647 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); 2648 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); 2649 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 2650 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3))); 2651 } 2652 2653 static void GenFPToFPCall(HInvoke* invoke, 2654 ArmVIXLAssembler* assembler, 2655 CodeGeneratorARMVIXL* codegen, 2656 QuickEntrypointEnum entry) { 2657 LocationSummary* const locations = invoke->GetLocations(); 2658 2659 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 2660 DCHECK(locations->WillCall() && locations->Intrinsified()); 2661 2662 // Native code uses the soft float ABI. 2663 __ Vmov(RegisterFrom(locations->GetTemp(0)), 2664 RegisterFrom(locations->GetTemp(1)), 2665 InputDRegisterAt(invoke, 0)); 2666 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 2667 __ Vmov(OutputDRegister(invoke), 2668 RegisterFrom(locations->GetTemp(0)), 2669 RegisterFrom(locations->GetTemp(1))); 2670 } 2671 2672 static void GenFPFPToFPCall(HInvoke* invoke, 2673 ArmVIXLAssembler* assembler, 2674 CodeGeneratorARMVIXL* codegen, 2675 QuickEntrypointEnum entry) { 2676 LocationSummary* const locations = invoke->GetLocations(); 2677 2678 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 2679 DCHECK(locations->WillCall() && locations->Intrinsified()); 2680 2681 // Native code uses the soft float ABI. 2682 __ Vmov(RegisterFrom(locations->GetTemp(0)), 2683 RegisterFrom(locations->GetTemp(1)), 2684 InputDRegisterAt(invoke, 0)); 2685 __ Vmov(RegisterFrom(locations->GetTemp(2)), 2686 RegisterFrom(locations->GetTemp(3)), 2687 InputDRegisterAt(invoke, 1)); 2688 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 2689 __ Vmov(OutputDRegister(invoke), 2690 RegisterFrom(locations->GetTemp(0)), 2691 RegisterFrom(locations->GetTemp(1))); 2692 } 2693 2694 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) { 2695 CreateFPToFPCallLocations(allocator_, invoke); 2696 } 2697 2698 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { 2699 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos); 2700 } 2701 2702 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) { 2703 CreateFPToFPCallLocations(allocator_, invoke); 2704 } 2705 2706 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { 2707 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin); 2708 } 2709 2710 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) { 2711 CreateFPToFPCallLocations(allocator_, invoke); 2712 } 2713 2714 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { 2715 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos); 2716 } 2717 2718 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) { 2719 CreateFPToFPCallLocations(allocator_, invoke); 2720 } 2721 2722 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { 2723 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin); 2724 } 2725 2726 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) { 2727 CreateFPToFPCallLocations(allocator_, invoke); 2728 } 2729 2730 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { 2731 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan); 2732 } 2733 2734 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) { 2735 CreateFPToFPCallLocations(allocator_, invoke); 2736 } 2737 2738 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { 2739 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt); 2740 } 2741 2742 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) { 2743 CreateFPToFPCallLocations(allocator_, invoke); 2744 } 2745 2746 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { 2747 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh); 2748 } 2749 2750 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) { 2751 CreateFPToFPCallLocations(allocator_, invoke); 2752 } 2753 2754 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { 2755 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp); 2756 } 2757 2758 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) { 2759 CreateFPToFPCallLocations(allocator_, invoke); 2760 } 2761 2762 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { 2763 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1); 2764 } 2765 2766 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) { 2767 CreateFPToFPCallLocations(allocator_, invoke); 2768 } 2769 2770 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { 2771 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog); 2772 } 2773 2774 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) { 2775 CreateFPToFPCallLocations(allocator_, invoke); 2776 } 2777 2778 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { 2779 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10); 2780 } 2781 2782 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) { 2783 CreateFPToFPCallLocations(allocator_, invoke); 2784 } 2785 2786 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { 2787 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh); 2788 } 2789 2790 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) { 2791 CreateFPToFPCallLocations(allocator_, invoke); 2792 } 2793 2794 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { 2795 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan); 2796 } 2797 2798 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) { 2799 CreateFPToFPCallLocations(allocator_, invoke); 2800 } 2801 2802 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { 2803 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh); 2804 } 2805 2806 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) { 2807 CreateFPFPToFPCallLocations(allocator_, invoke); 2808 } 2809 2810 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { 2811 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); 2812 } 2813 2814 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { 2815 CreateFPFPToFPCallLocations(allocator_, invoke); 2816 } 2817 2818 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { 2819 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); 2820 } 2821 2822 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { 2823 CreateFPFPToFPCallLocations(allocator_, invoke); 2824 } 2825 2826 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { 2827 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot); 2828 } 2829 2830 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) { 2831 CreateFPFPToFPCallLocations(allocator_, invoke); 2832 } 2833 2834 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { 2835 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter); 2836 } 2837 2838 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) { 2839 CreateIntToIntLocations(allocator_, invoke); 2840 } 2841 2842 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { 2843 ArmVIXLAssembler* assembler = GetAssembler(); 2844 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2845 } 2846 2847 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { 2848 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 2849 } 2850 2851 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { 2852 ArmVIXLAssembler* assembler = GetAssembler(); 2853 LocationSummary* locations = invoke->GetLocations(); 2854 2855 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 2856 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 2857 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); 2858 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); 2859 2860 __ Rbit(out_reg_lo, in_reg_hi); 2861 __ Rbit(out_reg_hi, in_reg_lo); 2862 } 2863 2864 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { 2865 CreateIntToIntLocations(allocator_, invoke); 2866 } 2867 2868 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { 2869 ArmVIXLAssembler* assembler = GetAssembler(); 2870 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2871 } 2872 2873 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { 2874 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 2875 } 2876 2877 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { 2878 ArmVIXLAssembler* assembler = GetAssembler(); 2879 LocationSummary* locations = invoke->GetLocations(); 2880 2881 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 2882 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 2883 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); 2884 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); 2885 2886 __ Rev(out_reg_lo, in_reg_hi); 2887 __ Rev(out_reg_hi, in_reg_lo); 2888 } 2889 2890 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { 2891 CreateIntToIntLocations(allocator_, invoke); 2892 } 2893 2894 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { 2895 ArmVIXLAssembler* assembler = GetAssembler(); 2896 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2897 } 2898 2899 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) { 2900 DCHECK(DataType::IsIntOrLongType(type)) << type; 2901 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32); 2902 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type); 2903 2904 bool is_long = type == DataType::Type::kInt64; 2905 LocationSummary* locations = instr->GetLocations(); 2906 Location in = locations->InAt(0); 2907 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in); 2908 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0; 2909 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0)); 2910 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0)); 2911 vixl32::Register out_r = OutputRegister(instr); 2912 2913 // Move data from core register(s) to temp D-reg for bit count calculation, then move back. 2914 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, 2915 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, 2916 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. 2917 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| 2918 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| 2919 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c| 2920 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c| 2921 if (is_long) { 2922 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c| 2923 } 2924 __ Vmov(out_r, tmp_s); 2925 } 2926 2927 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { 2928 CreateIntToIntLocations(allocator_, invoke); 2929 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 2930 } 2931 2932 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { 2933 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler()); 2934 } 2935 2936 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { 2937 VisitIntegerBitCount(invoke); 2938 } 2939 2940 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { 2941 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler()); 2942 } 2943 2944 static void GenHighestOneBit(HInvoke* invoke, 2945 DataType::Type type, 2946 CodeGeneratorARMVIXL* codegen) { 2947 DCHECK(DataType::IsIntOrLongType(type)); 2948 2949 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 2950 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2951 const vixl32::Register temp = temps.Acquire(); 2952 2953 if (type == DataType::Type::kInt64) { 2954 LocationSummary* locations = invoke->GetLocations(); 2955 Location in = locations->InAt(0); 2956 Location out = locations->Out(); 2957 2958 vixl32::Register in_reg_lo = LowRegisterFrom(in); 2959 vixl32::Register in_reg_hi = HighRegisterFrom(in); 2960 vixl32::Register out_reg_lo = LowRegisterFrom(out); 2961 vixl32::Register out_reg_hi = HighRegisterFrom(out); 2962 2963 __ Mov(temp, 0x80000000); // Modified immediate. 2964 __ Clz(out_reg_lo, in_reg_lo); 2965 __ Clz(out_reg_hi, in_reg_hi); 2966 __ Lsr(out_reg_lo, temp, out_reg_lo); 2967 __ Lsrs(out_reg_hi, temp, out_reg_hi); 2968 2969 // Discard result for lowest 32 bits if highest 32 bits are not zero. 2970 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 2971 // we check that the output is in a low register, so that a 16-bit MOV 2972 // encoding can be used. If output is in a high register, then we generate 2973 // 4 more bytes of code to avoid a branch. 2974 Operand mov_src(0); 2975 if (!out_reg_lo.IsLow()) { 2976 __ Mov(LeaveFlags, temp, 0); 2977 mov_src = Operand(temp); 2978 } 2979 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), 2980 2 * vixl32::k16BitT32InstructionSizeInBytes, 2981 CodeBufferCheckScope::kExactSize); 2982 __ it(ne); 2983 __ mov(ne, out_reg_lo, mov_src); 2984 } else { 2985 vixl32::Register out = OutputRegister(invoke); 2986 vixl32::Register in = InputRegisterAt(invoke, 0); 2987 2988 __ Mov(temp, 0x80000000); // Modified immediate. 2989 __ Clz(out, in); 2990 __ Lsr(out, temp, out); 2991 } 2992 } 2993 2994 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { 2995 CreateIntToIntLocations(allocator_, invoke); 2996 } 2997 2998 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { 2999 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_); 3000 } 3001 3002 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { 3003 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 3004 } 3005 3006 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { 3007 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_); 3008 } 3009 3010 static void GenLowestOneBit(HInvoke* invoke, 3011 DataType::Type type, 3012 CodeGeneratorARMVIXL* codegen) { 3013 DCHECK(DataType::IsIntOrLongType(type)); 3014 3015 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 3016 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3017 const vixl32::Register temp = temps.Acquire(); 3018 3019 if (type == DataType::Type::kInt64) { 3020 LocationSummary* locations = invoke->GetLocations(); 3021 Location in = locations->InAt(0); 3022 Location out = locations->Out(); 3023 3024 vixl32::Register in_reg_lo = LowRegisterFrom(in); 3025 vixl32::Register in_reg_hi = HighRegisterFrom(in); 3026 vixl32::Register out_reg_lo = LowRegisterFrom(out); 3027 vixl32::Register out_reg_hi = HighRegisterFrom(out); 3028 3029 __ Rsb(out_reg_hi, in_reg_hi, 0); 3030 __ Rsb(out_reg_lo, in_reg_lo, 0); 3031 __ And(out_reg_hi, out_reg_hi, in_reg_hi); 3032 // The result of this operation is 0 iff in_reg_lo is 0 3033 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo); 3034 3035 // Discard result for highest 32 bits if lowest 32 bits are not zero. 3036 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, 3037 // we check that the output is in a low register, so that a 16-bit MOV 3038 // encoding can be used. If output is in a high register, then we generate 3039 // 4 more bytes of code to avoid a branch. 3040 Operand mov_src(0); 3041 if (!out_reg_lo.IsLow()) { 3042 __ Mov(LeaveFlags, temp, 0); 3043 mov_src = Operand(temp); 3044 } 3045 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), 3046 2 * vixl32::k16BitT32InstructionSizeInBytes, 3047 CodeBufferCheckScope::kExactSize); 3048 __ it(ne); 3049 __ mov(ne, out_reg_hi, mov_src); 3050 } else { 3051 vixl32::Register out = OutputRegister(invoke); 3052 vixl32::Register in = InputRegisterAt(invoke, 0); 3053 3054 __ Rsb(temp, in, 0); 3055 __ And(out, temp, in); 3056 } 3057 } 3058 3059 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { 3060 CreateIntToIntLocations(allocator_, invoke); 3061 } 3062 3063 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { 3064 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_); 3065 } 3066 3067 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { 3068 CreateLongToLongLocationsWithOverlap(allocator_, invoke); 3069 } 3070 3071 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { 3072 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_); 3073 } 3074 3075 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { 3076 LocationSummary* locations = 3077 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3078 locations->SetInAt(0, Location::RequiresRegister()); 3079 locations->SetInAt(1, Location::RequiresRegister()); 3080 locations->SetInAt(2, Location::RequiresRegister()); 3081 locations->SetInAt(3, Location::RequiresRegister()); 3082 locations->SetInAt(4, Location::RequiresRegister()); 3083 3084 // Temporary registers to store lengths of strings and for calculations. 3085 locations->AddTemp(Location::RequiresRegister()); 3086 locations->AddTemp(Location::RequiresRegister()); 3087 locations->AddTemp(Location::RequiresRegister()); 3088 } 3089 3090 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { 3091 ArmVIXLAssembler* assembler = GetAssembler(); 3092 LocationSummary* locations = invoke->GetLocations(); 3093 3094 // Check assumption that sizeof(Char) is 2 (used in scaling below). 3095 const size_t char_size = DataType::Size(DataType::Type::kUint16); 3096 DCHECK_EQ(char_size, 2u); 3097 3098 // Location of data in char array buffer. 3099 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 3100 3101 // Location of char array data in string. 3102 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 3103 3104 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); 3105 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. 3106 vixl32::Register srcObj = InputRegisterAt(invoke, 0); 3107 vixl32::Register srcBegin = InputRegisterAt(invoke, 1); 3108 vixl32::Register srcEnd = InputRegisterAt(invoke, 2); 3109 vixl32::Register dstObj = InputRegisterAt(invoke, 3); 3110 vixl32::Register dstBegin = InputRegisterAt(invoke, 4); 3111 3112 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0)); 3113 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1)); 3114 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); 3115 3116 vixl32::Label done, compressed_string_loop; 3117 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); 3118 // dst to be copied. 3119 __ Add(dst_ptr, dstObj, data_offset); 3120 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); 3121 3122 __ Subs(num_chr, srcEnd, srcBegin); 3123 // Early out for valid zero-length retrievals. 3124 __ B(eq, final_label, /* far_target */ false); 3125 3126 // src range to copy. 3127 __ Add(src_ptr, srcObj, value_offset); 3128 3129 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3130 vixl32::Register temp; 3131 vixl32::Label compressed_string_preloop; 3132 if (mirror::kUseStringCompression) { 3133 // Location of count in string. 3134 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 3135 temp = temps.Acquire(); 3136 // String's length. 3137 __ Ldr(temp, MemOperand(srcObj, count_offset)); 3138 __ Tst(temp, 1); 3139 temps.Release(temp); 3140 __ B(eq, &compressed_string_preloop, /* far_target */ false); 3141 } 3142 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); 3143 3144 // Do the copy. 3145 vixl32::Label loop, remainder; 3146 3147 temp = temps.Acquire(); 3148 // Save repairing the value of num_chr on the < 4 character path. 3149 __ Subs(temp, num_chr, 4); 3150 __ B(lt, &remainder, /* far_target */ false); 3151 3152 // Keep the result of the earlier subs, we are going to fetch at least 4 characters. 3153 __ Mov(num_chr, temp); 3154 3155 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time. 3156 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code 3157 // to rectify these everywhere this intrinsic applies.) 3158 __ Bind(&loop); 3159 __ Ldr(temp, MemOperand(src_ptr, char_size * 2)); 3160 __ Subs(num_chr, num_chr, 4); 3161 __ Str(temp, MemOperand(dst_ptr, char_size * 2)); 3162 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); 3163 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); 3164 temps.Release(temp); 3165 __ B(ge, &loop, /* far_target */ false); 3166 3167 __ Adds(num_chr, num_chr, 4); 3168 __ B(eq, final_label, /* far_target */ false); 3169 3170 // Main loop for < 4 character case and remainder handling. Loads and stores one 3171 // 16-bit Java character at a time. 3172 __ Bind(&remainder); 3173 temp = temps.Acquire(); 3174 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex)); 3175 __ Subs(num_chr, num_chr, 1); 3176 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); 3177 temps.Release(temp); 3178 __ B(gt, &remainder, /* far_target */ false); 3179 3180 if (mirror::kUseStringCompression) { 3181 __ B(final_label); 3182 3183 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 3184 DCHECK_EQ(c_char_size, 1u); 3185 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. 3186 __ Bind(&compressed_string_preloop); 3187 __ Add(src_ptr, src_ptr, srcBegin); 3188 __ Bind(&compressed_string_loop); 3189 temp = temps.Acquire(); 3190 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex)); 3191 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); 3192 temps.Release(temp); 3193 __ Subs(num_chr, num_chr, 1); 3194 __ B(gt, &compressed_string_loop, /* far_target */ false); 3195 } 3196 3197 if (done.IsReferenced()) { 3198 __ Bind(&done); 3199 } 3200 } 3201 3202 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { 3203 CreateFPToIntLocations(allocator_, invoke); 3204 } 3205 3206 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { 3207 ArmVIXLAssembler* const assembler = GetAssembler(); 3208 const vixl32::Register out = OutputRegister(invoke); 3209 // Shifting left by 1 bit makes the value encodable as an immediate operand; 3210 // we don't care about the sign bit anyway. 3211 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U; 3212 3213 __ Vmov(out, InputSRegisterAt(invoke, 0)); 3214 // We don't care about the sign bit, so shift left. 3215 __ Lsl(out, out, 1); 3216 __ Eor(out, out, infinity); 3217 codegen_->GenerateConditionWithZero(kCondEQ, out, out); 3218 } 3219 3220 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { 3221 CreateFPToIntLocations(allocator_, invoke); 3222 } 3223 3224 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { 3225 ArmVIXLAssembler* const assembler = GetAssembler(); 3226 const vixl32::Register out = OutputRegister(invoke); 3227 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3228 const vixl32::Register temp = temps.Acquire(); 3229 // The highest 32 bits of double precision positive infinity separated into 3230 // two constants encodable as immediate operands. 3231 constexpr uint32_t infinity_high = 0x7f000000U; 3232 constexpr uint32_t infinity_high2 = 0x00f00000U; 3233 3234 static_assert((infinity_high | infinity_high2) == 3235 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U), 3236 "The constants do not add up to the high 32 bits of double " 3237 "precision positive infinity."); 3238 __ Vmov(temp, out, InputDRegisterAt(invoke, 0)); 3239 __ Eor(out, out, infinity_high); 3240 __ Eor(out, out, infinity_high2); 3241 // We don't care about the sign bit, so shift left. 3242 __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); 3243 codegen_->GenerateConditionWithZero(kCondEQ, out, out); 3244 } 3245 3246 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { 3247 if (features_.HasARMv8AInstructions()) { 3248 CreateFPToFPLocations(allocator_, invoke); 3249 } 3250 } 3251 3252 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) { 3253 ArmVIXLAssembler* assembler = GetAssembler(); 3254 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 3255 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 3256 } 3257 3258 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { 3259 if (features_.HasARMv8AInstructions()) { 3260 CreateFPToFPLocations(allocator_, invoke); 3261 } 3262 } 3263 3264 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { 3265 ArmVIXLAssembler* assembler = GetAssembler(); 3266 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 3267 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 3268 } 3269 3270 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { 3271 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3272 IntrinsicVisitor::ComputeIntegerValueOfLocations( 3273 invoke, 3274 codegen_, 3275 LocationFrom(r0), 3276 LocationFrom(calling_convention.GetRegisterAt(0))); 3277 } 3278 3279 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { 3280 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 3281 LocationSummary* locations = invoke->GetLocations(); 3282 ArmVIXLAssembler* const assembler = GetAssembler(); 3283 3284 vixl32::Register out = RegisterFrom(locations->Out()); 3285 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3286 vixl32::Register temp = temps.Acquire(); 3287 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3288 vixl32::Register argument = calling_convention.GetRegisterAt(0); 3289 if (invoke->InputAt(0)->IsConstant()) { 3290 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 3291 if (value >= info.low && value <= info.high) { 3292 // Just embed the j.l.Integer in the code. 3293 ScopedObjectAccess soa(Thread::Current()); 3294 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 3295 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 3296 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 3297 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); 3298 } else { 3299 // Allocate and initialize a new j.l.Integer. 3300 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 3301 // JIT object table. 3302 uint32_t address = 3303 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3304 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); 3305 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3306 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3307 __ Mov(temp, value); 3308 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); 3309 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 3310 // one. 3311 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3312 } 3313 } else { 3314 vixl32::Register in = RegisterFrom(locations->InAt(0)); 3315 // Check bounds of our cache. 3316 __ Add(out, in, -info.low); 3317 __ Cmp(out, info.high - info.low + 1); 3318 vixl32::Label allocate, done; 3319 __ B(hs, &allocate, /* is_far_target */ false); 3320 // If the value is within the bounds, load the j.l.Integer directly from the array. 3321 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3322 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 3323 __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); 3324 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); 3325 assembler->MaybeUnpoisonHeapReference(out); 3326 __ B(&done); 3327 __ Bind(&allocate); 3328 // Otherwise allocate and initialize a new j.l.Integer. 3329 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3330 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); 3331 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3332 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3333 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); 3334 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 3335 // one. 3336 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3337 __ Bind(&done); 3338 } 3339 } 3340 3341 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { 3342 LocationSummary* locations = 3343 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3344 locations->SetOut(Location::RequiresRegister()); 3345 } 3346 3347 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { 3348 ArmVIXLAssembler* assembler = GetAssembler(); 3349 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out()); 3350 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value(); 3351 __ Ldr(out, MemOperand(tr, offset)); 3352 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3353 vixl32::Register temp = temps.Acquire(); 3354 vixl32::Label done; 3355 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done); 3356 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); 3357 __ Dmb(vixl32::ISH); 3358 __ Mov(temp, 0); 3359 assembler->StoreToOffset(kStoreWord, temp, tr, offset); 3360 __ Dmb(vixl32::ISH); 3361 if (done.IsReferenced()) { 3362 __ Bind(&done); 3363 } 3364 } 3365 3366 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) { 3367 LocationSummary* locations = 3368 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 3369 locations->SetInAt(0, Location::Any()); 3370 } 3371 3372 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 3373 3374 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? 3375 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. 3376 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) 3377 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) 3378 3379 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); 3380 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); 3381 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); 3382 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); 3383 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); 3384 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend); 3385 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); 3386 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); 3387 3388 // 1.8. 3389 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) 3390 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) 3391 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) 3392 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) 3393 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) 3394 3395 UNREACHABLE_INTRINSICS(ARMVIXL) 3396 3397 #undef __ 3398 3399 } // namespace arm 3400 } // namespace art 3401