1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_arm_vixl.h" 18 19 #include "arch/arm/instruction_set_features_arm.h" 20 #include "art_method.h" 21 #include "code_generator_arm_vixl.h" 22 #include "common_arm.h" 23 #include "lock_word.h" 24 #include "mirror/array-inl.h" 25 #include "mirror/object_array-inl.h" 26 #include "mirror/reference.h" 27 #include "mirror/string.h" 28 #include "scoped_thread_state_change-inl.h" 29 #include "thread-current-inl.h" 30 31 #include "aarch32/constants-aarch32.h" 32 33 namespace art { 34 namespace arm { 35 36 #define __ assembler->GetVIXLAssembler()-> 37 38 using helpers::DRegisterFrom; 39 using helpers::HighRegisterFrom; 40 using helpers::InputDRegisterAt; 41 using helpers::InputRegisterAt; 42 using helpers::InputSRegisterAt; 43 using helpers::InputVRegisterAt; 44 using helpers::Int32ConstantFrom; 45 using helpers::LocationFrom; 46 using helpers::LowRegisterFrom; 47 using helpers::LowSRegisterFrom; 48 using helpers::HighSRegisterFrom; 49 using helpers::OutputDRegister; 50 using helpers::OutputSRegister; 51 using helpers::OutputRegister; 52 using helpers::OutputVRegister; 53 using helpers::RegisterFrom; 54 using helpers::SRegisterFrom; 55 using helpers::DRegisterFromS; 56 57 using namespace vixl::aarch32; // NOLINT(build/namespaces) 58 59 using vixl::ExactAssemblyScope; 60 using vixl::CodeBufferCheckScope; 61 62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() { 63 return codegen_->GetAssembler(); 64 } 65 66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() { 67 return codegen_->GetGraph()->GetArena(); 68 } 69 70 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an 71 // intrinsified call. This will copy the arguments into the positions for a regular call. 72 // 73 // Note: The actual parameters are required to be in the locations given by the invoke's location 74 // summary. If an intrinsic modifies those locations before a slowpath call, they must be 75 // restored! 76 // 77 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially 78 // sub-optimal (compared to a direct pointer call), but this is a slow-path. 79 80 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { 81 public: 82 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke) 83 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {} 84 85 Location MoveArguments(CodeGenerator* codegen) { 86 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor; 87 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor); 88 return calling_convention_visitor.GetMethodLocation(); 89 } 90 91 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 92 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); 93 __ Bind(GetEntryLabel()); 94 95 SaveLiveRegisters(codegen, invoke_->GetLocations()); 96 97 Location method_loc = MoveArguments(codegen); 98 99 if (invoke_->IsInvokeStaticOrDirect()) { 100 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); 101 } else { 102 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); 103 } 104 105 // Copy the result back to the expected output. 106 Location out = invoke_->GetLocations()->Out(); 107 if (out.IsValid()) { 108 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 109 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 110 codegen->MoveFromReturnRegister(out, invoke_->GetType()); 111 } 112 113 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 114 __ B(GetExitLabel()); 115 } 116 117 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } 118 119 private: 120 // The instruction where this slow path is happening. 121 HInvoke* const invoke_; 122 123 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL); 124 }; 125 126 // Compute base address for the System.arraycopy intrinsic in `base`. 127 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler, 128 Primitive::Type type, 129 const vixl32::Register& array, 130 const Location& pos, 131 const vixl32::Register& base) { 132 // This routine is only used by the SystemArrayCopy intrinsic at the 133 // moment. We can allow Primitive::kPrimNot as `type` to implement 134 // the SystemArrayCopyChar intrinsic. 135 DCHECK_EQ(type, Primitive::kPrimNot); 136 const int32_t element_size = Primitive::ComponentSize(type); 137 const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); 138 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 139 140 if (pos.IsConstant()) { 141 int32_t constant = Int32ConstantFrom(pos); 142 __ Add(base, array, element_size * constant + data_offset); 143 } else { 144 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift)); 145 __ Add(base, base, data_offset); 146 } 147 } 148 149 // Compute end address for the System.arraycopy intrinsic in `end`. 150 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler, 151 Primitive::Type type, 152 const Location& copy_length, 153 const vixl32::Register& base, 154 const vixl32::Register& end) { 155 // This routine is only used by the SystemArrayCopy intrinsic at the 156 // moment. We can allow Primitive::kPrimNot as `type` to implement 157 // the SystemArrayCopyChar intrinsic. 158 DCHECK_EQ(type, Primitive::kPrimNot); 159 const int32_t element_size = Primitive::ComponentSize(type); 160 const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); 161 162 if (copy_length.IsConstant()) { 163 int32_t constant = Int32ConstantFrom(copy_length); 164 __ Add(end, base, element_size * constant); 165 } else { 166 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift)); 167 } 168 } 169 170 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 171 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { 172 public: 173 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) 174 : SlowPathCodeARMVIXL(instruction) { 175 DCHECK(kEmitCompilerReadBarrier); 176 DCHECK(kUseBakerReadBarrier); 177 } 178 179 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 180 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); 181 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); 182 LocationSummary* locations = instruction_->GetLocations(); 183 DCHECK(locations->CanCall()); 184 DCHECK(instruction_->IsInvokeStaticOrDirect()) 185 << "Unexpected instruction in read barrier arraycopy slow path: " 186 << instruction_->DebugName(); 187 DCHECK(instruction_->GetLocations()->Intrinsified()); 188 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 189 190 Primitive::Type type = Primitive::kPrimNot; 191 const int32_t element_size = Primitive::ComponentSize(type); 192 193 vixl32::Register dest = InputRegisterAt(instruction_, 2); 194 Location dest_pos = locations->InAt(3); 195 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0)); 196 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1)); 197 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2)); 198 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3)); 199 200 __ Bind(GetEntryLabel()); 201 // Compute the base destination address in `dst_curr_addr`. 202 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr); 203 204 vixl32::Label loop; 205 __ Bind(&loop); 206 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 207 assembler->MaybeUnpoisonHeapReference(tmp); 208 // TODO: Inline the mark bit check before calling the runtime? 209 // tmp = ReadBarrier::Mark(tmp); 210 // No need to save live registers; it's taken care of by the 211 // entrypoint. Also, there is no need to update the stack mask, 212 // as this runtime call will not trigger a garbage collection. 213 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more 214 // explanations.) 215 DCHECK(!tmp.IsSP()); 216 DCHECK(!tmp.IsLR()); 217 DCHECK(!tmp.IsPC()); 218 // IP is used internally by the ReadBarrierMarkRegX entry point 219 // as a temporary (and not preserved). It thus cannot be used by 220 // any live register in this slow path. 221 DCHECK(!src_curr_addr.Is(ip)); 222 DCHECK(!dst_curr_addr.Is(ip)); 223 DCHECK(!src_stop_addr.Is(ip)); 224 DCHECK(!tmp.Is(ip)); 225 DCHECK(tmp.IsRegister()) << tmp; 226 // TODO: Load the entrypoint once before the loop, instead of 227 // loading it at every iteration. 228 int32_t entry_point_offset = 229 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); 230 // This runtime call does not require a stack map. 231 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 232 assembler->MaybePoisonHeapReference(tmp); 233 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 234 __ Cmp(src_curr_addr, src_stop_addr); 235 __ B(ne, &loop, /* far_target */ false); 236 __ B(GetExitLabel()); 237 } 238 239 const char* GetDescription() const OVERRIDE { 240 return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; 241 } 242 243 private: 244 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL); 245 }; 246 247 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) 248 : arena_(codegen->GetGraph()->GetArena()), 249 codegen_(codegen), 250 assembler_(codegen->GetAssembler()), 251 features_(codegen->GetInstructionSetFeatures()) {} 252 253 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) { 254 Dispatch(invoke); 255 LocationSummary* res = invoke->GetLocations(); 256 if (res == nullptr) { 257 return false; 258 } 259 return res->Intrinsified(); 260 } 261 262 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 263 LocationSummary* locations = new (arena) LocationSummary(invoke, 264 LocationSummary::kNoCall, 265 kIntrinsified); 266 locations->SetInAt(0, Location::RequiresFpuRegister()); 267 locations->SetOut(Location::RequiresRegister()); 268 } 269 270 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 271 LocationSummary* locations = new (arena) LocationSummary(invoke, 272 LocationSummary::kNoCall, 273 kIntrinsified); 274 locations->SetInAt(0, Location::RequiresRegister()); 275 locations->SetOut(Location::RequiresFpuRegister()); 276 } 277 278 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { 279 Location input = locations->InAt(0); 280 Location output = locations->Out(); 281 if (is64bit) { 282 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input)); 283 } else { 284 __ Vmov(RegisterFrom(output), SRegisterFrom(input)); 285 } 286 } 287 288 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { 289 Location input = locations->InAt(0); 290 Location output = locations->Out(); 291 if (is64bit) { 292 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input)); 293 } else { 294 __ Vmov(SRegisterFrom(output), RegisterFrom(input)); 295 } 296 } 297 298 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 299 CreateFPToIntLocations(arena_, invoke); 300 } 301 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 302 CreateIntToFPLocations(arena_, invoke); 303 } 304 305 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 306 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 307 } 308 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 309 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 310 } 311 312 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 313 CreateFPToIntLocations(arena_, invoke); 314 } 315 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { 316 CreateIntToFPLocations(arena_, invoke); 317 } 318 319 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 320 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 321 } 322 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { 323 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 324 } 325 326 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 327 LocationSummary* locations = new (arena) LocationSummary(invoke, 328 LocationSummary::kNoCall, 329 kIntrinsified); 330 locations->SetInAt(0, Location::RequiresRegister()); 331 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 332 } 333 334 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 335 LocationSummary* locations = new (arena) LocationSummary(invoke, 336 LocationSummary::kNoCall, 337 kIntrinsified); 338 locations->SetInAt(0, Location::RequiresFpuRegister()); 339 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 340 } 341 342 static void GenNumberOfLeadingZeros(HInvoke* invoke, 343 Primitive::Type type, 344 CodeGeneratorARMVIXL* codegen) { 345 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 346 LocationSummary* locations = invoke->GetLocations(); 347 Location in = locations->InAt(0); 348 vixl32::Register out = RegisterFrom(locations->Out()); 349 350 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); 351 352 if (type == Primitive::kPrimLong) { 353 vixl32::Register in_reg_lo = LowRegisterFrom(in); 354 vixl32::Register in_reg_hi = HighRegisterFrom(in); 355 vixl32::Label end; 356 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); 357 __ Clz(out, in_reg_hi); 358 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); 359 __ Clz(out, in_reg_lo); 360 __ Add(out, out, 32); 361 if (end.IsReferenced()) { 362 __ Bind(&end); 363 } 364 } else { 365 __ Clz(out, RegisterFrom(in)); 366 } 367 } 368 369 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 370 CreateIntToIntLocations(arena_, invoke); 371 } 372 373 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 374 GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); 375 } 376 377 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 378 LocationSummary* locations = new (arena_) LocationSummary(invoke, 379 LocationSummary::kNoCall, 380 kIntrinsified); 381 locations->SetInAt(0, Location::RequiresRegister()); 382 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 383 } 384 385 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 386 GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); 387 } 388 389 static void GenNumberOfTrailingZeros(HInvoke* invoke, 390 Primitive::Type type, 391 CodeGeneratorARMVIXL* codegen) { 392 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); 393 394 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 395 LocationSummary* locations = invoke->GetLocations(); 396 vixl32::Register out = RegisterFrom(locations->Out()); 397 398 if (type == Primitive::kPrimLong) { 399 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 400 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 401 vixl32::Label end; 402 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); 403 __ Rbit(out, in_reg_lo); 404 __ Clz(out, out); 405 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); 406 __ Rbit(out, in_reg_hi); 407 __ Clz(out, out); 408 __ Add(out, out, 32); 409 if (end.IsReferenced()) { 410 __ Bind(&end); 411 } 412 } else { 413 vixl32::Register in = RegisterFrom(locations->InAt(0)); 414 __ Rbit(out, in); 415 __ Clz(out, out); 416 } 417 } 418 419 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 420 LocationSummary* locations = new (arena_) LocationSummary(invoke, 421 LocationSummary::kNoCall, 422 kIntrinsified); 423 locations->SetInAt(0, Location::RequiresRegister()); 424 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 425 } 426 427 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 428 GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); 429 } 430 431 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 432 LocationSummary* locations = new (arena_) LocationSummary(invoke, 433 LocationSummary::kNoCall, 434 kIntrinsified); 435 locations->SetInAt(0, Location::RequiresRegister()); 436 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 437 } 438 439 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 440 GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); 441 } 442 443 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { 444 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); 445 } 446 447 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { 448 CreateFPToFPLocations(arena_, invoke); 449 } 450 451 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { 452 MathAbsFP(invoke, GetAssembler()); 453 } 454 455 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { 456 CreateFPToFPLocations(arena_, invoke); 457 } 458 459 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { 460 MathAbsFP(invoke, GetAssembler()); 461 } 462 463 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { 464 LocationSummary* locations = new (arena) LocationSummary(invoke, 465 LocationSummary::kNoCall, 466 kIntrinsified); 467 locations->SetInAt(0, Location::RequiresRegister()); 468 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 469 470 locations->AddTemp(Location::RequiresRegister()); 471 } 472 473 static void GenAbsInteger(LocationSummary* locations, 474 bool is64bit, 475 ArmVIXLAssembler* assembler) { 476 Location in = locations->InAt(0); 477 Location output = locations->Out(); 478 479 vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); 480 481 if (is64bit) { 482 vixl32::Register in_reg_lo = LowRegisterFrom(in); 483 vixl32::Register in_reg_hi = HighRegisterFrom(in); 484 vixl32::Register out_reg_lo = LowRegisterFrom(output); 485 vixl32::Register out_reg_hi = HighRegisterFrom(output); 486 487 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; 488 489 __ Asr(mask, in_reg_hi, 31); 490 __ Adds(out_reg_lo, in_reg_lo, mask); 491 __ Adc(out_reg_hi, in_reg_hi, mask); 492 __ Eor(out_reg_lo, mask, out_reg_lo); 493 __ Eor(out_reg_hi, mask, out_reg_hi); 494 } else { 495 vixl32::Register in_reg = RegisterFrom(in); 496 vixl32::Register out_reg = RegisterFrom(output); 497 498 __ Asr(mask, in_reg, 31); 499 __ Add(out_reg, in_reg, mask); 500 __ Eor(out_reg, mask, out_reg); 501 } 502 } 503 504 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { 505 CreateIntToIntPlusTemp(arena_, invoke); 506 } 507 508 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { 509 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 510 } 511 512 513 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { 514 CreateIntToIntPlusTemp(arena_, invoke); 515 } 516 517 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { 518 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 519 } 520 521 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { 522 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 523 Location op1_loc = invoke->GetLocations()->InAt(0); 524 Location op2_loc = invoke->GetLocations()->InAt(1); 525 Location out_loc = invoke->GetLocations()->Out(); 526 527 // Optimization: don't generate any code if inputs are the same. 528 if (op1_loc.Equals(op2_loc)) { 529 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. 530 return; 531 } 532 533 vixl32::SRegister op1 = SRegisterFrom(op1_loc); 534 vixl32::SRegister op2 = SRegisterFrom(op2_loc); 535 vixl32::SRegister out = OutputSRegister(invoke); 536 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 537 const vixl32::Register temp1 = temps.Acquire(); 538 vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); 539 vixl32::Label nan, done; 540 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); 541 542 DCHECK(op1.Is(out)); 543 544 __ Vcmp(op1, op2); 545 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 546 __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. 547 548 // op1 <> op2 549 vixl32::ConditionType cond = is_min ? gt : lt; 550 { 551 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 552 2 * kMaxInstructionSizeInBytes, 553 CodeBufferCheckScope::kMaximumSize); 554 __ it(cond); 555 __ vmov(cond, F32, out, op2); 556 } 557 // for <>(not equal), we've done min/max calculation. 558 __ B(ne, final_label, /* far_target */ false); 559 560 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). 561 __ Vmov(temp1, op1); 562 __ Vmov(temp2, op2); 563 if (is_min) { 564 __ Orr(temp1, temp1, temp2); 565 } else { 566 __ And(temp1, temp1, temp2); 567 } 568 __ Vmov(out, temp1); 569 __ B(final_label); 570 571 // handle NaN input. 572 __ Bind(&nan); 573 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. 574 __ Vmov(out, temp1); 575 576 if (done.IsReferenced()) { 577 __ Bind(&done); 578 } 579 } 580 581 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 582 LocationSummary* locations = new (arena) LocationSummary(invoke, 583 LocationSummary::kNoCall, 584 kIntrinsified); 585 locations->SetInAt(0, Location::RequiresFpuRegister()); 586 locations->SetInAt(1, Location::RequiresFpuRegister()); 587 locations->SetOut(Location::SameAsFirstInput()); 588 } 589 590 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { 591 CreateFPFPToFPLocations(arena_, invoke); 592 invoke->GetLocations()->AddTemp(Location::RequiresRegister()); 593 } 594 595 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { 596 GenMinMaxFloat(invoke, /* is_min */ true, codegen_); 597 } 598 599 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { 600 CreateFPFPToFPLocations(arena_, invoke); 601 invoke->GetLocations()->AddTemp(Location::RequiresRegister()); 602 } 603 604 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { 605 GenMinMaxFloat(invoke, /* is_min */ false, codegen_); 606 } 607 608 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { 609 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 610 Location op1_loc = invoke->GetLocations()->InAt(0); 611 Location op2_loc = invoke->GetLocations()->InAt(1); 612 Location out_loc = invoke->GetLocations()->Out(); 613 614 // Optimization: don't generate any code if inputs are the same. 615 if (op1_loc.Equals(op2_loc)) { 616 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. 617 return; 618 } 619 620 vixl32::DRegister op1 = DRegisterFrom(op1_loc); 621 vixl32::DRegister op2 = DRegisterFrom(op2_loc); 622 vixl32::DRegister out = OutputDRegister(invoke); 623 vixl32::Label handle_nan_eq, done; 624 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); 625 626 DCHECK(op1.Is(out)); 627 628 __ Vcmp(op1, op2); 629 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 630 __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. 631 632 // op1 <> op2 633 vixl32::ConditionType cond = is_min ? gt : lt; 634 { 635 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 636 2 * kMaxInstructionSizeInBytes, 637 CodeBufferCheckScope::kMaximumSize); 638 __ it(cond); 639 __ vmov(cond, F64, out, op2); 640 } 641 // for <>(not equal), we've done min/max calculation. 642 __ B(ne, final_label, /* far_target */ false); 643 644 // handle op1 == op2, max(+0.0,-0.0). 645 if (!is_min) { 646 __ Vand(F64, out, op1, op2); 647 __ B(final_label); 648 } 649 650 // handle op1 == op2, min(+0.0,-0.0), NaN input. 651 __ Bind(&handle_nan_eq); 652 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. 653 654 if (done.IsReferenced()) { 655 __ Bind(&done); 656 } 657 } 658 659 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { 660 CreateFPFPToFPLocations(arena_, invoke); 661 } 662 663 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { 664 GenMinMaxDouble(invoke, /* is_min */ true , codegen_); 665 } 666 667 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { 668 CreateFPFPToFPLocations(arena_, invoke); 669 } 670 671 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { 672 GenMinMaxDouble(invoke, /* is_min */ false, codegen_); 673 } 674 675 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { 676 Location op1_loc = invoke->GetLocations()->InAt(0); 677 Location op2_loc = invoke->GetLocations()->InAt(1); 678 Location out_loc = invoke->GetLocations()->Out(); 679 680 // Optimization: don't generate any code if inputs are the same. 681 if (op1_loc.Equals(op2_loc)) { 682 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. 683 return; 684 } 685 686 vixl32::Register op1_lo = LowRegisterFrom(op1_loc); 687 vixl32::Register op1_hi = HighRegisterFrom(op1_loc); 688 vixl32::Register op2_lo = LowRegisterFrom(op2_loc); 689 vixl32::Register op2_hi = HighRegisterFrom(op2_loc); 690 vixl32::Register out_lo = LowRegisterFrom(out_loc); 691 vixl32::Register out_hi = HighRegisterFrom(out_loc); 692 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 693 const vixl32::Register temp = temps.Acquire(); 694 695 DCHECK(op1_lo.Is(out_lo)); 696 DCHECK(op1_hi.Is(out_hi)); 697 698 // Compare op1 >= op2, or op1 < op2. 699 __ Cmp(out_lo, op2_lo); 700 __ Sbcs(temp, out_hi, op2_hi); 701 702 // Now GE/LT condition code is correct for the long comparison. 703 { 704 vixl32::ConditionType cond = is_min ? ge : lt; 705 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 706 3 * kMaxInstructionSizeInBytes, 707 CodeBufferCheckScope::kMaximumSize); 708 __ itt(cond); 709 __ mov(cond, out_lo, op2_lo); 710 __ mov(cond, out_hi, op2_hi); 711 } 712 } 713 714 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { 715 LocationSummary* locations = new (arena) LocationSummary(invoke, 716 LocationSummary::kNoCall, 717 kIntrinsified); 718 locations->SetInAt(0, Location::RequiresRegister()); 719 locations->SetInAt(1, Location::RequiresRegister()); 720 locations->SetOut(Location::SameAsFirstInput()); 721 } 722 723 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { 724 CreateLongLongToLongLocations(arena_, invoke); 725 } 726 727 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { 728 GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); 729 } 730 731 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { 732 CreateLongLongToLongLocations(arena_, invoke); 733 } 734 735 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { 736 GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); 737 } 738 739 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { 740 vixl32::Register op1 = InputRegisterAt(invoke, 0); 741 vixl32::Register op2 = InputRegisterAt(invoke, 1); 742 vixl32::Register out = OutputRegister(invoke); 743 744 __ Cmp(op1, op2); 745 746 { 747 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 748 3 * kMaxInstructionSizeInBytes, 749 CodeBufferCheckScope::kMaximumSize); 750 751 __ ite(is_min ? lt : gt); 752 __ mov(is_min ? lt : gt, out, op1); 753 __ mov(is_min ? ge : le, out, op2); 754 } 755 } 756 757 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 758 LocationSummary* locations = new (arena) LocationSummary(invoke, 759 LocationSummary::kNoCall, 760 kIntrinsified); 761 locations->SetInAt(0, Location::RequiresRegister()); 762 locations->SetInAt(1, Location::RequiresRegister()); 763 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 764 } 765 766 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { 767 CreateIntIntToIntLocations(arena_, invoke); 768 } 769 770 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { 771 GenMinMax(invoke, /* is_min */ true, GetAssembler()); 772 } 773 774 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { 775 CreateIntIntToIntLocations(arena_, invoke); 776 } 777 778 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { 779 GenMinMax(invoke, /* is_min */ false, GetAssembler()); 780 } 781 782 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { 783 CreateFPToFPLocations(arena_, invoke); 784 } 785 786 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { 787 ArmVIXLAssembler* assembler = GetAssembler(); 788 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 789 } 790 791 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { 792 if (features_.HasARMv8AInstructions()) { 793 CreateFPToFPLocations(arena_, invoke); 794 } 795 } 796 797 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { 798 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 799 ArmVIXLAssembler* assembler = GetAssembler(); 800 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 801 } 802 803 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { 804 if (features_.HasARMv8AInstructions()) { 805 LocationSummary* locations = new (arena_) LocationSummary(invoke, 806 LocationSummary::kNoCall, 807 kIntrinsified); 808 locations->SetInAt(0, Location::RequiresFpuRegister()); 809 locations->SetOut(Location::RequiresRegister()); 810 locations->AddTemp(Location::RequiresFpuRegister()); 811 } 812 } 813 814 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { 815 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 816 817 ArmVIXLAssembler* assembler = GetAssembler(); 818 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0); 819 vixl32::Register out_reg = OutputRegister(invoke); 820 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0)); 821 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0)); 822 vixl32::Label done; 823 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); 824 825 // Round to nearest integer, ties away from zero. 826 __ Vcvta(S32, F32, temp1, in_reg); 827 __ Vmov(out_reg, temp1); 828 829 // For positive, zero or NaN inputs, rounding is done. 830 __ Cmp(out_reg, 0); 831 __ B(ge, final_label, /* far_target */ false); 832 833 // Handle input < 0 cases. 834 // If input is negative but not a tie, previous result (round to nearest) is valid. 835 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. 836 __ Vrinta(F32, F32, temp1, in_reg); 837 __ Vmov(temp2, 0.5); 838 __ Vsub(F32, temp1, in_reg, temp1); 839 __ Vcmp(F32, temp1, temp2); 840 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); 841 { 842 // Use ExactAsemblyScope here because we are using IT. 843 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), 844 2 * kMaxInstructionSizeInBytes, 845 CodeBufferCheckScope::kMaximumSize); 846 __ it(eq); 847 __ add(eq, out_reg, out_reg, 1); 848 } 849 850 if (done.IsReferenced()) { 851 __ Bind(&done); 852 } 853 } 854 855 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { 856 CreateIntToIntLocations(arena_, invoke); 857 } 858 859 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { 860 ArmVIXLAssembler* assembler = GetAssembler(); 861 // Ignore upper 4B of long address. 862 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 863 } 864 865 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { 866 CreateIntToIntLocations(arena_, invoke); 867 } 868 869 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { 870 ArmVIXLAssembler* assembler = GetAssembler(); 871 // Ignore upper 4B of long address. 872 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 873 } 874 875 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { 876 CreateIntToIntLocations(arena_, invoke); 877 } 878 879 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { 880 ArmVIXLAssembler* assembler = GetAssembler(); 881 // Ignore upper 4B of long address. 882 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); 883 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor 884 // exception. So we can't use ldrd as addr may be unaligned. 885 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out()); 886 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out()); 887 if (addr.Is(lo)) { 888 __ Ldr(hi, MemOperand(addr, 4)); 889 __ Ldr(lo, MemOperand(addr)); 890 } else { 891 __ Ldr(lo, MemOperand(addr)); 892 __ Ldr(hi, MemOperand(addr, 4)); 893 } 894 } 895 896 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { 897 CreateIntToIntLocations(arena_, invoke); 898 } 899 900 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { 901 ArmVIXLAssembler* assembler = GetAssembler(); 902 // Ignore upper 4B of long address. 903 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 904 } 905 906 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { 907 LocationSummary* locations = new (arena) LocationSummary(invoke, 908 LocationSummary::kNoCall, 909 kIntrinsified); 910 locations->SetInAt(0, Location::RequiresRegister()); 911 locations->SetInAt(1, Location::RequiresRegister()); 912 } 913 914 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { 915 CreateIntIntToVoidLocations(arena_, invoke); 916 } 917 918 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { 919 ArmVIXLAssembler* assembler = GetAssembler(); 920 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 921 } 922 923 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { 924 CreateIntIntToVoidLocations(arena_, invoke); 925 } 926 927 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { 928 ArmVIXLAssembler* assembler = GetAssembler(); 929 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 930 } 931 932 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { 933 CreateIntIntToVoidLocations(arena_, invoke); 934 } 935 936 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { 937 ArmVIXLAssembler* assembler = GetAssembler(); 938 // Ignore upper 4B of long address. 939 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); 940 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor 941 // exception. So we can't use ldrd as addr may be unaligned. 942 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr)); 943 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4)); 944 } 945 946 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { 947 CreateIntIntToVoidLocations(arena_, invoke); 948 } 949 950 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { 951 ArmVIXLAssembler* assembler = GetAssembler(); 952 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); 953 } 954 955 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { 956 LocationSummary* locations = new (arena_) LocationSummary(invoke, 957 LocationSummary::kNoCall, 958 kIntrinsified); 959 locations->SetOut(Location::RequiresRegister()); 960 } 961 962 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { 963 ArmVIXLAssembler* assembler = GetAssembler(); 964 __ Ldr(OutputRegister(invoke), 965 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value())); 966 } 967 968 static void GenUnsafeGet(HInvoke* invoke, 969 Primitive::Type type, 970 bool is_volatile, 971 CodeGeneratorARMVIXL* codegen) { 972 LocationSummary* locations = invoke->GetLocations(); 973 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 974 Location base_loc = locations->InAt(1); 975 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. 976 Location offset_loc = locations->InAt(2); 977 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only. 978 Location trg_loc = locations->Out(); 979 980 switch (type) { 981 case Primitive::kPrimInt: { 982 vixl32::Register trg = RegisterFrom(trg_loc); 983 __ Ldr(trg, MemOperand(base, offset)); 984 if (is_volatile) { 985 __ Dmb(vixl32::ISH); 986 } 987 break; 988 } 989 990 case Primitive::kPrimNot: { 991 vixl32::Register trg = RegisterFrom(trg_loc); 992 if (kEmitCompilerReadBarrier) { 993 if (kUseBakerReadBarrier) { 994 Location temp = locations->GetTemp(0); 995 codegen->GenerateReferenceLoadWithBakerReadBarrier( 996 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); 997 if (is_volatile) { 998 __ Dmb(vixl32::ISH); 999 } 1000 } else { 1001 __ Ldr(trg, MemOperand(base, offset)); 1002 if (is_volatile) { 1003 __ Dmb(vixl32::ISH); 1004 } 1005 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); 1006 } 1007 } else { 1008 __ Ldr(trg, MemOperand(base, offset)); 1009 if (is_volatile) { 1010 __ Dmb(vixl32::ISH); 1011 } 1012 assembler->MaybeUnpoisonHeapReference(trg); 1013 } 1014 break; 1015 } 1016 1017 case Primitive::kPrimLong: { 1018 vixl32::Register trg_lo = LowRegisterFrom(trg_loc); 1019 vixl32::Register trg_hi = HighRegisterFrom(trg_loc); 1020 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { 1021 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1022 const vixl32::Register temp_reg = temps.Acquire(); 1023 __ Add(temp_reg, base, offset); 1024 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg)); 1025 } else { 1026 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset)); 1027 } 1028 if (is_volatile) { 1029 __ Dmb(vixl32::ISH); 1030 } 1031 break; 1032 } 1033 1034 default: 1035 LOG(FATAL) << "Unexpected type " << type; 1036 UNREACHABLE(); 1037 } 1038 } 1039 1040 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, 1041 HInvoke* invoke, 1042 Primitive::Type type) { 1043 bool can_call = kEmitCompilerReadBarrier && 1044 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1045 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1046 LocationSummary* locations = new (arena) LocationSummary(invoke, 1047 (can_call 1048 ? LocationSummary::kCallOnSlowPath 1049 : LocationSummary::kNoCall), 1050 kIntrinsified); 1051 if (can_call && kUseBakerReadBarrier) { 1052 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 1053 } 1054 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1055 locations->SetInAt(1, Location::RequiresRegister()); 1056 locations->SetInAt(2, Location::RequiresRegister()); 1057 locations->SetOut(Location::RequiresRegister(), 1058 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 1059 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1060 // We need a temporary register for the read barrier marking slow 1061 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier. 1062 locations->AddTemp(Location::RequiresRegister()); 1063 } 1064 } 1065 1066 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) { 1067 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 1068 } 1069 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { 1070 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 1071 } 1072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { 1073 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 1074 } 1075 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1076 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 1077 } 1078 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { 1079 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 1080 } 1081 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1082 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 1083 } 1084 1085 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { 1086 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 1087 } 1088 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { 1089 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 1090 } 1091 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { 1092 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 1093 } 1094 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1095 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 1096 } 1097 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { 1098 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 1099 } 1100 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1101 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 1102 } 1103 1104 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, 1105 const ArmInstructionSetFeatures& features, 1106 Primitive::Type type, 1107 bool is_volatile, 1108 HInvoke* invoke) { 1109 LocationSummary* locations = new (arena) LocationSummary(invoke, 1110 LocationSummary::kNoCall, 1111 kIntrinsified); 1112 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1113 locations->SetInAt(1, Location::RequiresRegister()); 1114 locations->SetInAt(2, Location::RequiresRegister()); 1115 locations->SetInAt(3, Location::RequiresRegister()); 1116 1117 if (type == Primitive::kPrimLong) { 1118 // Potentially need temps for ldrexd-strexd loop. 1119 if (is_volatile && !features.HasAtomicLdrdAndStrd()) { 1120 locations->AddTemp(Location::RequiresRegister()); // Temp_lo. 1121 locations->AddTemp(Location::RequiresRegister()); // Temp_hi. 1122 } 1123 } else if (type == Primitive::kPrimNot) { 1124 // Temps for card-marking. 1125 locations->AddTemp(Location::RequiresRegister()); // Temp. 1126 locations->AddTemp(Location::RequiresRegister()); // Card. 1127 } 1128 } 1129 1130 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { 1131 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); 1132 } 1133 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { 1134 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); 1135 } 1136 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { 1137 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke); 1138 } 1139 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { 1140 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); 1141 } 1142 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1143 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); 1144 } 1145 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1146 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke); 1147 } 1148 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { 1149 CreateIntIntIntIntToVoid( 1150 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); 1151 } 1152 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1153 CreateIntIntIntIntToVoid( 1154 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); 1155 } 1156 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1157 CreateIntIntIntIntToVoid( 1158 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke); 1159 } 1160 1161 static void GenUnsafePut(LocationSummary* locations, 1162 Primitive::Type type, 1163 bool is_volatile, 1164 bool is_ordered, 1165 CodeGeneratorARMVIXL* codegen) { 1166 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 1167 1168 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer. 1169 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. 1170 vixl32::Register value; 1171 1172 if (is_volatile || is_ordered) { 1173 __ Dmb(vixl32::ISH); 1174 } 1175 1176 if (type == Primitive::kPrimLong) { 1177 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3)); 1178 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3)); 1179 value = value_lo; 1180 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { 1181 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0)); 1182 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1)); 1183 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1184 const vixl32::Register temp_reg = temps.Acquire(); 1185 1186 __ Add(temp_reg, base, offset); 1187 vixl32::Label loop_head; 1188 __ Bind(&loop_head); 1189 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); 1190 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); 1191 __ Cmp(temp_lo, 0); 1192 __ B(ne, &loop_head, /* far_target */ false); 1193 } else { 1194 __ Strd(value_lo, value_hi, MemOperand(base, offset)); 1195 } 1196 } else { 1197 value = RegisterFrom(locations->InAt(3)); 1198 vixl32::Register source = value; 1199 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1200 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1201 __ Mov(temp, value); 1202 assembler->PoisonHeapReference(temp); 1203 source = temp; 1204 } 1205 __ Str(source, MemOperand(base, offset)); 1206 } 1207 1208 if (is_volatile) { 1209 __ Dmb(vixl32::ISH); 1210 } 1211 1212 if (type == Primitive::kPrimNot) { 1213 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1214 vixl32::Register card = RegisterFrom(locations->GetTemp(1)); 1215 bool value_can_be_null = true; // TODO: Worth finding out this information? 1216 codegen->MarkGCCard(temp, card, base, value, value_can_be_null); 1217 } 1218 } 1219 1220 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { 1221 GenUnsafePut(invoke->GetLocations(), 1222 Primitive::kPrimInt, 1223 /* is_volatile */ false, 1224 /* is_ordered */ false, 1225 codegen_); 1226 } 1227 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { 1228 GenUnsafePut(invoke->GetLocations(), 1229 Primitive::kPrimInt, 1230 /* is_volatile */ false, 1231 /* is_ordered */ true, 1232 codegen_); 1233 } 1234 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { 1235 GenUnsafePut(invoke->GetLocations(), 1236 Primitive::kPrimInt, 1237 /* is_volatile */ true, 1238 /* is_ordered */ false, 1239 codegen_); 1240 } 1241 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { 1242 GenUnsafePut(invoke->GetLocations(), 1243 Primitive::kPrimNot, 1244 /* is_volatile */ false, 1245 /* is_ordered */ false, 1246 codegen_); 1247 } 1248 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1249 GenUnsafePut(invoke->GetLocations(), 1250 Primitive::kPrimNot, 1251 /* is_volatile */ false, 1252 /* is_ordered */ true, 1253 codegen_); 1254 } 1255 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1256 GenUnsafePut(invoke->GetLocations(), 1257 Primitive::kPrimNot, 1258 /* is_volatile */ true, 1259 /* is_ordered */ false, 1260 codegen_); 1261 } 1262 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { 1263 GenUnsafePut(invoke->GetLocations(), 1264 Primitive::kPrimLong, 1265 /* is_volatile */ false, 1266 /* is_ordered */ false, 1267 codegen_); 1268 } 1269 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1270 GenUnsafePut(invoke->GetLocations(), 1271 Primitive::kPrimLong, 1272 /* is_volatile */ false, 1273 /* is_ordered */ true, 1274 codegen_); 1275 } 1276 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1277 GenUnsafePut(invoke->GetLocations(), 1278 Primitive::kPrimLong, 1279 /* is_volatile */ true, 1280 /* is_ordered */ false, 1281 codegen_); 1282 } 1283 1284 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, 1285 HInvoke* invoke, 1286 Primitive::Type type) { 1287 bool can_call = kEmitCompilerReadBarrier && 1288 kUseBakerReadBarrier && 1289 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 1290 LocationSummary* locations = new (arena) LocationSummary(invoke, 1291 (can_call 1292 ? LocationSummary::kCallOnSlowPath 1293 : LocationSummary::kNoCall), 1294 kIntrinsified); 1295 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1296 locations->SetInAt(1, Location::RequiresRegister()); 1297 locations->SetInAt(2, Location::RequiresRegister()); 1298 locations->SetInAt(3, Location::RequiresRegister()); 1299 locations->SetInAt(4, Location::RequiresRegister()); 1300 1301 // If heap poisoning is enabled, we don't want the unpoisoning 1302 // operations to potentially clobber the output. Likewise when 1303 // emitting a (Baker) read barrier, which may call. 1304 Location::OutputOverlap overlaps = 1305 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) 1306 ? Location::kOutputOverlap 1307 : Location::kNoOutputOverlap; 1308 locations->SetOut(Location::RequiresRegister(), overlaps); 1309 1310 // Temporary registers used in CAS. In the object case 1311 // (UnsafeCASObject intrinsic), these are also used for 1312 // card-marking, and possibly for (Baker) read barrier. 1313 locations->AddTemp(Location::RequiresRegister()); // Pointer. 1314 locations->AddTemp(Location::RequiresRegister()); // Temp 1. 1315 } 1316 1317 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) { 1318 DCHECK_NE(type, Primitive::kPrimLong); 1319 1320 ArmVIXLAssembler* assembler = codegen->GetAssembler(); 1321 LocationSummary* locations = invoke->GetLocations(); 1322 1323 Location out_loc = locations->Out(); 1324 vixl32::Register out = OutputRegister(invoke); // Boolean result. 1325 1326 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. 1327 Location offset_loc = locations->InAt(2); 1328 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). 1329 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. 1330 vixl32::Register value = InputRegisterAt(invoke, 4); // Value. 1331 1332 Location tmp_ptr_loc = locations->GetTemp(0); 1333 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. 1334 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. 1335 1336 if (type == Primitive::kPrimNot) { 1337 // The only read barrier implementation supporting the 1338 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1339 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1340 1341 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged 1342 // object and scan the receiver at the next GC for nothing. 1343 bool value_can_be_null = true; // TODO: Worth finding out this information? 1344 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); 1345 1346 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1347 // Need to make sure the reference stored in the field is a to-space 1348 // one before attempting the CAS or the CAS could fail incorrectly. 1349 codegen->UpdateReferenceFieldWithBakerReadBarrier( 1350 invoke, 1351 out_loc, // Unused, used only as a "temporary" within the read barrier. 1352 base, 1353 /* field_offset */ offset_loc, 1354 tmp_ptr_loc, 1355 /* needs_null_check */ false, 1356 tmp); 1357 } 1358 } 1359 1360 // Prevent reordering with prior memory operations. 1361 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the 1362 // latter allows a preceding load to be delayed past the STXR 1363 // instruction below. 1364 __ Dmb(vixl32::ISH); 1365 1366 __ Add(tmp_ptr, base, offset); 1367 1368 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1369 codegen->GetAssembler()->PoisonHeapReference(expected); 1370 if (value.Is(expected)) { 1371 // Do not poison `value`, as it is the same register as 1372 // `expected`, which has just been poisoned. 1373 } else { 1374 codegen->GetAssembler()->PoisonHeapReference(value); 1375 } 1376 } 1377 1378 // do { 1379 // tmp = [r_ptr] - expected; 1380 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 1381 // result = tmp != 0; 1382 1383 vixl32::Label loop_head; 1384 __ Bind(&loop_head); 1385 1386 __ Ldrex(tmp, MemOperand(tmp_ptr)); 1387 1388 __ Subs(tmp, tmp, expected); 1389 1390 { 1391 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1392 3 * kMaxInstructionSizeInBytes, 1393 CodeBufferCheckScope::kMaximumSize); 1394 1395 __ itt(eq); 1396 __ strex(eq, tmp, value, MemOperand(tmp_ptr)); 1397 __ cmp(eq, tmp, 1); 1398 } 1399 1400 __ B(eq, &loop_head, /* far_target */ false); 1401 1402 __ Dmb(vixl32::ISH); 1403 1404 __ Rsbs(out, tmp, 1); 1405 1406 { 1407 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1408 2 * kMaxInstructionSizeInBytes, 1409 CodeBufferCheckScope::kMaximumSize); 1410 1411 __ it(cc); 1412 __ mov(cc, out, 0); 1413 } 1414 1415 if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 1416 codegen->GetAssembler()->UnpoisonHeapReference(expected); 1417 if (value.Is(expected)) { 1418 // Do not unpoison `value`, as it is the same register as 1419 // `expected`, which has just been unpoisoned. 1420 } else { 1421 codegen->GetAssembler()->UnpoisonHeapReference(value); 1422 } 1423 } 1424 } 1425 1426 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { 1427 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); 1428 } 1429 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { 1430 // The only read barrier implementation supporting the 1431 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1432 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1433 return; 1434 } 1435 1436 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); 1437 } 1438 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { 1439 GenCas(invoke, Primitive::kPrimInt, codegen_); 1440 } 1441 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { 1442 // The only read barrier implementation supporting the 1443 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1444 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1445 1446 GenCas(invoke, Primitive::kPrimNot, codegen_); 1447 } 1448 1449 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { 1450 // The inputs plus one temp. 1451 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1452 invoke->InputAt(1)->CanBeNull() 1453 ? LocationSummary::kCallOnSlowPath 1454 : LocationSummary::kNoCall, 1455 kIntrinsified); 1456 locations->SetInAt(0, Location::RequiresRegister()); 1457 locations->SetInAt(1, Location::RequiresRegister()); 1458 locations->AddTemp(Location::RequiresRegister()); 1459 locations->AddTemp(Location::RequiresRegister()); 1460 locations->AddTemp(Location::RequiresRegister()); 1461 // Need temporary registers for String compression's feature. 1462 if (mirror::kUseStringCompression) { 1463 locations->AddTemp(Location::RequiresRegister()); 1464 } 1465 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1466 } 1467 1468 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { 1469 ArmVIXLAssembler* assembler = GetAssembler(); 1470 LocationSummary* locations = invoke->GetLocations(); 1471 1472 vixl32::Register str = InputRegisterAt(invoke, 0); 1473 vixl32::Register arg = InputRegisterAt(invoke, 1); 1474 vixl32::Register out = OutputRegister(invoke); 1475 1476 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); 1477 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1478 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); 1479 vixl32::Register temp3; 1480 if (mirror::kUseStringCompression) { 1481 temp3 = RegisterFrom(locations->GetTemp(3)); 1482 } 1483 1484 vixl32::Label loop; 1485 vixl32::Label find_char_diff; 1486 vixl32::Label end; 1487 vixl32::Label different_compression; 1488 1489 // Get offsets of count and value fields within a string object. 1490 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1491 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1492 1493 // Note that the null check must have been done earlier. 1494 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1495 1496 // Take slow path and throw if input can be and is null. 1497 SlowPathCodeARMVIXL* slow_path = nullptr; 1498 const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); 1499 if (can_slow_path) { 1500 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1501 codegen_->AddSlowPath(slow_path); 1502 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel()); 1503 } 1504 1505 // Reference equality check, return 0 if same reference. 1506 __ Subs(out, str, arg); 1507 __ B(eq, &end); 1508 1509 if (mirror::kUseStringCompression) { 1510 // Load `count` fields of this and argument strings. 1511 __ Ldr(temp3, MemOperand(str, count_offset)); 1512 __ Ldr(temp2, MemOperand(arg, count_offset)); 1513 // Extract lengths from the `count` fields. 1514 __ Lsr(temp0, temp3, 1u); 1515 __ Lsr(temp1, temp2, 1u); 1516 } else { 1517 // Load lengths of this and argument strings. 1518 __ Ldr(temp0, MemOperand(str, count_offset)); 1519 __ Ldr(temp1, MemOperand(arg, count_offset)); 1520 } 1521 // out = length diff. 1522 __ Subs(out, temp0, temp1); 1523 // temp0 = min(len(str), len(arg)). 1524 1525 { 1526 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1527 2 * kMaxInstructionSizeInBytes, 1528 CodeBufferCheckScope::kMaximumSize); 1529 1530 __ it(gt); 1531 __ mov(gt, temp0, temp1); 1532 } 1533 1534 // Shorter string is empty? 1535 // Note that mirror::kUseStringCompression==true introduces lots of instructions, 1536 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'. 1537 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression); 1538 1539 if (mirror::kUseStringCompression) { 1540 // Check if both strings using same compression style to use this comparison loop. 1541 __ Eors(temp2, temp2, temp3); 1542 __ Lsrs(temp2, temp2, 1u); 1543 __ B(cs, &different_compression); 1544 // For string compression, calculate the number of bytes to compare (not chars). 1545 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. 1546 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag. 1547 1548 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1549 2 * kMaxInstructionSizeInBytes, 1550 CodeBufferCheckScope::kMaximumSize); 1551 1552 __ it(ne); 1553 __ add(ne, temp0, temp0, temp0); 1554 } 1555 1556 // Store offset of string value in preparation for comparison loop. 1557 __ Mov(temp1, value_offset); 1558 1559 // Assertions that must hold in order to compare multiple characters at a time. 1560 CHECK_ALIGNED(value_offset, 8); 1561 static_assert(IsAligned<8>(kObjectAlignment), 1562 "String data must be 8-byte aligned for unrolled CompareTo loop."); 1563 1564 const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1565 DCHECK_EQ(char_size, 2u); 1566 1567 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 1568 1569 vixl32::Label find_char_diff_2nd_cmp; 1570 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). 1571 __ Bind(&loop); 1572 vixl32::Register temp_reg = temps.Acquire(); 1573 __ Ldr(temp_reg, MemOperand(str, temp1)); 1574 __ Ldr(temp2, MemOperand(arg, temp1)); 1575 __ Cmp(temp_reg, temp2); 1576 __ B(ne, &find_char_diff, /* far_target */ false); 1577 __ Add(temp1, temp1, char_size * 2); 1578 1579 __ Ldr(temp_reg, MemOperand(str, temp1)); 1580 __ Ldr(temp2, MemOperand(arg, temp1)); 1581 __ Cmp(temp_reg, temp2); 1582 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); 1583 __ Add(temp1, temp1, char_size * 2); 1584 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1585 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); 1586 __ B(hi, &loop, /* far_target */ false); 1587 __ B(&end); 1588 1589 __ Bind(&find_char_diff_2nd_cmp); 1590 if (mirror::kUseStringCompression) { 1591 __ Subs(temp0, temp0, 4); // 4 bytes previously compared. 1592 __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end? 1593 } else { 1594 // Without string compression, we can start treating temp0 as signed 1595 // and rely on the signed comparison below. 1596 __ Sub(temp0, temp0, 2); 1597 } 1598 1599 // Find the single character difference. 1600 __ Bind(&find_char_diff); 1601 // Get the bit position of the first character that differs. 1602 __ Eor(temp1, temp2, temp_reg); 1603 __ Rbit(temp1, temp1); 1604 __ Clz(temp1, temp1); 1605 1606 // temp0 = number of characters remaining to compare. 1607 // (Without string compression, it could be < 1 if a difference is found by the second CMP 1608 // in the comparison loop, and after the end of the shorter string data). 1609 1610 // Without string compression (temp1 >> 4) = character where difference occurs between the last 1611 // two words compared, in the interval [0,1]. 1612 // (0 for low half-word different, 1 for high half-word different). 1613 // With string compression, (temp1 << 3) = byte where the difference occurs, 1614 // in the interval [0,3]. 1615 1616 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside 1617 // the remaining string data, so just return length diff (out). 1618 // The comparison is unsigned for string compression, otherwise signed. 1619 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); 1620 __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false); 1621 1622 // Extract the characters and calculate the difference. 1623 if (mirror::kUseStringCompression) { 1624 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear 1625 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`. 1626 // The compression flag is now in the highest bit of temp3, so let's play some tricks. 1627 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u 1628 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u) 1629 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u. 1630 __ Lsr(temp2, temp2, temp1); // Extract second character. 1631 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu 1632 __ Lsr(out, temp_reg, temp1); // Extract first character. 1633 __ And(temp2, temp2, temp3); 1634 __ And(out, out, temp3); 1635 } else { 1636 __ Bic(temp1, temp1, 0xf); 1637 __ Lsr(temp2, temp2, temp1); 1638 __ Lsr(out, temp_reg, temp1); 1639 __ Movt(temp2, 0); 1640 __ Movt(out, 0); 1641 } 1642 1643 __ Sub(out, out, temp2); 1644 temps.Release(temp_reg); 1645 1646 if (mirror::kUseStringCompression) { 1647 __ B(&end); 1648 __ Bind(&different_compression); 1649 1650 // Comparison for different compression style. 1651 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); 1652 DCHECK_EQ(c_char_size, 1u); 1653 1654 // We want to free up the temp3, currently holding `str.count`, for comparison. 1655 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen 1656 // need to treat as unsigned. Start by freeing the bit with an ADD and continue 1657 // further down by a LSRS+SBC which will flip the meaning of the flag but allow 1658 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. 1659 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit. 1660 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. 1661 __ Mov(temp1, str); 1662 __ Mov(temp2, arg); 1663 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag. 1664 { 1665 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1666 3 * kMaxInstructionSizeInBytes, 1667 CodeBufferCheckScope::kMaximumSize); 1668 __ itt(cs); // Interleave with selection of temp1 and temp2. 1669 __ mov(cs, temp1, arg); // Preserves flags. 1670 __ mov(cs, temp2, str); // Preserves flags. 1671 } 1672 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag. 1673 1674 // Adjust temp1 and temp2 from string pointers to data pointers. 1675 __ Add(temp1, temp1, value_offset); 1676 __ Add(temp2, temp2, value_offset); 1677 1678 vixl32::Label different_compression_loop; 1679 vixl32::Label different_compression_diff; 1680 1681 // Main loop for different compression. 1682 temp_reg = temps.Acquire(); 1683 __ Bind(&different_compression_loop); 1684 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); 1685 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); 1686 __ Cmp(temp_reg, temp3); 1687 __ B(ne, &different_compression_diff, /* far_target */ false); 1688 __ Subs(temp0, temp0, 2); 1689 __ B(hi, &different_compression_loop, /* far_target */ false); 1690 __ B(&end); 1691 1692 // Calculate the difference. 1693 __ Bind(&different_compression_diff); 1694 __ Sub(out, temp_reg, temp3); 1695 temps.Release(temp_reg); 1696 // Flip the difference if the `arg` is compressed. 1697 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag. 1698 __ Lsrs(temp0, temp0, 1u); 1699 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1700 "Expecting 0=compressed, 1=uncompressed"); 1701 1702 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1703 2 * kMaxInstructionSizeInBytes, 1704 CodeBufferCheckScope::kMaximumSize); 1705 __ it(cc); 1706 __ rsb(cc, out, out, 0); 1707 } 1708 1709 __ Bind(&end); 1710 1711 if (can_slow_path) { 1712 __ Bind(slow_path->GetExitLabel()); 1713 } 1714 } 1715 1716 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { 1717 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1718 LocationSummary::kNoCall, 1719 kIntrinsified); 1720 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1721 locations->SetInAt(0, Location::RequiresRegister()); 1722 locations->SetInAt(1, Location::RequiresRegister()); 1723 // Temporary registers to store lengths of strings and for calculations. 1724 // Using instruction cbz requires a low register, so explicitly set a temp to be R0. 1725 locations->AddTemp(LocationFrom(r0)); 1726 locations->AddTemp(Location::RequiresRegister()); 1727 locations->AddTemp(Location::RequiresRegister()); 1728 1729 locations->SetOut(Location::RequiresRegister()); 1730 } 1731 1732 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { 1733 ArmVIXLAssembler* assembler = GetAssembler(); 1734 LocationSummary* locations = invoke->GetLocations(); 1735 1736 vixl32::Register str = InputRegisterAt(invoke, 0); 1737 vixl32::Register arg = InputRegisterAt(invoke, 1); 1738 vixl32::Register out = OutputRegister(invoke); 1739 1740 vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); 1741 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); 1742 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); 1743 1744 vixl32::Label loop; 1745 vixl32::Label end; 1746 vixl32::Label return_true; 1747 vixl32::Label return_false; 1748 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end); 1749 1750 // Get offsets of count, value, and class fields within a string object. 1751 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1752 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1753 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1754 1755 // Note that the null check must have been done earlier. 1756 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1757 1758 StringEqualsOptimizations optimizations(invoke); 1759 if (!optimizations.GetArgumentNotNull()) { 1760 // Check if input is null, return false if it is. 1761 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false); 1762 } 1763 1764 // Reference equality check, return true if same reference. 1765 __ Cmp(str, arg); 1766 __ B(eq, &return_true, /* far_target */ false); 1767 1768 if (!optimizations.GetArgumentIsString()) { 1769 // Instanceof check for the argument by comparing class fields. 1770 // All string objects must have the same type since String cannot be subclassed. 1771 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1772 // If the argument is a string object, its class field must be equal to receiver's class field. 1773 __ Ldr(temp, MemOperand(str, class_offset)); 1774 __ Ldr(temp1, MemOperand(arg, class_offset)); 1775 __ Cmp(temp, temp1); 1776 __ B(ne, &return_false, /* far_target */ false); 1777 } 1778 1779 // Load `count` fields of this and argument strings. 1780 __ Ldr(temp, MemOperand(str, count_offset)); 1781 __ Ldr(temp1, MemOperand(arg, count_offset)); 1782 // Check if `count` fields are equal, return false if they're not. 1783 // Also compares the compression style, if differs return false. 1784 __ Cmp(temp, temp1); 1785 __ B(ne, &return_false, /* far_target */ false); 1786 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1787 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1788 "Expecting 0=compressed, 1=uncompressed"); 1789 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); 1790 1791 // Assertions that must hold in order to compare strings 4 bytes at a time. 1792 DCHECK_ALIGNED(value_offset, 4); 1793 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); 1794 1795 if (mirror::kUseStringCompression) { 1796 // For string compression, calculate the number of bytes to compare (not chars). 1797 // This could in theory exceed INT32_MAX, so treat temp as unsigned. 1798 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. 1799 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 1800 2 * kMaxInstructionSizeInBytes, 1801 CodeBufferCheckScope::kMaximumSize); 1802 __ it(cs); // If uncompressed, 1803 __ add(cs, temp, temp, temp); // double the byte count. 1804 } 1805 1806 // Store offset of string value in preparation for comparison loop. 1807 __ Mov(temp1, value_offset); 1808 1809 // Loop to compare strings 4 bytes at a time starting at the front of the string. 1810 // Ok to do this because strings are zero-padded to kObjectAlignment. 1811 __ Bind(&loop); 1812 __ Ldr(out, MemOperand(str, temp1)); 1813 __ Ldr(temp2, MemOperand(arg, temp1)); 1814 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); 1815 __ Cmp(out, temp2); 1816 __ B(ne, &return_false, /* far_target */ false); 1817 // With string compression, we have compared 4 bytes, otherwise 2 chars. 1818 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); 1819 __ B(hi, &loop, /* far_target */ false); 1820 1821 // Return true and exit the function. 1822 // If loop does not result in returning false, we return true. 1823 __ Bind(&return_true); 1824 __ Mov(out, 1); 1825 __ B(final_label); 1826 1827 // Return false and exit the function. 1828 __ Bind(&return_false); 1829 __ Mov(out, 0); 1830 1831 if (end.IsReferenced()) { 1832 __ Bind(&end); 1833 } 1834 } 1835 1836 static void GenerateVisitStringIndexOf(HInvoke* invoke, 1837 ArmVIXLAssembler* assembler, 1838 CodeGeneratorARMVIXL* codegen, 1839 ArenaAllocator* allocator, 1840 bool start_at_zero) { 1841 LocationSummary* locations = invoke->GetLocations(); 1842 1843 // Note that the null check must have been done earlier. 1844 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1845 1846 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1847 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1848 SlowPathCodeARMVIXL* slow_path = nullptr; 1849 HInstruction* code_point = invoke->InputAt(1); 1850 if (code_point->IsIntConstant()) { 1851 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) > 1852 std::numeric_limits<uint16_t>::max()) { 1853 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1854 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1855 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); 1856 codegen->AddSlowPath(slow_path); 1857 __ B(slow_path->GetEntryLabel()); 1858 __ Bind(slow_path->GetExitLabel()); 1859 return; 1860 } 1861 } else if (code_point->GetType() != Primitive::kPrimChar) { 1862 vixl32::Register char_reg = InputRegisterAt(invoke, 1); 1863 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`. 1864 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); 1865 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); 1866 codegen->AddSlowPath(slow_path); 1867 __ B(hs, slow_path->GetEntryLabel()); 1868 } 1869 1870 if (start_at_zero) { 1871 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0)); 1872 DCHECK(tmp_reg.Is(r2)); 1873 // Start-index = 0. 1874 __ Mov(tmp_reg, 0); 1875 } 1876 1877 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); 1878 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); 1879 1880 if (slow_path != nullptr) { 1881 __ Bind(slow_path->GetExitLabel()); 1882 } 1883 } 1884 1885 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { 1886 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1887 LocationSummary::kCallOnMainAndSlowPath, 1888 kIntrinsified); 1889 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1890 // best to align the inputs accordingly. 1891 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1892 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1893 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1894 locations->SetOut(LocationFrom(r0)); 1895 1896 // Need to send start-index=0. 1897 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 1898 } 1899 1900 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { 1901 GenerateVisitStringIndexOf( 1902 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1903 } 1904 1905 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { 1906 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1907 LocationSummary::kCallOnMainAndSlowPath, 1908 kIntrinsified); 1909 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1910 // best to align the inputs accordingly. 1911 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1912 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1913 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1914 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1915 locations->SetOut(LocationFrom(r0)); 1916 } 1917 1918 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { 1919 GenerateVisitStringIndexOf( 1920 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1921 } 1922 1923 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { 1924 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1925 LocationSummary::kCallOnMainAndSlowPath, 1926 kIntrinsified); 1927 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1928 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1929 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1930 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1931 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); 1932 locations->SetOut(LocationFrom(r0)); 1933 } 1934 1935 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { 1936 ArmVIXLAssembler* assembler = GetAssembler(); 1937 vixl32::Register byte_array = InputRegisterAt(invoke, 0); 1938 __ Cmp(byte_array, 0); 1939 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1940 codegen_->AddSlowPath(slow_path); 1941 __ B(eq, slow_path->GetEntryLabel()); 1942 1943 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); 1944 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1945 __ Bind(slow_path->GetExitLabel()); 1946 } 1947 1948 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { 1949 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1950 LocationSummary::kCallOnMainOnly, 1951 kIntrinsified); 1952 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1953 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1954 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1955 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1956 locations->SetOut(LocationFrom(r0)); 1957 } 1958 1959 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { 1960 // No need to emit code checking whether `locations->InAt(2)` is a null 1961 // pointer, as callers of the native method 1962 // 1963 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1964 // 1965 // all include a null check on `data` before calling that method. 1966 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1967 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1968 } 1969 1970 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { 1971 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1972 LocationSummary::kCallOnMainAndSlowPath, 1973 kIntrinsified); 1974 InvokeRuntimeCallingConventionARMVIXL calling_convention; 1975 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1976 locations->SetOut(LocationFrom(r0)); 1977 } 1978 1979 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { 1980 ArmVIXLAssembler* assembler = GetAssembler(); 1981 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0); 1982 __ Cmp(string_to_copy, 0); 1983 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); 1984 codegen_->AddSlowPath(slow_path); 1985 __ B(eq, slow_path->GetEntryLabel()); 1986 1987 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); 1988 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1989 1990 __ Bind(slow_path->GetExitLabel()); 1991 } 1992 1993 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { 1994 // The only read barrier implementation supporting the 1995 // SystemArrayCopy intrinsic is the Baker-style read barriers. 1996 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1997 return; 1998 } 1999 2000 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 2001 LocationSummary* locations = invoke->GetLocations(); 2002 if (locations == nullptr) { 2003 return; 2004 } 2005 2006 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2007 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 2008 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2009 2010 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { 2011 locations->SetInAt(1, Location::RequiresRegister()); 2012 } 2013 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) { 2014 locations->SetInAt(3, Location::RequiresRegister()); 2015 } 2016 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { 2017 locations->SetInAt(4, Location::RequiresRegister()); 2018 } 2019 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2020 // Temporary register IP cannot be used in 2021 // ReadBarrierSystemArrayCopySlowPathARM (because that register 2022 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra 2023 // temporary register from the register allocator. 2024 locations->AddTemp(Location::RequiresRegister()); 2025 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); 2026 arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); 2027 } 2028 } 2029 2030 static void CheckPosition(ArmVIXLAssembler* assembler, 2031 Location pos, 2032 vixl32::Register input, 2033 Location length, 2034 SlowPathCodeARMVIXL* slow_path, 2035 vixl32::Register temp, 2036 bool length_is_input_length = false) { 2037 // Where is the length in the Array? 2038 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 2039 2040 if (pos.IsConstant()) { 2041 int32_t pos_const = Int32ConstantFrom(pos); 2042 if (pos_const == 0) { 2043 if (!length_is_input_length) { 2044 // Check that length(input) >= length. 2045 __ Ldr(temp, MemOperand(input, length_offset)); 2046 if (length.IsConstant()) { 2047 __ Cmp(temp, Int32ConstantFrom(length)); 2048 } else { 2049 __ Cmp(temp, RegisterFrom(length)); 2050 } 2051 __ B(lt, slow_path->GetEntryLabel()); 2052 } 2053 } else { 2054 // Check that length(input) >= pos. 2055 __ Ldr(temp, MemOperand(input, length_offset)); 2056 __ Subs(temp, temp, pos_const); 2057 __ B(lt, slow_path->GetEntryLabel()); 2058 2059 // Check that (length(input) - pos) >= length. 2060 if (length.IsConstant()) { 2061 __ Cmp(temp, Int32ConstantFrom(length)); 2062 } else { 2063 __ Cmp(temp, RegisterFrom(length)); 2064 } 2065 __ B(lt, slow_path->GetEntryLabel()); 2066 } 2067 } else if (length_is_input_length) { 2068 // The only way the copy can succeed is if pos is zero. 2069 vixl32::Register pos_reg = RegisterFrom(pos); 2070 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel()); 2071 } else { 2072 // Check that pos >= 0. 2073 vixl32::Register pos_reg = RegisterFrom(pos); 2074 __ Cmp(pos_reg, 0); 2075 __ B(lt, slow_path->GetEntryLabel()); 2076 2077 // Check that pos <= length(input). 2078 __ Ldr(temp, MemOperand(input, length_offset)); 2079 __ Subs(temp, temp, pos_reg); 2080 __ B(lt, slow_path->GetEntryLabel()); 2081 2082 // Check that (length(input) - pos) >= length. 2083 if (length.IsConstant()) { 2084 __ Cmp(temp, Int32ConstantFrom(length)); 2085 } else { 2086 __ Cmp(temp, RegisterFrom(length)); 2087 } 2088 __ B(lt, slow_path->GetEntryLabel()); 2089 } 2090 } 2091 2092 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { 2093 // The only read barrier implementation supporting the 2094 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2095 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2096 2097 ArmVIXLAssembler* assembler = GetAssembler(); 2098 LocationSummary* locations = invoke->GetLocations(); 2099 2100 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2101 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2102 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2103 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2104 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2105 2106 vixl32::Register src = InputRegisterAt(invoke, 0); 2107 Location src_pos = locations->InAt(1); 2108 vixl32::Register dest = InputRegisterAt(invoke, 2); 2109 Location dest_pos = locations->InAt(3); 2110 Location length = locations->InAt(4); 2111 Location temp1_loc = locations->GetTemp(0); 2112 vixl32::Register temp1 = RegisterFrom(temp1_loc); 2113 Location temp2_loc = locations->GetTemp(1); 2114 vixl32::Register temp2 = RegisterFrom(temp2_loc); 2115 Location temp3_loc = locations->GetTemp(2); 2116 vixl32::Register temp3 = RegisterFrom(temp3_loc); 2117 2118 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); 2119 codegen_->AddSlowPath(intrinsic_slow_path); 2120 2121 vixl32::Label conditions_on_positions_validated; 2122 SystemArrayCopyOptimizations optimizations(invoke); 2123 2124 // If source and destination are the same, we go to slow path if we need to do 2125 // forward copying. 2126 if (src_pos.IsConstant()) { 2127 int32_t src_pos_constant = Int32ConstantFrom(src_pos); 2128 if (dest_pos.IsConstant()) { 2129 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); 2130 if (optimizations.GetDestinationIsSource()) { 2131 // Checked when building locations. 2132 DCHECK_GE(src_pos_constant, dest_pos_constant); 2133 } else if (src_pos_constant < dest_pos_constant) { 2134 __ Cmp(src, dest); 2135 __ B(eq, intrinsic_slow_path->GetEntryLabel()); 2136 } 2137 2138 // Checked when building locations. 2139 DCHECK(!optimizations.GetDestinationIsSource() 2140 || (src_pos_constant >= Int32ConstantFrom(dest_pos))); 2141 } else { 2142 if (!optimizations.GetDestinationIsSource()) { 2143 __ Cmp(src, dest); 2144 __ B(ne, &conditions_on_positions_validated, /* far_target */ false); 2145 } 2146 __ Cmp(RegisterFrom(dest_pos), src_pos_constant); 2147 __ B(gt, intrinsic_slow_path->GetEntryLabel()); 2148 } 2149 } else { 2150 if (!optimizations.GetDestinationIsSource()) { 2151 __ Cmp(src, dest); 2152 __ B(ne, &conditions_on_positions_validated, /* far_target */ false); 2153 } 2154 if (dest_pos.IsConstant()) { 2155 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); 2156 __ Cmp(RegisterFrom(src_pos), dest_pos_constant); 2157 } else { 2158 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos)); 2159 } 2160 __ B(lt, intrinsic_slow_path->GetEntryLabel()); 2161 } 2162 2163 __ Bind(&conditions_on_positions_validated); 2164 2165 if (!optimizations.GetSourceIsNotNull()) { 2166 // Bail out if the source is null. 2167 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel()); 2168 } 2169 2170 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2171 // Bail out if the destination is null. 2172 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel()); 2173 } 2174 2175 // If the length is negative, bail out. 2176 // We have already checked in the LocationsBuilder for the constant case. 2177 if (!length.IsConstant() && 2178 !optimizations.GetCountIsSourceLength() && 2179 !optimizations.GetCountIsDestinationLength()) { 2180 __ Cmp(RegisterFrom(length), 0); 2181 __ B(lt, intrinsic_slow_path->GetEntryLabel()); 2182 } 2183 2184 // Validity checks: source. 2185 CheckPosition(assembler, 2186 src_pos, 2187 src, 2188 length, 2189 intrinsic_slow_path, 2190 temp1, 2191 optimizations.GetCountIsSourceLength()); 2192 2193 // Validity checks: dest. 2194 CheckPosition(assembler, 2195 dest_pos, 2196 dest, 2197 length, 2198 intrinsic_slow_path, 2199 temp1, 2200 optimizations.GetCountIsDestinationLength()); 2201 2202 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2203 // Check whether all elements of the source array are assignable to the component 2204 // type of the destination array. We do two checks: the classes are the same, 2205 // or the destination is Object[]. If none of these checks succeed, we go to the 2206 // slow path. 2207 2208 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2209 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2210 // /* HeapReference<Class> */ temp1 = src->klass_ 2211 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2212 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); 2213 // Bail out if the source is not a non primitive array. 2214 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2215 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2216 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2217 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2218 // If heap poisoning is enabled, `temp1` has been unpoisoned 2219 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2220 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); 2221 __ Ldrh(temp1, MemOperand(temp1, primitive_offset)); 2222 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2223 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2224 } 2225 2226 // /* HeapReference<Class> */ temp1 = dest->klass_ 2227 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2228 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); 2229 2230 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2231 // Bail out if the destination is not a non primitive array. 2232 // 2233 // Register `temp1` is not trashed by the read barrier emitted 2234 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2235 // method produces a call to a ReadBarrierMarkRegX entry point, 2236 // which saves all potentially live registers, including 2237 // temporaries such a `temp1`. 2238 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2239 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2240 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); 2241 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); 2242 // If heap poisoning is enabled, `temp2` has been unpoisoned 2243 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2244 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2245 __ Ldrh(temp2, MemOperand(temp2, primitive_offset)); 2246 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2247 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel()); 2248 } 2249 2250 // For the same reason given earlier, `temp1` is not trashed by the 2251 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2252 // /* HeapReference<Class> */ temp2 = src->klass_ 2253 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2254 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); 2255 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2256 __ Cmp(temp1, temp2); 2257 2258 if (optimizations.GetDestinationIsTypedObjectArray()) { 2259 vixl32::Label do_copy; 2260 __ B(eq, &do_copy, /* far_target */ false); 2261 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2262 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2263 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2264 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2265 // We do not need to emit a read barrier for the following 2266 // heap reference load, as `temp1` is only used in a 2267 // comparison with null below, and this reference is not 2268 // kept afterwards. 2269 __ Ldr(temp1, MemOperand(temp1, super_offset)); 2270 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2271 __ Bind(&do_copy); 2272 } else { 2273 __ B(ne, intrinsic_slow_path->GetEntryLabel()); 2274 } 2275 } else { 2276 // Non read barrier code. 2277 2278 // /* HeapReference<Class> */ temp1 = dest->klass_ 2279 __ Ldr(temp1, MemOperand(dest, class_offset)); 2280 // /* HeapReference<Class> */ temp2 = src->klass_ 2281 __ Ldr(temp2, MemOperand(src, class_offset)); 2282 bool did_unpoison = false; 2283 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 2284 !optimizations.GetSourceIsNonPrimitiveArray()) { 2285 // One or two of the references need to be unpoisoned. Unpoison them 2286 // both to make the identity check valid. 2287 assembler->MaybeUnpoisonHeapReference(temp1); 2288 assembler->MaybeUnpoisonHeapReference(temp2); 2289 did_unpoison = true; 2290 } 2291 2292 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2293 // Bail out if the destination is not a non primitive array. 2294 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2295 __ Ldr(temp3, MemOperand(temp1, component_offset)); 2296 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2297 assembler->MaybeUnpoisonHeapReference(temp3); 2298 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2299 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2300 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2301 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2302 } 2303 2304 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2305 // Bail out if the source is not a non primitive array. 2306 // /* HeapReference<Class> */ temp3 = temp2->component_type_ 2307 __ Ldr(temp3, MemOperand(temp2, component_offset)); 2308 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2309 assembler->MaybeUnpoisonHeapReference(temp3); 2310 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2311 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2312 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2313 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2314 } 2315 2316 __ Cmp(temp1, temp2); 2317 2318 if (optimizations.GetDestinationIsTypedObjectArray()) { 2319 vixl32::Label do_copy; 2320 __ B(eq, &do_copy, /* far_target */ false); 2321 if (!did_unpoison) { 2322 assembler->MaybeUnpoisonHeapReference(temp1); 2323 } 2324 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2325 __ Ldr(temp1, MemOperand(temp1, component_offset)); 2326 assembler->MaybeUnpoisonHeapReference(temp1); 2327 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2328 __ Ldr(temp1, MemOperand(temp1, super_offset)); 2329 // No need to unpoison the result, we're comparing against null. 2330 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); 2331 __ Bind(&do_copy); 2332 } else { 2333 __ B(ne, intrinsic_slow_path->GetEntryLabel()); 2334 } 2335 } 2336 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2337 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2338 // Bail out if the source is not a non primitive array. 2339 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2340 // /* HeapReference<Class> */ temp1 = src->klass_ 2341 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2342 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); 2343 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2344 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2345 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); 2346 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2347 // If heap poisoning is enabled, `temp3` has been unpoisoned 2348 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2349 } else { 2350 // /* HeapReference<Class> */ temp1 = src->klass_ 2351 __ Ldr(temp1, MemOperand(src, class_offset)); 2352 assembler->MaybeUnpoisonHeapReference(temp1); 2353 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2354 __ Ldr(temp3, MemOperand(temp1, component_offset)); 2355 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2356 assembler->MaybeUnpoisonHeapReference(temp3); 2357 } 2358 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2359 __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); 2360 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2361 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); 2362 } 2363 2364 if (length.IsConstant() && Int32ConstantFrom(length) == 0) { 2365 // Null constant length: not need to emit the loop code at all. 2366 } else { 2367 vixl32::Label done; 2368 const Primitive::Type type = Primitive::kPrimNot; 2369 const int32_t element_size = Primitive::ComponentSize(type); 2370 2371 if (length.IsRegister()) { 2372 // Don't enter the copy loop if the length is null. 2373 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); 2374 } 2375 2376 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2377 // TODO: Also convert this intrinsic to the IsGcMarking strategy? 2378 2379 // SystemArrayCopy implementation for Baker read barriers (see 2380 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier): 2381 // 2382 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2383 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2384 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2385 // if (is_gray) { 2386 // // Slow-path copy. 2387 // do { 2388 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 2389 // } while (src_ptr != end_ptr) 2390 // } else { 2391 // // Fast-path copy. 2392 // do { 2393 // *dest_ptr++ = *src_ptr++; 2394 // } while (src_ptr != end_ptr) 2395 // } 2396 2397 // /* int32_t */ monitor = src->monitor_ 2398 __ Ldr(temp2, MemOperand(src, monitor_offset)); 2399 // /* LockWord */ lock_word = LockWord(monitor) 2400 static_assert(sizeof(LockWord) == sizeof(int32_t), 2401 "art::LockWord and int32_t have different sizes."); 2402 2403 // Introduce a dependency on the lock_word including the rb_state, 2404 // which shall prevent load-load reordering without using 2405 // a memory barrier (which would be more expensive). 2406 // `src` is unchanged by this operation, but its value now depends 2407 // on `temp2`. 2408 __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); 2409 2410 // Compute the base source address in `temp1`. 2411 // Note that `temp1` (the base source address) is computed from 2412 // `src` (and `src_pos`) here, and thus honors the artificial 2413 // dependency of `src` on `temp2`. 2414 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 2415 // Compute the end source address in `temp3`. 2416 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2417 // The base destination address is computed later, as `temp2` is 2418 // used for intermediate computations. 2419 2420 // Slow path used to copy array when `src` is gray. 2421 // Note that the base destination address is computed in `temp2` 2422 // by the slow path code. 2423 SlowPathCodeARMVIXL* read_barrier_slow_path = 2424 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); 2425 codegen_->AddSlowPath(read_barrier_slow_path); 2426 2427 // Given the numeric representation, it's enough to check the low bit of the 2428 // rb_state. We do that by shifting the bit out of the lock word with LSRS 2429 // which can be a 16-bit instruction unlike the TST immediate. 2430 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 2431 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2432 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); 2433 // Carry flag is the last bit shifted out by LSRS. 2434 __ B(cs, read_barrier_slow_path->GetEntryLabel()); 2435 2436 // Fast-path copy. 2437 // Compute the base destination address in `temp2`. 2438 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2439 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2440 // poison/unpoison. 2441 vixl32::Label loop; 2442 __ Bind(&loop); 2443 { 2444 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2445 const vixl32::Register temp_reg = temps.Acquire(); 2446 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); 2447 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); 2448 } 2449 __ Cmp(temp1, temp3); 2450 __ B(ne, &loop, /* far_target */ false); 2451 2452 __ Bind(read_barrier_slow_path->GetExitLabel()); 2453 } else { 2454 // Non read barrier code. 2455 // Compute the base source address in `temp1`. 2456 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); 2457 // Compute the base destination address in `temp2`. 2458 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); 2459 // Compute the end source address in `temp3`. 2460 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); 2461 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2462 // poison/unpoison. 2463 vixl32::Label loop; 2464 __ Bind(&loop); 2465 { 2466 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2467 const vixl32::Register temp_reg = temps.Acquire(); 2468 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); 2469 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); 2470 } 2471 __ Cmp(temp1, temp3); 2472 __ B(ne, &loop, /* far_target */ false); 2473 } 2474 __ Bind(&done); 2475 } 2476 2477 // We only need one card marking on the destination array. 2478 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); 2479 2480 __ Bind(intrinsic_slow_path->GetExitLabel()); 2481 } 2482 2483 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { 2484 // If the graph is debuggable, all callee-saved floating-point registers are blocked by 2485 // the code generator. Furthermore, the register allocator creates fixed live intervals 2486 // for all caller-saved registers because we are doing a function call. As a result, if 2487 // the input and output locations are unallocated, the register allocator runs out of 2488 // registers and fails; however, a debuggable graph is not the common case. 2489 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { 2490 return; 2491 } 2492 2493 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 2494 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); 2495 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); 2496 2497 LocationSummary* const locations = new (arena) LocationSummary(invoke, 2498 LocationSummary::kCallOnMainOnly, 2499 kIntrinsified); 2500 const InvokeRuntimeCallingConventionARMVIXL calling_convention; 2501 2502 locations->SetInAt(0, Location::RequiresFpuRegister()); 2503 locations->SetOut(Location::RequiresFpuRegister()); 2504 // Native code uses the soft float ABI. 2505 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); 2506 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); 2507 } 2508 2509 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { 2510 // If the graph is debuggable, all callee-saved floating-point registers are blocked by 2511 // the code generator. Furthermore, the register allocator creates fixed live intervals 2512 // for all caller-saved registers because we are doing a function call. As a result, if 2513 // the input and output locations are unallocated, the register allocator runs out of 2514 // registers and fails; however, a debuggable graph is not the common case. 2515 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { 2516 return; 2517 } 2518 2519 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 2520 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); 2521 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble); 2522 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); 2523 2524 LocationSummary* const locations = new (arena) LocationSummary(invoke, 2525 LocationSummary::kCallOnMainOnly, 2526 kIntrinsified); 2527 const InvokeRuntimeCallingConventionARMVIXL calling_convention; 2528 2529 locations->SetInAt(0, Location::RequiresFpuRegister()); 2530 locations->SetInAt(1, Location::RequiresFpuRegister()); 2531 locations->SetOut(Location::RequiresFpuRegister()); 2532 // Native code uses the soft float ABI. 2533 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); 2534 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); 2535 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 2536 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3))); 2537 } 2538 2539 static void GenFPToFPCall(HInvoke* invoke, 2540 ArmVIXLAssembler* assembler, 2541 CodeGeneratorARMVIXL* codegen, 2542 QuickEntrypointEnum entry) { 2543 LocationSummary* const locations = invoke->GetLocations(); 2544 2545 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 2546 DCHECK(locations->WillCall() && locations->Intrinsified()); 2547 2548 // Native code uses the soft float ABI. 2549 __ Vmov(RegisterFrom(locations->GetTemp(0)), 2550 RegisterFrom(locations->GetTemp(1)), 2551 InputDRegisterAt(invoke, 0)); 2552 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 2553 __ Vmov(OutputDRegister(invoke), 2554 RegisterFrom(locations->GetTemp(0)), 2555 RegisterFrom(locations->GetTemp(1))); 2556 } 2557 2558 static void GenFPFPToFPCall(HInvoke* invoke, 2559 ArmVIXLAssembler* assembler, 2560 CodeGeneratorARMVIXL* codegen, 2561 QuickEntrypointEnum entry) { 2562 LocationSummary* const locations = invoke->GetLocations(); 2563 2564 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 2565 DCHECK(locations->WillCall() && locations->Intrinsified()); 2566 2567 // Native code uses the soft float ABI. 2568 __ Vmov(RegisterFrom(locations->GetTemp(0)), 2569 RegisterFrom(locations->GetTemp(1)), 2570 InputDRegisterAt(invoke, 0)); 2571 __ Vmov(RegisterFrom(locations->GetTemp(2)), 2572 RegisterFrom(locations->GetTemp(3)), 2573 InputDRegisterAt(invoke, 1)); 2574 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 2575 __ Vmov(OutputDRegister(invoke), 2576 RegisterFrom(locations->GetTemp(0)), 2577 RegisterFrom(locations->GetTemp(1))); 2578 } 2579 2580 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) { 2581 CreateFPToFPCallLocations(arena_, invoke); 2582 } 2583 2584 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { 2585 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos); 2586 } 2587 2588 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) { 2589 CreateFPToFPCallLocations(arena_, invoke); 2590 } 2591 2592 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { 2593 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin); 2594 } 2595 2596 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) { 2597 CreateFPToFPCallLocations(arena_, invoke); 2598 } 2599 2600 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { 2601 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos); 2602 } 2603 2604 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) { 2605 CreateFPToFPCallLocations(arena_, invoke); 2606 } 2607 2608 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { 2609 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin); 2610 } 2611 2612 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) { 2613 CreateFPToFPCallLocations(arena_, invoke); 2614 } 2615 2616 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { 2617 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan); 2618 } 2619 2620 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) { 2621 CreateFPToFPCallLocations(arena_, invoke); 2622 } 2623 2624 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { 2625 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt); 2626 } 2627 2628 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) { 2629 CreateFPToFPCallLocations(arena_, invoke); 2630 } 2631 2632 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { 2633 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh); 2634 } 2635 2636 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) { 2637 CreateFPToFPCallLocations(arena_, invoke); 2638 } 2639 2640 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { 2641 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp); 2642 } 2643 2644 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) { 2645 CreateFPToFPCallLocations(arena_, invoke); 2646 } 2647 2648 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { 2649 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1); 2650 } 2651 2652 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) { 2653 CreateFPToFPCallLocations(arena_, invoke); 2654 } 2655 2656 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { 2657 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog); 2658 } 2659 2660 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) { 2661 CreateFPToFPCallLocations(arena_, invoke); 2662 } 2663 2664 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { 2665 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10); 2666 } 2667 2668 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) { 2669 CreateFPToFPCallLocations(arena_, invoke); 2670 } 2671 2672 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { 2673 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh); 2674 } 2675 2676 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) { 2677 CreateFPToFPCallLocations(arena_, invoke); 2678 } 2679 2680 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { 2681 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan); 2682 } 2683 2684 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) { 2685 CreateFPToFPCallLocations(arena_, invoke); 2686 } 2687 2688 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { 2689 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh); 2690 } 2691 2692 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) { 2693 CreateFPFPToFPCallLocations(arena_, invoke); 2694 } 2695 2696 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { 2697 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); 2698 } 2699 2700 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { 2701 CreateFPFPToFPCallLocations(arena_, invoke); 2702 } 2703 2704 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { 2705 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot); 2706 } 2707 2708 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) { 2709 CreateFPFPToFPCallLocations(arena_, invoke); 2710 } 2711 2712 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { 2713 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter); 2714 } 2715 2716 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) { 2717 CreateIntToIntLocations(arena_, invoke); 2718 } 2719 2720 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { 2721 ArmVIXLAssembler* assembler = GetAssembler(); 2722 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2723 } 2724 2725 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { 2726 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2727 LocationSummary::kNoCall, 2728 kIntrinsified); 2729 locations->SetInAt(0, Location::RequiresRegister()); 2730 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2731 } 2732 2733 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { 2734 ArmVIXLAssembler* assembler = GetAssembler(); 2735 LocationSummary* locations = invoke->GetLocations(); 2736 2737 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 2738 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 2739 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); 2740 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); 2741 2742 __ Rbit(out_reg_lo, in_reg_hi); 2743 __ Rbit(out_reg_hi, in_reg_lo); 2744 } 2745 2746 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { 2747 CreateIntToIntLocations(arena_, invoke); 2748 } 2749 2750 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { 2751 ArmVIXLAssembler* assembler = GetAssembler(); 2752 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2753 } 2754 2755 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { 2756 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2757 LocationSummary::kNoCall, 2758 kIntrinsified); 2759 locations->SetInAt(0, Location::RequiresRegister()); 2760 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2761 } 2762 2763 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { 2764 ArmVIXLAssembler* assembler = GetAssembler(); 2765 LocationSummary* locations = invoke->GetLocations(); 2766 2767 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); 2768 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); 2769 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); 2770 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); 2771 2772 __ Rev(out_reg_lo, in_reg_hi); 2773 __ Rev(out_reg_hi, in_reg_lo); 2774 } 2775 2776 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { 2777 CreateIntToIntLocations(arena_, invoke); 2778 } 2779 2780 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { 2781 ArmVIXLAssembler* assembler = GetAssembler(); 2782 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0)); 2783 } 2784 2785 static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) { 2786 DCHECK(Primitive::IsIntOrLongType(type)) << type; 2787 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); 2788 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); 2789 2790 bool is_long = type == Primitive::kPrimLong; 2791 LocationSummary* locations = instr->GetLocations(); 2792 Location in = locations->InAt(0); 2793 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in); 2794 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0; 2795 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0)); 2796 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0)); 2797 vixl32::Register out_r = OutputRegister(instr); 2798 2799 // Move data from core register(s) to temp D-reg for bit count calculation, then move back. 2800 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, 2801 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, 2802 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. 2803 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| 2804 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| 2805 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c| 2806 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c| 2807 if (is_long) { 2808 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c| 2809 } 2810 __ Vmov(out_r, tmp_s); 2811 } 2812 2813 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { 2814 CreateIntToIntLocations(arena_, invoke); 2815 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 2816 } 2817 2818 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { 2819 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler()); 2820 } 2821 2822 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { 2823 VisitIntegerBitCount(invoke); 2824 } 2825 2826 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { 2827 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); 2828 } 2829 2830 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { 2831 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2832 LocationSummary::kNoCall, 2833 kIntrinsified); 2834 locations->SetInAt(0, Location::RequiresRegister()); 2835 locations->SetInAt(1, Location::RequiresRegister()); 2836 locations->SetInAt(2, Location::RequiresRegister()); 2837 locations->SetInAt(3, Location::RequiresRegister()); 2838 locations->SetInAt(4, Location::RequiresRegister()); 2839 2840 // Temporary registers to store lengths of strings and for calculations. 2841 locations->AddTemp(Location::RequiresRegister()); 2842 locations->AddTemp(Location::RequiresRegister()); 2843 locations->AddTemp(Location::RequiresRegister()); 2844 } 2845 2846 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { 2847 ArmVIXLAssembler* assembler = GetAssembler(); 2848 LocationSummary* locations = invoke->GetLocations(); 2849 2850 // Check assumption that sizeof(Char) is 2 (used in scaling below). 2851 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 2852 DCHECK_EQ(char_size, 2u); 2853 2854 // Location of data in char array buffer. 2855 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 2856 2857 // Location of char array data in string. 2858 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 2859 2860 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); 2861 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. 2862 vixl32::Register srcObj = InputRegisterAt(invoke, 0); 2863 vixl32::Register srcBegin = InputRegisterAt(invoke, 1); 2864 vixl32::Register srcEnd = InputRegisterAt(invoke, 2); 2865 vixl32::Register dstObj = InputRegisterAt(invoke, 3); 2866 vixl32::Register dstBegin = InputRegisterAt(invoke, 4); 2867 2868 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0)); 2869 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1)); 2870 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); 2871 2872 vixl32::Label done, compressed_string_loop; 2873 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); 2874 // dst to be copied. 2875 __ Add(dst_ptr, dstObj, data_offset); 2876 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); 2877 2878 __ Subs(num_chr, srcEnd, srcBegin); 2879 // Early out for valid zero-length retrievals. 2880 __ B(eq, final_label, /* far_target */ false); 2881 2882 // src range to copy. 2883 __ Add(src_ptr, srcObj, value_offset); 2884 2885 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2886 vixl32::Register temp; 2887 vixl32::Label compressed_string_preloop; 2888 if (mirror::kUseStringCompression) { 2889 // Location of count in string. 2890 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2891 temp = temps.Acquire(); 2892 // String's length. 2893 __ Ldr(temp, MemOperand(srcObj, count_offset)); 2894 __ Tst(temp, 1); 2895 temps.Release(temp); 2896 __ B(eq, &compressed_string_preloop, /* far_target */ false); 2897 } 2898 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); 2899 2900 // Do the copy. 2901 vixl32::Label loop, remainder; 2902 2903 temp = temps.Acquire(); 2904 // Save repairing the value of num_chr on the < 4 character path. 2905 __ Subs(temp, num_chr, 4); 2906 __ B(lt, &remainder, /* far_target */ false); 2907 2908 // Keep the result of the earlier subs, we are going to fetch at least 4 characters. 2909 __ Mov(num_chr, temp); 2910 2911 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time. 2912 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code 2913 // to rectify these everywhere this intrinsic applies.) 2914 __ Bind(&loop); 2915 __ Ldr(temp, MemOperand(src_ptr, char_size * 2)); 2916 __ Subs(num_chr, num_chr, 4); 2917 __ Str(temp, MemOperand(dst_ptr, char_size * 2)); 2918 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); 2919 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); 2920 temps.Release(temp); 2921 __ B(ge, &loop, /* far_target */ false); 2922 2923 __ Adds(num_chr, num_chr, 4); 2924 __ B(eq, final_label, /* far_target */ false); 2925 2926 // Main loop for < 4 character case and remainder handling. Loads and stores one 2927 // 16-bit Java character at a time. 2928 __ Bind(&remainder); 2929 temp = temps.Acquire(); 2930 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex)); 2931 __ Subs(num_chr, num_chr, 1); 2932 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); 2933 temps.Release(temp); 2934 __ B(gt, &remainder, /* far_target */ false); 2935 2936 if (mirror::kUseStringCompression) { 2937 __ B(final_label); 2938 2939 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); 2940 DCHECK_EQ(c_char_size, 1u); 2941 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. 2942 __ Bind(&compressed_string_preloop); 2943 __ Add(src_ptr, src_ptr, srcBegin); 2944 __ Bind(&compressed_string_loop); 2945 temp = temps.Acquire(); 2946 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex)); 2947 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); 2948 temps.Release(temp); 2949 __ Subs(num_chr, num_chr, 1); 2950 __ B(gt, &compressed_string_loop, /* far_target */ false); 2951 } 2952 2953 if (done.IsReferenced()) { 2954 __ Bind(&done); 2955 } 2956 } 2957 2958 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { 2959 CreateFPToIntLocations(arena_, invoke); 2960 } 2961 2962 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { 2963 ArmVIXLAssembler* const assembler = GetAssembler(); 2964 const vixl32::Register out = OutputRegister(invoke); 2965 // Shifting left by 1 bit makes the value encodable as an immediate operand; 2966 // we don't care about the sign bit anyway. 2967 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U; 2968 2969 __ Vmov(out, InputSRegisterAt(invoke, 0)); 2970 // We don't care about the sign bit, so shift left. 2971 __ Lsl(out, out, 1); 2972 __ Eor(out, out, infinity); 2973 codegen_->GenerateConditionWithZero(kCondEQ, out, out); 2974 } 2975 2976 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { 2977 CreateFPToIntLocations(arena_, invoke); 2978 } 2979 2980 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { 2981 ArmVIXLAssembler* const assembler = GetAssembler(); 2982 const vixl32::Register out = OutputRegister(invoke); 2983 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 2984 const vixl32::Register temp = temps.Acquire(); 2985 // The highest 32 bits of double precision positive infinity separated into 2986 // two constants encodable as immediate operands. 2987 constexpr uint32_t infinity_high = 0x7f000000U; 2988 constexpr uint32_t infinity_high2 = 0x00f00000U; 2989 2990 static_assert((infinity_high | infinity_high2) == 2991 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U), 2992 "The constants do not add up to the high 32 bits of double " 2993 "precision positive infinity."); 2994 __ Vmov(temp, out, InputDRegisterAt(invoke, 0)); 2995 __ Eor(out, out, infinity_high); 2996 __ Eor(out, out, infinity_high2); 2997 // We don't care about the sign bit, so shift left. 2998 __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); 2999 codegen_->GenerateConditionWithZero(kCondEQ, out, out); 3000 } 3001 3002 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { 3003 if (features_.HasARMv8AInstructions()) { 3004 CreateFPToFPLocations(arena_, invoke); 3005 } 3006 } 3007 3008 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) { 3009 ArmVIXLAssembler* assembler = GetAssembler(); 3010 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 3011 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 3012 } 3013 3014 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { 3015 if (features_.HasARMv8AInstructions()) { 3016 CreateFPToFPLocations(arena_, invoke); 3017 } 3018 } 3019 3020 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { 3021 ArmVIXLAssembler* assembler = GetAssembler(); 3022 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); 3023 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); 3024 } 3025 3026 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { 3027 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3028 IntrinsicVisitor::ComputeIntegerValueOfLocations( 3029 invoke, 3030 codegen_, 3031 LocationFrom(r0), 3032 LocationFrom(calling_convention.GetRegisterAt(0))); 3033 } 3034 3035 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { 3036 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); 3037 LocationSummary* locations = invoke->GetLocations(); 3038 ArmVIXLAssembler* const assembler = GetAssembler(); 3039 3040 vixl32::Register out = RegisterFrom(locations->Out()); 3041 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3042 vixl32::Register temp = temps.Acquire(); 3043 InvokeRuntimeCallingConventionARMVIXL calling_convention; 3044 vixl32::Register argument = calling_convention.GetRegisterAt(0); 3045 if (invoke->InputAt(0)->IsConstant()) { 3046 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 3047 if (value >= info.low && value <= info.high) { 3048 // Just embed the j.l.Integer in the code. 3049 ScopedObjectAccess soa(Thread::Current()); 3050 mirror::Object* boxed = info.cache->Get(value + (-info.low)); 3051 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); 3052 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); 3053 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); 3054 } else { 3055 // Allocate and initialize a new j.l.Integer. 3056 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 3057 // JIT object table. 3058 uint32_t address = 3059 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3060 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); 3061 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3062 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3063 __ Mov(temp, value); 3064 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); 3065 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 3066 // one. 3067 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3068 } 3069 } else { 3070 vixl32::Register in = RegisterFrom(locations->InAt(0)); 3071 // Check bounds of our cache. 3072 __ Add(out, in, -info.low); 3073 __ Cmp(out, info.high - info.low + 1); 3074 vixl32::Label allocate, done; 3075 __ B(hs, &allocate, /* is_far_target */ false); 3076 // If the value is within the bounds, load the j.l.Integer directly from the array. 3077 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3078 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); 3079 __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); 3080 codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out); 3081 assembler->MaybeUnpoisonHeapReference(out); 3082 __ B(&done); 3083 __ Bind(&allocate); 3084 // Otherwise allocate and initialize a new j.l.Integer. 3085 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); 3086 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); 3087 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 3088 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 3089 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); 3090 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 3091 // one. 3092 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 3093 __ Bind(&done); 3094 } 3095 } 3096 3097 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { 3098 LocationSummary* locations = new (arena_) LocationSummary(invoke, 3099 LocationSummary::kNoCall, 3100 kIntrinsified); 3101 locations->SetOut(Location::RequiresRegister()); 3102 } 3103 3104 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { 3105 ArmVIXLAssembler* assembler = GetAssembler(); 3106 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out()); 3107 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value(); 3108 __ Ldr(out, MemOperand(tr, offset)); 3109 UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); 3110 vixl32::Register temp = temps.Acquire(); 3111 vixl32::Label done; 3112 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done); 3113 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); 3114 __ Dmb(vixl32::ISH); 3115 __ Mov(temp, 0); 3116 assembler->StoreToOffset(kStoreWord, temp, tr, offset); 3117 __ Dmb(vixl32::ISH); 3118 if (done.IsReferenced()) { 3119 __ Bind(&done); 3120 } 3121 } 3122 3123 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? 3124 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. 3125 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) 3126 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) 3127 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) 3128 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) 3129 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) 3130 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) 3131 3132 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); 3133 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); 3134 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); 3135 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); 3136 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); 3137 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend); 3138 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); 3139 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); 3140 3141 // 1.8. 3142 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) 3143 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) 3144 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) 3145 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) 3146 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) 3147 3148 UNREACHABLE_INTRINSICS(ARMVIXL) 3149 3150 #undef __ 3151 3152 } // namespace arm 3153 } // namespace art 3154