1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <iostream> 18 #include <type_traits> 19 20 #include "assembler_arm_vixl.h" 21 #include "entrypoints/quick/quick_entrypoints.h" 22 #include "thread.h" 23 24 using namespace vixl::aarch32; // NOLINT(build/namespaces) 25 26 using vixl::ExactAssemblyScope; 27 using vixl::CodeBufferCheckScope; 28 29 namespace art { 30 namespace arm { 31 32 #ifdef ___ 33 #error "ARM Assembler macro already defined." 34 #else 35 #define ___ vixl_masm_. 36 #endif 37 38 extern const vixl32::Register tr(TR); 39 40 void ArmVIXLAssembler::FinalizeCode() { 41 vixl_masm_.FinalizeCode(); 42 } 43 44 size_t ArmVIXLAssembler::CodeSize() const { 45 return vixl_masm_.GetSizeOfCodeGenerated(); 46 } 47 48 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const { 49 return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); 50 } 51 52 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) { 53 // Copy the instructions from the buffer. 54 MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); 55 region.CopyFrom(0, from); 56 } 57 58 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) { 59 // reg = -reg. 60 ___ Rsb(reg, reg, 0); 61 } 62 63 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) { 64 // reg = -reg. 65 ___ Rsb(reg, reg, 0); 66 } 67 68 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) { 69 if (kPoisonHeapReferences) { 70 PoisonHeapReference(reg); 71 } 72 } 73 74 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) { 75 if (kPoisonHeapReferences) { 76 UnpoisonHeapReference(reg); 77 } 78 } 79 80 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) { 81 // TODO(VIXL): Implement this optimization in VIXL. 82 if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) { 83 ___ Mvn(rd, ~value); 84 } else { 85 ___ Mov(rd, value); 86 } 87 } 88 89 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { 90 return vixl_masm_.IsModifiedImmediate(immediate); 91 } 92 93 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) { 94 switch (opcode) { 95 case ADD: 96 case SUB: 97 // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. 98 if (IsUint<12>(immediate) && set_cc != kCcSet) { 99 return true; 100 } 101 return ShifterOperandCanAlwaysHold(immediate); 102 103 case MOV: 104 // TODO: Support less than or equal to 12bits. 105 return ShifterOperandCanAlwaysHold(immediate); 106 107 case MVN: 108 default: 109 return ShifterOperandCanAlwaysHold(immediate); 110 } 111 } 112 113 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, 114 int32_t offset, 115 /*out*/ int32_t* add_to_base, 116 /*out*/ int32_t* offset_for_load_store) { 117 int32_t other_bits = offset & ~allowed_offset_bits; 118 if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { 119 *add_to_base = offset & ~allowed_offset_bits; 120 *offset_for_load_store = offset & allowed_offset_bits; 121 return true; 122 } 123 return false; 124 } 125 126 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, 127 vixl32::Register temp, 128 vixl32::Register base, 129 int32_t offset) { 130 DCHECK_NE(offset & ~allowed_offset_bits, 0); 131 int32_t add_to_base, offset_for_load; 132 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { 133 ___ Add(temp, base, add_to_base); 134 return offset_for_load; 135 } else { 136 ___ Mov(temp, offset); 137 ___ Add(temp, temp, base); 138 return 0; 139 } 140 } 141 142 // TODO(VIXL): Implement this in VIXL. 143 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) { 144 switch (type) { 145 case kLoadSignedByte: 146 case kLoadSignedHalfword: 147 case kLoadUnsignedHalfword: 148 case kLoadUnsignedByte: 149 case kLoadWord: 150 // We can encode imm12 offset. 151 return 0xfff; 152 case kLoadSWord: 153 case kLoadDWord: 154 case kLoadWordPair: 155 // We can encode imm8:'00' offset. 156 return 0xff << 2; 157 default: 158 LOG(FATAL) << "UNREACHABLE"; 159 UNREACHABLE(); 160 } 161 } 162 163 // TODO(VIXL): Implement this in VIXL. 164 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) { 165 switch (type) { 166 case kStoreHalfword: 167 case kStoreByte: 168 case kStoreWord: 169 // We can encode imm12 offset. 170 return 0xfff; 171 case kStoreSWord: 172 case kStoreDWord: 173 case kStoreWordPair: 174 // We can encode imm8:'00' offset. 175 return 0xff << 2; 176 default: 177 LOG(FATAL) << "UNREACHABLE"; 178 UNREACHABLE(); 179 } 180 } 181 182 // TODO(VIXL): Implement this in VIXL. 183 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { 184 switch (type) { 185 case kLoadSignedByte: 186 case kLoadSignedHalfword: 187 case kLoadUnsignedHalfword: 188 case kLoadUnsignedByte: 189 case kLoadWord: 190 return IsAbsoluteUint<12>(offset); 191 case kLoadSWord: 192 case kLoadDWord: 193 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. 194 case kLoadWordPair: 195 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); 196 default: 197 LOG(FATAL) << "UNREACHABLE"; 198 UNREACHABLE(); 199 } 200 } 201 202 // TODO(VIXL): Implement this in VIXL. 203 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { 204 switch (type) { 205 case kStoreHalfword: 206 case kStoreByte: 207 case kStoreWord: 208 return IsAbsoluteUint<12>(offset); 209 case kStoreSWord: 210 case kStoreDWord: 211 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. 212 case kStoreWordPair: 213 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); 214 default: 215 LOG(FATAL) << "UNREACHABLE"; 216 UNREACHABLE(); 217 } 218 } 219 220 // Implementation note: this method must emit at most one instruction when 221 // Address::CanHoldStoreOffsetThumb. 222 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. 223 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type, 224 vixl32::Register reg, 225 vixl32::Register base, 226 int32_t offset) { 227 vixl32::Register tmp_reg; 228 UseScratchRegisterScope temps(&vixl_masm_); 229 230 if (!CanHoldStoreOffsetThumb(type, offset)) { 231 CHECK_NE(base.GetCode(), kIpCode); 232 if ((reg.GetCode() != kIpCode) && 233 (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) && 234 ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) { 235 tmp_reg = temps.Acquire(); 236 } else { 237 // Be careful not to use ip twice (for `reg` (or `reg` + 1 in 238 // the case of a word-pair store) and `base`) to build the 239 // Address object used by the store instruction(s) below. 240 // Instead, save R5 on the stack (or R6 if R5 is already used by 241 // `base`), use it as secondary temporary register, and restore 242 // it after the store instruction has been emitted. 243 tmp_reg = (base.GetCode() != 5) ? r5 : r6; 244 ___ Push(tmp_reg); 245 if (base.GetCode() == kSpCode) { 246 offset += kRegisterSize; 247 } 248 } 249 // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() 250 // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. 251 offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset); 252 base = tmp_reg; 253 } 254 DCHECK(CanHoldStoreOffsetThumb(type, offset)); 255 switch (type) { 256 case kStoreByte: 257 ___ Strb(reg, MemOperand(base, offset)); 258 break; 259 case kStoreHalfword: 260 ___ Strh(reg, MemOperand(base, offset)); 261 break; 262 case kStoreWord: 263 ___ Str(reg, MemOperand(base, offset)); 264 break; 265 case kStoreWordPair: 266 ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset)); 267 break; 268 default: 269 LOG(FATAL) << "UNREACHABLE"; 270 UNREACHABLE(); 271 } 272 if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) { 273 CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg; 274 ___ Pop(tmp_reg); 275 } 276 } 277 278 // Implementation note: this method must emit at most one instruction when 279 // Address::CanHoldLoadOffsetThumb. 280 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. 281 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type, 282 vixl32::Register dest, 283 vixl32::Register base, 284 int32_t offset) { 285 if (!CanHoldLoadOffsetThumb(type, offset)) { 286 CHECK(!base.Is(ip)); 287 // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. 288 int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); 289 DCHECK_NE(offset & ~allowed_offset_bits, 0); 290 int32_t add_to_base, offset_for_load; 291 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { 292 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. 293 AddConstant(dest, base, add_to_base); 294 base = dest; 295 offset = offset_for_load; 296 } else { 297 UseScratchRegisterScope temps(&vixl_masm_); 298 vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest; 299 LoadImmediate(temp, offset); 300 // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. 301 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. 302 ___ Add(dest, dest, (dest.Is(base)) ? temp : base); 303 base = dest; 304 offset = 0; 305 } 306 } 307 308 DCHECK(CanHoldLoadOffsetThumb(type, offset)); 309 switch (type) { 310 case kLoadSignedByte: 311 ___ Ldrsb(dest, MemOperand(base, offset)); 312 break; 313 case kLoadUnsignedByte: 314 ___ Ldrb(dest, MemOperand(base, offset)); 315 break; 316 case kLoadSignedHalfword: 317 ___ Ldrsh(dest, MemOperand(base, offset)); 318 break; 319 case kLoadUnsignedHalfword: 320 ___ Ldrh(dest, MemOperand(base, offset)); 321 break; 322 case kLoadWord: 323 CHECK(!dest.IsSP()); 324 ___ Ldr(dest, MemOperand(base, offset)); 325 break; 326 case kLoadWordPair: 327 ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset)); 328 break; 329 default: 330 LOG(FATAL) << "UNREACHABLE"; 331 UNREACHABLE(); 332 } 333 } 334 335 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source, 336 vixl32::Register base, 337 int32_t offset) { 338 ___ Vstr(source, MemOperand(base, offset)); 339 } 340 341 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source, 342 vixl32::Register base, 343 int32_t offset) { 344 ___ Vstr(source, MemOperand(base, offset)); 345 } 346 347 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg, 348 vixl32::Register base, 349 int32_t offset) { 350 ___ Vldr(reg, MemOperand(base, offset)); 351 } 352 353 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg, 354 vixl32::Register base, 355 int32_t offset) { 356 ___ Vldr(reg, MemOperand(base, offset)); 357 } 358 359 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and 360 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size). 361 static constexpr int kRegListThreshold = 4; 362 363 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) { 364 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); 365 if (number_of_regs != 0) { 366 if (number_of_regs > kRegListThreshold) { 367 UseScratchRegisterScope temps(GetVIXLAssembler()); 368 vixl32::Register base = sp; 369 if (stack_offset != 0) { 370 base = temps.Acquire(); 371 DCHECK_EQ(regs & (1u << base.GetCode()), 0u); 372 ___ Add(base, sp, Operand::From(stack_offset)); 373 } 374 ___ Stm(base, NO_WRITE_BACK, RegisterList(regs)); 375 } else { 376 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { 377 ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset)); 378 stack_offset += kRegSizeInBytes; 379 } 380 } 381 } 382 } 383 384 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) { 385 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); 386 if (number_of_regs != 0) { 387 if (number_of_regs > kRegListThreshold) { 388 UseScratchRegisterScope temps(GetVIXLAssembler()); 389 vixl32::Register base = sp; 390 if (stack_offset != 0) { 391 base = temps.Acquire(); 392 ___ Add(base, sp, Operand::From(stack_offset)); 393 } 394 ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs)); 395 } else { 396 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { 397 ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset)); 398 stack_offset += kRegSizeInBytes; 399 } 400 } 401 } 402 } 403 404 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) { 405 AddConstant(rd, rd, value); 406 } 407 408 // TODO(VIXL): think about using adds which updates flags where possible. 409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, 410 vixl32::Register rn, 411 int32_t value) { 412 DCHECK(vixl_masm_.OutsideITBlock()); 413 // TODO(VIXL): implement this optimization in VIXL. 414 if (value == 0) { 415 if (!rd.Is(rn)) { 416 ___ Mov(rd, rn); 417 } 418 return; 419 } 420 ___ Add(rd, rn, value); 421 } 422 423 // Inside IT block we must use assembler, macroassembler instructions are not permitted. 424 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd, 425 vixl32::Register rn, 426 int32_t value, 427 vixl32::Condition cond) { 428 DCHECK(vixl_masm_.InITBlock()); 429 if (value == 0) { 430 ___ mov(cond, rd, rn); 431 } else { 432 ___ add(cond, rd, rn, value); 433 } 434 } 435 436 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn, 437 vixl32::Label* label, 438 bool is_far_target) { 439 if (!is_far_target && rn.IsLow() && !label->IsBound()) { 440 // In T32, Cbz/Cbnz instructions have following limitations: 441 // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target). 442 // - Only low registers (i.e R0 .. R7) can be encoded. 443 // - Only forward branches (unbound labels) are supported. 444 Cbz(rn, label); 445 return; 446 } 447 Cmp(rn, 0); 448 B(eq, label, is_far_target); 449 } 450 451 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, 452 vixl32::Label* label, 453 bool is_far_target) { 454 if (!is_far_target && rn.IsLow() && !label->IsBound()) { 455 Cbnz(rn, label); 456 return; 457 } 458 Cmp(rn, 0); 459 B(ne, label, is_far_target); 460 } 461 462 void ArmVIXLMacroAssembler::B(vixl32::Label* label) { 463 if (!label->IsBound()) { 464 // Try to use 16-bit T2 encoding of B instruction. 465 DCHECK(OutsideITBlock()); 466 ExactAssemblyScope guard(this, 467 k16BitT32InstructionSizeInBytes, 468 CodeBufferCheckScope::kMaximumSize); 469 b(al, Narrow, label); 470 AddBranchLabel(label); 471 return; 472 } 473 MacroAssembler::B(label); 474 } 475 476 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) { 477 if (!label->IsBound() && !is_far_target) { 478 // Try to use 16-bit T2 encoding of B instruction. 479 DCHECK(OutsideITBlock()); 480 ExactAssemblyScope guard(this, 481 k16BitT32InstructionSizeInBytes, 482 CodeBufferCheckScope::kMaximumSize); 483 b(cond, Narrow, label); 484 AddBranchLabel(label); 485 return; 486 } 487 // To further reduce the Bcc encoding size and use 16-bit T1 encoding, 488 // we can provide a hint to this function: i.e. far_target=false. 489 // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding. 490 MacroAssembler::B(cond, label); 491 } 492 493 } // namespace arm 494 } // namespace art 495