1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <iostream> 18 #include <type_traits> 19 20 #include "assembler_arm_vixl.h" 21 #include "base/bit_utils.h" 22 #include "base/bit_utils_iterator.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "thread.h" 25 26 using namespace vixl::aarch32; // NOLINT(build/namespaces) 27 28 using vixl::ExactAssemblyScope; 29 using vixl::CodeBufferCheckScope; 30 31 namespace art { 32 namespace arm { 33 34 #ifdef ___ 35 #error "ARM Assembler macro already defined." 36 #else 37 #define ___ vixl_masm_. 38 #endif 39 40 // Thread register definition. 41 extern const vixl32::Register tr(TR); 42 // Marking register definition. 43 extern const vixl32::Register mr(MR); 44 45 void ArmVIXLAssembler::FinalizeCode() { 46 vixl_masm_.FinalizeCode(); 47 } 48 49 size_t ArmVIXLAssembler::CodeSize() const { 50 return vixl_masm_.GetSizeOfCodeGenerated(); 51 } 52 53 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const { 54 return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); 55 } 56 57 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) { 58 // Copy the instructions from the buffer. 59 MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); 60 region.CopyFrom(0, from); 61 } 62 63 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) { 64 // reg = -reg. 65 ___ Rsb(reg, reg, 0); 66 } 67 68 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) { 69 // reg = -reg. 70 ___ Rsb(reg, reg, 0); 71 } 72 73 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) { 74 if (kPoisonHeapReferences) { 75 PoisonHeapReference(reg); 76 } 77 } 78 79 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) { 80 if (kPoisonHeapReferences) { 81 UnpoisonHeapReference(reg); 82 } 83 } 84 85 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) { 86 // TODO(VIXL): Implement this optimization in VIXL. 87 if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) { 88 ___ Mvn(rd, ~value); 89 } else { 90 ___ Mov(rd, value); 91 } 92 } 93 94 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { 95 return vixl_masm_.IsModifiedImmediate(immediate); 96 } 97 98 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) { 99 switch (opcode) { 100 case ADD: 101 case SUB: 102 // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. 103 if (IsUint<12>(immediate) && set_cc != kCcSet) { 104 return true; 105 } 106 return ShifterOperandCanAlwaysHold(immediate); 107 108 case MOV: 109 // TODO: Support less than or equal to 12bits. 110 return ShifterOperandCanAlwaysHold(immediate); 111 112 case MVN: 113 default: 114 return ShifterOperandCanAlwaysHold(immediate); 115 } 116 } 117 118 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, 119 int32_t offset, 120 /*out*/ int32_t* add_to_base, 121 /*out*/ int32_t* offset_for_load_store) { 122 int32_t other_bits = offset & ~allowed_offset_bits; 123 if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { 124 *add_to_base = offset & ~allowed_offset_bits; 125 *offset_for_load_store = offset & allowed_offset_bits; 126 return true; 127 } 128 return false; 129 } 130 131 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, 132 vixl32::Register temp, 133 vixl32::Register base, 134 int32_t offset) { 135 DCHECK_NE(offset & ~allowed_offset_bits, 0); 136 int32_t add_to_base, offset_for_load; 137 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { 138 ___ Add(temp, base, add_to_base); 139 return offset_for_load; 140 } else { 141 ___ Mov(temp, offset); 142 ___ Add(temp, temp, base); 143 return 0; 144 } 145 } 146 147 // TODO(VIXL): Implement this in VIXL. 148 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) { 149 switch (type) { 150 case kLoadSignedByte: 151 case kLoadSignedHalfword: 152 case kLoadUnsignedHalfword: 153 case kLoadUnsignedByte: 154 case kLoadWord: 155 // We can encode imm12 offset. 156 return 0xfff; 157 case kLoadSWord: 158 case kLoadDWord: 159 case kLoadWordPair: 160 // We can encode imm8:'00' offset. 161 return 0xff << 2; 162 default: 163 LOG(FATAL) << "UNREACHABLE"; 164 UNREACHABLE(); 165 } 166 } 167 168 // TODO(VIXL): Implement this in VIXL. 169 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) { 170 switch (type) { 171 case kStoreHalfword: 172 case kStoreByte: 173 case kStoreWord: 174 // We can encode imm12 offset. 175 return 0xfff; 176 case kStoreSWord: 177 case kStoreDWord: 178 case kStoreWordPair: 179 // We can encode imm8:'00' offset. 180 return 0xff << 2; 181 default: 182 LOG(FATAL) << "UNREACHABLE"; 183 UNREACHABLE(); 184 } 185 } 186 187 // TODO(VIXL): Implement this in VIXL. 188 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { 189 switch (type) { 190 case kLoadSignedByte: 191 case kLoadSignedHalfword: 192 case kLoadUnsignedHalfword: 193 case kLoadUnsignedByte: 194 case kLoadWord: 195 return IsAbsoluteUint<12>(offset); 196 case kLoadSWord: 197 case kLoadDWord: 198 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. 199 case kLoadWordPair: 200 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); 201 default: 202 LOG(FATAL) << "UNREACHABLE"; 203 UNREACHABLE(); 204 } 205 } 206 207 // TODO(VIXL): Implement this in VIXL. 208 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { 209 switch (type) { 210 case kStoreHalfword: 211 case kStoreByte: 212 case kStoreWord: 213 return IsAbsoluteUint<12>(offset); 214 case kStoreSWord: 215 case kStoreDWord: 216 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. 217 case kStoreWordPair: 218 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); 219 default: 220 LOG(FATAL) << "UNREACHABLE"; 221 UNREACHABLE(); 222 } 223 } 224 225 // Implementation note: this method must emit at most one instruction when 226 // Address::CanHoldStoreOffsetThumb. 227 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. 228 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type, 229 vixl32::Register reg, 230 vixl32::Register base, 231 int32_t offset) { 232 vixl32::Register tmp_reg; 233 UseScratchRegisterScope temps(&vixl_masm_); 234 235 if (!CanHoldStoreOffsetThumb(type, offset)) { 236 CHECK_NE(base.GetCode(), kIpCode); 237 if ((reg.GetCode() != kIpCode) && 238 (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) && 239 ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) { 240 tmp_reg = temps.Acquire(); 241 } else { 242 // Be careful not to use ip twice (for `reg` (or `reg` + 1 in 243 // the case of a word-pair store) and `base`) to build the 244 // Address object used by the store instruction(s) below. 245 // Instead, save R5 on the stack (or R6 if R5 is already used by 246 // `base`), use it as secondary temporary register, and restore 247 // it after the store instruction has been emitted. 248 tmp_reg = (base.GetCode() != 5) ? r5 : r6; 249 ___ Push(tmp_reg); 250 if (base.GetCode() == kSpCode) { 251 offset += kRegisterSize; 252 } 253 } 254 // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() 255 // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. 256 offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset); 257 base = tmp_reg; 258 } 259 DCHECK(CanHoldStoreOffsetThumb(type, offset)); 260 switch (type) { 261 case kStoreByte: 262 ___ Strb(reg, MemOperand(base, offset)); 263 break; 264 case kStoreHalfword: 265 ___ Strh(reg, MemOperand(base, offset)); 266 break; 267 case kStoreWord: 268 ___ Str(reg, MemOperand(base, offset)); 269 break; 270 case kStoreWordPair: 271 ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset)); 272 break; 273 default: 274 LOG(FATAL) << "UNREACHABLE"; 275 UNREACHABLE(); 276 } 277 if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) { 278 CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg; 279 ___ Pop(tmp_reg); 280 } 281 } 282 283 // Implementation note: this method must emit at most one instruction when 284 // Address::CanHoldLoadOffsetThumb. 285 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. 286 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type, 287 vixl32::Register dest, 288 vixl32::Register base, 289 int32_t offset) { 290 if (!CanHoldLoadOffsetThumb(type, offset)) { 291 CHECK(!base.Is(ip)); 292 // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. 293 int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); 294 DCHECK_NE(offset & ~allowed_offset_bits, 0); 295 int32_t add_to_base, offset_for_load; 296 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { 297 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. 298 AddConstant(dest, base, add_to_base); 299 base = dest; 300 offset = offset_for_load; 301 } else { 302 UseScratchRegisterScope temps(&vixl_masm_); 303 vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest; 304 LoadImmediate(temp, offset); 305 // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. 306 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. 307 ___ Add(dest, dest, (dest.Is(base)) ? temp : base); 308 base = dest; 309 offset = 0; 310 } 311 } 312 313 DCHECK(CanHoldLoadOffsetThumb(type, offset)); 314 switch (type) { 315 case kLoadSignedByte: 316 ___ Ldrsb(dest, MemOperand(base, offset)); 317 break; 318 case kLoadUnsignedByte: 319 ___ Ldrb(dest, MemOperand(base, offset)); 320 break; 321 case kLoadSignedHalfword: 322 ___ Ldrsh(dest, MemOperand(base, offset)); 323 break; 324 case kLoadUnsignedHalfword: 325 ___ Ldrh(dest, MemOperand(base, offset)); 326 break; 327 case kLoadWord: 328 CHECK(!dest.IsSP()); 329 ___ Ldr(dest, MemOperand(base, offset)); 330 break; 331 case kLoadWordPair: 332 ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset)); 333 break; 334 default: 335 LOG(FATAL) << "UNREACHABLE"; 336 UNREACHABLE(); 337 } 338 } 339 340 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source, 341 vixl32::Register base, 342 int32_t offset) { 343 ___ Vstr(source, MemOperand(base, offset)); 344 } 345 346 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source, 347 vixl32::Register base, 348 int32_t offset) { 349 ___ Vstr(source, MemOperand(base, offset)); 350 } 351 352 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg, 353 vixl32::Register base, 354 int32_t offset) { 355 ___ Vldr(reg, MemOperand(base, offset)); 356 } 357 358 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg, 359 vixl32::Register base, 360 int32_t offset) { 361 ___ Vldr(reg, MemOperand(base, offset)); 362 } 363 364 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and 365 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size). 366 static constexpr int kRegListThreshold = 4; 367 368 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) { 369 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); 370 if (number_of_regs != 0) { 371 if (number_of_regs > kRegListThreshold) { 372 UseScratchRegisterScope temps(GetVIXLAssembler()); 373 vixl32::Register base = sp; 374 if (stack_offset != 0) { 375 base = temps.Acquire(); 376 DCHECK_EQ(regs & (1u << base.GetCode()), 0u); 377 ___ Add(base, sp, Operand::From(stack_offset)); 378 } 379 ___ Stm(base, NO_WRITE_BACK, RegisterList(regs)); 380 } else { 381 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { 382 ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset)); 383 stack_offset += kRegSizeInBytes; 384 } 385 } 386 } 387 } 388 389 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) { 390 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); 391 if (number_of_regs != 0) { 392 if (number_of_regs > kRegListThreshold) { 393 UseScratchRegisterScope temps(GetVIXLAssembler()); 394 vixl32::Register base = sp; 395 if (stack_offset != 0) { 396 base = temps.Acquire(); 397 ___ Add(base, sp, Operand::From(stack_offset)); 398 } 399 ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs)); 400 } else { 401 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { 402 ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset)); 403 stack_offset += kRegSizeInBytes; 404 } 405 } 406 } 407 } 408 409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) { 410 AddConstant(rd, rd, value); 411 } 412 413 // TODO(VIXL): think about using adds which updates flags where possible. 414 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, 415 vixl32::Register rn, 416 int32_t value) { 417 DCHECK(vixl_masm_.OutsideITBlock()); 418 // TODO(VIXL): implement this optimization in VIXL. 419 if (value == 0) { 420 if (!rd.Is(rn)) { 421 ___ Mov(rd, rn); 422 } 423 return; 424 } 425 ___ Add(rd, rn, value); 426 } 427 428 // Inside IT block we must use assembler, macroassembler instructions are not permitted. 429 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd, 430 vixl32::Register rn, 431 int32_t value, 432 vixl32::Condition cond) { 433 DCHECK(vixl_masm_.InITBlock()); 434 if (value == 0) { 435 ___ mov(cond, rd, rn); 436 } else { 437 ___ add(cond, rd, rn, value); 438 } 439 } 440 441 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn, 442 vixl32::Label* label, 443 bool is_far_target) { 444 if (!is_far_target && rn.IsLow() && !label->IsBound()) { 445 // In T32, Cbz/Cbnz instructions have following limitations: 446 // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target). 447 // - Only low registers (i.e R0 .. R7) can be encoded. 448 // - Only forward branches (unbound labels) are supported. 449 Cbz(rn, label); 450 return; 451 } 452 Cmp(rn, 0); 453 B(eq, label, is_far_target); 454 } 455 456 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, 457 vixl32::Label* label, 458 bool is_far_target) { 459 if (!is_far_target && rn.IsLow() && !label->IsBound()) { 460 Cbnz(rn, label); 461 return; 462 } 463 Cmp(rn, 0); 464 B(ne, label, is_far_target); 465 } 466 467 void ArmVIXLMacroAssembler::B(vixl32::Label* label) { 468 if (!label->IsBound()) { 469 // Try to use 16-bit T2 encoding of B instruction. 470 DCHECK(OutsideITBlock()); 471 ExactAssemblyScope guard(this, 472 k16BitT32InstructionSizeInBytes, 473 CodeBufferCheckScope::kMaximumSize); 474 b(al, Narrow, label); 475 AddBranchLabel(label); 476 return; 477 } 478 MacroAssembler::B(label); 479 } 480 481 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) { 482 if (!label->IsBound() && !is_far_target) { 483 // Try to use 16-bit T2 encoding of B instruction. 484 DCHECK(OutsideITBlock()); 485 ExactAssemblyScope guard(this, 486 k16BitT32InstructionSizeInBytes, 487 CodeBufferCheckScope::kMaximumSize); 488 b(cond, Narrow, label); 489 AddBranchLabel(label); 490 return; 491 } 492 // To further reduce the Bcc encoding size and use 16-bit T1 encoding, 493 // we can provide a hint to this function: i.e. far_target=false. 494 // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding. 495 MacroAssembler::B(cond, label); 496 } 497 498 } // namespace arm 499 } // namespace art 500