Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <iostream>
     18 #include <type_traits>
     19 
     20 #include "assembler_arm_vixl.h"
     21 #include "entrypoints/quick/quick_entrypoints.h"
     22 #include "thread.h"
     23 
     24 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
     25 
     26 using vixl::ExactAssemblyScope;
     27 using vixl::CodeBufferCheckScope;
     28 
     29 namespace art {
     30 namespace arm {
     31 
     32 #ifdef ___
     33 #error "ARM Assembler macro already defined."
     34 #else
     35 #define ___   vixl_masm_.
     36 #endif
     37 
     38 extern const vixl32::Register tr(TR);
     39 
     40 void ArmVIXLAssembler::FinalizeCode() {
     41   vixl_masm_.FinalizeCode();
     42 }
     43 
     44 size_t ArmVIXLAssembler::CodeSize() const {
     45   return vixl_masm_.GetSizeOfCodeGenerated();
     46 }
     47 
     48 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
     49   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
     50 }
     51 
     52 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
     53   // Copy the instructions from the buffer.
     54   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
     55   region.CopyFrom(0, from);
     56 }
     57 
     58 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
     59   // reg = -reg.
     60   ___ Rsb(reg, reg, 0);
     61 }
     62 
     63 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
     64   // reg = -reg.
     65   ___ Rsb(reg, reg, 0);
     66 }
     67 
     68 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
     69   if (kPoisonHeapReferences) {
     70     PoisonHeapReference(reg);
     71   }
     72 }
     73 
     74 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
     75   if (kPoisonHeapReferences) {
     76     UnpoisonHeapReference(reg);
     77   }
     78 }
     79 
     80 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
     81   // TODO(VIXL): Implement this optimization in VIXL.
     82   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
     83     ___ Mvn(rd, ~value);
     84   } else {
     85     ___ Mov(rd, value);
     86   }
     87 }
     88 
     89 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
     90   return vixl_masm_.IsModifiedImmediate(immediate);
     91 }
     92 
     93 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
     94   switch (opcode) {
     95     case ADD:
     96     case SUB:
     97       // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
     98       if (IsUint<12>(immediate) && set_cc != kCcSet) {
     99         return true;
    100       }
    101       return ShifterOperandCanAlwaysHold(immediate);
    102 
    103     case MOV:
    104       // TODO: Support less than or equal to 12bits.
    105       return ShifterOperandCanAlwaysHold(immediate);
    106 
    107     case MVN:
    108     default:
    109       return ShifterOperandCanAlwaysHold(immediate);
    110   }
    111 }
    112 
    113 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
    114                                                int32_t offset,
    115                                                /*out*/ int32_t* add_to_base,
    116                                                /*out*/ int32_t* offset_for_load_store) {
    117   int32_t other_bits = offset & ~allowed_offset_bits;
    118   if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
    119     *add_to_base = offset & ~allowed_offset_bits;
    120     *offset_for_load_store = offset & allowed_offset_bits;
    121     return true;
    122   }
    123   return false;
    124 }
    125 
    126 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
    127                                                 vixl32::Register temp,
    128                                                 vixl32::Register base,
    129                                                 int32_t offset) {
    130   DCHECK_NE(offset & ~allowed_offset_bits, 0);
    131   int32_t add_to_base, offset_for_load;
    132   if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
    133     ___ Add(temp, base, add_to_base);
    134     return offset_for_load;
    135   } else {
    136     ___ Mov(temp, offset);
    137     ___ Add(temp, temp, base);
    138     return 0;
    139   }
    140 }
    141 
    142 // TODO(VIXL): Implement this in VIXL.
    143 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
    144   switch (type) {
    145     case kLoadSignedByte:
    146     case kLoadSignedHalfword:
    147     case kLoadUnsignedHalfword:
    148     case kLoadUnsignedByte:
    149     case kLoadWord:
    150       // We can encode imm12 offset.
    151       return 0xfff;
    152     case kLoadSWord:
    153     case kLoadDWord:
    154     case kLoadWordPair:
    155       // We can encode imm8:'00' offset.
    156       return 0xff << 2;
    157     default:
    158       LOG(FATAL) << "UNREACHABLE";
    159       UNREACHABLE();
    160   }
    161 }
    162 
    163 // TODO(VIXL): Implement this in VIXL.
    164 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
    165   switch (type) {
    166     case kStoreHalfword:
    167     case kStoreByte:
    168     case kStoreWord:
    169       // We can encode imm12 offset.
    170       return 0xfff;
    171     case kStoreSWord:
    172     case kStoreDWord:
    173     case kStoreWordPair:
    174       // We can encode imm8:'00' offset.
    175       return 0xff << 2;
    176     default:
    177       LOG(FATAL) << "UNREACHABLE";
    178       UNREACHABLE();
    179   }
    180 }
    181 
    182 // TODO(VIXL): Implement this in VIXL.
    183 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
    184   switch (type) {
    185     case kLoadSignedByte:
    186     case kLoadSignedHalfword:
    187     case kLoadUnsignedHalfword:
    188     case kLoadUnsignedByte:
    189     case kLoadWord:
    190       return IsAbsoluteUint<12>(offset);
    191     case kLoadSWord:
    192     case kLoadDWord:
    193       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
    194     case kLoadWordPair:
    195       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
    196     default:
    197       LOG(FATAL) << "UNREACHABLE";
    198       UNREACHABLE();
    199   }
    200 }
    201 
    202 // TODO(VIXL): Implement this in VIXL.
    203 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
    204   switch (type) {
    205     case kStoreHalfword:
    206     case kStoreByte:
    207     case kStoreWord:
    208       return IsAbsoluteUint<12>(offset);
    209     case kStoreSWord:
    210     case kStoreDWord:
    211       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
    212     case kStoreWordPair:
    213       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
    214     default:
    215       LOG(FATAL) << "UNREACHABLE";
    216       UNREACHABLE();
    217   }
    218 }
    219 
    220 // Implementation note: this method must emit at most one instruction when
    221 // Address::CanHoldStoreOffsetThumb.
    222 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
    223 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
    224                                      vixl32::Register reg,
    225                                      vixl32::Register base,
    226                                      int32_t offset) {
    227   vixl32::Register tmp_reg;
    228   UseScratchRegisterScope temps(&vixl_masm_);
    229 
    230   if (!CanHoldStoreOffsetThumb(type, offset)) {
    231     CHECK_NE(base.GetCode(), kIpCode);
    232     if ((reg.GetCode() != kIpCode) &&
    233         (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
    234         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
    235       tmp_reg = temps.Acquire();
    236     } else {
    237       // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
    238       // the case of a word-pair store) and `base`) to build the
    239       // Address object used by the store instruction(s) below.
    240       // Instead, save R5 on the stack (or R6 if R5 is already used by
    241       // `base`), use it as secondary temporary register, and restore
    242       // it after the store instruction has been emitted.
    243       tmp_reg = (base.GetCode() != 5) ? r5 : r6;
    244       ___ Push(tmp_reg);
    245       if (base.GetCode() == kSpCode) {
    246         offset += kRegisterSize;
    247       }
    248     }
    249     // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
    250     // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
    251     offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
    252     base = tmp_reg;
    253   }
    254   DCHECK(CanHoldStoreOffsetThumb(type, offset));
    255   switch (type) {
    256     case kStoreByte:
    257       ___ Strb(reg, MemOperand(base, offset));
    258       break;
    259     case kStoreHalfword:
    260       ___ Strh(reg, MemOperand(base, offset));
    261       break;
    262     case kStoreWord:
    263       ___ Str(reg, MemOperand(base, offset));
    264       break;
    265     case kStoreWordPair:
    266       ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
    267       break;
    268     default:
    269       LOG(FATAL) << "UNREACHABLE";
    270       UNREACHABLE();
    271   }
    272   if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
    273     CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
    274     ___ Pop(tmp_reg);
    275   }
    276 }
    277 
    278 // Implementation note: this method must emit at most one instruction when
    279 // Address::CanHoldLoadOffsetThumb.
    280 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
    281 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
    282                                       vixl32::Register dest,
    283                                       vixl32::Register base,
    284                                       int32_t offset) {
    285   if (!CanHoldLoadOffsetThumb(type, offset)) {
    286     CHECK(!base.Is(ip));
    287     // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
    288     int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
    289     DCHECK_NE(offset & ~allowed_offset_bits, 0);
    290     int32_t add_to_base, offset_for_load;
    291     if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
    292       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
    293       AddConstant(dest, base, add_to_base);
    294       base = dest;
    295       offset = offset_for_load;
    296     } else {
    297       UseScratchRegisterScope temps(&vixl_masm_);
    298       vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
    299       LoadImmediate(temp, offset);
    300       // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
    301       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
    302       ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
    303       base = dest;
    304       offset = 0;
    305     }
    306   }
    307 
    308   DCHECK(CanHoldLoadOffsetThumb(type, offset));
    309   switch (type) {
    310     case kLoadSignedByte:
    311       ___ Ldrsb(dest, MemOperand(base, offset));
    312       break;
    313     case kLoadUnsignedByte:
    314       ___ Ldrb(dest, MemOperand(base, offset));
    315       break;
    316     case kLoadSignedHalfword:
    317       ___ Ldrsh(dest, MemOperand(base, offset));
    318       break;
    319     case kLoadUnsignedHalfword:
    320       ___ Ldrh(dest, MemOperand(base, offset));
    321       break;
    322     case kLoadWord:
    323       CHECK(!dest.IsSP());
    324       ___ Ldr(dest, MemOperand(base, offset));
    325       break;
    326     case kLoadWordPair:
    327       ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
    328       break;
    329     default:
    330       LOG(FATAL) << "UNREACHABLE";
    331       UNREACHABLE();
    332   }
    333 }
    334 
    335 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
    336                                       vixl32::Register base,
    337                                       int32_t offset) {
    338   ___ Vstr(source, MemOperand(base, offset));
    339 }
    340 
    341 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
    342                                       vixl32::Register base,
    343                                       int32_t offset) {
    344   ___ Vstr(source, MemOperand(base, offset));
    345 }
    346 
    347 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
    348                                        vixl32::Register base,
    349                                        int32_t offset) {
    350   ___ Vldr(reg, MemOperand(base, offset));
    351 }
    352 
    353 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
    354                                        vixl32::Register base,
    355                                        int32_t offset) {
    356   ___ Vldr(reg, MemOperand(base, offset));
    357 }
    358 
    359 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
    360 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
    361 static constexpr int kRegListThreshold = 4;
    362 
    363 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
    364   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
    365   if (number_of_regs != 0) {
    366     if (number_of_regs > kRegListThreshold) {
    367       UseScratchRegisterScope temps(GetVIXLAssembler());
    368       vixl32::Register base = sp;
    369       if (stack_offset != 0) {
    370         base = temps.Acquire();
    371         DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
    372         ___ Add(base, sp, Operand::From(stack_offset));
    373       }
    374       ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
    375     } else {
    376       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
    377         ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
    378         stack_offset += kRegSizeInBytes;
    379       }
    380     }
    381   }
    382 }
    383 
    384 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
    385   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
    386   if (number_of_regs != 0) {
    387     if (number_of_regs > kRegListThreshold) {
    388       UseScratchRegisterScope temps(GetVIXLAssembler());
    389       vixl32::Register base = sp;
    390       if (stack_offset != 0) {
    391         base = temps.Acquire();
    392         ___ Add(base, sp, Operand::From(stack_offset));
    393       }
    394       ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
    395     } else {
    396       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
    397         ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
    398         stack_offset += kRegSizeInBytes;
    399       }
    400     }
    401   }
    402 }
    403 
    404 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
    405   AddConstant(rd, rd, value);
    406 }
    407 
    408 // TODO(VIXL): think about using adds which updates flags where possible.
    409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
    410                                    vixl32::Register rn,
    411                                    int32_t value) {
    412   DCHECK(vixl_masm_.OutsideITBlock());
    413   // TODO(VIXL): implement this optimization in VIXL.
    414   if (value == 0) {
    415     if (!rd.Is(rn)) {
    416       ___ Mov(rd, rn);
    417     }
    418     return;
    419   }
    420   ___ Add(rd, rn, value);
    421 }
    422 
    423 // Inside IT block we must use assembler, macroassembler instructions are not permitted.
    424 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
    425                                        vixl32::Register rn,
    426                                        int32_t value,
    427                                        vixl32::Condition cond) {
    428   DCHECK(vixl_masm_.InITBlock());
    429   if (value == 0) {
    430     ___ mov(cond, rd, rn);
    431   } else {
    432     ___ add(cond, rd, rn, value);
    433   }
    434 }
    435 
    436 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
    437                                                    vixl32::Label* label,
    438                                                    bool is_far_target) {
    439   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
    440     // In T32, Cbz/Cbnz instructions have following limitations:
    441     // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
    442     // - Only low registers (i.e R0 .. R7) can be encoded.
    443     // - Only forward branches (unbound labels) are supported.
    444     Cbz(rn, label);
    445     return;
    446   }
    447   Cmp(rn, 0);
    448   B(eq, label, is_far_target);
    449 }
    450 
    451 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
    452                                                       vixl32::Label* label,
    453                                                       bool is_far_target) {
    454   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
    455     Cbnz(rn, label);
    456     return;
    457   }
    458   Cmp(rn, 0);
    459   B(ne, label, is_far_target);
    460 }
    461 
    462 void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
    463   if (!label->IsBound()) {
    464     // Try to use 16-bit T2 encoding of B instruction.
    465     DCHECK(OutsideITBlock());
    466     ExactAssemblyScope guard(this,
    467                              k16BitT32InstructionSizeInBytes,
    468                              CodeBufferCheckScope::kMaximumSize);
    469     b(al, Narrow, label);
    470     AddBranchLabel(label);
    471     return;
    472   }
    473   MacroAssembler::B(label);
    474 }
    475 
    476 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
    477   if (!label->IsBound() && !is_far_target) {
    478     // Try to use 16-bit T2 encoding of B instruction.
    479     DCHECK(OutsideITBlock());
    480     ExactAssemblyScope guard(this,
    481                              k16BitT32InstructionSizeInBytes,
    482                              CodeBufferCheckScope::kMaximumSize);
    483     b(cond, Narrow, label);
    484     AddBranchLabel(label);
    485     return;
    486   }
    487   // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
    488   // we can provide a hint to this function: i.e. far_target=false.
    489   // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
    490   MacroAssembler::B(cond, label);
    491 }
    492 
    493 }  // namespace arm
    494 }  // namespace art
    495