Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <iostream>
     18 #include <type_traits>
     19 
     20 #include "assembler_arm_vixl.h"
     21 #include "base/bit_utils.h"
     22 #include "base/bit_utils_iterator.h"
     23 #include "entrypoints/quick/quick_entrypoints.h"
     24 #include "thread.h"
     25 
     26 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
     27 
     28 using vixl::ExactAssemblyScope;
     29 using vixl::CodeBufferCheckScope;
     30 
     31 namespace art {
     32 namespace arm {
     33 
     34 #ifdef ___
     35 #error "ARM Assembler macro already defined."
     36 #else
     37 #define ___   vixl_masm_.
     38 #endif
     39 
     40 // Thread register definition.
     41 extern const vixl32::Register tr(TR);
     42 // Marking register definition.
     43 extern const vixl32::Register mr(MR);
     44 
     45 void ArmVIXLAssembler::FinalizeCode() {
     46   vixl_masm_.FinalizeCode();
     47 }
     48 
     49 size_t ArmVIXLAssembler::CodeSize() const {
     50   return vixl_masm_.GetSizeOfCodeGenerated();
     51 }
     52 
     53 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
     54   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
     55 }
     56 
     57 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
     58   // Copy the instructions from the buffer.
     59   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
     60   region.CopyFrom(0, from);
     61 }
     62 
     63 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
     64   // reg = -reg.
     65   ___ Rsb(reg, reg, 0);
     66 }
     67 
     68 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
     69   // reg = -reg.
     70   ___ Rsb(reg, reg, 0);
     71 }
     72 
     73 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
     74   if (kPoisonHeapReferences) {
     75     PoisonHeapReference(reg);
     76   }
     77 }
     78 
     79 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
     80   if (kPoisonHeapReferences) {
     81     UnpoisonHeapReference(reg);
     82   }
     83 }
     84 
     85 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
     86   // TODO(VIXL): Implement this optimization in VIXL.
     87   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
     88     ___ Mvn(rd, ~value);
     89   } else {
     90     ___ Mov(rd, value);
     91   }
     92 }
     93 
     94 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
     95   return vixl_masm_.IsModifiedImmediate(immediate);
     96 }
     97 
     98 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
     99   switch (opcode) {
    100     case ADD:
    101     case SUB:
    102       // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
    103       if (IsUint<12>(immediate) && set_cc != kCcSet) {
    104         return true;
    105       }
    106       return ShifterOperandCanAlwaysHold(immediate);
    107 
    108     case MOV:
    109       // TODO: Support less than or equal to 12bits.
    110       return ShifterOperandCanAlwaysHold(immediate);
    111 
    112     case MVN:
    113     default:
    114       return ShifterOperandCanAlwaysHold(immediate);
    115   }
    116 }
    117 
    118 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
    119                                                int32_t offset,
    120                                                /*out*/ int32_t* add_to_base,
    121                                                /*out*/ int32_t* offset_for_load_store) {
    122   int32_t other_bits = offset & ~allowed_offset_bits;
    123   if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
    124     *add_to_base = offset & ~allowed_offset_bits;
    125     *offset_for_load_store = offset & allowed_offset_bits;
    126     return true;
    127   }
    128   return false;
    129 }
    130 
    131 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
    132                                                 vixl32::Register temp,
    133                                                 vixl32::Register base,
    134                                                 int32_t offset) {
    135   DCHECK_NE(offset & ~allowed_offset_bits, 0);
    136   int32_t add_to_base, offset_for_load;
    137   if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
    138     ___ Add(temp, base, add_to_base);
    139     return offset_for_load;
    140   } else {
    141     ___ Mov(temp, offset);
    142     ___ Add(temp, temp, base);
    143     return 0;
    144   }
    145 }
    146 
    147 // TODO(VIXL): Implement this in VIXL.
    148 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
    149   switch (type) {
    150     case kLoadSignedByte:
    151     case kLoadSignedHalfword:
    152     case kLoadUnsignedHalfword:
    153     case kLoadUnsignedByte:
    154     case kLoadWord:
    155       // We can encode imm12 offset.
    156       return 0xfff;
    157     case kLoadSWord:
    158     case kLoadDWord:
    159     case kLoadWordPair:
    160       // We can encode imm8:'00' offset.
    161       return 0xff << 2;
    162     default:
    163       LOG(FATAL) << "UNREACHABLE";
    164       UNREACHABLE();
    165   }
    166 }
    167 
    168 // TODO(VIXL): Implement this in VIXL.
    169 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
    170   switch (type) {
    171     case kStoreHalfword:
    172     case kStoreByte:
    173     case kStoreWord:
    174       // We can encode imm12 offset.
    175       return 0xfff;
    176     case kStoreSWord:
    177     case kStoreDWord:
    178     case kStoreWordPair:
    179       // We can encode imm8:'00' offset.
    180       return 0xff << 2;
    181     default:
    182       LOG(FATAL) << "UNREACHABLE";
    183       UNREACHABLE();
    184   }
    185 }
    186 
    187 // TODO(VIXL): Implement this in VIXL.
    188 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
    189   switch (type) {
    190     case kLoadSignedByte:
    191     case kLoadSignedHalfword:
    192     case kLoadUnsignedHalfword:
    193     case kLoadUnsignedByte:
    194     case kLoadWord:
    195       return IsAbsoluteUint<12>(offset);
    196     case kLoadSWord:
    197     case kLoadDWord:
    198       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
    199     case kLoadWordPair:
    200       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
    201     default:
    202       LOG(FATAL) << "UNREACHABLE";
    203       UNREACHABLE();
    204   }
    205 }
    206 
    207 // TODO(VIXL): Implement this in VIXL.
    208 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
    209   switch (type) {
    210     case kStoreHalfword:
    211     case kStoreByte:
    212     case kStoreWord:
    213       return IsAbsoluteUint<12>(offset);
    214     case kStoreSWord:
    215     case kStoreDWord:
    216       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
    217     case kStoreWordPair:
    218       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
    219     default:
    220       LOG(FATAL) << "UNREACHABLE";
    221       UNREACHABLE();
    222   }
    223 }
    224 
    225 // Implementation note: this method must emit at most one instruction when
    226 // Address::CanHoldStoreOffsetThumb.
    227 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
    228 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
    229                                      vixl32::Register reg,
    230                                      vixl32::Register base,
    231                                      int32_t offset) {
    232   vixl32::Register tmp_reg;
    233   UseScratchRegisterScope temps(&vixl_masm_);
    234 
    235   if (!CanHoldStoreOffsetThumb(type, offset)) {
    236     CHECK_NE(base.GetCode(), kIpCode);
    237     if ((reg.GetCode() != kIpCode) &&
    238         (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
    239         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
    240       tmp_reg = temps.Acquire();
    241     } else {
    242       // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
    243       // the case of a word-pair store) and `base`) to build the
    244       // Address object used by the store instruction(s) below.
    245       // Instead, save R5 on the stack (or R6 if R5 is already used by
    246       // `base`), use it as secondary temporary register, and restore
    247       // it after the store instruction has been emitted.
    248       tmp_reg = (base.GetCode() != 5) ? r5 : r6;
    249       ___ Push(tmp_reg);
    250       if (base.GetCode() == kSpCode) {
    251         offset += kRegisterSize;
    252       }
    253     }
    254     // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
    255     // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
    256     offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
    257     base = tmp_reg;
    258   }
    259   DCHECK(CanHoldStoreOffsetThumb(type, offset));
    260   switch (type) {
    261     case kStoreByte:
    262       ___ Strb(reg, MemOperand(base, offset));
    263       break;
    264     case kStoreHalfword:
    265       ___ Strh(reg, MemOperand(base, offset));
    266       break;
    267     case kStoreWord:
    268       ___ Str(reg, MemOperand(base, offset));
    269       break;
    270     case kStoreWordPair:
    271       ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
    272       break;
    273     default:
    274       LOG(FATAL) << "UNREACHABLE";
    275       UNREACHABLE();
    276   }
    277   if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
    278     CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
    279     ___ Pop(tmp_reg);
    280   }
    281 }
    282 
    283 // Implementation note: this method must emit at most one instruction when
    284 // Address::CanHoldLoadOffsetThumb.
    285 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
    286 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
    287                                       vixl32::Register dest,
    288                                       vixl32::Register base,
    289                                       int32_t offset) {
    290   if (!CanHoldLoadOffsetThumb(type, offset)) {
    291     CHECK(!base.Is(ip));
    292     // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
    293     int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
    294     DCHECK_NE(offset & ~allowed_offset_bits, 0);
    295     int32_t add_to_base, offset_for_load;
    296     if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
    297       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
    298       AddConstant(dest, base, add_to_base);
    299       base = dest;
    300       offset = offset_for_load;
    301     } else {
    302       UseScratchRegisterScope temps(&vixl_masm_);
    303       vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
    304       LoadImmediate(temp, offset);
    305       // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
    306       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
    307       ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
    308       base = dest;
    309       offset = 0;
    310     }
    311   }
    312 
    313   DCHECK(CanHoldLoadOffsetThumb(type, offset));
    314   switch (type) {
    315     case kLoadSignedByte:
    316       ___ Ldrsb(dest, MemOperand(base, offset));
    317       break;
    318     case kLoadUnsignedByte:
    319       ___ Ldrb(dest, MemOperand(base, offset));
    320       break;
    321     case kLoadSignedHalfword:
    322       ___ Ldrsh(dest, MemOperand(base, offset));
    323       break;
    324     case kLoadUnsignedHalfword:
    325       ___ Ldrh(dest, MemOperand(base, offset));
    326       break;
    327     case kLoadWord:
    328       CHECK(!dest.IsSP());
    329       ___ Ldr(dest, MemOperand(base, offset));
    330       break;
    331     case kLoadWordPair:
    332       ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
    333       break;
    334     default:
    335       LOG(FATAL) << "UNREACHABLE";
    336       UNREACHABLE();
    337   }
    338 }
    339 
    340 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
    341                                       vixl32::Register base,
    342                                       int32_t offset) {
    343   ___ Vstr(source, MemOperand(base, offset));
    344 }
    345 
    346 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
    347                                       vixl32::Register base,
    348                                       int32_t offset) {
    349   ___ Vstr(source, MemOperand(base, offset));
    350 }
    351 
    352 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
    353                                        vixl32::Register base,
    354                                        int32_t offset) {
    355   ___ Vldr(reg, MemOperand(base, offset));
    356 }
    357 
    358 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
    359                                        vixl32::Register base,
    360                                        int32_t offset) {
    361   ___ Vldr(reg, MemOperand(base, offset));
    362 }
    363 
    364 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
    365 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
    366 static constexpr int kRegListThreshold = 4;
    367 
    368 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
    369   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
    370   if (number_of_regs != 0) {
    371     if (number_of_regs > kRegListThreshold) {
    372       UseScratchRegisterScope temps(GetVIXLAssembler());
    373       vixl32::Register base = sp;
    374       if (stack_offset != 0) {
    375         base = temps.Acquire();
    376         DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
    377         ___ Add(base, sp, Operand::From(stack_offset));
    378       }
    379       ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
    380     } else {
    381       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
    382         ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
    383         stack_offset += kRegSizeInBytes;
    384       }
    385     }
    386   }
    387 }
    388 
    389 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
    390   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
    391   if (number_of_regs != 0) {
    392     if (number_of_regs > kRegListThreshold) {
    393       UseScratchRegisterScope temps(GetVIXLAssembler());
    394       vixl32::Register base = sp;
    395       if (stack_offset != 0) {
    396         base = temps.Acquire();
    397         ___ Add(base, sp, Operand::From(stack_offset));
    398       }
    399       ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
    400     } else {
    401       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
    402         ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
    403         stack_offset += kRegSizeInBytes;
    404       }
    405     }
    406   }
    407 }
    408 
    409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
    410   AddConstant(rd, rd, value);
    411 }
    412 
    413 // TODO(VIXL): think about using adds which updates flags where possible.
    414 void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
    415                                    vixl32::Register rn,
    416                                    int32_t value) {
    417   DCHECK(vixl_masm_.OutsideITBlock());
    418   // TODO(VIXL): implement this optimization in VIXL.
    419   if (value == 0) {
    420     if (!rd.Is(rn)) {
    421       ___ Mov(rd, rn);
    422     }
    423     return;
    424   }
    425   ___ Add(rd, rn, value);
    426 }
    427 
    428 // Inside IT block we must use assembler, macroassembler instructions are not permitted.
    429 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
    430                                        vixl32::Register rn,
    431                                        int32_t value,
    432                                        vixl32::Condition cond) {
    433   DCHECK(vixl_masm_.InITBlock());
    434   if (value == 0) {
    435     ___ mov(cond, rd, rn);
    436   } else {
    437     ___ add(cond, rd, rn, value);
    438   }
    439 }
    440 
    441 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
    442                                                    vixl32::Label* label,
    443                                                    bool is_far_target) {
    444   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
    445     // In T32, Cbz/Cbnz instructions have following limitations:
    446     // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
    447     // - Only low registers (i.e R0 .. R7) can be encoded.
    448     // - Only forward branches (unbound labels) are supported.
    449     Cbz(rn, label);
    450     return;
    451   }
    452   Cmp(rn, 0);
    453   B(eq, label, is_far_target);
    454 }
    455 
    456 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
    457                                                       vixl32::Label* label,
    458                                                       bool is_far_target) {
    459   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
    460     Cbnz(rn, label);
    461     return;
    462   }
    463   Cmp(rn, 0);
    464   B(ne, label, is_far_target);
    465 }
    466 
    467 void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
    468   if (!label->IsBound()) {
    469     // Try to use 16-bit T2 encoding of B instruction.
    470     DCHECK(OutsideITBlock());
    471     ExactAssemblyScope guard(this,
    472                              k16BitT32InstructionSizeInBytes,
    473                              CodeBufferCheckScope::kMaximumSize);
    474     b(al, Narrow, label);
    475     AddBranchLabel(label);
    476     return;
    477   }
    478   MacroAssembler::B(label);
    479 }
    480 
    481 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
    482   if (!label->IsBound() && !is_far_target) {
    483     // Try to use 16-bit T2 encoding of B instruction.
    484     DCHECK(OutsideITBlock());
    485     ExactAssemblyScope guard(this,
    486                              k16BitT32InstructionSizeInBytes,
    487                              CodeBufferCheckScope::kMaximumSize);
    488     b(cond, Narrow, label);
    489     AddBranchLabel(label);
    490     return;
    491   }
    492   // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
    493   // we can provide a hint to this function: i.e. far_target=false.
    494   // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
    495   MacroAssembler::B(cond, label);
    496 }
    497 
    498 }  // namespace arm
    499 }  // namespace art
    500