Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
     18 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
     19 
     20 #include <deque>
     21 #include <utility>
     22 #include <vector>
     23 
     24 #include "base/arena_containers.h"
     25 #include "base/array_ref.h"
     26 #include "base/logging.h"
     27 #include "constants_arm.h"
     28 #include "utils/arm/managed_register_arm.h"
     29 #include "utils/arm/assembler_arm.h"
     30 #include "offsets.h"
     31 
     32 namespace art {
     33 namespace arm {
     34 
     35 class Thumb2Assembler FINAL : public ArmAssembler {
     36  public:
     37   explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
     38       : ArmAssembler(arena),
     39         can_relocate_branches_(can_relocate_branches),
     40         force_32bit_(false),
     41         it_cond_index_(kNoItCondition),
     42         next_condition_(AL),
     43         fixups_(arena->Adapter(kArenaAllocAssembler)),
     44         fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
     45         literals_(arena->Adapter(kArenaAllocAssembler)),
     46         literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
     47         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
     48         last_position_adjustment_(0u),
     49         last_old_position_(0u),
     50         last_fixup_id_(0u) {
     51     cfi().DelayEmittingAdvancePCs();
     52   }
     53 
     54   virtual ~Thumb2Assembler() {
     55   }
     56 
     57   bool IsThumb() const OVERRIDE {
     58     return true;
     59   }
     60 
     61   bool IsForced32Bit() const {
     62     return force_32bit_;
     63   }
     64 
     65   bool CanRelocateBranches() const {
     66     return can_relocate_branches_;
     67   }
     68 
     69   void FinalizeCode() OVERRIDE;
     70 
     71   // Data-processing instructions.
     72   virtual void and_(Register rd, Register rn, const ShifterOperand& so,
     73                     Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     74 
     75   virtual void eor(Register rd, Register rn, const ShifterOperand& so,
     76                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     77 
     78   virtual void sub(Register rd, Register rn, const ShifterOperand& so,
     79                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     80 
     81   virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
     82                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     83 
     84   virtual void add(Register rd, Register rn, const ShifterOperand& so,
     85                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     86 
     87   virtual void adc(Register rd, Register rn, const ShifterOperand& so,
     88                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     89 
     90   virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
     91                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     92 
     93   virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
     94                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
     95 
     96   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
     97 
     98   void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
     99 
    100   void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
    101 
    102   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
    103 
    104   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
    105                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    106 
    107   virtual void orn(Register rd, Register rn, const ShifterOperand& so,
    108                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    109 
    110   virtual void mov(Register rd, const ShifterOperand& so,
    111                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    112 
    113   virtual void bic(Register rd, Register rn, const ShifterOperand& so,
    114                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    115 
    116   virtual void mvn(Register rd, const ShifterOperand& so,
    117                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    118 
    119   // Miscellaneous data-processing instructions.
    120   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    121   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
    122   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
    123   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    124   void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    125   void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    126   void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    127 
    128   // Multiply instructions.
    129   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
    130   void mla(Register rd, Register rn, Register rm, Register ra,
    131            Condition cond = AL) OVERRIDE;
    132   void mls(Register rd, Register rn, Register rm, Register ra,
    133            Condition cond = AL) OVERRIDE;
    134   void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
    135              Condition cond = AL) OVERRIDE;
    136   void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
    137              Condition cond = AL) OVERRIDE;
    138 
    139   void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
    140   void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
    141 
    142   // Bit field extract instructions.
    143   void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
    144   void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
    145 
    146   // Load/store instructions.
    147   void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    148   void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    149 
    150   void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    151   void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    152 
    153   void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    154   void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    155 
    156   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    157   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    158 
    159   // Load/store register dual instructions using registers `rd` and `rd` + 1.
    160   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    161   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
    162 
    163   // Load/store register dual instructions using registers `rd` and `rd2`.
    164   // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
    165   // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
    166   void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
    167   void strd(Register rd, Register rd2, const Address& ad, Condition cond);
    168 
    169 
    170   void ldm(BlockAddressMode am, Register base,
    171            RegList regs, Condition cond = AL) OVERRIDE;
    172   void stm(BlockAddressMode am, Register base,
    173            RegList regs, Condition cond = AL) OVERRIDE;
    174 
    175   void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
    176   void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
    177 
    178   void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
    179   void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);
    180 
    181   void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
    182   void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
    183 
    184   // Miscellaneous instructions.
    185   void clrex(Condition cond = AL) OVERRIDE;
    186   void nop(Condition cond = AL) OVERRIDE;
    187 
    188   void bkpt(uint16_t imm16) OVERRIDE;
    189   void svc(uint32_t imm24) OVERRIDE;
    190 
    191   // If-then
    192   void it(Condition firstcond, ItState i1 = kItOmitted,
    193         ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;
    194 
    195   void cbz(Register rn, Label* target) OVERRIDE;
    196   void cbnz(Register rn, Label* target) OVERRIDE;
    197 
    198   // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
    199   void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
    200   void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
    201   void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
    202   void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
    203   void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
    204   void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
    205   void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    206   void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
    207 
    208   // Returns false if the immediate cannot be encoded.
    209   bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
    210   bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
    211 
    212   void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
    213   void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
    214   void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
    215   void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
    216 
    217   void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    218   void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    219   void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    220   void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    221   void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    222   void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    223   void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    224   void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    225   void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    226   void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    227   void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
    228   void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
    229 
    230   void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    231   void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
    232   void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    233   void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
    234   void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    235   void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
    236 
    237   void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
    238   void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
    239   void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    240   void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
    241   void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    242   void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
    243   void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    244   void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
    245   void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    246   void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
    247 
    248   void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
    249   void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
    250   void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
    251   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
    252   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
    253 
    254   void vcntd(DRegister dd, DRegister dm) OVERRIDE;
    255   void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
    256 
    257   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    258   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    259   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    260   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    261   void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    262   void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
    263 
    264   // Branch instructions.
    265   void b(Label* label, Condition cond = AL);
    266   void bl(Label* label, Condition cond = AL);
    267   void blx(Label* label);
    268   void blx(Register rm, Condition cond = AL) OVERRIDE;
    269   void bx(Register rm, Condition cond = AL) OVERRIDE;
    270 
    271   virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
    272                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    273   virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
    274                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    275   virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
    276                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    277   virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
    278                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    279   virtual void Rrx(Register rd, Register rm,
    280                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    281 
    282   virtual void Lsl(Register rd, Register rm, Register rn,
    283                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    284   virtual void Lsr(Register rd, Register rm, Register rn,
    285                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    286   virtual void Asr(Register rd, Register rm, Register rn,
    287                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    288   virtual void Ror(Register rd, Register rm, Register rn,
    289                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    290 
    291   void Push(Register rd, Condition cond = AL) OVERRIDE;
    292   void Pop(Register rd, Condition cond = AL) OVERRIDE;
    293 
    294   void PushList(RegList regs, Condition cond = AL) OVERRIDE;
    295   void PopList(RegList regs, Condition cond = AL) OVERRIDE;
    296   void StoreList(RegList regs, size_t stack_offset) OVERRIDE;
    297   void LoadList(RegList regs, size_t stack_offset) OVERRIDE;
    298 
    299   void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
    300 
    301   void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
    302   void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
    303 
    304   // Memory barriers.
    305   void dmb(DmbOptions flavor) OVERRIDE;
    306 
    307   // Get the final position of a label after local fixup based on the old position
    308   // recorded before FinalizeCode().
    309   uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;
    310 
    311   using ArmAssembler::NewLiteral;  // Make the helper template visible.
    312 
    313   Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
    314   void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
    315   void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
    316   void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
    317   void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;
    318 
    319   // Add signed constant value to rd. May clobber IP.
    320   void AddConstant(Register rd, Register rn, int32_t value,
    321                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
    322 
    323   void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
    324 
    325   // Load and Store. May clobber IP.
    326   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
    327   void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
    328   void MarkExceptionHandler(Label* label) OVERRIDE;
    329   void LoadFromOffset(LoadOperandType type,
    330                       Register reg,
    331                       Register base,
    332                       int32_t offset,
    333                       Condition cond = AL) OVERRIDE;
    334   void StoreToOffset(StoreOperandType type,
    335                      Register reg,
    336                      Register base,
    337                      int32_t offset,
    338                      Condition cond = AL) OVERRIDE;
    339   void LoadSFromOffset(SRegister reg,
    340                        Register base,
    341                        int32_t offset,
    342                        Condition cond = AL) OVERRIDE;
    343   void StoreSToOffset(SRegister reg,
    344                       Register base,
    345                       int32_t offset,
    346                       Condition cond = AL) OVERRIDE;
    347   void LoadDFromOffset(DRegister reg,
    348                        Register base,
    349                        int32_t offset,
    350                        Condition cond = AL) OVERRIDE;
    351   void StoreDToOffset(DRegister reg,
    352                       Register base,
    353                       int32_t offset,
    354                       Condition cond = AL) OVERRIDE;
    355 
    356   bool ShifterOperandCanHold(Register rd,
    357                              Register rn,
    358                              Opcode opcode,
    359                              uint32_t immediate,
    360                              SetCc set_cc,
    361                              ShifterOperand* shifter_op) OVERRIDE;
    362   using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
    363 
    364   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
    365 
    366 
    367   static bool IsInstructionForExceptionHandling(uintptr_t pc);
    368 
    369   // Emit data (e.g. encoded instruction or immediate) to the.
    370   // instruction stream.
    371   void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
    372   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
    373   void Bind(Label* label) OVERRIDE;
    374 
    375   // Force the assembler to generate 32 bit instructions.
    376   void Force32Bit() {
    377     force_32bit_ = true;
    378   }
    379 
    380   // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
    381   // will generate a fixup.
    382   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
    383   // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
    384   void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
    385 
    386  private:
    387   typedef uint16_t FixupId;
    388 
    389   // Fixup: branches and literal pool references.
    390   //
    391   // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This
    392   // depends on both the type of branch and the offset to which it is branching. The 16-bit
    393   // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare
    394   // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be
    395   // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence
    396   // of instructions to make up for the limited range of load literal instructions (up to
    397   // 4KiB for the 32-bit variant). When generating code for these insns we don't know the
    398   // size before hand, so we assume it is the smallest available size and determine the final
    399   // code offsets and sizes and emit code in FinalizeCode().
    400   //
    401   // To handle this, we keep a record of every branch and literal pool load in the program.
    402   // The actual instruction encoding for these is delayed until we know the final size of
    403   // every instruction. When we bind a label to a branch we don't know the final location yet
    404   // as some preceding instructions may need to be expanded, so we record a non-final offset.
    405   // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of
    406   // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with
    407   // target on the other side of the expanded insn, as their offsets change and this may
    408   // trigger further expansion.
    409   //
    410   // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the
    411   // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing
    412   // to it, using the fixup ids as links. The first link is stored in the label's position
    413   // (the label is linked but not bound), the following links are stored in the code buffer,
    414   // in the placeholder where we will eventually emit the actual code.
    415 
    416   class Fixup {
    417    public:
    418     // Branch type.
    419     enum Type : uint8_t {
    420       kConditional,               // B<cond>.
    421       kUnconditional,             // B.
    422       kUnconditionalLink,         // BL.
    423       kUnconditionalLinkX,        // BLX.
    424       kCompareAndBranchXZero,     // cbz/cbnz.
    425       kLoadLiteralNarrow,         // Load narrrow integer literal.
    426       kLoadLiteralWide,           // Load wide integer literal.
    427       kLoadLiteralAddr,           // Load address of literal (used for jump table).
    428       kLoadFPLiteralSingle,       // Load FP literal single.
    429       kLoadFPLiteralDouble,       // Load FP literal double.
    430     };
    431 
    432     // Calculated size of branch instruction based on type and offset.
    433     enum Size : uint8_t {
    434       // Branch variants.
    435       kBranch16Bit,
    436       kBranch32Bit,
    437       // NOTE: We don't support branches which would require multiple instructions, i.e.
    438       // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB.
    439 
    440       // CBZ/CBNZ variants.
    441       kCbxz16Bit,   // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset.
    442       kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
    443       kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
    444 
    445       // Load integer literal variants.
    446       // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
    447       kLiteral1KiB,
    448       // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes.
    449       kLiteral4KiB,
    450       // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes.
    451       kLiteral64KiB,
    452       // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes.
    453       kLiteral1MiB,
    454       // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit.
    455       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
    456       kLiteralFar,
    457 
    458       // Load literal base addr.
    459       // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
    460       kLiteralAddr1KiB,
    461       // ADR rX, label; 4KiB offset. 4 bytes.
    462       kLiteralAddr4KiB,
    463       // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
    464       kLiteralAddr64KiB,
    465       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
    466       kLiteralAddrFar,
    467 
    468       // Load long or FP literal variants.
    469       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
    470       kLongOrFPLiteral1KiB,
    471       // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
    472       kLongOrFPLiteral64KiB,
    473       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
    474       kLongOrFPLiteralFar,
    475     };
    476 
    477     // Unresolved branch possibly with a condition.
    478     static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit,
    479                         Condition cond = AL) {
    480       DCHECK(type == kConditional || type == kUnconditional ||
    481              type == kUnconditionalLink || type == kUnconditionalLinkX);
    482       DCHECK(size == kBranch16Bit || size == kBranch32Bit);
    483       DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional));
    484       return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister,
    485                    cond, type, size, location);
    486     }
    487 
    488     // Unresolved compare-and-branch instruction with a register and condition (EQ or NE).
    489     static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) {
    490       DCHECK(cond == EQ || cond == NE);
    491       return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister,
    492                    cond, kCompareAndBranchXZero, kCbxz16Bit, location);
    493     }
    494 
    495     // Load narrow literal.
    496     static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
    497       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
    498              size == kLiteral1MiB || size == kLiteralFar);
    499       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
    500       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
    501                    AL, kLoadLiteralNarrow, size, location);
    502     }
    503 
    504     // Load wide literal.
    505     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
    506                                  Size size = kLongOrFPLiteral1KiB) {
    507       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
    508              size == kLongOrFPLiteralFar);
    509       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
    510       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
    511                    AL, kLoadLiteralWide, size, location);
    512     }
    513 
    514     // Load FP single literal.
    515     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
    516                                    Size size = kLongOrFPLiteral1KiB) {
    517       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
    518              size == kLongOrFPLiteralFar);
    519       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
    520                    AL, kLoadFPLiteralSingle, size, location);
    521     }
    522 
    523     // Load FP double literal.
    524     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
    525                                    Size size = kLongOrFPLiteral1KiB) {
    526       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
    527              size == kLongOrFPLiteralFar);
    528       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
    529                    AL, kLoadFPLiteralDouble, size, location);
    530     }
    531 
    532     static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
    533       DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
    534              size == kLiteralAddrFar);
    535       DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
    536       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
    537                    AL, kLoadLiteralAddr, size, location);
    538     }
    539 
    540     Type GetType() const {
    541       return type_;
    542     }
    543 
    544     bool IsLoadLiteral() const {
    545       return GetType() >= kLoadLiteralNarrow;
    546     }
    547 
    548     // Returns whether the Fixup can expand from the original size.
    549     bool CanExpand() const {
    550       switch (GetOriginalSize()) {
    551         case kBranch32Bit:
    552         case kCbxz48Bit:
    553         case kLiteralFar:
    554         case kLiteralAddrFar:
    555         case kLongOrFPLiteralFar:
    556           return false;
    557         default:
    558           return true;
    559       }
    560     }
    561 
    562     Size GetOriginalSize() const {
    563       return original_size_;
    564     }
    565 
    566     Size GetSize() const {
    567       return size_;
    568     }
    569 
    570     uint32_t GetOriginalSizeInBytes() const;
    571 
    572     uint32_t GetSizeInBytes() const;
    573 
    574     uint32_t GetLocation() const {
    575       return location_;
    576     }
    577 
    578     uint32_t GetTarget() const {
    579       return target_;
    580     }
    581 
    582     uint32_t GetAdjustment() const {
    583       return adjustment_;
    584     }
    585 
    586     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
    587     static void PrepareDependents(Thumb2Assembler* assembler);
    588 
    589     ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
    590       return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
    591                                                                            dependents_count_);
    592     }
    593 
    594     // Resolve a branch when the target is known.
    595     void Resolve(uint32_t target) {
    596       DCHECK_EQ(target_, kUnresolved);
    597       DCHECK_NE(target, kUnresolved);
    598       target_ = target;
    599     }
    600 
    601     // Branches with bound targets that are in range can be emitted early.
    602     // However, the caller still needs to check if the branch doesn't go over
    603     // another Fixup that's not ready to be emitted.
    604     bool IsCandidateForEmitEarly() const;
    605 
    606     // Check if the current size is OK for current location_, target_ and adjustment_.
    607     // If not, increase the size. Return the size increase, 0 if unchanged.
    608     // If the target if after this Fixup, also add the difference to adjustment_,
    609     // so that we don't need to consider forward Fixups as their own dependencies.
    610     uint32_t AdjustSizeIfNeeded(uint32_t current_code_size);
    611 
    612     // Increase adjustments. This is called for dependents of a Fixup when its size changes.
    613     void IncreaseAdjustment(uint32_t increase) {
    614       adjustment_ += increase;
    615     }
    616 
    617     // Finalize the branch with an adjustment to the location. Both location and target are updated.
    618     void Finalize(uint32_t location_adjustment) {
    619       DCHECK_NE(target_, kUnresolved);
    620       location_ += location_adjustment;
    621       target_ += location_adjustment;
    622     }
    623 
    624     // Emit the branch instruction into the assembler buffer.  This does the
    625     // encoding into the thumb instruction.
    626     void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
    627 
    628    private:
    629     Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
    630           Condition cond, Type type, Size size, uint32_t location)
    631         : rn_(rn),
    632           rt2_(rt2),
    633           sd_(sd),
    634           dd_(dd),
    635           cond_(cond),
    636           type_(type),
    637           original_size_(size), size_(size),
    638           location_(location),
    639           target_(kUnresolved),
    640           adjustment_(0u),
    641           dependents_count_(0u),
    642           dependents_start_(0u) {
    643     }
    644 
    645     static size_t SizeInBytes(Size size);
    646 
    647     // The size of padding added before the literal pool.
    648     static size_t LiteralPoolPaddingSize(uint32_t current_code_size);
    649 
    650     // Returns the offset from the PC-using insn to the target.
    651     int32_t GetOffset(uint32_t current_code_size) const;
    652 
    653     size_t IncreaseSize(Size new_size);
    654 
    655     int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
    656 
    657     template <typename Function>
    658     static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);
    659 
    660     static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
    661 
    662     const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
    663     Register rt2_;        // For kLoadLiteralWide.
    664     SRegister sd_;        // For kLoadFPLiteralSingle.
    665     DRegister dd_;        // For kLoadFPLiteralDouble.
    666     const Condition cond_;
    667     const Type type_;
    668     Size original_size_;
    669     Size size_;
    670     uint32_t location_;     // Offset into assembler buffer in bytes.
    671     uint32_t target_;       // Offset into assembler buffer in bytes.
    672     uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
    673     // Fixups that require adjustment when current size changes are stored in a single
    674     // array in the assembler and we store only the start index and count here.
    675     uint32_t dependents_count_;
    676     uint32_t dependents_start_;
    677   };
    678 
    679   // Emit a single 32 or 16 bit data processing instruction.
    680   void EmitDataProcessing(Condition cond,
    681                           Opcode opcode,
    682                           SetCc set_cc,
    683                           Register rn,
    684                           Register rd,
    685                           const ShifterOperand& so);
    686 
    687   // Emit a single 32 bit miscellaneous instruction.
    688   void Emit32Miscellaneous(uint8_t op1,
    689                            uint8_t op2,
    690                            uint32_t rest_encoding);
    691 
    692   // Emit reverse byte instructions: rev, rev16, revsh.
    693   void EmitReverseBytes(Register rd, Register rm, uint32_t op);
    694 
    695   // Emit a single 16 bit miscellaneous instruction.
    696   void Emit16Miscellaneous(uint32_t rest_encoding);
    697 
    698   // Must the instruction be 32 bits or can it possibly be encoded
    699   // in 16 bits?
    700   bool Is32BitDataProcessing(Condition cond,
    701                              Opcode opcode,
    702                              SetCc set_cc,
    703                              Register rn,
    704                              Register rd,
    705                              const ShifterOperand& so);
    706 
    707   // Emit a 32 bit data processing instruction.
    708   void Emit32BitDataProcessing(Condition cond,
    709                                Opcode opcode,
    710                                SetCc set_cc,
    711                                Register rn,
    712                                Register rd,
    713                                const ShifterOperand& so);
    714 
    715   // Emit a 16 bit data processing instruction.
    716   void Emit16BitDataProcessing(Condition cond,
    717                                Opcode opcode,
    718                                SetCc set_cc,
    719                                Register rn,
    720                                Register rd,
    721                                const ShifterOperand& so);
    722 
    723   void Emit16BitAddSub(Condition cond,
    724                        Opcode opcode,
    725                        SetCc set_cc,
    726                        Register rn,
    727                        Register rd,
    728                        const ShifterOperand& so);
    729 
    730   uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);
    731 
    732   void EmitLoadStore(Condition cond,
    733                      bool load,
    734                      bool byte,
    735                      bool half,
    736                      bool is_signed,
    737                      Register rd,
    738                      const Address& ad);
    739 
    740   void EmitMemOpAddressMode3(Condition cond,
    741                              int32_t mode,
    742                              Register rd,
    743                              const Address& ad);
    744 
    745   void EmitMultiMemOp(Condition cond,
    746                       BlockAddressMode am,
    747                       bool load,
    748                       Register base,
    749                       RegList regs);
    750 
    751   void EmitMulOp(Condition cond,
    752                  int32_t opcode,
    753                  Register rd,
    754                  Register rn,
    755                  Register rm,
    756                  Register rs);
    757 
    758   void EmitVFPsss(Condition cond,
    759                   int32_t opcode,
    760                   SRegister sd,
    761                   SRegister sn,
    762                   SRegister sm);
    763 
    764   void EmitVLdmOrStm(int32_t rest,
    765                      uint32_t reg,
    766                      int nregs,
    767                      Register rn,
    768                      bool is_load,
    769                      bool dbl,
    770                      Condition cond);
    771 
    772   void EmitVFPddd(Condition cond,
    773                   int32_t opcode,
    774                   DRegister dd,
    775                   DRegister dn,
    776                   DRegister dm);
    777 
    778   void EmitVFPsd(Condition cond,
    779                  int32_t opcode,
    780                  SRegister sd,
    781                  DRegister dm);
    782 
    783   void EmitVFPds(Condition cond,
    784                  int32_t opcode,
    785                  DRegister dd,
    786                  SRegister sm);
    787 
    788   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
    789 
    790   void EmitBranch(Condition cond, Label* label, bool link, bool x);
    791   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
    792   static int DecodeBranchOffset(int32_t inst);
    793   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
    794                  Condition cond = AL, SetCc set_cc = kCcDontCare);
    795   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
    796                  Condition cond = AL, SetCc set_cc = kCcDontCare);
    797 
    798   static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
    799   static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
    800   bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
    801                                int32_t offset,
    802                                /*out*/ int32_t* add_to_base,
    803                                /*out*/ int32_t* offset_for_load_store);
    804   int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
    805                                 Register temp,
    806                                 Register base,
    807                                 int32_t offset,
    808                                 Condition cond);
    809 
    810   // Whether the assembler can relocate branches. If false, unresolved branches will be
    811   // emitted on 32bits.
    812   bool can_relocate_branches_;
    813 
    814   // Force the assembler to use 32 bit thumb2 instructions.
    815   bool force_32bit_;
    816 
    817   // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
    818   Condition it_conditions_[4];
    819   uint8_t it_cond_index_;
    820   Condition next_condition_;
    821 
    822   void SetItCondition(ItState s, Condition cond, uint8_t index);
    823 
    824   void CheckCondition(Condition cond) {
    825     CHECK_EQ(cond, next_condition_);
    826 
    827     // Move to the next condition if there is one.
    828     if (it_cond_index_ < 3) {
    829       ++it_cond_index_;
    830       next_condition_ = it_conditions_[it_cond_index_];
    831     } else {
    832       next_condition_ = AL;
    833     }
    834   }
    835 
    836   void CheckConditionLastIt(Condition cond) {
    837     if (it_cond_index_ < 3) {
    838       // Check that the next condition is AL.  This means that the
    839       // current condition is the last in the IT block.
    840       CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
    841     }
    842     CheckCondition(cond);
    843   }
    844 
    845   FixupId AddFixup(Fixup fixup) {
    846     FixupId fixup_id = static_cast<FixupId>(fixups_.size());
    847     fixups_.push_back(fixup);
    848     // For iterating using FixupId, we need the next id to be representable.
    849     DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size());
    850     return fixup_id;
    851   }
    852 
    853   Fixup* GetFixup(FixupId fixup_id) {
    854     DCHECK_LT(fixup_id, fixups_.size());
    855     return &fixups_[fixup_id];
    856   }
    857 
    858   void BindLabel(Label* label, uint32_t bound_pc);
    859   uint32_t BindLiterals();
    860   void BindJumpTables(uint32_t code_size);
    861   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
    862                            std::deque<FixupId>* fixups_to_recalculate);
    863   uint32_t AdjustFixups();
    864   void EmitFixups(uint32_t adjusted_code_size);
    865   void EmitLiterals();
    866   void EmitJumpTables();
    867   void PatchCFI();
    868 
    869   static int16_t BEncoding16(int32_t offset, Condition cond);
    870   static int32_t BEncoding32(int32_t offset, Condition cond);
    871   static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond);
    872   static int16_t CmpRnImm8Encoding16(Register rn, int32_t value);
    873   static int16_t AddRdnRmEncoding16(Register rdn, Register rm);
    874   static int32_t MovwEncoding32(Register rd, int32_t value);
    875   static int32_t MovtEncoding32(Register rd, int32_t value);
    876   static int32_t MovModImmEncoding32(Register rd, int32_t value);
    877   static int16_t LdrLitEncoding16(Register rt, int32_t offset);
    878   static int32_t LdrLitEncoding32(Register rt, int32_t offset);
    879   static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset);
    880   static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset);
    881   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
    882   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
    883   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
    884   static int16_t AdrEncoding16(Register rd, int32_t offset);
    885   static int32_t AdrEncoding32(Register rd, int32_t offset);
    886 
    887   ArenaVector<Fixup> fixups_;
    888   ArenaVector<FixupId> fixup_dependents_;
    889 
    890   // Use std::deque<> for literal labels to allow insertions at the end
    891   // without invalidating pointers and references to existing elements.
    892   ArenaDeque<Literal> literals_;
    893 
    894   // Deduplication map for 64-bit literals, used for LoadDImmediate().
    895   ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
    896 
    897   // Jump table list.
    898   ArenaDeque<JumpTable> jump_tables_;
    899 
    900   // Data for AdjustedPosition(), see the description there.
    901   uint32_t last_position_adjustment_;
    902   uint32_t last_old_position_;
    903   FixupId last_fixup_id_;
    904 };
    905 
    906 }  // namespace arm
    907 }  // namespace art
    908 
    909 #endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
    910