Home | History | Annotate | Download | only in arm64
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "arm64_lir.h"
     18 #include "codegen_arm64.h"
     19 #include "dex/quick/mir_to_lir-inl.h"
     20 #include "dex/reg_storage_eq.h"
     21 
     22 namespace art {
     23 
     24 /* This file contains codegen for the A64 ISA. */
     25 
     26 int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
     27   /*
     28    * Valid values will have the form:
     29    *
     30    *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
     31    *
     32    * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
     33    */
     34 
     35   // bits[19..0] are cleared.
     36   if ((bits & 0x0007ffff) != 0)
     37     return -1;
     38 
     39   // bits[29..25] are all set or all cleared.
     40   uint32_t b_pattern = (bits >> 16) & 0x3e00;
     41   if (b_pattern != 0 && b_pattern != 0x3e00)
     42     return -1;
     43 
     44   // bit[30] and bit[29] are opposite.
     45   if (((bits ^ (bits << 1)) & 0x40000000) == 0)
     46     return -1;
     47 
     48   // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
     49   // bit7: a000.0000
     50   uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
     51   // bit6: 0b00.0000
     52   uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
     53   // bit5_to_0: 00cd.efgh
     54   uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
     55   return (bit7 | bit6 | bit5_to_0);
     56 }
     57 
     58 int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
     59   /*
     60    * Valid values will have the form:
     61    *
     62    *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
     63    *   0000.0000.0000.0000.0000.0000.0000.0000
     64    *
     65    * where B = not(b).
     66    */
     67 
     68   // bits[47..0] are cleared.
     69   if ((bits & UINT64_C(0xffffffffffff)) != 0)
     70     return -1;
     71 
     72   // bits[61..54] are all set or all cleared.
     73   uint32_t b_pattern = (bits >> 48) & 0x3fc0;
     74   if (b_pattern != 0 && b_pattern != 0x3fc0)
     75     return -1;
     76 
     77   // bit[62] and bit[61] are opposite.
     78   if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
     79     return -1;
     80 
     81   // bit7: a000.0000
     82   uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
     83   // bit6: 0b00.0000
     84   uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
     85   // bit5_to_0: 00cd.efgh
     86   uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
     87   return (bit7 | bit6 | bit5_to_0);
     88 }
     89 
     90 size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
     91   bool opcode_is_wide = IS_WIDE(lir->opcode);
     92   ArmOpcode opcode = UNWIDE(lir->opcode);
     93   DCHECK(!IsPseudoLirOp(opcode));
     94   const ArmEncodingMap *encoder = &EncodingMap[opcode];
     95   uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
     96   return (bits >> 30);
     97 }
     98 
     99 size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
    100   size_t offset = lir->operands[2];
    101   uint64_t check_flags = GetTargetInstFlags(lir->opcode);
    102   DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
    103   if (check_flags & SCALED_OFFSET_X0) {
    104     DCHECK(check_flags & IS_TERTIARY_OP);
    105     offset = offset * (1 << GetLoadStoreSize(lir));
    106   }
    107   return offset;
    108 }
    109 
    110 LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
    111   DCHECK(r_dest.IsSingle());
    112   if (value == 0) {
    113     return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
    114   } else {
    115     int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
    116     if (encoded_imm >= 0) {
    117       return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
    118     }
    119   }
    120 
    121   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
    122   if (data_target == NULL) {
    123     // Wide, as we need 8B alignment.
    124     data_target = AddWideData(&literal_list_, value, 0);
    125   }
    126 
    127   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
    128   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
    129                             r_dest.GetReg(), 0, 0, 0, 0, data_target);
    130   AppendLIR(load_pc_rel);
    131   return load_pc_rel;
    132 }
    133 
    134 LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
    135   DCHECK(r_dest.IsDouble());
    136   if (value == 0) {
    137     return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
    138   } else {
    139     int32_t encoded_imm = EncodeImmDouble(value);
    140     if (encoded_imm >= 0) {
    141       return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
    142     }
    143   }
    144 
    145   // No short form - load from the literal pool.
    146   int32_t val_lo = Low32Bits(value);
    147   int32_t val_hi = High32Bits(value);
    148   LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
    149   if (data_target == NULL) {
    150     data_target = AddWideData(&literal_list_, val_lo, val_hi);
    151   }
    152 
    153   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
    154   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
    155                             r_dest.GetReg(), 0, 0, 0, 0, data_target);
    156   AppendLIR(load_pc_rel);
    157   return load_pc_rel;
    158 }
    159 
    160 static int CountLeadingZeros(bool is_wide, uint64_t value) {
    161   return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
    162 }
    163 
    164 static int CountTrailingZeros(bool is_wide, uint64_t value) {
    165   return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
    166 }
    167 
    168 static int CountSetBits(bool is_wide, uint64_t value) {
    169   return ((is_wide) ?
    170           __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
    171 }
    172 
    173 /**
    174  * @brief Try encoding an immediate in the form required by logical instructions.
    175  *
    176  * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
    177  * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
    178  *   32-bit if @p is_wide is false.
    179  * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
    180  * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
    181  */
    182 int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
    183   unsigned n, imm_s, imm_r;
    184 
    185   // Logical immediates are encoded using parameters n, imm_s and imm_r using
    186   // the following table:
    187   //
    188   //  N   imms    immr    size        S             R
    189   //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
    190   //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
    191   //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
    192   //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
    193   //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
    194   //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
    195   // (s bits must not be all set)
    196   //
    197   // A pattern is constructed of size bits, where the least significant S+1
    198   // bits are set. The pattern is rotated right by R, and repeated across a
    199   // 32 or 64-bit value, depending on destination register width.
    200   //
    201   // To test if an arbitary immediate can be encoded using this scheme, an
    202   // iterative algorithm is used.
    203   //
    204 
    205   // 1. If the value has all set or all clear bits, it can't be encoded.
    206   if (value == 0 || value == ~UINT64_C(0) ||
    207       (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
    208     return -1;
    209   }
    210 
    211   unsigned lead_zero  = CountLeadingZeros(is_wide, value);
    212   unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
    213   unsigned trail_zero = CountTrailingZeros(is_wide, value);
    214   unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
    215   unsigned set_bits   = CountSetBits(is_wide, value);
    216 
    217   // The fixed bits in the immediate s field.
    218   // If width == 64 (X reg), start at 0xFFFFFF80.
    219   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
    220   // widths won't be executed.
    221   unsigned width = (is_wide) ? 64 : 32;
    222   int imm_s_fixed = (is_wide) ? -128 : -64;
    223   int imm_s_mask = 0x3f;
    224 
    225   for (;;) {
    226     // 2. If the value is two bits wide, it can be encoded.
    227     if (width == 2) {
    228       n = 0;
    229       imm_s = 0x3C;
    230       imm_r = (value & 3) - 1;
    231       break;
    232     }
    233 
    234     n = (width == 64) ? 1 : 0;
    235     imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
    236     if ((lead_zero + set_bits) == width) {
    237       imm_r = 0;
    238     } else {
    239       imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
    240     }
    241 
    242     // 3. If the sum of leading zeros, trailing zeros and set bits is
    243     //    equal to the bit width of the value, it can be encoded.
    244     if (lead_zero + trail_zero + set_bits == width) {
    245       break;
    246     }
    247 
    248     // 4. If the sum of leading ones, trailing ones and unset bits in the
    249     //    value is equal to the bit width of the value, it can be encoded.
    250     if (lead_one + trail_one + (width - set_bits) == width) {
    251       break;
    252     }
    253 
    254     // 5. If the most-significant half of the bitwise value is equal to
    255     //    the least-significant half, return to step 2 using the
    256     //    least-significant half of the value.
    257     uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
    258     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
    259       width >>= 1;
    260       set_bits >>= 1;
    261       imm_s_fixed >>= 1;
    262       continue;
    263     }
    264 
    265     // 6. Otherwise, the value can't be encoded.
    266     return -1;
    267   }
    268 
    269   return (n << 12 | imm_r << 6 | imm_s);
    270 }
    271 
    272 // Maximum number of instructions to use for encoding the immediate.
    273 static const int max_num_ops_per_const_load = 2;
    274 
    275 /**
    276  * @brief Return the number of fast halfwords in the given uint64_t integer.
    277  * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
    278  *   number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
    279  *   a more accurate description.
    280  * @param value The input 64-bit integer.
    281  * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
    282  *   the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
    283  *   set (0xffff). Additionally (retval & 0x8) is set when m > n.
    284  */
    285 static int GetNumFastHalfWords(uint64_t value) {
    286   unsigned int num_0000_halfwords = 0;
    287   unsigned int num_ffff_halfwords = 0;
    288   for (int shift = 0; shift < 64; shift += 16) {
    289     uint16_t halfword = static_cast<uint16_t>(value >> shift);
    290     if (halfword == 0)
    291       num_0000_halfwords++;
    292     else if (halfword == UINT16_C(0xffff))
    293       num_ffff_halfwords++;
    294   }
    295   if (num_0000_halfwords >= num_ffff_halfwords) {
    296     DCHECK_LE(num_0000_halfwords, 4U);
    297     return num_0000_halfwords;
    298   } else {
    299     DCHECK_LE(num_ffff_halfwords, 4U);
    300     return num_ffff_halfwords | 0x8;
    301   }
    302 }
    303 
    304 // The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
    305 // constant is considered for promotion. If the constant is "inexpensive" then the promotion
    306 // algorithm will give it a low priority for promotion, even when it is referenced many times in
    307 // the code.
    308 
    309 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
    310   // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
    311   // We therefore return true and give it a low priority for promotion.
    312   return true;
    313 }
    314 
    315 bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
    316   return EncodeImmSingle(value) >= 0;
    317 }
    318 
    319 bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
    320   int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
    321   if (num_slow_halfwords <= max_num_ops_per_const_load) {
    322     return true;
    323   }
    324   return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
    325 }
    326 
    327 bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
    328   return EncodeImmDouble(value) >= 0;
    329 }
    330 
    331 // The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
    332 // when one of the operands is an immediate (e.g. register version or immediate version of add).
    333 
    334 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
    335   switch (opcode) {
    336   case Instruction::IF_EQ:
    337   case Instruction::IF_NE:
    338   case Instruction::IF_LT:
    339   case Instruction::IF_GE:
    340   case Instruction::IF_GT:
    341   case Instruction::IF_LE:
    342   case Instruction::ADD_INT:
    343   case Instruction::ADD_INT_2ADDR:
    344   case Instruction::SUB_INT:
    345   case Instruction::SUB_INT_2ADDR:
    346     // The code below is consistent with the implementation of OpRegRegImm().
    347     {
    348       uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
    349       if (abs_value < 0x1000) {
    350         return true;
    351       } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
    352         return true;
    353       }
    354       return false;
    355     }
    356   case Instruction::SHL_INT:
    357   case Instruction::SHL_INT_2ADDR:
    358   case Instruction::SHR_INT:
    359   case Instruction::SHR_INT_2ADDR:
    360   case Instruction::USHR_INT:
    361   case Instruction::USHR_INT_2ADDR:
    362     return true;
    363   case Instruction::AND_INT:
    364   case Instruction::AND_INT_2ADDR:
    365   case Instruction::AND_INT_LIT16:
    366   case Instruction::AND_INT_LIT8:
    367   case Instruction::OR_INT:
    368   case Instruction::OR_INT_2ADDR:
    369   case Instruction::OR_INT_LIT16:
    370   case Instruction::OR_INT_LIT8:
    371   case Instruction::XOR_INT:
    372   case Instruction::XOR_INT_2ADDR:
    373   case Instruction::XOR_INT_LIT16:
    374   case Instruction::XOR_INT_LIT8:
    375     if (value == 0 || value == INT32_C(-1)) {
    376       return true;
    377     }
    378     return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
    379   default:
    380     return false;
    381   }
    382 }
    383 
    384 /*
    385  * Load a immediate using one single instruction when possible; otherwise
    386  * use a pair of movz and movk instructions.
    387  *
    388  * No additional register clobbering operation performed. Use this version when
    389  * 1) r_dest is freshly returned from AllocTemp or
    390  * 2) The codegen is under fixed register usage
    391  */
    392 LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
    393   LIR* res;
    394 
    395   if (r_dest.IsFloat()) {
    396     return LoadFPConstantValue(r_dest, value);
    397   }
    398 
    399   if (r_dest.Is64Bit()) {
    400     return LoadConstantWide(r_dest, value);
    401   }
    402 
    403   // Loading SP/ZR with an immediate is not supported.
    404   DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
    405   DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
    406 
    407   // Compute how many movk, movz instructions are needed to load the value.
    408   uint16_t high_bits = High16Bits(value);
    409   uint16_t low_bits = Low16Bits(value);
    410 
    411   bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
    412   bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
    413 
    414   if (LIKELY(low_fast || high_fast)) {
    415     // 1 instruction is enough to load the immediate.
    416     if (LIKELY(low_bits == high_bits)) {
    417       // Value is either 0 or -1: we can just use wzr.
    418       ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
    419       res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
    420     } else {
    421       uint16_t uniform_bits, useful_bits;
    422       int shift;
    423 
    424       if (LIKELY(high_fast)) {
    425         shift = 0;
    426         uniform_bits = high_bits;
    427         useful_bits = low_bits;
    428       } else {
    429         shift = 1;
    430         uniform_bits = low_bits;
    431         useful_bits = high_bits;
    432       }
    433 
    434       if (UNLIKELY(uniform_bits != 0)) {
    435         res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
    436       } else {
    437         res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
    438       }
    439     }
    440   } else {
    441     // movk, movz require 2 instructions. Try detecting logical immediates.
    442     int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
    443     if (log_imm >= 0) {
    444       res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
    445     } else {
    446       // Use 2 instructions.
    447       res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
    448       NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
    449     }
    450   }
    451 
    452   return res;
    453 }
    454 
    455 // TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
    456 LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
    457   if (r_dest.IsFloat()) {
    458     return LoadFPConstantValueWide(r_dest, value);
    459   }
    460 
    461   DCHECK(r_dest.Is64Bit());
    462 
    463   // Loading SP/ZR with an immediate is not supported.
    464   DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
    465   DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
    466 
    467   if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
    468     // value is either 0 or -1: we can just use xzr.
    469     ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
    470     return NewLIR2(opcode, r_dest.GetReg(), rxzr);
    471   }
    472 
    473   // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
    474   uint64_t uvalue = static_cast<uint64_t>(value);
    475   int num_fast_halfwords = GetNumFastHalfWords(uvalue);
    476   int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
    477   bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
    478 
    479   if (num_slow_halfwords > 1) {
    480     // A single movz/movn is not enough. Try the logical immediate route.
    481     int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
    482     if (log_imm >= 0) {
    483       return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
    484     }
    485   }
    486 
    487   if (num_slow_halfwords <= max_num_ops_per_const_load) {
    488     // We can encode the number using a movz/movn followed by one or more movk.
    489     ArmOpcode op;
    490     uint16_t background;
    491     LIR* res = nullptr;
    492 
    493     // Decide whether to use a movz or a movn.
    494     if (more_ffff_halfwords) {
    495       op = WIDE(kA64Movn3rdM);
    496       background = 0xffff;
    497     } else {
    498       op = WIDE(kA64Movz3rdM);
    499       background = 0;
    500     }
    501 
    502     // Emit the first instruction (movz, movn).
    503     int shift;
    504     for (shift = 0; shift < 4; shift++) {
    505       uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
    506       if (halfword != background) {
    507         res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
    508         break;
    509       }
    510     }
    511 
    512     // Emit the movk instructions.
    513     for (shift++; shift < 4; shift++) {
    514       uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
    515       if (halfword != background) {
    516         NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
    517       }
    518     }
    519     return res;
    520   }
    521 
    522   // Use the literal pool.
    523   int32_t val_lo = Low32Bits(value);
    524   int32_t val_hi = High32Bits(value);
    525   LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
    526   if (data_target == NULL) {
    527     data_target = AddWideData(&literal_list_, val_lo, val_hi);
    528   }
    529 
    530   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
    531   LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
    532                     r_dest.GetReg(), 0, 0, 0, 0, data_target);
    533   AppendLIR(res);
    534   return res;
    535 }
    536 
    537 LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
    538   LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
    539   res->target = target;
    540   return res;
    541 }
    542 
    543 LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
    544   LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
    545                         0 /* offset to be patched */);
    546   branch->target = target;
    547   return branch;
    548 }
    549 
    550 LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
    551   ArmOpcode opcode = kA64Brk1d;
    552   switch (op) {
    553     case kOpBlx:
    554       opcode = kA64Blr1x;
    555       break;
    556     // TODO(Arm64): port kThumbBx.
    557     // case kOpBx:
    558     //   opcode = kThumbBx;
    559     //   break;
    560     default:
    561       LOG(FATAL) << "Bad opcode " << op;
    562   }
    563   return NewLIR1(opcode, r_dest_src.GetReg());
    564 }
    565 
    566 LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
    567   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
    568   CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
    569   ArmOpcode opcode = kA64Brk1d;
    570 
    571   switch (op) {
    572     case kOpCmn:
    573       opcode = kA64Cmn3rro;
    574       break;
    575     case kOpCmp:
    576       opcode = kA64Cmp3rro;
    577       break;
    578     case kOpMov:
    579       opcode = kA64Mov2rr;
    580       break;
    581     case kOpMvn:
    582       opcode = kA64Mvn2rr;
    583       break;
    584     case kOpNeg:
    585       opcode = kA64Neg3rro;
    586       break;
    587     case kOpTst:
    588       opcode = kA64Tst3rro;
    589       break;
    590     case kOpRev:
    591       DCHECK_EQ(shift, 0);
    592       // Binary, but rm is encoded twice.
    593       return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
    594       break;
    595     case kOpRevsh:
    596       // Binary, but rm is encoded twice.
    597       NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
    598       // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
    599       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
    600       break;
    601     case kOp2Byte:
    602       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
    603       // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
    604       // For now we use sbfm directly.
    605       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
    606     case kOp2Short:
    607       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
    608       // For now we use sbfm rather than its alias, sbfx.
    609       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
    610     case kOp2Char:
    611       // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
    612       // For now we use ubfm directly.
    613       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
    614       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
    615     default:
    616       return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
    617   }
    618 
    619   DCHECK(!IsPseudoLirOp(opcode));
    620   if (EncodingMap[opcode].flags & IS_BINARY_OP) {
    621     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
    622     return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
    623   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
    624     ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
    625     if (kind == kFmtShift) {
    626       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
    627     }
    628   }
    629 
    630   LOG(FATAL) << "Unexpected encoding operand count";
    631   return NULL;
    632 }
    633 
    634 LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
    635                                   A64RegExtEncodings ext, uint8_t amount) {
    636   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
    637   ArmOpcode opcode = kA64Brk1d;
    638 
    639   switch (op) {
    640     case kOpCmn:
    641       opcode = kA64Cmn3Rre;
    642       break;
    643     case kOpCmp:
    644       opcode = kA64Cmp3Rre;
    645       break;
    646     case kOpAdd:
    647       // Note: intentional fallthrough
    648     case kOpSub:
    649       return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
    650       break;
    651     default:
    652       LOG(FATAL) << "Bad Opcode: " << opcode;
    653       break;
    654   }
    655 
    656   DCHECK(!IsPseudoLirOp(opcode));
    657   if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
    658     ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
    659     if (kind == kFmtExtend) {
    660       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
    661                      EncodeExtend(ext, amount));
    662     }
    663   }
    664 
    665   LOG(FATAL) << "Unexpected encoding operand count";
    666   return NULL;
    667 }
    668 
    669 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
    670   /* RegReg operations with SP in first parameter need extended register instruction form.
    671    * Only CMN, CMP, ADD & SUB instructions are implemented.
    672    */
    673   if (r_dest_src1 == rs_sp) {
    674     return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
    675   } else {
    676     return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
    677   }
    678 }
    679 
    680 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
    681   UNIMPLEMENTED(FATAL);
    682   return nullptr;
    683 }
    684 
    685 LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
    686   UNIMPLEMENTED(FATAL);
    687   return nullptr;
    688 }
    689 
    690 LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
    691   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
    692   return NULL;
    693 }
    694 
    695 LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
    696                                     RegStorage r_src2, int shift) {
    697   ArmOpcode opcode = kA64Brk1d;
    698 
    699   switch (op) {
    700     case kOpAdd:
    701       opcode = kA64Add4rrro;
    702       break;
    703     case kOpSub:
    704       opcode = kA64Sub4rrro;
    705       break;
    706     // case kOpRsub:
    707     //   opcode = kA64RsubWWW;
    708     //   break;
    709     case kOpAdc:
    710       opcode = kA64Adc3rrr;
    711       break;
    712     case kOpAnd:
    713       opcode = kA64And4rrro;
    714       break;
    715     case kOpXor:
    716       opcode = kA64Eor4rrro;
    717       break;
    718     case kOpMul:
    719       opcode = kA64Mul3rrr;
    720       break;
    721     case kOpDiv:
    722       opcode = kA64Sdiv3rrr;
    723       break;
    724     case kOpOr:
    725       opcode = kA64Orr4rrro;
    726       break;
    727     case kOpSbc:
    728       opcode = kA64Sbc3rrr;
    729       break;
    730     case kOpLsl:
    731       opcode = kA64Lsl3rrr;
    732       break;
    733     case kOpLsr:
    734       opcode = kA64Lsr3rrr;
    735       break;
    736     case kOpAsr:
    737       opcode = kA64Asr3rrr;
    738       break;
    739     case kOpRor:
    740       opcode = kA64Ror3rrr;
    741       break;
    742     default:
    743       LOG(FATAL) << "Bad opcode: " << op;
    744       break;
    745   }
    746 
    747   // The instructions above belong to two kinds:
    748   // - 4-operands instructions, where the last operand is a shift/extend immediate,
    749   // - 3-operands instructions with no shift/extend.
    750   ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
    751   CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
    752   CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
    753   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
    754     DCHECK(!IsExtendEncoding(shift));
    755     return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
    756   } else {
    757     DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
    758     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
    759     return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
    760   }
    761 }
    762 
    763 LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
    764                                      RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
    765   ArmOpcode opcode = kA64Brk1d;
    766 
    767   switch (op) {
    768     case kOpAdd:
    769       opcode = kA64Add4RRre;
    770       break;
    771     case kOpSub:
    772       opcode = kA64Sub4RRre;
    773       break;
    774     default:
    775       LOG(FATAL) << "Unimplemented opcode: " << op;
    776       break;
    777   }
    778   ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
    779 
    780   if (r_dest.Is64Bit()) {
    781     CHECK(r_src1.Is64Bit());
    782 
    783     // dest determines whether the op is wide or not. Up-convert src2 when necessary.
    784     // Note: this is not according to aarch64 specifications, but our encoding.
    785     if (!r_src2.Is64Bit()) {
    786       r_src2 = As64BitReg(r_src2);
    787     }
    788   } else {
    789     CHECK(!r_src1.Is64Bit());
    790     CHECK(!r_src2.Is64Bit());
    791   }
    792 
    793   // Sanity checks.
    794   //    1) Amount is in the range 0..4
    795   CHECK_LE(amount, 4);
    796 
    797   return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
    798                  EncodeExtend(ext, amount));
    799 }
    800 
    801 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
    802   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
    803 }
    804 
    805 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
    806   return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
    807 }
    808 
    809 LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
    810   LIR* res;
    811   bool neg = (value < 0);
    812   uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
    813   ArmOpcode opcode = kA64Brk1d;
    814   ArmOpcode alt_opcode = kA64Brk1d;
    815   bool is_logical = false;
    816   bool is_wide = r_dest.Is64Bit();
    817   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
    818   int info = 0;
    819 
    820   switch (op) {
    821     case kOpLsl: {
    822       // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
    823       // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
    824       // For now, we just use ubfm directly.
    825       int max_value = (is_wide) ? 63 : 31;
    826       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
    827                      (-value) & max_value, max_value - value);
    828     }
    829     case kOpLsr:
    830       return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
    831     case kOpAsr:
    832       return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
    833     case kOpRor:
    834       // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
    835       // For now, we just use extr directly.
    836       return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
    837                      value);
    838     case kOpAdd:
    839       neg = !neg;
    840       // Note: intentional fallthrough
    841     case kOpSub:
    842       // Add and sub below read/write sp rather than xzr.
    843       if (abs_value < 0x1000) {
    844         opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
    845         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
    846       } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
    847         opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
    848         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
    849       } else {
    850         alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
    851         info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
    852       }
    853       break;
    854     case kOpAdc:
    855       alt_opcode = kA64Adc3rrr;
    856       break;
    857     case kOpSbc:
    858       alt_opcode = kA64Sbc3rrr;
    859       break;
    860     case kOpOr:
    861       is_logical = true;
    862       opcode = kA64Orr3Rrl;
    863       alt_opcode = kA64Orr4rrro;
    864       break;
    865     case kOpAnd:
    866       is_logical = true;
    867       opcode = kA64And3Rrl;
    868       alt_opcode = kA64And4rrro;
    869       break;
    870     case kOpXor:
    871       is_logical = true;
    872       opcode = kA64Eor3Rrl;
    873       alt_opcode = kA64Eor4rrro;
    874       break;
    875     case kOpMul:
    876       // TUNING: power of 2, shift & add
    877       alt_opcode = kA64Mul3rrr;
    878       break;
    879     default:
    880       LOG(FATAL) << "Bad opcode: " << op;
    881   }
    882 
    883   if (is_logical) {
    884     int log_imm = EncodeLogicalImmediate(is_wide, value);
    885     if (log_imm >= 0) {
    886       return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
    887     } else {
    888       // When the immediate is either 0 or ~0, the logical operation can be trivially reduced
    889       // to a - possibly negated - assignment.
    890       if (value == 0) {
    891         switch (op) {
    892           case kOpOr:
    893           case kOpXor:
    894             // Or/Xor by zero reduces to an assignment.
    895             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
    896           default:
    897             // And by zero reduces to a `mov rdest, xzr'.
    898             DCHECK(op == kOpAnd);
    899             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
    900         }
    901       } else if (value == INT64_C(-1)
    902                  || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
    903         switch (op) {
    904           case kOpAnd:
    905             // And by -1 reduces to an assignment.
    906             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
    907           case kOpXor:
    908             // Xor by -1 reduces to an `mvn rdest, rsrc'.
    909             return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
    910           default:
    911             // Or by -1 reduces to a `mvn rdest, xzr'.
    912             DCHECK(op == kOpOr);
    913             return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
    914         }
    915       }
    916     }
    917   }
    918 
    919   RegStorage r_scratch;
    920   if (is_wide) {
    921     r_scratch = AllocTempWide();
    922     LoadConstantWide(r_scratch, value);
    923   } else {
    924     r_scratch = AllocTemp();
    925     LoadConstant(r_scratch, value);
    926   }
    927   if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
    928     res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
    929   else
    930     res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
    931   FreeTemp(r_scratch);
    932   return res;
    933 }
    934 
    935 LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
    936   return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
    937 }
    938 
    939 LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
    940   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
    941   ArmOpcode opcode = kA64Brk1d;
    942   ArmOpcode neg_opcode = kA64Brk1d;
    943   bool shift;
    944   bool neg = (value < 0);
    945   uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
    946 
    947   if (LIKELY(abs_value < 0x1000)) {
    948     // abs_value is a 12-bit immediate.
    949     shift = false;
    950   } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
    951     // abs_value is a shifted 12-bit immediate.
    952     shift = true;
    953     abs_value >>= 12;
    954   } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
    955     // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
    956     // This works for both normal registers and SP.
    957     // For a frame size == 0x2468, it will be encoded as:
    958     //   sub sp, #0x2000
    959     //   sub sp, #0x468
    960     if (neg) {
    961       op = (op == kOpAdd) ? kOpSub : kOpAdd;
    962     }
    963     OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
    964     return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
    965   } else {
    966     RegStorage r_tmp;
    967     LIR* res;
    968     if (IS_WIDE(wide)) {
    969       r_tmp = AllocTempWide();
    970       res = LoadConstantWide(r_tmp, value);
    971     } else {
    972       r_tmp = AllocTemp();
    973       res = LoadConstant(r_tmp, value);
    974     }
    975     OpRegReg(op, r_dest_src1, r_tmp);
    976     FreeTemp(r_tmp);
    977     return res;
    978   }
    979 
    980   switch (op) {
    981     case kOpAdd:
    982       neg_opcode = kA64Sub4RRdT;
    983       opcode = kA64Add4RRdT;
    984       break;
    985     case kOpSub:
    986       neg_opcode = kA64Add4RRdT;
    987       opcode = kA64Sub4RRdT;
    988       break;
    989     case kOpCmp:
    990       neg_opcode = kA64Cmn3RdT;
    991       opcode = kA64Cmp3RdT;
    992       break;
    993     default:
    994       LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
    995       break;
    996   }
    997 
    998   if (UNLIKELY(neg))
    999     opcode = neg_opcode;
   1000 
   1001   if (EncodingMap[opcode].flags & IS_QUAD_OP)
   1002     return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
   1003                    (shift) ? 1 : 0);
   1004   else
   1005     return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
   1006 }
   1007 
   1008 int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
   1009   DCHECK_EQ(shift_type & 0x3, shift_type);
   1010   DCHECK_EQ(amount & 0x3f, amount);
   1011   return ((shift_type & 0x3) << 7) | (amount & 0x3f);
   1012 }
   1013 
   1014 int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
   1015   DCHECK_EQ(extend_type & 0x7, extend_type);
   1016   DCHECK_EQ(amount & 0x7, amount);
   1017   return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
   1018 }
   1019 
   1020 bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
   1021   return ((1 << 6) & encoded_value) != 0;
   1022 }
   1023 
   1024 LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
   1025                                    int scale, OpSize size) {
   1026   LIR* load;
   1027   int expected_scale = 0;
   1028   ArmOpcode opcode = kA64Brk1d;
   1029   r_base = Check64BitReg(r_base);
   1030 
   1031   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
   1032   //   register offset load (rather than doing the sign extension in a separate instruction).
   1033   if (r_index.Is32Bit()) {
   1034     // Assemble: ``sxtw xN, wN''.
   1035     r_index = As64BitReg(r_index);
   1036     NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
   1037   }
   1038 
   1039   if (r_dest.IsFloat()) {
   1040     if (r_dest.IsDouble()) {
   1041       DCHECK(size == k64 || size == kDouble);
   1042       expected_scale = 3;
   1043       opcode = FWIDE(kA64Ldr4fXxG);
   1044     } else {
   1045       DCHECK(r_dest.IsSingle());
   1046       DCHECK(size == k32 || size == kSingle);
   1047       expected_scale = 2;
   1048       opcode = kA64Ldr4fXxG;
   1049     }
   1050 
   1051     DCHECK(scale == 0 || scale == expected_scale);
   1052     return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
   1053                    (scale != 0) ? 1 : 0);
   1054   }
   1055 
   1056   switch (size) {
   1057     case kDouble:
   1058     case kWord:
   1059     case k64:
   1060       r_dest = Check64BitReg(r_dest);
   1061       opcode = WIDE(kA64Ldr4rXxG);
   1062       expected_scale = 3;
   1063       break;
   1064     case kSingle:     // Intentional fall-through.
   1065     case k32:         // Intentional fall-through.
   1066     case kReference:
   1067       r_dest = Check32BitReg(r_dest);
   1068       opcode = kA64Ldr4rXxG;
   1069       expected_scale = 2;
   1070       break;
   1071     case kUnsignedHalf:
   1072       r_dest = Check32BitReg(r_dest);
   1073       opcode = kA64Ldrh4wXxd;
   1074       expected_scale = 1;
   1075       break;
   1076     case kSignedHalf:
   1077       r_dest = Check32BitReg(r_dest);
   1078       opcode = kA64Ldrsh4rXxd;
   1079       expected_scale = 1;
   1080       break;
   1081     case kUnsignedByte:
   1082       r_dest = Check32BitReg(r_dest);
   1083       opcode = kA64Ldrb3wXx;
   1084       break;
   1085     case kSignedByte:
   1086       r_dest = Check32BitReg(r_dest);
   1087       opcode = kA64Ldrsb3rXx;
   1088       break;
   1089     default:
   1090       LOG(FATAL) << "Bad size: " << size;
   1091   }
   1092 
   1093   if (UNLIKELY(expected_scale == 0)) {
   1094     // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
   1095     DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
   1096     DCHECK_EQ(scale, 0);
   1097     load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
   1098   } else {
   1099     DCHECK(scale == 0 || scale == expected_scale);
   1100     load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
   1101                    (scale != 0) ? 1 : 0);
   1102   }
   1103 
   1104   return load;
   1105 }
   1106 
   1107 LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
   1108                                   int scale) {
   1109   return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference);
   1110 }
   1111 
   1112 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
   1113                                     int scale, OpSize size) {
   1114   LIR* store;
   1115   int expected_scale = 0;
   1116   ArmOpcode opcode = kA64Brk1d;
   1117   r_base = Check64BitReg(r_base);
   1118 
   1119   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
   1120   //   register offset store (rather than doing the sign extension in a separate instruction).
   1121   if (r_index.Is32Bit()) {
   1122     // Assemble: ``sxtw xN, wN''.
   1123     r_index = As64BitReg(r_index);
   1124     NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
   1125   }
   1126 
   1127   if (r_src.IsFloat()) {
   1128     if (r_src.IsDouble()) {
   1129       DCHECK(size == k64 || size == kDouble);
   1130       expected_scale = 3;
   1131       opcode = FWIDE(kA64Str4fXxG);
   1132     } else {
   1133       DCHECK(r_src.IsSingle());
   1134       DCHECK(size == k32 || size == kSingle);
   1135       expected_scale = 2;
   1136       opcode = kA64Str4fXxG;
   1137     }
   1138 
   1139     DCHECK(scale == 0 || scale == expected_scale);
   1140     return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
   1141                    (scale != 0) ? 1 : 0);
   1142   }
   1143 
   1144   switch (size) {
   1145     case kDouble:     // Intentional fall-trough.
   1146     case kWord:       // Intentional fall-trough.
   1147     case k64:
   1148       r_src = Check64BitReg(r_src);
   1149       opcode = WIDE(kA64Str4rXxG);
   1150       expected_scale = 3;
   1151       break;
   1152     case kSingle:     // Intentional fall-trough.
   1153     case k32:         // Intentional fall-trough.
   1154     case kReference:
   1155       r_src = Check32BitReg(r_src);
   1156       opcode = kA64Str4rXxG;
   1157       expected_scale = 2;
   1158       break;
   1159     case kUnsignedHalf:
   1160     case kSignedHalf:
   1161       r_src = Check32BitReg(r_src);
   1162       opcode = kA64Strh4wXxd;
   1163       expected_scale = 1;
   1164       break;
   1165     case kUnsignedByte:
   1166     case kSignedByte:
   1167       r_src = Check32BitReg(r_src);
   1168       opcode = kA64Strb3wXx;
   1169       break;
   1170     default:
   1171       LOG(FATAL) << "Bad size: " << size;
   1172   }
   1173 
   1174   if (UNLIKELY(expected_scale == 0)) {
   1175     // This is a tertiary op (e.g. strb), it does not not support scale.
   1176     DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
   1177     DCHECK_EQ(scale, 0);
   1178     store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
   1179   } else {
   1180     store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
   1181                     (scale != 0) ? 1 : 0);
   1182   }
   1183 
   1184   return store;
   1185 }
   1186 
   1187 LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
   1188                                    int scale) {
   1189   return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference);
   1190 }
   1191 
   1192 /*
   1193  * Load value from base + displacement.  Optionally perform null check
   1194  * on base (which must have an associated s_reg and MIR).  If not
   1195  * performing null check, incoming MIR can be null.
   1196  */
   1197 LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
   1198                                     OpSize size) {
   1199   LIR* load = NULL;
   1200   ArmOpcode opcode = kA64Brk1d;
   1201   ArmOpcode alt_opcode = kA64Brk1d;
   1202   int scale = 0;
   1203 
   1204   switch (size) {
   1205     case kDouble:     // Intentional fall-through.
   1206     case kWord:       // Intentional fall-through.
   1207     case k64:
   1208       r_dest = Check64BitReg(r_dest);
   1209       scale = 3;
   1210       if (r_dest.IsFloat()) {
   1211         DCHECK(r_dest.IsDouble());
   1212         opcode = FWIDE(kA64Ldr3fXD);
   1213         alt_opcode = FWIDE(kA64Ldur3fXd);
   1214       } else {
   1215         opcode = WIDE(kA64Ldr3rXD);
   1216         alt_opcode = WIDE(kA64Ldur3rXd);
   1217       }
   1218       break;
   1219     case kSingle:     // Intentional fall-through.
   1220     case k32:         // Intentional fall-trough.
   1221     case kReference:
   1222       r_dest = Check32BitReg(r_dest);
   1223       scale = 2;
   1224       if (r_dest.IsFloat()) {
   1225         DCHECK(r_dest.IsSingle());
   1226         opcode = kA64Ldr3fXD;
   1227       } else {
   1228         opcode = kA64Ldr3rXD;
   1229       }
   1230       break;
   1231     case kUnsignedHalf:
   1232       scale = 1;
   1233       opcode = kA64Ldrh3wXF;
   1234       break;
   1235     case kSignedHalf:
   1236       scale = 1;
   1237       opcode = kA64Ldrsh3rXF;
   1238       break;
   1239     case kUnsignedByte:
   1240       opcode = kA64Ldrb3wXd;
   1241       break;
   1242     case kSignedByte:
   1243       opcode = kA64Ldrsb3rXd;
   1244       break;
   1245     default:
   1246       LOG(FATAL) << "Bad size: " << size;
   1247   }
   1248 
   1249   bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
   1250   int scaled_disp = displacement >> scale;
   1251   if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
   1252     // Can use scaled load.
   1253     load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
   1254   } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
   1255     // Can use unscaled load.
   1256     load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
   1257   } else {
   1258     // Use long sequence.
   1259     // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
   1260     RegStorage r_scratch = AllocTempWide();
   1261     LoadConstantWide(r_scratch, displacement);
   1262     load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
   1263     FreeTemp(r_scratch);
   1264   }
   1265 
   1266   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   1267   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
   1268     DCHECK(r_base == rs_sp);
   1269     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
   1270   }
   1271   return load;
   1272 }
   1273 
   1274 LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
   1275                                 OpSize size, VolatileKind is_volatile) {
   1276   // LoadBaseDisp() will emit correct insn for atomic load on arm64
   1277   // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
   1278 
   1279   LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
   1280 
   1281   if (UNLIKELY(is_volatile == kVolatile)) {
   1282     // TODO: This should generate an acquire load instead of the barrier.
   1283     GenMemBarrier(kLoadAny);
   1284   }
   1285 
   1286   return load;
   1287 }
   1288 
   1289 LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
   1290                                VolatileKind is_volatile) {
   1291   return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile);
   1292 }
   1293 
   1294 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
   1295                                      OpSize size) {
   1296   LIR* store = NULL;
   1297   ArmOpcode opcode = kA64Brk1d;
   1298   ArmOpcode alt_opcode = kA64Brk1d;
   1299   int scale = 0;
   1300 
   1301   switch (size) {
   1302     case kDouble:     // Intentional fall-through.
   1303     case kWord:       // Intentional fall-through.
   1304     case k64:
   1305       r_src = Check64BitReg(r_src);
   1306       scale = 3;
   1307       if (r_src.IsFloat()) {
   1308         DCHECK(r_src.IsDouble());
   1309         opcode = FWIDE(kA64Str3fXD);
   1310         alt_opcode = FWIDE(kA64Stur3fXd);
   1311       } else {
   1312         opcode = FWIDE(kA64Str3rXD);
   1313         alt_opcode = FWIDE(kA64Stur3rXd);
   1314       }
   1315       break;
   1316     case kSingle:     // Intentional fall-through.
   1317     case k32:         // Intentional fall-trough.
   1318     case kReference:
   1319       r_src = Check32BitReg(r_src);
   1320       scale = 2;
   1321       if (r_src.IsFloat()) {
   1322         DCHECK(r_src.IsSingle());
   1323         opcode = kA64Str3fXD;
   1324       } else {
   1325         opcode = kA64Str3rXD;
   1326       }
   1327       break;
   1328     case kUnsignedHalf:
   1329     case kSignedHalf:
   1330       scale = 1;
   1331       opcode = kA64Strh3wXF;
   1332       break;
   1333     case kUnsignedByte:
   1334     case kSignedByte:
   1335       opcode = kA64Strb3wXd;
   1336       break;
   1337     default:
   1338       LOG(FATAL) << "Bad size: " << size;
   1339   }
   1340 
   1341   bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
   1342   int scaled_disp = displacement >> scale;
   1343   if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
   1344     // Can use scaled store.
   1345     store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
   1346   } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
   1347     // Can use unscaled store.
   1348     store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
   1349   } else {
   1350     // Use long sequence.
   1351     RegStorage r_scratch = AllocTempWide();
   1352     LoadConstantWide(r_scratch, displacement);
   1353     store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
   1354     FreeTemp(r_scratch);
   1355   }
   1356 
   1357   // TODO: In future, may need to differentiate Dalvik & spill accesses.
   1358   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
   1359     DCHECK(r_base == rs_sp);
   1360     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
   1361   }
   1362   return store;
   1363 }
   1364 
   1365 LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
   1366                                  OpSize size, VolatileKind is_volatile) {
   1367   // TODO: This should generate a release store and no barriers.
   1368   if (UNLIKELY(is_volatile == kVolatile)) {
   1369     // Ensure that prior accesses become visible to other threads first.
   1370     GenMemBarrier(kAnyStore);
   1371   }
   1372 
   1373   // StoreBaseDisp() will emit correct insn for atomic store on arm64
   1374   // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
   1375 
   1376   LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
   1377 
   1378   if (UNLIKELY(is_volatile == kVolatile)) {
   1379     // Preserve order with respect to any subsequent volatile loads.
   1380     // We need StoreLoad, but that generally requires the most expensive barrier.
   1381     GenMemBarrier(kAnyAny);
   1382   }
   1383 
   1384   return store;
   1385 }
   1386 
   1387 LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
   1388                                 VolatileKind is_volatile) {
   1389   return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile);
   1390 }
   1391 
   1392 LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   1393   LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
   1394   return NULL;
   1395 }
   1396 
   1397 LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
   1398   LOG(FATAL) << "Unexpected use of OpMem for Arm64";
   1399   return NULL;
   1400 }
   1401 
   1402 LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
   1403   return OpReg(op, r_tgt);
   1404 }
   1405 
   1406 }  // namespace art
   1407