Home | History | Annotate | Download | only in arm64
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /* This file contains codegen for the Thumb2 ISA. */
     18 
     19 #include "arm64_lir.h"
     20 #include "codegen_arm64.h"
     21 #include "dex/quick/mir_to_lir-inl.h"
     22 #include "dex/reg_storage_eq.h"
     23 #include "entrypoints/quick/quick_entrypoints.h"
     24 #include "mirror/array.h"
     25 #include "utils.h"
     26 
     27 namespace art {
     28 
     29 LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
     30   OpRegReg(kOpCmp, src1, src2);
     31   return OpCondBranch(cond, target);
     32 }
     33 
     34 LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
     35   LOG(FATAL) << "Unexpected use of OpIT for Arm64";
     36   return NULL;
     37 }
     38 
     39 void Arm64Mir2Lir::OpEndIT(LIR* it) {
     40   LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
     41 }
     42 
     43 /*
     44  * 64-bit 3way compare function.
     45  *     cmp   xA, xB
     46  *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
     47  *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
     48  */
     49 void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
     50                               RegLocation rl_src2) {
     51   RegLocation rl_result;
     52   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     53   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     54   rl_result = EvalLoc(rl_dest, kCoreReg, true);
     55 
     56   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
     57   NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
     58   NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
     59           rl_result.reg.GetReg(), kArmCondGe);
     60   StoreValue(rl_dest, rl_result);
     61 }
     62 
     63 void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
     64                              RegLocation rl_src1, RegLocation rl_shift) {
     65   OpKind op = kOpBkpt;
     66   switch (opcode) {
     67   case Instruction::SHL_LONG:
     68   case Instruction::SHL_LONG_2ADDR:
     69     op = kOpLsl;
     70     break;
     71   case Instruction::SHR_LONG:
     72   case Instruction::SHR_LONG_2ADDR:
     73     op = kOpAsr;
     74     break;
     75   case Instruction::USHR_LONG:
     76   case Instruction::USHR_LONG_2ADDR:
     77     op = kOpLsr;
     78     break;
     79   default:
     80     LOG(FATAL) << "Unexpected case: " << opcode;
     81   }
     82   rl_shift = LoadValue(rl_shift, kCoreReg);
     83   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     84   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
     85   OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
     86   StoreValueWide(rl_dest, rl_result);
     87 }
     88 
     89 static constexpr bool kUseDeltaEncodingInGenSelect = false;
     90 
     91 void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
     92                              RegStorage rs_dest, int result_reg_class) {
     93   if (false_val == 0 ||               // 0 is better as first operand.
     94       true_val == 1 ||                // Potentially Csinc.
     95       true_val == -1 ||               // Potentially Csinv.
     96       true_val == false_val + 1) {    // Potentially Csinc.
     97     ccode = NegateComparison(ccode);
     98     std::swap(true_val, false_val);
     99   }
    100 
    101   ArmConditionCode code = ArmConditionEncoding(ccode);
    102 
    103   int opcode;                                      // The opcode.
    104   RegStorage left_op = RegStorage::InvalidReg();   // The operands.
    105   RegStorage right_op = RegStorage::InvalidReg();  // The operands.
    106 
    107   bool is_wide = rs_dest.Is64Bit();
    108 
    109   RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
    110 
    111   if (true_val == 0) {
    112     left_op = zero_reg;
    113   } else {
    114     left_op = rs_dest;
    115     LoadConstantNoClobber(rs_dest, true_val);
    116   }
    117   if (false_val == 1) {
    118     right_op = zero_reg;
    119     opcode = kA64Csinc4rrrc;
    120   } else if (false_val == -1) {
    121     right_op = zero_reg;
    122     opcode = kA64Csinv4rrrc;
    123   } else if (false_val == true_val + 1) {
    124     right_op = left_op;
    125     opcode = kA64Csinc4rrrc;
    126   } else if (false_val == -true_val) {
    127     right_op = left_op;
    128     opcode = kA64Csneg4rrrc;
    129   } else if (false_val == ~true_val) {
    130     right_op = left_op;
    131     opcode = kA64Csinv4rrrc;
    132   } else if (true_val == 0) {
    133     // left_op is zero_reg.
    134     right_op = rs_dest;
    135     LoadConstantNoClobber(rs_dest, false_val);
    136     opcode = kA64Csel4rrrc;
    137   } else {
    138     // Generic case.
    139     RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
    140     if (is_wide) {
    141       if (t_reg2.Is32Bit()) {
    142         t_reg2 = As64BitReg(t_reg2);
    143       }
    144     } else {
    145       if (t_reg2.Is64Bit()) {
    146         t_reg2 = As32BitReg(t_reg2);
    147       }
    148     }
    149 
    150     if (kUseDeltaEncodingInGenSelect) {
    151       int32_t delta = false_val - true_val;
    152       uint32_t abs_val = delta < 0 ? -delta : delta;
    153 
    154       if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
    155         // Can encode as immediate to an add.
    156         right_op = t_reg2;
    157         OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
    158       }
    159     }
    160 
    161     // Load as constant.
    162     if (!right_op.Valid()) {
    163       LoadConstantNoClobber(t_reg2, false_val);
    164       right_op = t_reg2;
    165     }
    166 
    167     opcode = kA64Csel4rrrc;
    168   }
    169 
    170   DCHECK(left_op.Valid() && right_op.Valid());
    171   NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
    172       code);
    173 }
    174 
    175 void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
    176                                     int32_t true_val, int32_t false_val, RegStorage rs_dest,
    177                                     int dest_reg_class) {
    178   DCHECK(rs_dest.Valid());
    179   OpRegReg(kOpCmp, left_op, right_op);
    180   GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
    181 }
    182 
    183 void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
    184   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
    185   rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
    186   // rl_src may be aliased with rl_result/rl_dest, so do compare early.
    187   OpRegImm(kOpCmp, rl_src.reg, 0);
    188 
    189   RegLocation rl_dest = mir_graph_->GetDest(mir);
    190 
    191   // The kMirOpSelect has two variants, one for constants and one for moves.
    192   if (mir->ssa_rep->num_uses == 1) {
    193     RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
    194     GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
    195               rl_dest.ref ? kRefReg : kCoreReg);
    196     StoreValue(rl_dest, rl_result);
    197   } else {
    198     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
    199     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
    200 
    201     RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
    202     rl_true = LoadValue(rl_true, result_reg_class);
    203     rl_false = LoadValue(rl_false, result_reg_class);
    204     RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
    205 
    206     bool is_wide = rl_dest.ref || rl_dest.wide;
    207     int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
    208     NewLIR4(opcode, rl_result.reg.GetReg(),
    209             rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
    210     StoreValue(rl_dest, rl_result);
    211   }
    212 }
    213 
    214 void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
    215   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    216   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    217   LIR* taken = &block_label_list_[bb->taken];
    218   LIR* not_taken = &block_label_list_[bb->fall_through];
    219   // Normalize such that if either operand is constant, src2 will be constant.
    220   ConditionCode ccode = mir->meta.ccode;
    221   if (rl_src1.is_const) {
    222     std::swap(rl_src1, rl_src2);
    223     ccode = FlipComparisonOrder(ccode);
    224   }
    225 
    226   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    227 
    228   if (rl_src2.is_const) {
    229     // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
    230 
    231     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
    232     // Special handling using cbz & cbnz.
    233     if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
    234       OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
    235       OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
    236       return;
    237     }
    238 
    239     // Only handle Imm if src2 is not already in a register.
    240     rl_src2 = UpdateLocWide(rl_src2);
    241     if (rl_src2.location != kLocPhysReg) {
    242       OpRegImm64(kOpCmp, rl_src1.reg, val);
    243       OpCondBranch(ccode, taken);
    244       OpCondBranch(NegateComparison(ccode), not_taken);
    245       return;
    246     }
    247   }
    248 
    249   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
    250   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    251   OpCondBranch(ccode, taken);
    252   OpCondBranch(NegateComparison(ccode), not_taken);
    253 }
    254 
    255 /*
    256  * Generate a register comparison to an immediate and branch.  Caller
    257  * is responsible for setting branch target field.
    258  */
    259 LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
    260                                   LIR* target) {
    261   LIR* branch = nullptr;
    262   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
    263   if (check_value == 0) {
    264     if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
    265       ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
    266       ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
    267       branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
    268     } else if (arm_cond == kArmCondLs) {
    269       // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
    270       // This case happens for a bounds check of array[0].
    271       ArmOpcode opcode = kA64Cbz2rt;
    272       ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
    273       branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
    274     }
    275     // TODO: Use tbz/tbnz for < 0 or >= 0.
    276   }
    277 
    278   if (branch == nullptr) {
    279     OpRegImm(kOpCmp, reg, check_value);
    280     branch = NewLIR2(kA64B2ct, arm_cond, 0);
    281   }
    282 
    283   branch->target = target;
    284   return branch;
    285 }
    286 
    287 LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
    288                                      RegStorage base_reg, int offset, int check_value,
    289                                      LIR* target, LIR** compare) {
    290   DCHECK(compare == nullptr);
    291   // It is possible that temp register is 64-bit. (ArgReg or RefReg)
    292   // Always compare 32-bit value no matter what temp_reg is.
    293   if (temp_reg.Is64Bit()) {
    294     temp_reg = As32BitReg(temp_reg);
    295   }
    296   Load32Disp(base_reg, offset, temp_reg);
    297   LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
    298   return branch;
    299 }
    300 
    301 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
    302   bool dest_is_fp = r_dest.IsFloat();
    303   bool src_is_fp = r_src.IsFloat();
    304   ArmOpcode opcode = kA64Brk1d;
    305   LIR* res;
    306 
    307   if (LIKELY(dest_is_fp == src_is_fp)) {
    308     if (LIKELY(!dest_is_fp)) {
    309       DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
    310 
    311       // Core/core copy.
    312       // Copies involving the sp register require a different instruction.
    313       opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
    314 
    315       // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
    316       //   This currently works because the other arguments are set to 0 by default. We should
    317       //   rather introduce an alias kA64Mov2RR.
    318 
    319       // core/core copy. Do a x/x copy only if both registers are x.
    320       if (r_dest.Is64Bit() && r_src.Is64Bit()) {
    321         opcode = WIDE(opcode);
    322       }
    323     } else {
    324       // Float/float copy.
    325       bool dest_is_double = r_dest.IsDouble();
    326       bool src_is_double = r_src.IsDouble();
    327 
    328       // We do not do float/double or double/float casts here.
    329       DCHECK_EQ(dest_is_double, src_is_double);
    330 
    331       // Homogeneous float/float copy.
    332       opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
    333     }
    334   } else {
    335     // Inhomogeneous register copy.
    336     if (dest_is_fp) {
    337       if (r_dest.IsDouble()) {
    338         opcode = kA64Fmov2Sx;
    339       } else {
    340         r_src = Check32BitReg(r_src);
    341         opcode = kA64Fmov2sw;
    342       }
    343     } else {
    344       if (r_src.IsDouble()) {
    345         opcode = kA64Fmov2xS;
    346       } else {
    347         r_dest = Check32BitReg(r_dest);
    348         opcode = kA64Fmov2ws;
    349       }
    350     }
    351   }
    352 
    353   res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
    354 
    355   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
    356     res->flags.is_nop = true;
    357   }
    358 
    359   return res;
    360 }
    361 
    362 void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
    363   if (r_dest != r_src) {
    364     LIR* res = OpRegCopyNoInsert(r_dest, r_src);
    365     AppendLIR(res);
    366   }
    367 }
    368 
    369 void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
    370   OpRegCopy(r_dest, r_src);
    371 }
    372 
    373 // Table of magic divisors
    374 struct MagicTable {
    375   int magic64_base;
    376   int magic64_eor;
    377   uint64_t magic64;
    378   uint32_t magic32;
    379   uint32_t shift;
    380   DividePattern pattern;
    381 };
    382 
    383 static const MagicTable magic_table[] = {
    384   {   0,      0,                  0,          0, 0, DivideNone},  // 0
    385   {   0,      0,                  0,          0, 0, DivideNone},  // 1
    386   {   0,      0,                  0,          0, 0, DivideNone},  // 2
    387   {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
    388   {   0,      0,                  0,          0, 0, DivideNone},  // 4
    389   {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
    390   {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
    391   {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
    392   {   0,      0,                  0,          0, 0, DivideNone},  // 8
    393   {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
    394   {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
    395   {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
    396   {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
    397   {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
    398   {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
    399   {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
    400 };
    401 
    402 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
    403 bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
    404                                       RegLocation rl_src, RegLocation rl_dest, int lit) {
    405   if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
    406     return false;
    407   }
    408   DividePattern pattern = magic_table[lit].pattern;
    409   if (pattern == DivideNone) {
    410     return false;
    411   }
    412   // Tuning: add rem patterns
    413   if (!is_div) {
    414     return false;
    415   }
    416 
    417   RegStorage r_magic = AllocTemp();
    418   LoadConstant(r_magic, magic_table[lit].magic32);
    419   rl_src = LoadValue(rl_src, kCoreReg);
    420   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    421   RegStorage r_long_mul = AllocTemp();
    422   NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
    423           r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
    424   switch (pattern) {
    425     case Divide3:
    426       OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
    427       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
    428       break;
    429     case Divide5:
    430       OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
    431                   32 + magic_table[lit].shift);
    432       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
    433       break;
    434     case Divide7:
    435       OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
    436                        As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
    437       OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
    438       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
    439       break;
    440     default:
    441       LOG(FATAL) << "Unexpected pattern: " << pattern;
    442   }
    443   StoreValue(rl_dest, rl_result);
    444   return true;
    445 }
    446 
    447 bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
    448                                         RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
    449   if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
    450     return false;
    451   }
    452   DividePattern pattern = magic_table[lit].pattern;
    453   if (pattern == DivideNone) {
    454     return false;
    455   }
    456   // Tuning: add rem patterns
    457   if (!is_div) {
    458     return false;
    459   }
    460 
    461   RegStorage r_magic = AllocTempWide();
    462   rl_src = LoadValueWide(rl_src, kCoreReg);
    463   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
    464   RegStorage r_long_mul = AllocTempWide();
    465 
    466   if (magic_table[lit].magic64_base >= 0) {
    467     // Check that the entry in the table is correct.
    468     if (kIsDebugBuild) {
    469       uint64_t reconstructed_imm;
    470       uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
    471       if (magic_table[lit].magic64_eor >= 0) {
    472         uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
    473         reconstructed_imm = base ^ eor;
    474       } else {
    475         reconstructed_imm = base + 1;
    476       }
    477     }
    478 
    479     // Load the magic constant in two instructions.
    480     NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
    481     if (magic_table[lit].magic64_eor >= 0) {
    482       NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
    483               magic_table[lit].magic64_eor);
    484     } else {
    485       NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
    486     }
    487   } else {
    488     LoadConstantWide(r_magic, magic_table[lit].magic64);
    489   }
    490 
    491   NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
    492   switch (pattern) {
    493     case Divide3:
    494       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
    495       break;
    496     case Divide5:
    497       OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
    498       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
    499       break;
    500     case Divide7:
    501       OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
    502       OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
    503       OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
    504       break;
    505     default:
    506       LOG(FATAL) << "Unexpected pattern: " << pattern;
    507   }
    508   StoreValueWide(rl_dest, rl_result);
    509   return true;
    510 }
    511 
    512 // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
    513 // and store the result in 'rl_dest'.
    514 bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
    515                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
    516   return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
    517 }
    518 
    519 // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
    520 // and store the result in 'rl_dest'.
    521 bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
    522                                       RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
    523   const bool is_64bit = rl_dest.wide;
    524   const int nbits = (is_64bit) ? 64 : 32;
    525 
    526   if (lit < 2) {
    527     return false;
    528   }
    529   if (!IsPowerOfTwo(lit)) {
    530     if (is_64bit) {
    531       return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
    532     } else {
    533       return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
    534     }
    535   }
    536   int k = LowestSetBit(lit);
    537   if (k >= nbits - 2) {
    538     // Avoid special cases.
    539     return false;
    540   }
    541 
    542   RegLocation rl_result;
    543   RegStorage t_reg;
    544   if (is_64bit) {
    545     rl_src = LoadValueWide(rl_src, kCoreReg);
    546     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
    547     t_reg = AllocTempWide();
    548   } else {
    549     rl_src = LoadValue(rl_src, kCoreReg);
    550     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    551     t_reg = AllocTemp();
    552   }
    553 
    554   int shift = EncodeShift(kA64Lsr, nbits - k);
    555   if (is_div) {
    556     if (lit == 2) {
    557       // Division by 2 is by far the most common division by constant.
    558       OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
    559       OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
    560     } else {
    561       OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
    562       OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
    563       OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
    564     }
    565   } else {
    566     if (lit == 2) {
    567       OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
    568       OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
    569       OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
    570     } else {
    571       RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
    572       OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
    573       OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
    574       OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
    575       OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
    576     }
    577   }
    578 
    579   if (is_64bit) {
    580     StoreValueWide(rl_dest, rl_result);
    581   } else {
    582     StoreValue(rl_dest, rl_result);
    583   }
    584   return true;
    585 }
    586 
    587 bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
    588   LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
    589   return false;
    590 }
    591 
    592 RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
    593   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
    594   return rl_dest;
    595 }
    596 
    597 RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
    598   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    599 
    600   // Put the literal in a temp.
    601   RegStorage lit_temp = AllocTemp();
    602   LoadConstant(lit_temp, lit);
    603   // Use the generic case for div/rem with arg2 in a register.
    604   // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
    605   rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
    606   FreeTemp(lit_temp);
    607 
    608   return rl_result;
    609 }
    610 
    611 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
    612                                     RegLocation rl_src2, bool is_div, bool check_zero) {
    613   LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
    614   return rl_dest;
    615 }
    616 
    617 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
    618                                     bool is_div) {
    619   CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
    620 
    621   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    622   if (is_div) {
    623     OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
    624   } else {
    625     // temp = r_src1 / r_src2
    626     // dest = r_src1 - temp * r_src2
    627     RegStorage temp;
    628     ArmOpcode wide;
    629     if (rl_result.reg.Is64Bit()) {
    630       temp = AllocTempWide();
    631       wide = WIDE(0);
    632     } else {
    633       temp = AllocTemp();
    634       wide = UNWIDE(0);
    635     }
    636     OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
    637     NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
    638             r_src1.GetReg(), r_src2.GetReg());
    639     FreeTemp(temp);
    640   }
    641   return rl_result;
    642 }
    643 
    644 bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
    645   RegLocation rl_src = info->args[0];
    646   rl_src = LoadValueWide(rl_src, kCoreReg);
    647   RegLocation rl_dest = InlineTargetWide(info);
    648   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    649   RegStorage sign_reg = AllocTempWide();
    650   // abs(x) = y<=x>>63, (x+y)^y.
    651   OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
    652   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
    653   OpRegReg(kOpXor, rl_result.reg, sign_reg);
    654   StoreValueWide(rl_dest, rl_result);
    655   return true;
    656 }
    657 
    658 bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
    659   DCHECK_EQ(cu_->instruction_set, kArm64);
    660   RegLocation rl_src1 = info->args[0];
    661   RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
    662   rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
    663   rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
    664   RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
    665   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    666   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    667   NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
    668           rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
    669   (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
    670   return true;
    671 }
    672 
    673 bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
    674   RegLocation rl_src_address = info->args[0];  // long address
    675   RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
    676   RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
    677   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    678 
    679   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
    680   if (size == k64) {
    681     StoreValueWide(rl_dest, rl_result);
    682   } else {
    683     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    684     StoreValue(rl_dest, rl_result);
    685   }
    686   return true;
    687 }
    688 
    689 bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
    690   RegLocation rl_src_address = info->args[0];  // long address
    691   RegLocation rl_src_value = info->args[2];  // [size] value
    692   RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
    693 
    694   RegLocation rl_value;
    695   if (size == k64) {
    696     rl_value = LoadValueWide(rl_src_value, kCoreReg);
    697   } else {
    698     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    699     rl_value = LoadValue(rl_src_value, kCoreReg);
    700   }
    701   StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
    702   return true;
    703 }
    704 
    705 bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
    706   DCHECK_EQ(cu_->instruction_set, kArm64);
    707   // Unused - RegLocation rl_src_unsafe = info->args[0];
    708   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
    709   RegLocation rl_src_offset = info->args[2];  // long low
    710   RegLocation rl_src_expected = info->args[4];  // int, long or Object
    711   // If is_long, high half is in info->args[5]
    712   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
    713   // If is_long, high half is in info->args[7]
    714   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
    715 
    716   // Load Object and offset
    717   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
    718   RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
    719 
    720   RegLocation rl_new_value;
    721   RegLocation rl_expected;
    722   if (is_long) {
    723     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
    724     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
    725   } else {
    726     rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
    727     rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
    728   }
    729 
    730   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
    731     // Mark card for object assuming new value is stored.
    732     MarkGCCard(rl_new_value.reg, rl_object.reg);
    733   }
    734 
    735   RegStorage r_ptr = AllocTempRef();
    736   OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
    737 
    738   // Free now unneeded rl_object and rl_offset to give more temps.
    739   ClobberSReg(rl_object.s_reg_low);
    740   FreeTemp(rl_object.reg);
    741   ClobberSReg(rl_offset.s_reg_low);
    742   FreeTemp(rl_offset.reg);
    743 
    744   // do {
    745   //   tmp = [r_ptr] - expected;
    746   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
    747   // result = tmp != 0;
    748 
    749   RegStorage r_tmp;
    750   RegStorage r_tmp_stored;
    751   RegStorage rl_new_value_stored = rl_new_value.reg;
    752   ArmOpcode wide = UNWIDE(0);
    753   if (is_long) {
    754     r_tmp_stored = r_tmp = AllocTempWide();
    755     wide = WIDE(0);
    756   } else if (is_object) {
    757     // References use 64-bit registers, but are stored as compressed 32-bit values.
    758     // This means r_tmp_stored != r_tmp.
    759     r_tmp = AllocTempRef();
    760     r_tmp_stored = As32BitReg(r_tmp);
    761     rl_new_value_stored = As32BitReg(rl_new_value_stored);
    762   } else {
    763     r_tmp_stored = r_tmp = AllocTemp();
    764   }
    765 
    766   RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
    767   LIR* loop = NewLIR0(kPseudoTargetLabel);
    768   NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
    769   OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
    770   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    771   LIR* early_exit = OpCondBranch(kCondNe, NULL);
    772   NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
    773   NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
    774   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    775   OpCondBranch(kCondNe, loop);
    776 
    777   LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
    778   early_exit->target = exit_loop;
    779 
    780   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    781   NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
    782 
    783   FreeTemp(r_tmp);  // Now unneeded.
    784   FreeTemp(r_ptr);  // Now unneeded.
    785 
    786   StoreValue(rl_dest, rl_result);
    787 
    788   return true;
    789 }
    790 
    791 bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
    792   constexpr int kLargeArrayThreshold = 512;
    793 
    794   RegLocation rl_src = info->args[0];
    795   RegLocation rl_src_pos = info->args[1];
    796   RegLocation rl_dst = info->args[2];
    797   RegLocation rl_dst_pos = info->args[3];
    798   RegLocation rl_length = info->args[4];
    799   // Compile time check, handle exception by non-inline method to reduce related meta-data.
    800   if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
    801       (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
    802       (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
    803     return false;
    804   }
    805 
    806   ClobberCallerSave();
    807   LockCallTemps();  // Prepare for explicit register usage.
    808   RegStorage rs_src = rs_x0;
    809   RegStorage rs_dst = rs_x1;
    810   LoadValueDirectFixed(rl_src, rs_src);
    811   LoadValueDirectFixed(rl_dst, rs_dst);
    812 
    813   // Handle null pointer exception in slow-path.
    814   LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
    815   LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
    816   // Handle potential overlapping in slow-path.
    817   // TUNING: Support overlapping cases.
    818   LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
    819   // Handle exception or big length in slow-path.
    820   RegStorage rs_length = rs_w2;
    821   LoadValueDirectFixed(rl_length, rs_length);
    822   LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
    823   // Src bounds check.
    824   RegStorage rs_src_pos = rs_w3;
    825   RegStorage rs_arr_length = rs_w4;
    826   LoadValueDirectFixed(rl_src_pos, rs_src_pos);
    827   LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
    828   Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
    829   OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
    830   LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
    831   // Dst bounds check.
    832   RegStorage rs_dst_pos = rs_w5;
    833   LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
    834   LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
    835   Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
    836   OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
    837   LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
    838 
    839   // Everything is checked now.
    840   // Set rs_src to the address of the first element to be copied.
    841   rs_src_pos = As64BitReg(rs_src_pos);
    842   OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
    843   OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
    844   OpRegReg(kOpAdd, rs_src, rs_src_pos);
    845   // Set rs_src to the address of the first element to be copied.
    846   rs_dst_pos = As64BitReg(rs_dst_pos);
    847   OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
    848   OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
    849   OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
    850 
    851   // rs_arr_length won't be not used anymore.
    852   RegStorage rs_tmp = rs_arr_length;
    853   // Use 64-bit view since rs_length will be used as index.
    854   rs_length = As64BitReg(rs_length);
    855   OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
    856 
    857   // Copy one element.
    858   OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2);
    859   LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
    860   OpRegImm(kOpSub, rs_length, 2);
    861   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
    862   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
    863 
    864   // Copy two elements.
    865   LIR *copy_two = NewLIR0(kPseudoTargetLabel);
    866   OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4);
    867   LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
    868   OpRegImm(kOpSub, rs_length, 4);
    869   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
    870   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
    871 
    872   // Copy four elements.
    873   LIR *copy_four = NewLIR0(kPseudoTargetLabel);
    874   LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
    875   LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
    876   OpRegImm(kOpSub, rs_length, 8);
    877   rs_tmp = As64BitReg(rs_tmp);
    878   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
    879   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
    880   LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
    881   LIR* loop_finished = OpUnconditionalBranch(nullptr);
    882 
    883   LIR *check_failed = NewLIR0(kPseudoTargetLabel);
    884   LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
    885   LIR* return_point = NewLIR0(kPseudoTargetLabel);
    886 
    887   src_check_branch->target = check_failed;
    888   dst_check_branch->target = check_failed;
    889   src_dst_same->target = check_failed;
    890   len_neg_or_too_big->target = check_failed;
    891   src_pos_negative->target = check_failed;
    892   src_bad_len->target = check_failed;
    893   dst_pos_negative->target = check_failed;
    894   dst_bad_len->target = check_failed;
    895   jmp_to_copy_two->target = copy_two;
    896   jmp_to_copy_four->target = copy_four;
    897   jmp_to_ret->target = return_point;
    898   jmp_to_loop->target = begin_loop;
    899   loop_finished->target = return_point;
    900 
    901   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
    902   ClobberCallerSave();  // We must clobber everything because slow path will return here
    903 
    904   return true;
    905 }
    906 
    907 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
    908   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
    909   return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
    910 }
    911 
    912 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
    913   LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
    914   return NULL;
    915 }
    916 
    917 LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
    918   LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
    919   return NULL;
    920 }
    921 
    922 void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
    923                                                RegLocation rl_result, int lit,
    924                                                int first_bit, int second_bit) {
    925   OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
    926   if (first_bit != 0) {
    927     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
    928   }
    929 }
    930 
    931 void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
    932   LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
    933 }
    934 
    935 // Test suspend flag, return target of taken suspend branch
    936 LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
    937   NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1);
    938   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
    939 }
    940 
    941 // Decrement register and branch on condition
    942 LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
    943   // Combine sub & test using sub setflags encoding here.  We need to make sure a
    944   // subtract form that sets carry is used, so generate explicitly.
    945   // TODO: might be best to add a new op, kOpSubs, and handle it generically.
    946   ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
    947   NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
    948   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    949   return OpCondBranch(c_code, target);
    950 }
    951 
    952 bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
    953 #if ANDROID_SMP != 0
    954   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
    955   LIR* barrier = last_lir_insn_;
    956 
    957   int dmb_flavor;
    958   // TODO: revisit Arm barrier kinds
    959   switch (barrier_kind) {
    960     case kAnyStore: dmb_flavor = kISH; break;
    961     case kLoadAny: dmb_flavor = kISH; break;
    962         // We conjecture that kISHLD is insufficient.  It is documented
    963         // to provide LoadLoad | StoreStore ordering.  But if this were used
    964         // to implement volatile loads, we suspect that the lack of store
    965         // atomicity on ARM would cause us to allow incorrect results for
    966         // the canonical IRIW example.  But we're not sure.
    967         // We should be using acquire loads instead.
    968     case kStoreStore: dmb_flavor = kISHST; break;
    969     case kAnyAny: dmb_flavor = kISH; break;
    970     default:
    971       LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
    972       dmb_flavor = kSY;  // quiet gcc.
    973       break;
    974   }
    975 
    976   bool ret = false;
    977 
    978   // If the same barrier already exists, don't generate another.
    979   if (barrier == nullptr
    980       || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
    981     barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
    982     ret = true;
    983   }
    984 
    985   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
    986   DCHECK(!barrier->flags.use_def_invalid);
    987   barrier->u.m.def_mask = &kEncodeAll;
    988   return ret;
    989 #else
    990   return false;
    991 #endif
    992 }
    993 
    994 void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
    995   RegLocation rl_result;
    996 
    997   rl_src = LoadValue(rl_src, kCoreReg);
    998   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
    999   NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
   1000   StoreValueWide(rl_dest, rl_result);
   1001 }
   1002 
   1003 void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
   1004                                  RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
   1005   if (rl_src2.is_const) {
   1006     DCHECK(rl_src2.wide);
   1007     int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
   1008     if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
   1009       return;
   1010     }
   1011   }
   1012 
   1013   RegLocation rl_result;
   1014   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1015   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1016   GenDivZeroCheck(rl_src2.reg);
   1017   rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
   1018   StoreValueWide(rl_dest, rl_result);
   1019 }
   1020 
   1021 void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
   1022                              RegLocation rl_src2) {
   1023   RegLocation rl_result;
   1024 
   1025   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1026   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1027   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1028   OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
   1029   StoreValueWide(rl_dest, rl_result);
   1030 }
   1031 
   1032 void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   1033   RegLocation rl_result;
   1034 
   1035   rl_src = LoadValueWide(rl_src, kCoreReg);
   1036   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1037   OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
   1038   StoreValueWide(rl_dest, rl_result);
   1039 }
   1040 
   1041 void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
   1042   RegLocation rl_result;
   1043 
   1044   rl_src = LoadValueWide(rl_src, kCoreReg);
   1045   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1046   OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
   1047   StoreValueWide(rl_dest, rl_result);
   1048 }
   1049 
   1050 void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
   1051                                   RegLocation rl_src1, RegLocation rl_src2) {
   1052   switch (opcode) {
   1053     case Instruction::NOT_LONG:
   1054       GenNotLong(rl_dest, rl_src2);
   1055       return;
   1056     case Instruction::ADD_LONG:
   1057     case Instruction::ADD_LONG_2ADDR:
   1058       GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
   1059       return;
   1060     case Instruction::SUB_LONG:
   1061     case Instruction::SUB_LONG_2ADDR:
   1062       GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
   1063       return;
   1064     case Instruction::MUL_LONG:
   1065     case Instruction::MUL_LONG_2ADDR:
   1066       GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
   1067       return;
   1068     case Instruction::DIV_LONG:
   1069     case Instruction::DIV_LONG_2ADDR:
   1070       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
   1071       return;
   1072     case Instruction::REM_LONG:
   1073     case Instruction::REM_LONG_2ADDR:
   1074       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
   1075       return;
   1076     case Instruction::AND_LONG_2ADDR:
   1077     case Instruction::AND_LONG:
   1078       GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
   1079       return;
   1080     case Instruction::OR_LONG:
   1081     case Instruction::OR_LONG_2ADDR:
   1082       GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
   1083       return;
   1084     case Instruction::XOR_LONG:
   1085     case Instruction::XOR_LONG_2ADDR:
   1086       GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
   1087       return;
   1088     case Instruction::NEG_LONG: {
   1089       GenNegLong(rl_dest, rl_src2);
   1090       return;
   1091     }
   1092     default:
   1093       LOG(FATAL) << "Invalid long arith op";
   1094       return;
   1095   }
   1096 }
   1097 
   1098 /*
   1099  * Generate array load
   1100  */
   1101 void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   1102                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   1103   RegisterClass reg_class = RegClassBySize(size);
   1104   int len_offset = mirror::Array::LengthOffset().Int32Value();
   1105   int data_offset;
   1106   RegLocation rl_result;
   1107   bool constant_index = rl_index.is_const;
   1108   rl_array = LoadValue(rl_array, kRefReg);
   1109   if (!constant_index) {
   1110     rl_index = LoadValue(rl_index, kCoreReg);
   1111   }
   1112 
   1113   if (rl_dest.wide) {
   1114     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   1115   } else {
   1116     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   1117   }
   1118 
   1119   // If index is constant, just fold it into the data offset
   1120   if (constant_index) {
   1121     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   1122   }
   1123 
   1124   /* null object? */
   1125   GenNullCheck(rl_array.reg, opt_flags);
   1126 
   1127   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
   1128   RegStorage reg_len;
   1129   if (needs_range_check) {
   1130     reg_len = AllocTemp();
   1131     /* Get len */
   1132     Load32Disp(rl_array.reg, len_offset, reg_len);
   1133     MarkPossibleNullPointerException(opt_flags);
   1134   } else {
   1135     ForceImplicitNullCheck(rl_array.reg, opt_flags);
   1136   }
   1137   if (rl_dest.wide || rl_dest.fp || constant_index) {
   1138     RegStorage reg_ptr;
   1139     if (constant_index) {
   1140       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
   1141     } else {
   1142       // No special indexed operation, lea + load w/ displacement
   1143       reg_ptr = AllocTempRef();
   1144       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
   1145                        EncodeShift(kA64Lsl, scale));
   1146       FreeTemp(rl_index.reg);
   1147     }
   1148     rl_result = EvalLoc(rl_dest, reg_class, true);
   1149 
   1150     if (needs_range_check) {
   1151       if (constant_index) {
   1152         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
   1153       } else {
   1154         GenArrayBoundsCheck(rl_index.reg, reg_len);
   1155       }
   1156       FreeTemp(reg_len);
   1157     }
   1158     if (rl_result.ref) {
   1159       LoadRefDisp(reg_ptr, data_offset, rl_result.reg, kNotVolatile);
   1160     } else {
   1161       LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
   1162     }
   1163     MarkPossibleNullPointerException(opt_flags);
   1164     if (!constant_index) {
   1165       FreeTemp(reg_ptr);
   1166     }
   1167     if (rl_dest.wide) {
   1168       StoreValueWide(rl_dest, rl_result);
   1169     } else {
   1170       StoreValue(rl_dest, rl_result);
   1171     }
   1172   } else {
   1173     // Offset base, then use indexed load
   1174     RegStorage reg_ptr = AllocTempRef();
   1175     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
   1176     FreeTemp(rl_array.reg);
   1177     rl_result = EvalLoc(rl_dest, reg_class, true);
   1178 
   1179     if (needs_range_check) {
   1180       GenArrayBoundsCheck(rl_index.reg, reg_len);
   1181       FreeTemp(reg_len);
   1182     }
   1183     if (rl_result.ref) {
   1184       LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale);
   1185     } else {
   1186       LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
   1187     }
   1188     MarkPossibleNullPointerException(opt_flags);
   1189     FreeTemp(reg_ptr);
   1190     StoreValue(rl_dest, rl_result);
   1191   }
   1192 }
   1193 
   1194 /*
   1195  * Generate array store
   1196  *
   1197  */
   1198 void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
   1199                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   1200   RegisterClass reg_class = RegClassBySize(size);
   1201   int len_offset = mirror::Array::LengthOffset().Int32Value();
   1202   bool constant_index = rl_index.is_const;
   1203 
   1204   int data_offset;
   1205   if (size == k64 || size == kDouble) {
   1206     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   1207   } else {
   1208     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   1209   }
   1210 
   1211   // If index is constant, just fold it into the data offset.
   1212   if (constant_index) {
   1213     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   1214   }
   1215 
   1216   rl_array = LoadValue(rl_array, kRefReg);
   1217   if (!constant_index) {
   1218     rl_index = LoadValue(rl_index, kCoreReg);
   1219   }
   1220 
   1221   RegStorage reg_ptr;
   1222   bool allocated_reg_ptr_temp = false;
   1223   if (constant_index) {
   1224     reg_ptr = rl_array.reg;
   1225   } else if (IsTemp(rl_array.reg) && !card_mark) {
   1226     Clobber(rl_array.reg);
   1227     reg_ptr = rl_array.reg;
   1228   } else {
   1229     allocated_reg_ptr_temp = true;
   1230     reg_ptr = AllocTempRef();
   1231   }
   1232 
   1233   /* null object? */
   1234   GenNullCheck(rl_array.reg, opt_flags);
   1235 
   1236   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
   1237   RegStorage reg_len;
   1238   if (needs_range_check) {
   1239     reg_len = AllocTemp();
   1240     // NOTE: max live temps(4) here.
   1241     /* Get len */
   1242     Load32Disp(rl_array.reg, len_offset, reg_len);
   1243     MarkPossibleNullPointerException(opt_flags);
   1244   } else {
   1245     ForceImplicitNullCheck(rl_array.reg, opt_flags);
   1246   }
   1247   /* at this point, reg_ptr points to array, 2 live temps */
   1248   if (rl_src.wide || rl_src.fp || constant_index) {
   1249     if (rl_src.wide) {
   1250       rl_src = LoadValueWide(rl_src, reg_class);
   1251     } else {
   1252       rl_src = LoadValue(rl_src, reg_class);
   1253     }
   1254     if (!constant_index) {
   1255       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
   1256                        EncodeShift(kA64Lsl, scale));
   1257     }
   1258     if (needs_range_check) {
   1259       if (constant_index) {
   1260         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
   1261       } else {
   1262         GenArrayBoundsCheck(rl_index.reg, reg_len);
   1263       }
   1264       FreeTemp(reg_len);
   1265     }
   1266     if (rl_src.ref) {
   1267       StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
   1268     } else {
   1269       StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
   1270     }
   1271     MarkPossibleNullPointerException(opt_flags);
   1272   } else {
   1273     /* reg_ptr -> array data */
   1274     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
   1275     rl_src = LoadValue(rl_src, reg_class);
   1276     if (needs_range_check) {
   1277       GenArrayBoundsCheck(rl_index.reg, reg_len);
   1278       FreeTemp(reg_len);
   1279     }
   1280     if (rl_src.ref) {
   1281       StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale);
   1282     } else {
   1283       StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
   1284     }
   1285     MarkPossibleNullPointerException(opt_flags);
   1286   }
   1287   if (allocated_reg_ptr_temp) {
   1288     FreeTemp(reg_ptr);
   1289   }
   1290   if (card_mark) {
   1291     MarkGCCard(rl_src.reg, rl_array.reg);
   1292   }
   1293 }
   1294 
   1295 void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
   1296                                      RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
   1297   OpKind op = kOpBkpt;
   1298   // Per spec, we only care about low 6 bits of shift amount.
   1299   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
   1300   rl_src = LoadValueWide(rl_src, kCoreReg);
   1301   if (shift_amount == 0) {
   1302     StoreValueWide(rl_dest, rl_src);
   1303     return;
   1304   }
   1305 
   1306   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1307   switch (opcode) {
   1308     case Instruction::SHL_LONG:
   1309     case Instruction::SHL_LONG_2ADDR:
   1310       op = kOpLsl;
   1311       break;
   1312     case Instruction::SHR_LONG:
   1313     case Instruction::SHR_LONG_2ADDR:
   1314       op = kOpAsr;
   1315       break;
   1316     case Instruction::USHR_LONG:
   1317     case Instruction::USHR_LONG_2ADDR:
   1318       op = kOpLsr;
   1319       break;
   1320     default:
   1321       LOG(FATAL) << "Unexpected case";
   1322   }
   1323   OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
   1324   StoreValueWide(rl_dest, rl_result);
   1325 }
   1326 
   1327 void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   1328                                      RegLocation rl_src1, RegLocation rl_src2) {
   1329   OpKind op = kOpBkpt;
   1330   switch (opcode) {
   1331     case Instruction::ADD_LONG:
   1332     case Instruction::ADD_LONG_2ADDR:
   1333       op = kOpAdd;
   1334       break;
   1335     case Instruction::SUB_LONG:
   1336     case Instruction::SUB_LONG_2ADDR:
   1337       op = kOpSub;
   1338       break;
   1339     case Instruction::AND_LONG:
   1340     case Instruction::AND_LONG_2ADDR:
   1341       op = kOpAnd;
   1342       break;
   1343     case Instruction::OR_LONG:
   1344     case Instruction::OR_LONG_2ADDR:
   1345       op = kOpOr;
   1346       break;
   1347     case Instruction::XOR_LONG:
   1348     case Instruction::XOR_LONG_2ADDR:
   1349       op = kOpXor;
   1350       break;
   1351     default:
   1352       LOG(FATAL) << "Unexpected opcode";
   1353   }
   1354 
   1355   if (op == kOpSub) {
   1356     if (!rl_src2.is_const) {
   1357       return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   1358     }
   1359   } else {
   1360     // Associativity.
   1361     if (!rl_src2.is_const) {
   1362       DCHECK(rl_src1.is_const);
   1363       std::swap(rl_src1, rl_src2);
   1364     }
   1365   }
   1366   DCHECK(rl_src2.is_const);
   1367   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   1368 
   1369   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1370   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1371   OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
   1372   StoreValueWide(rl_dest, rl_result);
   1373 }
   1374 
   1375 static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
   1376   // Find first register.
   1377   int first_bit_set = CTZ(reg_mask) + 1;
   1378   *reg = *reg + first_bit_set;
   1379   reg_mask >>= first_bit_set;
   1380   return reg_mask;
   1381 }
   1382 
   1383 /**
   1384  * @brief Split a register list in pairs or registers.
   1385  *
   1386  * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
   1387  * @code
   1388  *   int reg1 = -1, reg2 = -1;
   1389  *   while (reg_mask) {
   1390  *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
   1391  *     if (UNLIKELY(reg2 < 0)) {
   1392  *       // Single register in reg1.
   1393  *     } else {
   1394  *       // Pair in reg1, reg2.
   1395  *     }
   1396  *   }
   1397  * @endcode
   1398  */
   1399 static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
   1400   // Find first register.
   1401   int first_bit_set = CTZ(reg_mask) + 1;
   1402   int reg = *reg1 + first_bit_set;
   1403   reg_mask >>= first_bit_set;
   1404 
   1405   if (LIKELY(reg_mask)) {
   1406     // Save the first register, find the second and use the pair opcode.
   1407     int second_bit_set = CTZ(reg_mask) + 1;
   1408     *reg2 = reg;
   1409     reg_mask >>= second_bit_set;
   1410     *reg1 = reg + second_bit_set;
   1411     return reg_mask;
   1412   }
   1413 
   1414   // Use the single opcode, as we just have one register.
   1415   *reg1 = reg;
   1416   *reg2 = -1;
   1417   return reg_mask;
   1418 }
   1419 
   1420 static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   1421   int reg1 = -1, reg2 = -1;
   1422   const int reg_log2_size = 3;
   1423 
   1424   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
   1425     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
   1426     if (UNLIKELY(reg2 < 0)) {
   1427       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
   1428     } else {
   1429       m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
   1430                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
   1431     }
   1432   }
   1433 }
   1434 
   1435 // TODO(Arm64): consider using ld1 and st1?
   1436 static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   1437   int reg1 = -1, reg2 = -1;
   1438   const int reg_log2_size = 3;
   1439 
   1440   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
   1441     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
   1442     if (UNLIKELY(reg2 < 0)) {
   1443       m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
   1444                    offset);
   1445     } else {
   1446       m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
   1447                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
   1448     }
   1449   }
   1450 }
   1451 
   1452 static int SpillRegsPreSub(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
   1453                            uint32_t fp_reg_mask, int frame_size) {
   1454   m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
   1455 
   1456   int core_count = POPCOUNT(core_reg_mask);
   1457 
   1458   if (fp_reg_mask != 0) {
   1459     // Spill FP regs.
   1460     int fp_count = POPCOUNT(fp_reg_mask);
   1461     int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
   1462     SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
   1463   }
   1464 
   1465   if (core_reg_mask != 0) {
   1466     // Spill core regs.
   1467     int spill_offset = frame_size - (core_count * kArm64PointerSize);
   1468     SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
   1469   }
   1470 
   1471   return frame_size;
   1472 }
   1473 
   1474 static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
   1475                                uint32_t fp_reg_mask, int frame_size) {
   1476   // Otherwise, spill both core and fp regs at the same time.
   1477   // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
   1478   // down. From then on, we fill upwards. This will generate overall the same number of instructions
   1479   // as the specialized code above in most cases (exception being odd number of core and even
   1480   // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
   1481   //
   1482   // Some demonstrative fill cases : (c) = core, (f) = fp
   1483   // cc    44   cc    44   cc    22   cc    33   fc => 1[1/2]
   1484   // fc => 23   fc => 23   ff => 11   ff => 22
   1485   // ff    11    f    11               f    11
   1486   //
   1487   int reg1 = -1, reg2 = -1;
   1488   int core_count = POPCOUNT(core_reg_mask);
   1489   int fp_count = POPCOUNT(fp_reg_mask);
   1490 
   1491   int combined = fp_count + core_count;
   1492   int all_offset = RoundUp(combined, 2);  // Needs to be 16B = 2-reg aligned.
   1493 
   1494   int cur_offset = 2;  // What's the starting offset after the first stp? We expect the base slot
   1495                        // to be filled.
   1496 
   1497   // First figure out whether the bottom is FP or core.
   1498   if (fp_count > 0) {
   1499     // Some FP spills.
   1500     //
   1501     // Four cases: (d0 is dummy to fill up stp)
   1502     // 1) Single FP, even number of core -> stp d0, fp_reg
   1503     // 2) Single FP, odd number of core -> stp fp_reg, d0
   1504     // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
   1505     // 4) More FP, odd number combined -> stp d0, fp_reg
   1506     if (fp_count == 1) {
   1507       fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
   1508       DCHECK_EQ(fp_reg_mask, 0U);
   1509       if (core_count % 2 == 0) {
   1510         m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
   1511                      RegStorage::FloatSolo64(reg1).GetReg(),
   1512                      RegStorage::FloatSolo64(reg1).GetReg(),
   1513                      base.GetReg(), -all_offset);
   1514       } else {
   1515         m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
   1516                      RegStorage::FloatSolo64(reg1).GetReg(),
   1517                      RegStorage::FloatSolo64(reg1).GetReg(),
   1518                      base.GetReg(), -all_offset);
   1519         cur_offset = 0;  // That core reg needs to go into the upper half.
   1520       }
   1521     } else {
   1522       if (combined % 2 == 0) {
   1523         fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
   1524         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
   1525                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
   1526       } else {
   1527         fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
   1528         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
   1529                      base.GetReg(), -all_offset);
   1530       }
   1531     }
   1532   } else {
   1533     // No FP spills.
   1534     //
   1535     // Two cases:
   1536     // 1) Even number of core -> stp core1, core2
   1537     // 2) Odd number of core -> stp xzr, core1
   1538     if (core_count % 2 == 1) {
   1539       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
   1540       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
   1541                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
   1542     } else {
   1543       core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
   1544       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
   1545                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
   1546     }
   1547   }
   1548 
   1549   if (fp_count != 0) {
   1550     for (; fp_reg_mask != 0;) {
   1551       // Have some FP regs to do.
   1552       fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
   1553       if (UNLIKELY(reg2 < 0)) {
   1554         m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
   1555                      cur_offset);
   1556         // Do not increment offset here, as the second half will be filled by a core reg.
   1557       } else {
   1558         m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
   1559                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
   1560         cur_offset += 2;
   1561       }
   1562     }
   1563 
   1564     // Reset counting.
   1565     reg1 = -1;
   1566 
   1567     // If there is an odd number of core registers, we need to store the bottom now.
   1568     if (core_count % 2 == 1) {
   1569       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
   1570       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
   1571                    cur_offset + 1);
   1572       cur_offset += 2;  // Half-slot filled now.
   1573     }
   1574   }
   1575 
   1576   // Spill the rest of the core regs. They are guaranteed to be even.
   1577   DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
   1578   for (; core_reg_mask != 0; cur_offset += 2) {
   1579     core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
   1580     m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
   1581                  RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
   1582   }
   1583 
   1584   DCHECK_EQ(cur_offset, all_offset);
   1585 
   1586   return all_offset * 8;
   1587 }
   1588 
   1589 int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
   1590                             int frame_size) {
   1591   // If the frame size is small enough that all offsets would fit into the immediates, use that
   1592   // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
   1593   // instruction-count wise than the complicated code below.
   1594   //
   1595   // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
   1596   // number of fp spills.
   1597   if ((RoundUp(frame_size, 8) / 8 <= 63)) {
   1598     return SpillRegsPreSub(this, base, core_reg_mask, fp_reg_mask, frame_size);
   1599   } else {
   1600     return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask, frame_size);
   1601   }
   1602 }
   1603 
   1604 static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   1605   int reg1 = -1, reg2 = -1;
   1606   const int reg_log2_size = 3;
   1607 
   1608   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
   1609     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
   1610     if (UNLIKELY(reg2 < 0)) {
   1611       m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
   1612     } else {
   1613       DCHECK_LE(offset, 63);
   1614       m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
   1615                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
   1616     }
   1617   }
   1618 }
   1619 
   1620 static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   1621   int reg1 = -1, reg2 = -1;
   1622   const int reg_log2_size = 3;
   1623 
   1624   for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
   1625      reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
   1626     if (UNLIKELY(reg2 < 0)) {
   1627       m2l->NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
   1628                    offset);
   1629     } else {
   1630       m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
   1631                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
   1632     }
   1633   }
   1634 }
   1635 
   1636 void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
   1637                                int frame_size) {
   1638   // Restore saves and drop stack frame.
   1639   // 2 versions:
   1640   //
   1641   // 1. (Original): Try to address directly, then drop the whole frame.
   1642   //                Limitation: ldp is a 7b signed immediate.
   1643   //
   1644   // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
   1645   //           in range. Then drop the rest.
   1646   //
   1647   // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
   1648   //       in variant 1.
   1649 
   1650   // "Magic" constant, 63 (max signed 7b) * 8.
   1651   static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
   1652 
   1653   const int num_core_spills = POPCOUNT(core_reg_mask);
   1654   const int num_fp_spills = POPCOUNT(fp_reg_mask);
   1655 
   1656   int early_drop = 0;
   1657 
   1658   if (frame_size > kMaxFramesizeForOffset) {
   1659     // Second variant. Drop the frame part.
   1660 
   1661     // TODO: Always use the first formula, as num_fp_spills would be zero?
   1662     if (fp_reg_mask != 0) {
   1663       early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
   1664     } else {
   1665       early_drop = frame_size - kArm64PointerSize * num_core_spills;
   1666     }
   1667 
   1668     // Drop needs to be 16B aligned, so that SP keeps aligned.
   1669     early_drop = RoundDown(early_drop, 16);
   1670 
   1671     OpRegImm64(kOpAdd, rs_sp, early_drop);
   1672   }
   1673 
   1674   // Unspill.
   1675   if (fp_reg_mask != 0) {
   1676     int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
   1677     UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
   1678   }
   1679   if (core_reg_mask != 0) {
   1680     int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
   1681     UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
   1682   }
   1683 
   1684   // Drop the (rest of) the frame.
   1685   OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
   1686 }
   1687 
   1688 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
   1689   ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
   1690   RegLocation rl_src_i = info->args[0];
   1691   RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   1692   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1693   RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
   1694   NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
   1695   (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
   1696   return true;
   1697 }
   1698 
   1699 }  // namespace art
   1700