Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /* This file contains codegen for the X86 ISA */
     18 
     19 #include "codegen_x86.h"
     20 
     21 #include "art_method.h"
     22 #include "base/bit_utils.h"
     23 #include "base/logging.h"
     24 #include "dex/quick/mir_to_lir-inl.h"
     25 #include "dex/reg_storage_eq.h"
     26 #include "mirror/array-inl.h"
     27 #include "x86_lir.h"
     28 
     29 namespace art {
     30 
     31 /*
     32  * Compare two 64-bit values
     33  *    x = y     return  0
     34  *    x < y     return -1
     35  *    x > y     return  1
     36  */
     37 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
     38                             RegLocation rl_src2) {
     39   if (cu_->target64) {
     40     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     41     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     42     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
     43     RegStorage temp_reg = AllocTemp();
     44     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
     45     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
     46     NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
     47     NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
     48     NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
     49 
     50     StoreValue(rl_dest, rl_result);
     51     FreeTemp(temp_reg);
     52     return;
     53   }
     54 
     55   // Prepare for explicit register usage
     56   ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
     57   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
     58   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
     59   LoadValueDirectWideFixed(rl_src1, r_tmp1);
     60   LoadValueDirectWideFixed(rl_src2, r_tmp2);
     61   // Compute (r1:r0) = (r1:r0) - (r3:r2)
     62   OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
     63   OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
     64   NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
     65   NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
     66   OpReg(kOpNeg, rs_r2);         // r2 = -r2
     67   OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
     68   NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
     69   NewLIR2(kX86Movzx8RR, r0, r0);
     70   OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
     71   RegLocation rl_result = LocCReturn();
     72   StoreValue(rl_dest, rl_result);
     73 }
     74 
     75 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
     76   switch (cond) {
     77     case kCondEq: return kX86CondEq;
     78     case kCondNe: return kX86CondNe;
     79     case kCondCs: return kX86CondC;
     80     case kCondCc: return kX86CondNc;
     81     case kCondUlt: return kX86CondC;
     82     case kCondUge: return kX86CondNc;
     83     case kCondMi: return kX86CondS;
     84     case kCondPl: return kX86CondNs;
     85     case kCondVs: return kX86CondO;
     86     case kCondVc: return kX86CondNo;
     87     case kCondHi: return kX86CondA;
     88     case kCondLs: return kX86CondBe;
     89     case kCondGe: return kX86CondGe;
     90     case kCondLt: return kX86CondL;
     91     case kCondGt: return kX86CondG;
     92     case kCondLe: return kX86CondLe;
     93     case kCondAl:
     94     case kCondNv: LOG(FATAL) << "Should not reach here";
     95   }
     96   return kX86CondO;
     97 }
     98 
     99 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
    100   NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
    101   X86ConditionCode cc = X86ConditionEncoding(cond);
    102   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
    103                         cc);
    104   branch->target = target;
    105   return branch;
    106 }
    107 
    108 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
    109                                 int check_value, LIR* target) {
    110   if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
    111     // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
    112     NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
    113   } else {
    114     if (reg.Is64Bit()) {
    115       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
    116     } else {
    117       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
    118     }
    119   }
    120   X86ConditionCode cc = X86ConditionEncoding(cond);
    121   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
    122   branch->target = target;
    123   return branch;
    124 }
    125 
    126 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
    127   // If src or dest is a pair, we'll be using low reg.
    128   if (r_dest.IsPair()) {
    129     r_dest = r_dest.GetLow();
    130   }
    131   if (r_src.IsPair()) {
    132     r_src = r_src.GetLow();
    133   }
    134   if (r_dest.IsFloat() || r_src.IsFloat())
    135     return OpFpRegCopy(r_dest, r_src);
    136   LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
    137                     r_dest.GetReg(), r_src.GetReg());
    138   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
    139     res->flags.is_nop = true;
    140   }
    141   return res;
    142 }
    143 
    144 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
    145   if (r_dest != r_src) {
    146     LIR *res = OpRegCopyNoInsert(r_dest, r_src);
    147     AppendLIR(res);
    148   }
    149 }
    150 
    151 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
    152   if (r_dest != r_src) {
    153     bool dest_fp = r_dest.IsFloat();
    154     bool src_fp = r_src.IsFloat();
    155     if (dest_fp) {
    156       if (src_fp) {
    157         OpRegCopy(r_dest, r_src);
    158       } else {
    159         // TODO: Prevent this from happening in the code. The result is often
    160         // unused or could have been loaded more easily from memory.
    161         if (!r_src.IsPair()) {
    162           DCHECK(!r_dest.IsPair());
    163           NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
    164         } else {
    165           NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
    166           RegStorage r_tmp = AllocTempDouble();
    167           NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
    168           NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
    169           FreeTemp(r_tmp);
    170         }
    171       }
    172     } else {
    173       if (src_fp) {
    174         if (!r_dest.IsPair()) {
    175           DCHECK(!r_src.IsPair());
    176           NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
    177         } else {
    178           NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
    179           RegStorage temp_reg = AllocTempDouble();
    180           NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
    181           NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
    182           NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
    183         }
    184       } else {
    185         DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
    186         if (!r_src.IsPair()) {
    187           // Just copy the register directly.
    188           OpRegCopy(r_dest, r_src);
    189         } else {
    190           // Handle overlap
    191           if (r_src.GetHighReg() == r_dest.GetLowReg() &&
    192               r_src.GetLowReg() == r_dest.GetHighReg()) {
    193             // Deal with cycles.
    194             RegStorage temp_reg = AllocTemp();
    195             OpRegCopy(temp_reg, r_dest.GetHigh());
    196             OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
    197             OpRegCopy(r_dest.GetLow(), temp_reg);
    198             FreeTemp(temp_reg);
    199           } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
    200             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    201             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    202           } else {
    203             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    204             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    205           }
    206         }
    207       }
    208     }
    209   }
    210 }
    211 
    212 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
    213                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
    214                                   RegisterClass dest_reg_class) {
    215   DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
    216   DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
    217 
    218   // We really need this check for correctness, otherwise we will need to do more checks in
    219   // non zero/one case
    220   if (true_val == false_val) {
    221     LoadConstantNoClobber(rs_dest, true_val);
    222     return;
    223   }
    224 
    225   const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
    226 
    227   const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
    228   if (zero_one_case && IsByteRegister(rs_dest)) {
    229     if (!dest_intersect) {
    230       LoadConstantNoClobber(rs_dest, 0);
    231     }
    232     OpRegReg(kOpCmp, left_op, right_op);
    233     // Set the low byte of the result to 0 or 1 from the compare condition code.
    234     NewLIR2(kX86Set8R, rs_dest.GetReg(),
    235             X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
    236     if (dest_intersect) {
    237       NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
    238     }
    239   } else {
    240     // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
    241     // and it cannot use xor because it makes cc flags to be dirty
    242     RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
    243     if (temp_reg.Valid()) {
    244       if (false_val == 0 && dest_intersect) {
    245         code = FlipComparisonOrder(code);
    246         std::swap(true_val, false_val);
    247       }
    248       if (!dest_intersect) {
    249         LoadConstantNoClobber(rs_dest, false_val);
    250       }
    251       LoadConstantNoClobber(temp_reg, true_val);
    252       OpRegReg(kOpCmp, left_op, right_op);
    253       if (dest_intersect) {
    254         LoadConstantNoClobber(rs_dest, false_val);
    255         DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    256       }
    257       OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
    258       FreeTemp(temp_reg);
    259     } else {
    260       // slow path
    261       LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
    262       LoadConstantNoClobber(rs_dest, false_val);
    263       LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
    264       LIR* true_case = NewLIR0(kPseudoTargetLabel);
    265       cmp_branch->target = true_case;
    266       LoadConstantNoClobber(rs_dest, true_val);
    267       LIR* end = NewLIR0(kPseudoTargetLabel);
    268       that_is_it->target = end;
    269     }
    270   }
    271 }
    272 
    273 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
    274   UNUSED(bb);
    275   RegLocation rl_result;
    276   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
    277   RegLocation rl_dest = mir_graph_->GetDest(mir);
    278   // Avoid using float regs here.
    279   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
    280   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
    281   ConditionCode ccode = mir->meta.ccode;
    282 
    283   // The kMirOpSelect has two variants, one for constants and one for moves.
    284   const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
    285 
    286   if (is_constant_case) {
    287     int true_val = mir->dalvikInsn.vB;
    288     int false_val = mir->dalvikInsn.vC;
    289 
    290     // simplest strange case
    291     if (true_val == false_val) {
    292       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    293       LoadConstantNoClobber(rl_result.reg, true_val);
    294     } else {
    295       // TODO: use GenSelectConst32 and handle additional opcode patterns such as
    296       // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
    297       rl_src = LoadValue(rl_src, src_reg_class);
    298       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    299       /*
    300        * For ccode == kCondEq:
    301        *
    302        * 1) When the true case is zero and result_reg is not same as src_reg:
    303        *     xor result_reg, result_reg
    304        *     cmp $0, src_reg
    305        *     mov t1, $false_case
    306        *     cmovnz result_reg, t1
    307        * 2) When the false case is zero and result_reg is not same as src_reg:
    308        *     xor result_reg, result_reg
    309        *     cmp $0, src_reg
    310        *     mov t1, $true_case
    311        *     cmovz result_reg, t1
    312        * 3) All other cases (we do compare first to set eflags):
    313        *     cmp $0, src_reg
    314        *     mov result_reg, $false_case
    315        *     mov t1, $true_case
    316        *     cmovz result_reg, t1
    317        */
    318       // FIXME: depending on how you use registers you could get a false != mismatch when dealing
    319       // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
    320       const bool result_reg_same_as_src =
    321           (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
    322       const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
    323       const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
    324       const bool catch_all_case = !(true_zero_case || false_zero_case);
    325 
    326       if (true_zero_case || false_zero_case) {
    327         OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
    328       }
    329 
    330       if (true_zero_case || false_zero_case || catch_all_case) {
    331         OpRegImm(kOpCmp, rl_src.reg, 0);
    332       }
    333 
    334       if (catch_all_case) {
    335         OpRegImm(kOpMov, rl_result.reg, false_val);
    336       }
    337 
    338       if (true_zero_case || false_zero_case || catch_all_case) {
    339         ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
    340         int immediateForTemp = true_zero_case ? false_val : true_val;
    341         RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
    342         OpRegImm(kOpMov, temp1_reg, immediateForTemp);
    343 
    344         OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
    345 
    346         FreeTemp(temp1_reg);
    347       }
    348     }
    349   } else {
    350     rl_src = LoadValue(rl_src, src_reg_class);
    351     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
    352     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
    353     rl_true = LoadValue(rl_true, result_reg_class);
    354     rl_false = LoadValue(rl_false, result_reg_class);
    355     rl_result = EvalLoc(rl_dest, result_reg_class, true);
    356 
    357     /*
    358      * For ccode == kCondEq:
    359      *
    360      * 1) When true case is already in place:
    361      *     cmp $0, src_reg
    362      *     cmovnz result_reg, false_reg
    363      * 2) When false case is already in place:
    364      *     cmp $0, src_reg
    365      *     cmovz result_reg, true_reg
    366      * 3) When neither cases are in place:
    367      *     cmp $0, src_reg
    368      *     mov result_reg, false_reg
    369      *     cmovz result_reg, true_reg
    370      */
    371 
    372     // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
    373     OpRegImm(kOpCmp, rl_src.reg, 0);
    374 
    375     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
    376       OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
    377     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
    378       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    379     } else {
    380       OpRegCopy(rl_result.reg, rl_false.reg);
    381       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    382     }
    383   }
    384 
    385   StoreValue(rl_dest, rl_result);
    386 }
    387 
    388 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
    389   LIR* taken = &block_label_list_[bb->taken];
    390   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    391   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    392   ConditionCode ccode = mir->meta.ccode;
    393 
    394   if (rl_src1.is_const) {
    395     std::swap(rl_src1, rl_src2);
    396     ccode = FlipComparisonOrder(ccode);
    397   }
    398   if (rl_src2.is_const) {
    399     // Do special compare/branch against simple const operand
    400     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
    401     GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
    402     return;
    403   }
    404 
    405   if (cu_->target64) {
    406     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    407     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
    408 
    409     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    410     OpCondBranch(ccode, taken);
    411     return;
    412   }
    413 
    414   // Prepare for explicit register usage
    415   ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
    416   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
    417   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
    418   LoadValueDirectWideFixed(rl_src1, r_tmp1);
    419   LoadValueDirectWideFixed(rl_src2, r_tmp2);
    420 
    421   // Swap operands and condition code to prevent use of zero flag.
    422   if (ccode == kCondLe || ccode == kCondGt) {
    423     // Compute (r3:r2) = (r3:r2) - (r1:r0)
    424     OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
    425     OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
    426   } else {
    427     // Compute (r1:r0) = (r1:r0) - (r3:r2)
    428     OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
    429     OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
    430   }
    431   switch (ccode) {
    432     case kCondEq:
    433     case kCondNe:
    434       OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
    435       break;
    436     case kCondLe:
    437       ccode = kCondGe;
    438       break;
    439     case kCondGt:
    440       ccode = kCondLt;
    441       break;
    442     case kCondLt:
    443     case kCondGe:
    444       break;
    445     default:
    446       LOG(FATAL) << "Unexpected ccode: " << ccode;
    447   }
    448   OpCondBranch(ccode, taken);
    449 }
    450 
    451 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
    452                                           int64_t val, ConditionCode ccode) {
    453   int32_t val_lo = Low32Bits(val);
    454   int32_t val_hi = High32Bits(val);
    455   LIR* taken = &block_label_list_[bb->taken];
    456   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    457   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
    458 
    459   if (cu_->target64) {
    460     if (is_equality_test && val == 0) {
    461       // We can simplify of comparing for ==, != to 0.
    462       NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
    463     } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
    464       OpRegImm(kOpCmp, rl_src1.reg, val_lo);
    465     } else {
    466       RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    467       LoadConstantWide(tmp, val);
    468       OpRegReg(kOpCmp, rl_src1.reg, tmp);
    469       FreeTemp(tmp);
    470     }
    471     OpCondBranch(ccode, taken);
    472     return;
    473   }
    474 
    475   if (is_equality_test && val != 0) {
    476     rl_src1 = ForceTempWide(rl_src1);
    477   }
    478   RegStorage low_reg = rl_src1.reg.GetLow();
    479   RegStorage high_reg = rl_src1.reg.GetHigh();
    480 
    481   if (is_equality_test) {
    482     // We can simplify of comparing for ==, != to 0.
    483     if (val == 0) {
    484       if (IsTemp(low_reg)) {
    485         OpRegReg(kOpOr, low_reg, high_reg);
    486         // We have now changed it; ignore the old values.
    487         Clobber(rl_src1.reg);
    488       } else {
    489         RegStorage t_reg = AllocTemp();
    490         OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
    491         FreeTemp(t_reg);
    492       }
    493       OpCondBranch(ccode, taken);
    494       return;
    495     }
    496 
    497     // Need to compute the actual value for ==, !=.
    498     OpRegImm(kOpSub, low_reg, val_lo);
    499     NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    500     OpRegReg(kOpOr, high_reg, low_reg);
    501     Clobber(rl_src1.reg);
    502   } else if (ccode == kCondLe || ccode == kCondGt) {
    503     // Swap operands and condition code to prevent use of zero flag.
    504     RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    505     LoadConstantWide(tmp, val);
    506     OpRegReg(kOpSub, tmp.GetLow(), low_reg);
    507     OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
    508     ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
    509     FreeTemp(tmp);
    510   } else {
    511     // We can use a compare for the low word to set CF.
    512     OpRegImm(kOpCmp, low_reg, val_lo);
    513     if (IsTemp(high_reg)) {
    514       NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    515       // We have now changed it; ignore the old values.
    516       Clobber(rl_src1.reg);
    517     } else {
    518       // mov temp_reg, high_reg; sbb temp_reg, high_constant
    519       RegStorage t_reg = AllocTemp();
    520       OpRegCopy(t_reg, high_reg);
    521       NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
    522       FreeTemp(t_reg);
    523     }
    524   }
    525 
    526   OpCondBranch(ccode, taken);
    527 }
    528 
    529 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
    530   // It does not make sense to calculate magic and shift for zero divisor.
    531   DCHECK_NE(divisor, 0);
    532 
    533   /* According to H.S.Warren's Hacker's Delight Chapter 10 and
    534    * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    535    * The magic number M and shift S can be calculated in the following way:
    536    * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
    537    * where divisor(d) >=2.
    538    * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
    539    * where divisor(d) <= -2.
    540    * Thus nc can be calculated like:
    541    * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
    542    * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
    543    *
    544    * So the shift p is the smallest p satisfying
    545    * 2^p > nc * (d - 2^p % d), where d >= 2
    546    * 2^p > nc * (d + 2^p % d), where d <= -2.
    547    *
    548    * the magic number M is calcuated by
    549    * M = (2^p + d - 2^p % d) / d, where d >= 2
    550    * M = (2^p - d - 2^p % d) / d, where d <= -2.
    551    *
    552    * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
    553    * the shift number S.
    554    */
    555 
    556   int64_t p = (is_long) ? 63 : 31;
    557   const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
    558 
    559   // Initialize the computations.
    560   uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
    561   uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
    562                                     static_cast<uint32_t>(divisor) >> 31);
    563   uint64_t abs_nc = tmp - 1 - tmp % abs_d;
    564   uint64_t quotient1 = exp / abs_nc;
    565   uint64_t remainder1 = exp % abs_nc;
    566   uint64_t quotient2 = exp / abs_d;
    567   uint64_t remainder2 = exp % abs_d;
    568 
    569   /*
    570    * To avoid handling both positive and negative divisor, Hacker's Delight
    571    * introduces a method to handle these 2 cases together to avoid duplication.
    572    */
    573   uint64_t delta;
    574   do {
    575     p++;
    576     quotient1 = 2 * quotient1;
    577     remainder1 = 2 * remainder1;
    578     if (remainder1 >= abs_nc) {
    579       quotient1++;
    580       remainder1 = remainder1 - abs_nc;
    581     }
    582     quotient2 = 2 * quotient2;
    583     remainder2 = 2 * remainder2;
    584     if (remainder2 >= abs_d) {
    585       quotient2++;
    586       remainder2 = remainder2 - abs_d;
    587     }
    588     delta = abs_d - remainder2;
    589   } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
    590 
    591   magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
    592 
    593   if (!is_long) {
    594     magic = static_cast<int>(magic);
    595   }
    596 
    597   shift = (is_long) ? p - 64 : p - 32;
    598 }
    599 
    600 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
    601   UNUSED(rl_dest, reg_lo, lit, is_div);
    602   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
    603   UNREACHABLE();
    604 }
    605 
    606 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
    607                                      int imm, bool is_div) {
    608   // Use a multiply (and fixup) to perform an int div/rem by a constant.
    609   RegLocation rl_result;
    610 
    611   if (imm == 1) {
    612     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    613     if (is_div) {
    614       // x / 1 == x.
    615       LoadValueDirectFixed(rl_src, rl_result.reg);
    616     } else {
    617       // x % 1 == 0.
    618       LoadConstantNoClobber(rl_result.reg, 0);
    619     }
    620   } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
    621     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    622     if (is_div) {
    623       LoadValueDirectFixed(rl_src, rl_result.reg);
    624 
    625       // Check if numerator is 0
    626       OpRegImm(kOpCmp, rl_result.reg, 0);
    627       LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
    628 
    629       // handle 0x80000000 / -1
    630       OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
    631       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
    632 
    633       // for x != MIN_INT, x / -1 == -x.
    634       NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
    635 
    636       // EAX already contains the right value (0x80000000),
    637       minint_branch->target = NewLIR0(kPseudoTargetLabel);
    638       branch->target = NewLIR0(kPseudoTargetLabel);
    639     } else {
    640       // x % -1 == 0.
    641       LoadConstantNoClobber(rl_result.reg, 0);
    642     }
    643   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
    644     // Division using shifting.
    645     rl_src = LoadValue(rl_src, kCoreReg);
    646     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    647     if (IsSameReg(rl_result.reg, rl_src.reg)) {
    648       RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
    649       rl_result.reg.SetReg(rs_temp.GetReg());
    650     }
    651 
    652     // Check if numerator is 0
    653     OpRegImm(kOpCmp, rl_src.reg, 0);
    654     LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    655     LoadConstantNoClobber(rl_result.reg, 0);
    656     LIR* done = NewLIR1(kX86Jmp8, 0);
    657     branch->target = NewLIR0(kPseudoTargetLabel);
    658 
    659     NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
    660     NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
    661     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
    662     int shift_amount = CTZ(imm);
    663     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
    664     if (imm < 0) {
    665       OpReg(kOpNeg, rl_result.reg);
    666     }
    667     done->target = NewLIR0(kPseudoTargetLabel);
    668   } else {
    669     CHECK(imm <= -2 || imm >= 2);
    670 
    671     // Use H.S.Warren's Hacker's Delight Chapter 10 and
    672     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    673     int64_t magic;
    674     int shift;
    675     CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
    676 
    677     /*
    678      * For imm >= 2,
    679      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
    680      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
    681      * For imm <= -2,
    682      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
    683      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
    684      * We implement this algorithm in the following way:
    685      * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
    686      * 2. if imm > 0 and magic < 0, add numerator to EDX
    687      *    if imm < 0 and magic > 0, sub numerator from EDX
    688      * 3. if S !=0, SAR S bits for EDX
    689      * 4. add 1 to EDX if EDX < 0
    690      * 5. Thus, EDX is the quotient
    691      */
    692 
    693     FlushReg(rs_r0);
    694     Clobber(rs_r0);
    695     LockTemp(rs_r0);
    696     FlushReg(rs_r2);
    697     Clobber(rs_r2);
    698     LockTemp(rs_r2);
    699 
    700     // Assume that the result will be in EDX for divide, and EAX for remainder.
    701     rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0,
    702                  INVALID_SREG, INVALID_SREG};
    703 
    704     // We need the value at least twice.  Load into a temp.
    705     rl_src = LoadValue(rl_src, kCoreReg);
    706     RegStorage numerator_reg = rl_src.reg;
    707 
    708     // Check if numerator is 0.
    709     OpRegImm(kOpCmp, numerator_reg, 0);
    710     LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    711     // Return result 0 if numerator was 0.
    712     LoadConstantNoClobber(rl_result.reg, 0);
    713     LIR* done = NewLIR1(kX86Jmp8, 0);
    714     branch->target = NewLIR0(kPseudoTargetLabel);
    715 
    716     // EAX = magic.
    717     LoadConstant(rs_r0, magic);
    718 
    719     // EDX:EAX = magic * numerator.
    720     NewLIR1(kX86Imul32DaR, numerator_reg.GetReg());
    721 
    722     if (imm > 0 && magic < 0) {
    723       // Add numerator to EDX.
    724       DCHECK(numerator_reg.Valid());
    725       NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    726     } else if (imm < 0 && magic > 0) {
    727       DCHECK(numerator_reg.Valid());
    728       NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    729     }
    730 
    731     // Do we need the shift?
    732     if (shift != 0) {
    733       // Shift EDX by 'shift' bits.
    734       NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
    735     }
    736 
    737     // Add 1 to EDX if EDX < 0.
    738 
    739     // Move EDX to EAX.
    740     OpRegCopy(rs_r0, rs_r2);
    741 
    742     // Move sign bit to bit 0, zeroing the rest.
    743     NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
    744 
    745     // EDX = EDX + EAX.
    746     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
    747 
    748     // Quotient is in EDX.
    749     if (!is_div) {
    750       // We need to compute the remainder.
    751       // Remainder is divisor - (quotient * imm).
    752       DCHECK(numerator_reg.Valid());
    753       OpRegCopy(rs_r0, numerator_reg);
    754 
    755       // EAX = numerator * imm.
    756       OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
    757 
    758       // EAX -= EDX.
    759       NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
    760 
    761       // For this case, return the result in EAX.
    762     }
    763     done->target = NewLIR0(kPseudoTargetLabel);
    764   }
    765 
    766   return rl_result;
    767 }
    768 
    769 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
    770                                   bool is_div) {
    771   UNUSED(rl_dest, reg_lo, reg_hi, is_div);
    772   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
    773   UNREACHABLE();
    774 }
    775 
    776 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
    777                                   RegLocation rl_src2, bool is_div, int flags) {
    778   UNUSED(rl_dest);
    779   // We have to use fixed registers, so flush all the temps.
    780 
    781   // Prepare for explicit register usage.
    782   ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
    783 
    784   // Load LHS into EAX.
    785   LoadValueDirectFixed(rl_src1, rs_r0);
    786 
    787   // Load RHS into EBX.
    788   LoadValueDirectFixed(rl_src2, rs_r1);
    789 
    790   // Copy LHS sign bit into EDX.
    791   NewLIR0(kx86Cdq32Da);
    792 
    793   if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
    794     // Handle division by zero case.
    795     GenDivZeroCheck(rs_r1);
    796   }
    797 
    798   // Check if numerator is 0
    799   OpRegImm(kOpCmp, rs_r0, 0);
    800   LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
    801 
    802   // Have to catch 0x80000000/-1 case, or we will get an exception!
    803   OpRegImm(kOpCmp, rs_r1, -1);
    804   LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    805 
    806   // RHS is -1.
    807   OpRegImm(kOpCmp, rs_r0, 0x80000000);
    808   LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    809 
    810   branch->target = NewLIR0(kPseudoTargetLabel);
    811 
    812   // In 0x80000000/-1 case.
    813   if (!is_div) {
    814     // For DIV, EAX is already right. For REM, we need EDX 0.
    815     LoadConstantNoClobber(rs_r2, 0);
    816   }
    817   LIR* done = NewLIR1(kX86Jmp8, 0);
    818 
    819   // Expected case.
    820   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
    821   minint_branch->target = minus_one_branch->target;
    822   NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
    823   done->target = NewLIR0(kPseudoTargetLabel);
    824 
    825   // Result is in EAX for div and EDX for rem.
    826   RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
    827   if (!is_div) {
    828     rl_result.reg.SetReg(r2);
    829   }
    830   return rl_result;
    831 }
    832 
    833 static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
    834   return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
    835 }
    836 
    837 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
    838   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
    839 
    840   if (is_long && !cu_->target64) {
    841    /*
    842     * We want to implement the following algorithm
    843     * mov eax, low part of arg1
    844     * mov edx, high part of arg1
    845     * mov ebx, low part of arg2
    846     * mov ecx, high part of arg2
    847     * mov edi, eax
    848     * sub edi, ebx
    849     * mov edi, edx
    850     * sbb edi, ecx
    851     * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
    852     * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
    853     *
    854     * The algorithm above needs 5 registers: a pair for the first operand
    855     * (which later will be used as result), a pair for the second operand
    856     * and a temp register (e.g. 'edi') for intermediate calculations.
    857     * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
    858     * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
    859     * always enough registers to operate on. Practically, there is a pair
    860     * of registers 'edi' and 'esi' which holds promoted values and
    861     * sometimes should be treated as 'callee save'. If one of the operands
    862     * is in the promoted registers then we have enough register to
    863     * operate on. Otherwise there is lack of resources and we have to
    864     * save 'edi' before calculations and restore after.
    865     */
    866 
    867     RegLocation rl_src1 = info->args[0];
    868     RegLocation rl_src2 = info->args[2];
    869     RegLocation rl_dest = InlineTargetWide(info);
    870 
    871     if (rl_dest.s_reg_low == INVALID_SREG) {
    872       // Result is unused, the code is dead. Inlining successful, no code generated.
    873       return true;
    874     }
    875 
    876     if (PartiallyIntersects(rl_src1, rl_dest) &&
    877         PartiallyIntersects(rl_src2, rl_dest)) {
    878       // A special case which we don't want to handle.
    879       // This is when src1 is mapped on v0 and v1,
    880       // src2 is mapped on v2, v3,
    881       // result is mapped on v1, v2
    882       return false;
    883     }
    884 
    885 
    886     /*
    887      * If the result register is the same as the second element, then we
    888      * need to be careful. The reason is that the first copy will
    889      * inadvertently clobber the second element with the first one thus
    890      * yielding the wrong result. Thus we do a swap in that case.
    891      */
    892     if (Intersects(rl_src2, rl_dest)) {
    893       std::swap(rl_src1, rl_src2);
    894     }
    895 
    896     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    897     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    898 
    899     // Pick the first integer as min/max.
    900     OpRegCopyWide(rl_result.reg, rl_src1.reg);
    901 
    902     /*
    903      * If the integers are both in the same register, then there is
    904      * nothing else to do because they are equal and we have already
    905      * moved one into the result.
    906      */
    907     if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
    908         mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
    909       StoreValueWide(rl_dest, rl_result);
    910       return true;
    911     }
    912 
    913     // Free registers to make some room for the second operand.
    914     // But don't try to free part of a source which intersects
    915     // part of result or promoted registers.
    916 
    917     if (IsTemp(rl_src1.reg.GetLow()) &&
    918        (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
    919        (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
    920       // Is low part temporary and doesn't intersect any parts of result?
    921       FreeTemp(rl_src1.reg.GetLow());
    922     }
    923 
    924     if (IsTemp(rl_src1.reg.GetHigh()) &&
    925        (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
    926        (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
    927       // Is high part temporary and doesn't intersect any parts of result?
    928       FreeTemp(rl_src1.reg.GetHigh());
    929     }
    930 
    931     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
    932 
    933     // Do we have a free register for intermediate calculations?
    934     RegStorage tmp = AllocTemp(false);
    935     const int kRegSize = cu_->target64 ? 8 : 4;
    936     if (tmp == RegStorage::InvalidReg()) {
    937        /*
    938         * No, will use 'edi'.
    939         *
    940         * As mentioned above we have 4 temporary and 2 promotable
    941         * caller-save registers. Therefore, we assume that a free
    942         * register can be allocated only if 'esi' and 'edi' are
    943         * already used as operands. If number of promotable registers
    944         * increases from 2 to 4 then our assumption fails and operand
    945         * data is corrupted.
    946         * Let's DCHECK it.
    947         */
    948        DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
    949               IsTemp(rl_src2.reg.GetHigh()) &&
    950               IsTemp(rl_result.reg.GetLow()) &&
    951               IsTemp(rl_result.reg.GetHigh()));
    952        tmp = rs_rDI;
    953        NewLIR1(kX86Push32R, tmp.GetReg());
    954        cfi_.AdjustCFAOffset(kRegSize);
    955        // Record cfi only if it is not already spilled.
    956        if (!CoreSpillMaskContains(tmp.GetReg())) {
    957          cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
    958        }
    959     }
    960 
    961     // Now we are ready to do calculations.
    962     OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
    963     OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
    964     OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
    965     OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
    966 
    967     // Let's put pop 'edi' here to break a bit the dependency chain.
    968     if (tmp == rs_rDI) {
    969       NewLIR1(kX86Pop32R, tmp.GetReg());
    970       cfi_.AdjustCFAOffset(-kRegSize);
    971       if (!CoreSpillMaskContains(tmp.GetReg())) {
    972         cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
    973       }
    974     } else {
    975       FreeTemp(tmp);
    976     }
    977 
    978     // Conditionally move the other integer into the destination register.
    979     ConditionCode cc = is_min ? kCondGe : kCondLt;
    980     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
    981     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
    982     FreeTemp(rl_src2.reg);
    983     StoreValueWide(rl_dest, rl_result);
    984     return true;
    985   }
    986 
    987   // Get the two arguments to the invoke and place them in GP registers.
    988   RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
    989   if (rl_dest.s_reg_low == INVALID_SREG) {
    990     // Result is unused, the code is dead. Inlining successful, no code generated.
    991     return true;
    992   }
    993   RegLocation rl_src1 = info->args[0];
    994   RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
    995   rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
    996   rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
    997 
    998   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    999 
   1000   /*
   1001    * If the result register is the same as the second element, then we need to be careful.
   1002    * The reason is that the first copy will inadvertently clobber the second element with
   1003    * the first one thus yielding the wrong result. Thus we do a swap in that case.
   1004    */
   1005   if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1006     std::swap(rl_src1, rl_src2);
   1007   }
   1008 
   1009   // Pick the first integer as min/max.
   1010   OpRegCopy(rl_result.reg, rl_src1.reg);
   1011 
   1012   // If the integers are both in the same register, then there is nothing else to do
   1013   // because they are equal and we have already moved one into the result.
   1014   if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
   1015     // It is possible we didn't pick correctly so do the actual comparison now.
   1016     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
   1017 
   1018     // Conditionally move the other integer into the destination register.
   1019     ConditionCode condition_code = is_min ? kCondGt : kCondLt;
   1020     OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
   1021   }
   1022 
   1023   if (is_long) {
   1024     StoreValueWide(rl_dest, rl_result);
   1025   } else {
   1026     StoreValue(rl_dest, rl_result);
   1027   }
   1028   return true;
   1029 }
   1030 
   1031 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
   1032   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
   1033   if (rl_dest.s_reg_low == INVALID_SREG) {
   1034     // Result is unused, the code is dead. Inlining successful, no code generated.
   1035     return true;
   1036   }
   1037   RegLocation rl_src_address = info->args[0];  // long address
   1038   RegLocation rl_address;
   1039   if (!cu_->target64) {
   1040     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
   1041     rl_address = LoadValue(rl_src_address, kCoreReg);
   1042   } else {
   1043     rl_address = LoadValueWide(rl_src_address, kCoreReg);
   1044   }
   1045   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1046   // Unaligned access is allowed on x86.
   1047   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
   1048   if (size == k64) {
   1049     StoreValueWide(rl_dest, rl_result);
   1050   } else {
   1051     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
   1052     StoreValue(rl_dest, rl_result);
   1053   }
   1054   return true;
   1055 }
   1056 
   1057 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
   1058   RegLocation rl_src_address = info->args[0];  // long address
   1059   RegLocation rl_address;
   1060   if (!cu_->target64) {
   1061     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
   1062     rl_address = LoadValue(rl_src_address, kCoreReg);
   1063   } else {
   1064     rl_address = LoadValueWide(rl_src_address, kCoreReg);
   1065   }
   1066   RegLocation rl_src_value = info->args[2];  // [size] value
   1067   RegLocation rl_value;
   1068   if (size == k64) {
   1069     // Unaligned access is allowed on x86.
   1070     rl_value = LoadValueWide(rl_src_value, kCoreReg);
   1071   } else {
   1072     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
   1073     // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
   1074     if (!cu_->target64 && size == kSignedByte) {
   1075       rl_src_value = UpdateLocTyped(rl_src_value);
   1076       if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
   1077         RegStorage temp = AllocateByteRegister();
   1078         OpRegCopy(temp, rl_src_value.reg);
   1079         rl_value.reg = temp;
   1080       } else {
   1081         rl_value = LoadValue(rl_src_value, kCoreReg);
   1082       }
   1083     } else {
   1084       rl_value = LoadValue(rl_src_value, kCoreReg);
   1085     }
   1086   }
   1087   StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
   1088   return true;
   1089 }
   1090 
   1091 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
   1092   NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
   1093 }
   1094 
   1095 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
   1096   DCHECK_EQ(kX86, cu_->instruction_set);
   1097   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
   1098 }
   1099 
   1100 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
   1101   DCHECK_EQ(kX86_64, cu_->instruction_set);
   1102   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
   1103 }
   1104 
   1105 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
   1106   return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
   1107 }
   1108 
   1109 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   1110   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
   1111   // Unused - RegLocation rl_src_unsafe = info->args[0];
   1112   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   1113   RegLocation rl_src_offset = info->args[2];  // long low
   1114   if (!cu_->target64) {
   1115     rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   1116   }
   1117   RegLocation rl_src_expected = info->args[4];  // int, long or Object
   1118   // If is_long, high half is in info->args[5]
   1119   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   1120   // If is_long, high half is in info->args[7]
   1121   const int kRegSize = cu_->target64 ? 8 : 4;
   1122 
   1123   if (is_long && cu_->target64) {
   1124     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
   1125     FlushReg(rs_r0q);
   1126     Clobber(rs_r0q);
   1127     LockTemp(rs_r0q);
   1128 
   1129     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   1130     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   1131     RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
   1132     LoadValueDirectWide(rl_src_expected, rs_r0q);
   1133     NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
   1134             rl_new_value.reg.GetReg());
   1135 
   1136     // After a store we need to insert barrier in case of potential load. Since the
   1137     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
   1138     GenMemBarrier(kAnyAny);
   1139 
   1140     FreeTemp(rs_r0q);
   1141   } else if (is_long) {
   1142     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
   1143     FlushAllRegs();
   1144     LockCallTemps();
   1145     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
   1146     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
   1147     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
   1148     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
   1149     // FIXME: needs 64-bit update.
   1150     const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
   1151     const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
   1152     DCHECK(!obj_in_si || !obj_in_di);
   1153     const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
   1154     const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
   1155     DCHECK(!off_in_si || !off_in_di);
   1156     // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
   1157     RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
   1158     RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
   1159     bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
   1160     bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
   1161     if (push_di) {
   1162       NewLIR1(kX86Push32R, rs_rDI.GetReg());
   1163       MarkTemp(rs_rDI);
   1164       LockTemp(rs_rDI);
   1165       cfi_.AdjustCFAOffset(kRegSize);
   1166       // Record cfi only if it is not already spilled.
   1167       if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
   1168         cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
   1169       }
   1170     }
   1171     if (push_si) {
   1172       NewLIR1(kX86Push32R, rs_rSI.GetReg());
   1173       MarkTemp(rs_rSI);
   1174       LockTemp(rs_rSI);
   1175       cfi_.AdjustCFAOffset(kRegSize);
   1176       // Record cfi only if it is not already spilled.
   1177       if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
   1178         cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
   1179       }
   1180     }
   1181     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1182     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
   1183     const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
   1184     if (!obj_in_si && !obj_in_di) {
   1185       LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
   1186       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
   1187       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
   1188       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
   1189       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
   1190     }
   1191     if (!off_in_si && !off_in_di) {
   1192       LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
   1193       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
   1194       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
   1195       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
   1196       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
   1197     }
   1198     NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
   1199 
   1200     // After a store we need to insert barrier to prevent reordering with either
   1201     // earlier or later memory accesses.  Since
   1202     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1203     // and it will be associated with the cmpxchg instruction, preventing both.
   1204     GenMemBarrier(kAnyAny);
   1205 
   1206     if (push_si) {
   1207       FreeTemp(rs_rSI);
   1208       UnmarkTemp(rs_rSI);
   1209       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
   1210       cfi_.AdjustCFAOffset(-kRegSize);
   1211       if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
   1212         cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
   1213       }
   1214     }
   1215     if (push_di) {
   1216       FreeTemp(rs_rDI);
   1217       UnmarkTemp(rs_rDI);
   1218       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
   1219       cfi_.AdjustCFAOffset(-kRegSize);
   1220       if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
   1221         cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
   1222       }
   1223     }
   1224     FreeCallTemps();
   1225   } else {
   1226     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
   1227     FlushReg(rs_r0);
   1228     Clobber(rs_r0);
   1229     LockTemp(rs_r0);
   1230 
   1231     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   1232     RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
   1233 
   1234     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
   1235       // Mark card for object assuming new value is stored.
   1236       FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
   1237       MarkGCCard(0, rl_new_value.reg, rl_object.reg);
   1238       LockTemp(rs_r0);
   1239     }
   1240 
   1241     RegLocation rl_offset;
   1242     if (cu_->target64) {
   1243       rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
   1244     } else {
   1245       rl_offset = LoadValue(rl_src_offset, kCoreReg);
   1246     }
   1247     LoadValueDirect(rl_src_expected, rs_r0);
   1248     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
   1249             rl_new_value.reg.GetReg());
   1250 
   1251     // After a store we need to insert barrier to prevent reordering with either
   1252     // earlier or later memory accesses.  Since
   1253     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1254     // and it will be associated with the cmpxchg instruction, preventing both.
   1255     GenMemBarrier(kAnyAny);
   1256 
   1257     FreeTemp(rs_r0);
   1258   }
   1259 
   1260   // Convert ZF to boolean
   1261   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
   1262   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1263   RegStorage result_reg = rl_result.reg;
   1264 
   1265   // For 32-bit, SETcc only works with EAX..EDX.
   1266   if (!IsByteRegister(result_reg)) {
   1267     result_reg = AllocateByteRegister();
   1268   }
   1269   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
   1270   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
   1271   if (IsTemp(result_reg)) {
   1272     FreeTemp(result_reg);
   1273   }
   1274   StoreValue(rl_dest, rl_result);
   1275   return true;
   1276 }
   1277 
   1278 void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
   1279   RegStorage r_temp = AllocTemp();
   1280   OpRegCopy(r_temp, result_reg);
   1281   OpRegImm(kOpLsr, result_reg, shift);
   1282   OpRegImm(kOpAnd, r_temp, value);
   1283   OpRegImm(kOpAnd, result_reg, value);
   1284   OpRegImm(kOpLsl, r_temp, shift);
   1285   OpRegReg(kOpOr, result_reg, r_temp);
   1286   FreeTemp(r_temp);
   1287 }
   1288 
   1289 void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
   1290   RegStorage r_temp = AllocTempWide();
   1291   OpRegCopy(r_temp, result_reg);
   1292   OpRegImm(kOpLsr, result_reg, shift);
   1293   RegStorage r_value = AllocTempWide();
   1294   LoadConstantWide(r_value, value);
   1295   OpRegReg(kOpAnd, r_temp, r_value);
   1296   OpRegReg(kOpAnd, result_reg, r_value);
   1297   OpRegImm(kOpLsl, r_temp, shift);
   1298   OpRegReg(kOpOr, result_reg, r_temp);
   1299   FreeTemp(r_temp);
   1300   FreeTemp(r_value);
   1301 }
   1302 
   1303 bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
   1304   RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
   1305   if (rl_dest.s_reg_low == INVALID_SREG) {
   1306     // Result is unused, the code is dead. Inlining successful, no code generated.
   1307     return true;
   1308   }
   1309   RegLocation rl_src_i = info->args[0];
   1310   RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
   1311                                    : LoadValue(rl_src_i, kCoreReg);
   1312   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1313   if (size == k64) {
   1314     if (cu_->instruction_set == kX86_64) {
   1315       /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
   1316          swapping bits to reverse bits in a long number x. Using bswap to save instructions
   1317          compared to generic luni implementation which has 5 rounds of swapping bits.
   1318          x = bswap x
   1319          x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
   1320          x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
   1321          x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
   1322       */
   1323       OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
   1324       SwapBits64(rl_result.reg, 1, 0x5555555555555555);
   1325       SwapBits64(rl_result.reg, 2, 0x3333333333333333);
   1326       SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
   1327       StoreValueWide(rl_dest, rl_result);
   1328       return true;
   1329     }
   1330     RegStorage r_i_low = rl_i.reg.GetLow();
   1331     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
   1332       // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
   1333       // REV.
   1334       r_i_low = AllocTemp();
   1335       OpRegCopy(r_i_low, rl_i.reg);
   1336     }
   1337     OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
   1338     OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
   1339     // Free up at least one input register if it was a temp. Otherwise we may be in the bad
   1340     // situation of not having a temp available for SwapBits. Make sure it's not overlapping
   1341     // with the output, though.
   1342     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
   1343       // There's definitely a free temp after this.
   1344       FreeTemp(r_i_low);
   1345     } else {
   1346       // We opportunistically release both here. That saves duplication of the register state
   1347       // lookup (to see if it's actually a temp).
   1348       if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
   1349         FreeTemp(rl_i.reg.GetLow());
   1350       }
   1351       if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
   1352           rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
   1353         FreeTemp(rl_i.reg.GetHigh());
   1354       }
   1355     }
   1356 
   1357     SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
   1358     SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
   1359     SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
   1360     SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
   1361     SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
   1362     SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
   1363     StoreValueWide(rl_dest, rl_result);
   1364   } else {
   1365     OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
   1366     SwapBits(rl_result.reg, 1, 0x55555555);
   1367     SwapBits(rl_result.reg, 2, 0x33333333);
   1368     SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
   1369     StoreValue(rl_dest, rl_result);
   1370   }
   1371   return true;
   1372 }
   1373 
   1374 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   1375   if (cu_->target64) {
   1376     // We can do this directly using RIP addressing.
   1377     ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   1378     LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
   1379     res->target = target;
   1380     res->flags.fixup = kFixupLoad;
   1381     return;
   1382   }
   1383 
   1384   // Get the PC to a register and get the anchor.
   1385   LIR* anchor;
   1386   RegStorage r_pc = GetPcAndAnchor(&anchor);
   1387 
   1388   // Load the proper value from the literal area.
   1389   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   1390   LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
   1391   res->operands[4] = WrapPointer(anchor);
   1392   res->target = target;
   1393   res->flags.fixup = kFixupLoad;
   1394 }
   1395 
   1396 bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
   1397   return dex_cache_arrays_layout_.Valid();
   1398 }
   1399 
   1400 LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
   1401   DCHECK(!cu_->target64);
   1402   LIR* call = NewLIR1(kX86CallI, 0);
   1403   call->flags.fixup = kFixupLabel;
   1404   LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
   1405   pop->flags.fixup = kFixupLabel;
   1406   DCHECK(NEXT_LIR(call) == pop);
   1407   return call;
   1408 }
   1409 
   1410 RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
   1411   if (pc_rel_base_reg_.Valid()) {
   1412     DCHECK(setup_pc_rel_base_reg_ != nullptr);
   1413     *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
   1414     DCHECK(*anchor != nullptr);
   1415     DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
   1416     pc_rel_base_reg_used_ = true;
   1417     return pc_rel_base_reg_;
   1418   } else {
   1419     RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
   1420     LIR* load_pc = OpLoadPc(r_pc);
   1421     *anchor = NEXT_LIR(load_pc);
   1422     DCHECK(*anchor != nullptr);
   1423     DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
   1424     return r_pc;
   1425   }
   1426 }
   1427 
   1428 void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
   1429                                           bool wide) {
   1430   if (cu_->target64) {
   1431     LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg,
   1432         kDummy32BitOffset);
   1433     mov->flags.fixup = kFixupLabel;
   1434     mov->operands[3] = WrapPointer(dex_file);
   1435     mov->operands[4] = offset;
   1436     mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
   1437     dex_cache_access_insns_.push_back(mov);
   1438   } else {
   1439     CHECK(!wide) << "Unsupported";
   1440     // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
   1441     LIR* anchor;
   1442     RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
   1443     LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
   1444     mov->flags.fixup = kFixupLabel;
   1445     mov->operands[3] = WrapPointer(dex_file);
   1446     mov->operands[4] = offset;
   1447     mov->target = anchor;  // Used for pc_insn_offset.
   1448     dex_cache_access_insns_.push_back(mov);
   1449   }
   1450 }
   1451 
   1452 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
   1453   UNUSED(r_base, count);
   1454   LOG(FATAL) << "Unexpected use of OpVldm for x86";
   1455   UNREACHABLE();
   1456 }
   1457 
   1458 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
   1459   UNUSED(r_base, count);
   1460   LOG(FATAL) << "Unexpected use of OpVstm for x86";
   1461   UNREACHABLE();
   1462 }
   1463 
   1464 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
   1465                                                RegLocation rl_result, int lit,
   1466                                                int first_bit, int second_bit) {
   1467   UNUSED(lit);
   1468   RegStorage t_reg = AllocTemp();
   1469   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
   1470   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
   1471   FreeTemp(t_reg);
   1472   if (first_bit != 0) {
   1473     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
   1474   }
   1475 }
   1476 
   1477 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
   1478   if (cu_->target64) {
   1479     DCHECK(reg.Is64Bit());
   1480 
   1481     NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
   1482   } else {
   1483     DCHECK(reg.IsPair());
   1484 
   1485     // We are not supposed to clobber the incoming storage, so allocate a temporary.
   1486     RegStorage t_reg = AllocTemp();
   1487     // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
   1488     OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
   1489     // The temp is no longer needed so free it at this time.
   1490     FreeTemp(t_reg);
   1491   }
   1492 
   1493   // In case of zero, throw ArithmeticException.
   1494   GenDivZeroCheck(kCondEq);
   1495 }
   1496 
   1497 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
   1498                                      RegStorage array_base,
   1499                                      int len_offset) {
   1500   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1501    public:
   1502     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
   1503                              RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
   1504         : LIRSlowPath(m2l, branch_in),
   1505           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
   1506     }
   1507 
   1508     void Compile() OVERRIDE {
   1509       m2l_->ResetRegPool();
   1510       m2l_->ResetDefTracking();
   1511       GenerateTargetLabel(kPseudoThrowTarget);
   1512 
   1513       RegStorage new_index = index_;
   1514       // Move index out of kArg1, either directly to kArg0, or to kArg2.
   1515       // TODO: clean-up to check not a number but with type
   1516       if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
   1517         if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
   1518           m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
   1519           new_index = m2l_->TargetReg(kArg2, kNotWide);
   1520         } else {
   1521           m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
   1522           new_index = m2l_->TargetReg(kArg0, kNotWide);
   1523         }
   1524       }
   1525       // Load array length to kArg1.
   1526       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1527       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1528       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
   1529                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1530     }
   1531 
   1532    private:
   1533     const RegStorage index_;
   1534     const RegStorage array_base_;
   1535     const int32_t len_offset_;
   1536   };
   1537 
   1538   OpRegMem(kOpCmp, index, array_base, len_offset);
   1539   MarkPossibleNullPointerException(0);
   1540   LIR* branch = OpCondBranch(kCondUge, nullptr);
   1541   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1542                                                     index, array_base, len_offset));
   1543 }
   1544 
   1545 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
   1546                                      RegStorage array_base,
   1547                                      int32_t len_offset) {
   1548   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1549    public:
   1550     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
   1551                              int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
   1552         : LIRSlowPath(m2l, branch_in),
   1553           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
   1554     }
   1555 
   1556     void Compile() OVERRIDE {
   1557       m2l_->ResetRegPool();
   1558       m2l_->ResetDefTracking();
   1559       GenerateTargetLabel(kPseudoThrowTarget);
   1560 
   1561       // Load array length to kArg1.
   1562       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1563       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1564       x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
   1565       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
   1566                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1567     }
   1568 
   1569    private:
   1570     const int32_t index_;
   1571     const RegStorage array_base_;
   1572     const int32_t len_offset_;
   1573   };
   1574 
   1575   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
   1576   MarkPossibleNullPointerException(0);
   1577   LIR* branch = OpCondBranch(kCondLs, nullptr);
   1578   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1579                                                     index, array_base, len_offset));
   1580 }
   1581 
   1582 // Test suspend flag, return target of taken suspend branch
   1583 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
   1584   if (cu_->target64) {
   1585     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
   1586   } else {
   1587     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
   1588   }
   1589   return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target);
   1590 }
   1591 
   1592 // Decrement register and branch on condition
   1593 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
   1594   OpRegImm(kOpSub, reg, 1);
   1595   return OpCondBranch(c_code, target);
   1596 }
   1597 
   1598 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
   1599                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
   1600   UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
   1601   LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
   1602   UNREACHABLE();
   1603 }
   1604 
   1605 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
   1606   UNUSED(rl_src, rl_dest, lit);
   1607   LOG(FATAL) << "Unexpected use of easyMultiply in x86";
   1608   UNREACHABLE();
   1609 }
   1610 
   1611 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
   1612   UNUSED(cond, guide);
   1613   LOG(FATAL) << "Unexpected use of OpIT in x86";
   1614   UNREACHABLE();
   1615 }
   1616 
   1617 void X86Mir2Lir::OpEndIT(LIR* it) {
   1618   UNUSED(it);
   1619   LOG(FATAL) << "Unexpected use of OpEndIT in x86";
   1620   UNREACHABLE();
   1621 }
   1622 
   1623 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
   1624   switch (val) {
   1625     case 0:
   1626       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1627       break;
   1628     case 1:
   1629       OpRegCopy(dest, src);
   1630       break;
   1631     default:
   1632       OpRegRegImm(kOpMul, dest, src, val);
   1633       break;
   1634   }
   1635 }
   1636 
   1637 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
   1638   UNUSED(sreg);
   1639   // All memory accesses below reference dalvik regs.
   1640   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1641 
   1642   LIR *m;
   1643   switch (val) {
   1644     case 0:
   1645       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1646       break;
   1647     case 1: {
   1648       const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
   1649       LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile);
   1650       break;
   1651     }
   1652     default:
   1653       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
   1654                   rs_rX86_SP_32.GetReg(), displacement, val);
   1655       AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
   1656       break;
   1657   }
   1658 }
   1659 
   1660 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
   1661                                 RegLocation rl_src2, int flags) {
   1662   if (!cu_->target64) {
   1663     // Some x86 32b ops are fallback.
   1664     switch (opcode) {
   1665       case Instruction::NOT_LONG:
   1666       case Instruction::DIV_LONG:
   1667       case Instruction::DIV_LONG_2ADDR:
   1668       case Instruction::REM_LONG:
   1669       case Instruction::REM_LONG_2ADDR:
   1670         Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
   1671         return;
   1672 
   1673       default:
   1674         // Everything else we can handle.
   1675         break;
   1676     }
   1677   }
   1678 
   1679   switch (opcode) {
   1680     case Instruction::NOT_LONG:
   1681       GenNotLong(rl_dest, rl_src2);
   1682       return;
   1683 
   1684     case Instruction::ADD_LONG:
   1685     case Instruction::ADD_LONG_2ADDR:
   1686       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1687       return;
   1688 
   1689     case Instruction::SUB_LONG:
   1690     case Instruction::SUB_LONG_2ADDR:
   1691       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
   1692       return;
   1693 
   1694     case Instruction::MUL_LONG:
   1695     case Instruction::MUL_LONG_2ADDR:
   1696       GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
   1697       return;
   1698 
   1699     case Instruction::DIV_LONG:
   1700     case Instruction::DIV_LONG_2ADDR:
   1701       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
   1702       return;
   1703 
   1704     case Instruction::REM_LONG:
   1705     case Instruction::REM_LONG_2ADDR:
   1706       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
   1707       return;
   1708 
   1709     case Instruction::AND_LONG_2ADDR:
   1710     case Instruction::AND_LONG:
   1711       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1712       return;
   1713 
   1714     case Instruction::OR_LONG:
   1715     case Instruction::OR_LONG_2ADDR:
   1716       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1717       return;
   1718 
   1719     case Instruction::XOR_LONG:
   1720     case Instruction::XOR_LONG_2ADDR:
   1721       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1722       return;
   1723 
   1724     case Instruction::NEG_LONG:
   1725       GenNegLong(rl_dest, rl_src2);
   1726       return;
   1727 
   1728     default:
   1729       LOG(FATAL) << "Invalid long arith op";
   1730       return;
   1731   }
   1732 }
   1733 
   1734 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
   1735   // All memory accesses below reference dalvik regs.
   1736   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1737 
   1738   if (val == 0) {
   1739     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1740     if (cu_->target64) {
   1741       OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
   1742     } else {
   1743       OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
   1744       OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
   1745     }
   1746     StoreValueWide(rl_dest, rl_result);
   1747     return true;
   1748   } else if (val == 1) {
   1749     StoreValueWide(rl_dest, rl_src1);
   1750     return true;
   1751   } else if (val == 2) {
   1752     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
   1753     return true;
   1754   } else if (IsPowerOfTwo(val)) {
   1755     int shift_amount = CTZ(val);
   1756     if (!PartiallyIntersects(rl_src1, rl_dest)) {
   1757       rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1758       RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
   1759                                                 shift_amount, flags);
   1760       StoreValueWide(rl_dest, rl_result);
   1761       return true;
   1762     }
   1763   }
   1764 
   1765   // Okay, on 32b just bite the bullet and do it, still better than the general case.
   1766   if (!cu_->target64) {
   1767     int32_t val_lo = Low32Bits(val);
   1768     int32_t val_hi = High32Bits(val);
   1769     // Prepare for explicit register usage.
   1770     ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
   1771     rl_src1 = UpdateLocWideTyped(rl_src1);
   1772     bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1773     int displacement = SRegOffset(rl_src1.s_reg_low);
   1774 
   1775     // ECX <- 1H * 2L
   1776     // EAX <- 1L * 2H
   1777     if (src1_in_reg) {
   1778       GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
   1779       GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
   1780     } else {
   1781       GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
   1782       GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
   1783     }
   1784 
   1785     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1786     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1787 
   1788     // EAX <- 2L
   1789     LoadConstantNoClobber(rs_r0, val_lo);
   1790 
   1791     // EDX:EAX <- 2L * 1L (double precision)
   1792     if (src1_in_reg) {
   1793       NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1794     } else {
   1795       LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
   1796       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1797                               true /* is_load */, true /* is_64bit */);
   1798     }
   1799 
   1800     // EDX <- EDX + ECX (add high words)
   1801     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1802 
   1803     // Result is EDX:EAX
   1804     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1805                              RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1806     StoreValueWide(rl_dest, rl_result);
   1807     return true;
   1808   }
   1809   return false;
   1810 }
   1811 
   1812 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   1813                             RegLocation rl_src2, int flags) {
   1814   if (rl_src1.is_const) {
   1815     std::swap(rl_src1, rl_src2);
   1816   }
   1817 
   1818   if (rl_src2.is_const) {
   1819     if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
   1820       return;
   1821     }
   1822   }
   1823 
   1824   // All memory accesses below reference dalvik regs.
   1825   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1826 
   1827   if (cu_->target64) {
   1828     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1829     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1830     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1831     if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1832         rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1833       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
   1834     } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
   1835                rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1836       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
   1837     } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1838                rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
   1839       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1840     } else {
   1841       OpRegCopy(rl_result.reg, rl_src1.reg);
   1842       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1843     }
   1844     StoreValueWide(rl_dest, rl_result);
   1845     return;
   1846   }
   1847 
   1848   // Not multiplying by a constant. Do it the hard way
   1849   // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
   1850   bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
   1851                    mir_graph_->SRegToVReg(rl_src2.s_reg_low);
   1852 
   1853   // Prepare for explicit register usage.
   1854   ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
   1855   rl_src1 = UpdateLocWideTyped(rl_src1);
   1856   rl_src2 = UpdateLocWideTyped(rl_src2);
   1857 
   1858   // At this point, the VRs are in their home locations.
   1859   bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1860   bool src2_in_reg = rl_src2.location == kLocPhysReg;
   1861   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
   1862 
   1863   // ECX <- 1H
   1864   if (src1_in_reg) {
   1865     NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
   1866   } else {
   1867     LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
   1868                  kNotVolatile);
   1869   }
   1870 
   1871   if (is_square) {
   1872     // Take advantage of the fact that the values are the same.
   1873     // ECX <- ECX * 2L  (1H * 2L)
   1874     if (src2_in_reg) {
   1875       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1876     } else {
   1877       int displacement = SRegOffset(rl_src2.s_reg_low);
   1878       LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
   1879                        displacement + LOWORD_OFFSET);
   1880       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1881                               true /* is_load */, true /* is_64bit */);
   1882     }
   1883 
   1884     // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
   1885     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
   1886   } else {
   1887     // EAX <- 2H
   1888     if (src2_in_reg) {
   1889       NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
   1890     } else {
   1891       LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
   1892                    kNotVolatile);
   1893     }
   1894 
   1895     // EAX <- EAX * 1L  (2H * 1L)
   1896     if (src1_in_reg) {
   1897       NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
   1898     } else {
   1899       int displacement = SRegOffset(rl_src1.s_reg_low);
   1900       LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(),
   1901                        displacement + LOWORD_OFFSET);
   1902       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1903                               true /* is_load */, true /* is_64bit */);
   1904     }
   1905 
   1906     // ECX <- ECX * 2L  (1H * 2L)
   1907     if (src2_in_reg) {
   1908       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1909     } else {
   1910       int displacement = SRegOffset(rl_src2.s_reg_low);
   1911       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
   1912                        displacement + LOWORD_OFFSET);
   1913       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1914                               true /* is_load */, true /* is_64bit */);
   1915     }
   1916 
   1917     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1918     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1919   }
   1920 
   1921   // EAX <- 2L
   1922   if (src2_in_reg) {
   1923     NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
   1924   } else {
   1925     LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
   1926                  kNotVolatile);
   1927   }
   1928 
   1929   // EDX:EAX <- 2L * 1L (double precision)
   1930   if (src1_in_reg) {
   1931     NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1932   } else {
   1933     int displacement = SRegOffset(rl_src1.s_reg_low);
   1934     LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
   1935     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1936                             true /* is_load */, true /* is_64bit */);
   1937   }
   1938 
   1939   // EDX <- EDX + ECX (add high words)
   1940   NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1941 
   1942   // Result is EDX:EAX
   1943   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1944                            RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1945   StoreValueWide(rl_dest, rl_result);
   1946 }
   1947 
   1948 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
   1949                                    Instruction::Code op) {
   1950   DCHECK_EQ(rl_dest.location, kLocPhysReg);
   1951   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   1952   if (rl_src.location == kLocPhysReg) {
   1953     // Both operands are in registers.
   1954     // But we must ensure that rl_src is in pair
   1955     if (cu_->target64) {
   1956       NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
   1957     } else {
   1958       rl_src = LoadValueWide(rl_src, kCoreReg);
   1959       if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
   1960         // The registers are the same, so we would clobber it before the use.
   1961         RegStorage temp_reg = AllocTemp();
   1962         OpRegCopy(temp_reg, rl_dest.reg);
   1963         rl_src.reg.SetHighReg(temp_reg.GetReg());
   1964       }
   1965       NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
   1966 
   1967       x86op = GetOpcode(op, rl_dest, rl_src, true);
   1968       NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
   1969     }
   1970     return;
   1971   }
   1972 
   1973   // RHS is in memory.
   1974   DCHECK((rl_src.location == kLocDalvikFrame) ||
   1975          (rl_src.location == kLocCompilerTemp));
   1976   int r_base = rs_rX86_SP_32.GetReg();
   1977   int displacement = SRegOffset(rl_src.s_reg_low);
   1978 
   1979   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1980   LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
   1981                      r_base, displacement + LOWORD_OFFSET);
   1982   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1983                           true /* is_load */, true /* is64bit */);
   1984   if (!cu_->target64) {
   1985     x86op = GetOpcode(op, rl_dest, rl_src, true);
   1986     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
   1987     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1988                             true /* is_load */, true /* is64bit */);
   1989   }
   1990 }
   1991 
   1992 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   1993   rl_dest = UpdateLocWideTyped(rl_dest);
   1994   if (rl_dest.location == kLocPhysReg) {
   1995     // Ensure we are in a register pair
   1996     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1997 
   1998     rl_src = UpdateLocWideTyped(rl_src);
   1999     GenLongRegOrMemOp(rl_result, rl_src, op);
   2000     StoreFinalValueWide(rl_dest, rl_result);
   2001     return;
   2002   } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) {
   2003     // Handle the case when src and dest are intersect.
   2004     rl_src = LoadValueWide(rl_src, kCoreReg);
   2005     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2006     rl_src = UpdateLocWideTyped(rl_src);
   2007     GenLongRegOrMemOp(rl_result, rl_src, op);
   2008     StoreFinalValueWide(rl_dest, rl_result);
   2009     return;
   2010   }
   2011 
   2012   // It wasn't in registers, so it better be in memory.
   2013   DCHECK((rl_dest.location == kLocDalvikFrame) ||
   2014          (rl_dest.location == kLocCompilerTemp));
   2015   rl_src = LoadValueWide(rl_src, kCoreReg);
   2016 
   2017   // Operate directly into memory.
   2018   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   2019   int r_base = rs_rX86_SP_32.GetReg();
   2020   int displacement = SRegOffset(rl_dest.s_reg_low);
   2021 
   2022   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2023   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
   2024                      cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   2025   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2026                           true /* is_load */, true /* is64bit */);
   2027   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2028                           false /* is_load */, true /* is64bit */);
   2029   if (!cu_->target64) {
   2030     x86op = GetOpcode(op, rl_dest, rl_src, true);
   2031     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
   2032     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2033                             true /* is_load */, true /* is64bit */);
   2034     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2035                             false /* is_load */, true /* is64bit */);
   2036   }
   2037 
   2038   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
   2039   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
   2040 
   2041   // If the left operand is in memory and the right operand is in a register
   2042   // and both belong to the same dalvik register then we should clobber the
   2043   // right one because it doesn't hold valid data anymore.
   2044   if (v_src_reg == v_dst_reg) {
   2045     Clobber(rl_src.reg);
   2046   }
   2047 }
   2048 
   2049 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
   2050                               RegLocation rl_src2, Instruction::Code op,
   2051                               bool is_commutative) {
   2052   // Is this really a 2 operand operation?
   2053   switch (op) {
   2054     case Instruction::ADD_LONG_2ADDR:
   2055     case Instruction::SUB_LONG_2ADDR:
   2056     case Instruction::AND_LONG_2ADDR:
   2057     case Instruction::OR_LONG_2ADDR:
   2058     case Instruction::XOR_LONG_2ADDR:
   2059       if (GenerateTwoOperandInstructions()) {
   2060         GenLongArith(rl_dest, rl_src2, op);
   2061         return;
   2062       }
   2063       break;
   2064 
   2065     default:
   2066       break;
   2067   }
   2068 
   2069   if (rl_dest.location == kLocPhysReg) {
   2070     RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
   2071 
   2072     // We are about to clobber the LHS, so it needs to be a temp.
   2073     rl_result = ForceTempWide(rl_result);
   2074 
   2075     // Perform the operation using the RHS.
   2076     rl_src2 = UpdateLocWideTyped(rl_src2);
   2077     GenLongRegOrMemOp(rl_result, rl_src2, op);
   2078 
   2079     // And now record that the result is in the temp.
   2080     StoreFinalValueWide(rl_dest, rl_result);
   2081     return;
   2082   }
   2083 
   2084   // It wasn't in registers, so it better be in memory.
   2085   DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp));
   2086   rl_src1 = UpdateLocWideTyped(rl_src1);
   2087   rl_src2 = UpdateLocWideTyped(rl_src2);
   2088 
   2089   // Get one of the source operands into temporary register.
   2090   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2091   if (cu_->target64) {
   2092     if (IsTemp(rl_src1.reg)) {
   2093       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2094     } else if (is_commutative) {
   2095       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   2096       // We need at least one of them to be a temporary.
   2097       if (!IsTemp(rl_src2.reg)) {
   2098         rl_src1 = ForceTempWide(rl_src1);
   2099         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2100       } else {
   2101         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   2102         StoreFinalValueWide(rl_dest, rl_src2);
   2103         return;
   2104       }
   2105     } else {
   2106       // Need LHS to be the temp.
   2107       rl_src1 = ForceTempWide(rl_src1);
   2108       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2109     }
   2110   } else {
   2111     if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
   2112       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2113     } else if (is_commutative) {
   2114       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   2115       // We need at least one of them to be a temporary.
   2116       if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
   2117         rl_src1 = ForceTempWide(rl_src1);
   2118         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2119       } else {
   2120         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   2121         StoreFinalValueWide(rl_dest, rl_src2);
   2122         return;
   2123       }
   2124     } else {
   2125       // Need LHS to be the temp.
   2126       rl_src1 = ForceTempWide(rl_src1);
   2127       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   2128     }
   2129   }
   2130 
   2131   StoreFinalValueWide(rl_dest, rl_src1);
   2132 }
   2133 
   2134 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
   2135   if (cu_->target64) {
   2136     rl_src = LoadValueWide(rl_src, kCoreReg);
   2137     RegLocation rl_result;
   2138     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2139     OpRegCopy(rl_result.reg, rl_src.reg);
   2140     OpReg(kOpNot, rl_result.reg);
   2141     StoreValueWide(rl_dest, rl_result);
   2142   } else {
   2143     LOG(FATAL) << "Unexpected use GenNotLong()";
   2144   }
   2145 }
   2146 
   2147 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
   2148                                   int64_t imm, bool is_div) {
   2149   if (imm == 0) {
   2150     GenDivZeroException();
   2151   } else if (imm == 1) {
   2152     if (is_div) {
   2153       // x / 1 == x.
   2154       StoreValueWide(rl_dest, rl_src);
   2155     } else {
   2156       // x % 1 == 0.
   2157       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2158       LoadConstantWide(rl_result.reg, 0);
   2159       StoreValueWide(rl_dest, rl_result);
   2160     }
   2161   } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
   2162     if (is_div) {
   2163       rl_src = LoadValueWide(rl_src, kCoreReg);
   2164       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2165       RegStorage rs_temp = AllocTempWide();
   2166 
   2167       OpRegCopy(rl_result.reg, rl_src.reg);
   2168       LoadConstantWide(rs_temp, 0x8000000000000000);
   2169 
   2170       // If x == MIN_LONG, return MIN_LONG.
   2171       OpRegReg(kOpCmp, rl_src.reg, rs_temp);
   2172       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
   2173 
   2174       // For x != MIN_LONG, x / -1 == -x.
   2175       OpReg(kOpNeg, rl_result.reg);
   2176 
   2177       minint_branch->target = NewLIR0(kPseudoTargetLabel);
   2178       FreeTemp(rs_temp);
   2179       StoreValueWide(rl_dest, rl_result);
   2180     } else {
   2181       // x % -1 == 0.
   2182       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2183       LoadConstantWide(rl_result.reg, 0);
   2184       StoreValueWide(rl_dest, rl_result);
   2185     }
   2186   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
   2187     // Division using shifting.
   2188     rl_src = LoadValueWide(rl_src, kCoreReg);
   2189     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2190     if (IsSameReg(rl_result.reg, rl_src.reg)) {
   2191       RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
   2192       rl_result.reg.SetReg(rs_temp.GetReg());
   2193     }
   2194     LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
   2195     OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
   2196     NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
   2197     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
   2198     int shift_amount = CTZ(imm);
   2199     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
   2200     if (imm < 0) {
   2201       OpReg(kOpNeg, rl_result.reg);
   2202     }
   2203     StoreValueWide(rl_dest, rl_result);
   2204   } else {
   2205     CHECK(imm <= -2 || imm >= 2);
   2206 
   2207     FlushReg(rs_r0q);
   2208     Clobber(rs_r0q);
   2209     LockTemp(rs_r0q);
   2210     FlushReg(rs_r2q);
   2211     Clobber(rs_r2q);
   2212     LockTemp(rs_r2q);
   2213 
   2214     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   2215                              is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG};
   2216 
   2217     // Use H.S.Warren's Hacker's Delight Chapter 10 and
   2218     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
   2219     int64_t magic;
   2220     int shift;
   2221     CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
   2222 
   2223     /*
   2224      * For imm >= 2,
   2225      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
   2226      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
   2227      * For imm <= -2,
   2228      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
   2229      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
   2230      * We implement this algorithm in the following way:
   2231      * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
   2232      * 2. if imm > 0 and magic < 0, add numerator to RDX
   2233      *    if imm < 0 and magic > 0, sub numerator from RDX
   2234      * 3. if S !=0, SAR S bits for RDX
   2235      * 4. add 1 to RDX if RDX < 0
   2236      * 5. Thus, RDX is the quotient
   2237      */
   2238 
   2239     // RAX = magic.
   2240     LoadConstantWide(rs_r0q, magic);
   2241 
   2242     // Multiply by numerator.
   2243     RegStorage numerator_reg;
   2244     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
   2245       // We will need the value later.
   2246       rl_src = LoadValueWide(rl_src, kCoreReg);
   2247       numerator_reg = rl_src.reg;
   2248 
   2249       // RDX:RAX = magic * numerator.
   2250       NewLIR1(kX86Imul64DaR, numerator_reg.GetReg());
   2251     } else {
   2252       // Only need this once.  Multiply directly from the value.
   2253       rl_src = UpdateLocWideTyped(rl_src);
   2254       if (rl_src.location != kLocPhysReg) {
   2255         // Okay, we can do this from memory.
   2256         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2257         int displacement = SRegOffset(rl_src.s_reg_low);
   2258         // RDX:RAX = magic * numerator.
   2259         LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement);
   2260         AnnotateDalvikRegAccess(m, displacement >> 2,
   2261                                 true /* is_load */, true /* is_64bit */);
   2262       } else {
   2263         // RDX:RAX = magic * numerator.
   2264         NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg());
   2265       }
   2266     }
   2267 
   2268     if (imm > 0 && magic < 0) {
   2269       // Add numerator to RDX.
   2270       DCHECK(numerator_reg.Valid());
   2271       OpRegReg(kOpAdd, rs_r2q, numerator_reg);
   2272     } else if (imm < 0 && magic > 0) {
   2273       DCHECK(numerator_reg.Valid());
   2274       OpRegReg(kOpSub, rs_r2q, numerator_reg);
   2275     }
   2276 
   2277     // Do we need the shift?
   2278     if (shift != 0) {
   2279       // Shift RDX by 'shift' bits.
   2280       OpRegImm(kOpAsr, rs_r2q, shift);
   2281     }
   2282 
   2283     // Move RDX to RAX.
   2284     OpRegCopyWide(rs_r0q, rs_r2q);
   2285 
   2286     // Move sign bit to bit 0, zeroing the rest.
   2287     OpRegImm(kOpLsr, rs_r2q, 63);
   2288 
   2289     // RDX = RDX + RAX.
   2290     OpRegReg(kOpAdd, rs_r2q, rs_r0q);
   2291 
   2292     // Quotient is in RDX.
   2293     if (!is_div) {
   2294       // We need to compute the remainder.
   2295       // Remainder is divisor - (quotient * imm).
   2296       DCHECK(numerator_reg.Valid());
   2297       OpRegCopyWide(rs_r0q, numerator_reg);
   2298 
   2299       // Imul doesn't support 64-bit imms.
   2300       if (imm > std::numeric_limits<int32_t>::max() ||
   2301           imm < std::numeric_limits<int32_t>::min()) {
   2302         RegStorage rs_temp = AllocTempWide();
   2303         LoadConstantWide(rs_temp, imm);
   2304 
   2305         // RAX = numerator * imm.
   2306         NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
   2307 
   2308         FreeTemp(rs_temp);
   2309       } else {
   2310         // RAX = numerator * imm.
   2311         int short_imm = static_cast<int>(imm);
   2312         NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
   2313       }
   2314 
   2315       // RAX -= RDX.
   2316       OpRegReg(kOpSub, rs_r0q, rs_r2q);
   2317 
   2318       // Result in RAX.
   2319     } else {
   2320       // Result in RDX.
   2321     }
   2322     StoreValueWide(rl_dest, rl_result);
   2323     FreeTemp(rs_r0q);
   2324     FreeTemp(rs_r2q);
   2325   }
   2326 }
   2327 
   2328 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   2329                                RegLocation rl_src2, bool is_div, int flags) {
   2330   if (!cu_->target64) {
   2331     LOG(FATAL) << "Unexpected use GenDivRemLong()";
   2332     return;
   2333   }
   2334 
   2335   if (rl_src2.is_const) {
   2336     DCHECK(rl_src2.wide);
   2337     int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
   2338     GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
   2339     return;
   2340   }
   2341 
   2342   // We have to use fixed registers, so flush all the temps.
   2343   // Prepare for explicit register usage.
   2344   ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
   2345 
   2346   // Load LHS into RAX.
   2347   LoadValueDirectWideFixed(rl_src1, rs_r0q);
   2348 
   2349   // Load RHS into RCX.
   2350   LoadValueDirectWideFixed(rl_src2, rs_r1q);
   2351 
   2352   // Copy LHS sign bit into RDX.
   2353   NewLIR0(kx86Cqo64Da);
   2354 
   2355   // Handle division by zero case.
   2356   if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
   2357     GenDivZeroCheckWide(rs_r1q);
   2358   }
   2359 
   2360   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
   2361   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
   2362   LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   2363 
   2364   // RHS is -1.
   2365   LoadConstantWide(rs_r6q, 0x8000000000000000);
   2366   NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
   2367   LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   2368 
   2369   // In 0x8000000000000000/-1 case.
   2370   if (!is_div) {
   2371     // For DIV, RAX is already right. For REM, we need RDX 0.
   2372     NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
   2373   }
   2374   LIR* done = NewLIR1(kX86Jmp8, 0);
   2375 
   2376   // Expected case.
   2377   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
   2378   minint_branch->target = minus_one_branch->target;
   2379   NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
   2380   done->target = NewLIR0(kPseudoTargetLabel);
   2381 
   2382   // Result is in RAX for div and RDX for rem.
   2383   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
   2384   if (!is_div) {
   2385     rl_result.reg.SetReg(r2q);
   2386   }
   2387 
   2388   StoreValueWide(rl_dest, rl_result);
   2389 }
   2390 
   2391 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   2392   rl_src = LoadValueWide(rl_src, kCoreReg);
   2393   RegLocation rl_result;
   2394   if (cu_->target64) {
   2395     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2396     OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
   2397   } else {
   2398     rl_result = ForceTempWide(rl_src);
   2399     OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
   2400     OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
   2401     OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   2402   }
   2403   StoreValueWide(rl_dest, rl_result);
   2404 }
   2405 
   2406 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
   2407   DCHECK_EQ(kX86, cu_->instruction_set);
   2408   X86OpCode opcode = kX86Bkpt;
   2409   switch (op) {
   2410   case kOpCmp: opcode = kX86Cmp32RT;  break;
   2411   case kOpMov: opcode = kX86Mov32RT;  break;
   2412   default:
   2413     LOG(FATAL) << "Bad opcode: " << op;
   2414     break;
   2415   }
   2416   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2417 }
   2418 
   2419 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   2420   DCHECK_EQ(kX86_64, cu_->instruction_set);
   2421   X86OpCode opcode = kX86Bkpt;
   2422   if (cu_->target64 && r_dest.Is64BitSolo()) {
   2423     switch (op) {
   2424     case kOpCmp: opcode = kX86Cmp64RT;  break;
   2425     case kOpMov: opcode = kX86Mov64RT;  break;
   2426     default:
   2427       LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
   2428       break;
   2429     }
   2430   } else {
   2431     switch (op) {
   2432     case kOpCmp: opcode = kX86Cmp32RT;  break;
   2433     case kOpMov: opcode = kX86Mov32RT;  break;
   2434     default:
   2435       LOG(FATAL) << "Bad opcode: " << op;
   2436       break;
   2437     }
   2438   }
   2439   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2440 }
   2441 
   2442 /*
   2443  * Generate array load
   2444  */
   2445 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   2446                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   2447   RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
   2448   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2449   RegLocation rl_result;
   2450   rl_array = LoadValue(rl_array, kRefReg);
   2451 
   2452   int data_offset;
   2453   if (size == k64 || size == kDouble) {
   2454     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2455   } else {
   2456     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2457   }
   2458 
   2459   bool constant_index = rl_index.is_const;
   2460   int32_t constant_index_value = 0;
   2461   if (!constant_index) {
   2462     rl_index = LoadValue(rl_index, kCoreReg);
   2463   } else {
   2464     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2465     // If index is constant, just fold it into the data offset
   2466     data_offset += constant_index_value << scale;
   2467     // treat as non array below
   2468     rl_index.reg = RegStorage::InvalidReg();
   2469   }
   2470 
   2471   /* null object? */
   2472   GenNullCheck(rl_array.reg, opt_flags);
   2473 
   2474   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2475     if (constant_index) {
   2476       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2477     } else {
   2478       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2479     }
   2480   }
   2481   rl_result = EvalLoc(rl_dest, reg_class, true);
   2482   LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
   2483   if ((size == k64) || (size == kDouble)) {
   2484     StoreValueWide(rl_dest, rl_result);
   2485   } else {
   2486     StoreValue(rl_dest, rl_result);
   2487   }
   2488 }
   2489 
   2490 /*
   2491  * Generate array store
   2492  *
   2493  */
   2494 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
   2495                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   2496   RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
   2497   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2498   int data_offset;
   2499 
   2500   if (size == k64 || size == kDouble) {
   2501     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2502   } else {
   2503     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2504   }
   2505 
   2506   rl_array = LoadValue(rl_array, kRefReg);
   2507   bool constant_index = rl_index.is_const;
   2508   int32_t constant_index_value = 0;
   2509   if (!constant_index) {
   2510     rl_index = LoadValue(rl_index, kCoreReg);
   2511   } else {
   2512     // If index is constant, just fold it into the data offset
   2513     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2514     data_offset += constant_index_value << scale;
   2515     // treat as non array below
   2516     rl_index.reg = RegStorage::InvalidReg();
   2517   }
   2518 
   2519   /* null object? */
   2520   GenNullCheck(rl_array.reg, opt_flags);
   2521 
   2522   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2523     if (constant_index) {
   2524       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2525     } else {
   2526       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2527     }
   2528   }
   2529   if ((size == k64) || (size == kDouble)) {
   2530     rl_src = LoadValueWide(rl_src, reg_class);
   2531   } else {
   2532     rl_src = LoadValue(rl_src, reg_class);
   2533   }
   2534   // If the src reg can't be byte accessed, move it to a temp first.
   2535   if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
   2536     RegStorage temp = AllocTemp();
   2537     OpRegCopy(temp, rl_src.reg);
   2538     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags);
   2539   } else {
   2540     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags);
   2541   }
   2542   if (card_mark) {
   2543     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
   2544     if (!constant_index) {
   2545       FreeTemp(rl_index.reg);
   2546     }
   2547     MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
   2548   }
   2549 }
   2550 
   2551 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2552                                           RegLocation rl_src, int shift_amount, int flags) {
   2553   UNUSED(flags);
   2554   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2555   if (cu_->target64) {
   2556     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
   2557     switch (opcode) {
   2558       case Instruction::SHL_LONG:
   2559       case Instruction::SHL_LONG_2ADDR:
   2560         op = kOpLsl;
   2561         break;
   2562       case Instruction::SHR_LONG:
   2563       case Instruction::SHR_LONG_2ADDR:
   2564         op = kOpAsr;
   2565         break;
   2566       case Instruction::USHR_LONG:
   2567       case Instruction::USHR_LONG_2ADDR:
   2568         op = kOpLsr;
   2569         break;
   2570       default:
   2571         LOG(FATAL) << "Unexpected case";
   2572     }
   2573     OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
   2574   } else {
   2575     switch (opcode) {
   2576       case Instruction::SHL_LONG:
   2577       case Instruction::SHL_LONG_2ADDR:
   2578         DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
   2579         if (shift_amount == 32) {
   2580           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2581           LoadConstant(rl_result.reg.GetLow(), 0);
   2582         } else if (shift_amount > 31) {
   2583           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2584           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
   2585           LoadConstant(rl_result.reg.GetLow(), 0);
   2586         } else {
   2587           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2588           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2589           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
   2590                   shift_amount);
   2591           NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
   2592         }
   2593         break;
   2594       case Instruction::SHR_LONG:
   2595       case Instruction::SHR_LONG_2ADDR:
   2596         if (shift_amount == 32) {
   2597           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2598           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2599           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2600         } else if (shift_amount > 31) {
   2601           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2602           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2603           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2604           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2605         } else {
   2606           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2607           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2608           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2609                   shift_amount);
   2610           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
   2611         }
   2612         break;
   2613       case Instruction::USHR_LONG:
   2614       case Instruction::USHR_LONG_2ADDR:
   2615         if (shift_amount == 32) {
   2616           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2617           LoadConstant(rl_result.reg.GetHigh(), 0);
   2618         } else if (shift_amount > 31) {
   2619           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2620           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2621           LoadConstant(rl_result.reg.GetHigh(), 0);
   2622         } else {
   2623           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2624           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2625           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2626                   shift_amount);
   2627           NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
   2628         }
   2629         break;
   2630       default:
   2631         LOG(FATAL) << "Unexpected case";
   2632     }
   2633   }
   2634   return rl_result;
   2635 }
   2636 
   2637 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2638                                    RegLocation rl_src, RegLocation rl_shift, int flags) {
   2639   // Per spec, we only care about low 6 bits of shift amount.
   2640   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
   2641   if (shift_amount == 0) {
   2642     rl_src = LoadValueWide(rl_src, kCoreReg);
   2643     StoreValueWide(rl_dest, rl_src);
   2644     return;
   2645   } else if (shift_amount == 1 &&
   2646             (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
   2647     // Need to handle this here to avoid calling StoreValueWide twice.
   2648     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
   2649     return;
   2650   }
   2651   if (PartiallyIntersects(rl_src, rl_dest)) {
   2652     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
   2653     return;
   2654   }
   2655   rl_src = LoadValueWide(rl_src, kCoreReg);
   2656   RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
   2657   StoreValueWide(rl_dest, rl_result);
   2658 }
   2659 
   2660 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
   2661                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
   2662                                    int flags) {
   2663   bool isConstSuccess = false;
   2664   switch (opcode) {
   2665     case Instruction::ADD_LONG:
   2666     case Instruction::AND_LONG:
   2667     case Instruction::OR_LONG:
   2668     case Instruction::XOR_LONG:
   2669       if (rl_src2.is_const) {
   2670         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2671       } else {
   2672         DCHECK(rl_src1.is_const);
   2673         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2674       }
   2675       break;
   2676     case Instruction::SUB_LONG:
   2677     case Instruction::SUB_LONG_2ADDR:
   2678       if (rl_src2.is_const) {
   2679         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2680       } else {
   2681         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
   2682         isConstSuccess = true;
   2683       }
   2684       break;
   2685     case Instruction::ADD_LONG_2ADDR:
   2686     case Instruction::OR_LONG_2ADDR:
   2687     case Instruction::XOR_LONG_2ADDR:
   2688     case Instruction::AND_LONG_2ADDR:
   2689       if (rl_src2.is_const) {
   2690         if (GenerateTwoOperandInstructions()) {
   2691           isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
   2692         } else {
   2693           isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2694         }
   2695       } else {
   2696         DCHECK(rl_src1.is_const);
   2697         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2698       }
   2699       break;
   2700     default:
   2701       isConstSuccess = false;
   2702       break;
   2703   }
   2704 
   2705   if (!isConstSuccess) {
   2706     // Default - bail to non-const handler.
   2707     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
   2708   }
   2709 }
   2710 
   2711 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
   2712   switch (op) {
   2713     case Instruction::AND_LONG_2ADDR:
   2714     case Instruction::AND_LONG:
   2715       return value == -1;
   2716     case Instruction::OR_LONG:
   2717     case Instruction::OR_LONG_2ADDR:
   2718     case Instruction::XOR_LONG:
   2719     case Instruction::XOR_LONG_2ADDR:
   2720       return value == 0;
   2721     default:
   2722       return false;
   2723   }
   2724 }
   2725 
   2726 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
   2727                                 bool is_high_op) {
   2728   bool rhs_in_mem = rhs.location != kLocPhysReg;
   2729   bool dest_in_mem = dest.location != kLocPhysReg;
   2730   bool is64Bit = cu_->target64;
   2731   DCHECK(!rhs_in_mem || !dest_in_mem);
   2732   switch (op) {
   2733     case Instruction::ADD_LONG:
   2734     case Instruction::ADD_LONG_2ADDR:
   2735       if (dest_in_mem) {
   2736         return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
   2737       } else if (rhs_in_mem) {
   2738         return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
   2739       }
   2740       return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
   2741     case Instruction::SUB_LONG:
   2742     case Instruction::SUB_LONG_2ADDR:
   2743       if (dest_in_mem) {
   2744         return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
   2745       } else if (rhs_in_mem) {
   2746         return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
   2747       }
   2748       return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
   2749     case Instruction::AND_LONG_2ADDR:
   2750     case Instruction::AND_LONG:
   2751       if (dest_in_mem) {
   2752         return is64Bit ? kX86And64MR : kX86And32MR;
   2753       }
   2754       if (is64Bit) {
   2755         return rhs_in_mem ? kX86And64RM : kX86And64RR;
   2756       }
   2757       return rhs_in_mem ? kX86And32RM : kX86And32RR;
   2758     case Instruction::OR_LONG:
   2759     case Instruction::OR_LONG_2ADDR:
   2760       if (dest_in_mem) {
   2761         return is64Bit ? kX86Or64MR : kX86Or32MR;
   2762       }
   2763       if (is64Bit) {
   2764         return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
   2765       }
   2766       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
   2767     case Instruction::XOR_LONG:
   2768     case Instruction::XOR_LONG_2ADDR:
   2769       if (dest_in_mem) {
   2770         return is64Bit ? kX86Xor64MR : kX86Xor32MR;
   2771       }
   2772       if (is64Bit) {
   2773         return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
   2774       }
   2775       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
   2776     default:
   2777       LOG(FATAL) << "Unexpected opcode: " << op;
   2778       return kX86Add32RR;
   2779   }
   2780 }
   2781 
   2782 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
   2783                                 int32_t value) {
   2784   bool in_mem = loc.location != kLocPhysReg;
   2785   bool is64Bit = cu_->target64;
   2786   bool byte_imm = IS_SIMM8(value);
   2787   DCHECK(in_mem || !loc.reg.IsFloat());
   2788   switch (op) {
   2789     case Instruction::ADD_LONG:
   2790     case Instruction::ADD_LONG_2ADDR:
   2791       if (byte_imm) {
   2792         if (in_mem) {
   2793           return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
   2794         }
   2795         return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
   2796       }
   2797       if (in_mem) {
   2798         return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
   2799       }
   2800       return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
   2801     case Instruction::SUB_LONG:
   2802     case Instruction::SUB_LONG_2ADDR:
   2803       if (byte_imm) {
   2804         if (in_mem) {
   2805           return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
   2806         }
   2807         return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
   2808       }
   2809       if (in_mem) {
   2810         return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
   2811       }
   2812       return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
   2813     case Instruction::AND_LONG_2ADDR:
   2814     case Instruction::AND_LONG:
   2815       if (byte_imm) {
   2816         if (is64Bit) {
   2817           return in_mem ? kX86And64MI8 : kX86And64RI8;
   2818         }
   2819         return in_mem ? kX86And32MI8 : kX86And32RI8;
   2820       }
   2821       if (is64Bit) {
   2822         return in_mem ? kX86And64MI : kX86And64RI;
   2823       }
   2824       return in_mem ? kX86And32MI : kX86And32RI;
   2825     case Instruction::OR_LONG:
   2826     case Instruction::OR_LONG_2ADDR:
   2827       if (byte_imm) {
   2828         if (is64Bit) {
   2829           return in_mem ? kX86Or64MI8 : kX86Or64RI8;
   2830         }
   2831         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
   2832       }
   2833       if (is64Bit) {
   2834         return in_mem ? kX86Or64MI : kX86Or64RI;
   2835       }
   2836       return in_mem ? kX86Or32MI : kX86Or32RI;
   2837     case Instruction::XOR_LONG:
   2838     case Instruction::XOR_LONG_2ADDR:
   2839       if (byte_imm) {
   2840         if (is64Bit) {
   2841           return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
   2842         }
   2843         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
   2844       }
   2845       if (is64Bit) {
   2846         return in_mem ? kX86Xor64MI : kX86Xor64RI;
   2847       }
   2848       return in_mem ? kX86Xor32MI : kX86Xor32RI;
   2849     default:
   2850       LOG(FATAL) << "Unexpected opcode: " << op;
   2851       UNREACHABLE();
   2852   }
   2853 }
   2854 
   2855 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   2856   DCHECK(rl_src.is_const);
   2857   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   2858 
   2859   if (cu_->target64) {
   2860     // We can do with imm only if it fits 32 bit
   2861     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2862       return false;
   2863     }
   2864 
   2865     rl_dest = UpdateLocWideTyped(rl_dest);
   2866 
   2867     if ((rl_dest.location == kLocDalvikFrame) ||
   2868         (rl_dest.location == kLocCompilerTemp)) {
   2869       int r_base = rs_rX86_SP_32.GetReg();
   2870       int displacement = SRegOffset(rl_dest.s_reg_low);
   2871 
   2872       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2873       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2874       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
   2875       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2876                               true /* is_load */, true /* is64bit */);
   2877       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2878                               false /* is_load */, true /* is64bit */);
   2879       return true;
   2880     }
   2881 
   2882     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2883     DCHECK_EQ(rl_result.location, kLocPhysReg);
   2884     DCHECK(!rl_result.reg.IsFloat());
   2885 
   2886     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2887     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2888 
   2889     StoreValueWide(rl_dest, rl_result);
   2890     return true;
   2891   }
   2892 
   2893   int32_t val_lo = Low32Bits(val);
   2894   int32_t val_hi = High32Bits(val);
   2895   rl_dest = UpdateLocWideTyped(rl_dest);
   2896 
   2897   // Can we just do this into memory?
   2898   if ((rl_dest.location == kLocDalvikFrame) ||
   2899       (rl_dest.location == kLocCompilerTemp)) {
   2900     int r_base = rs_rX86_SP_32.GetReg();
   2901     int displacement = SRegOffset(rl_dest.s_reg_low);
   2902 
   2903     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2904     if (!IsNoOp(op, val_lo)) {
   2905       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2906       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
   2907       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2908                               true /* is_load */, true /* is64bit */);
   2909       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2910                               false /* is_load */, true /* is64bit */);
   2911     }
   2912     if (!IsNoOp(op, val_hi)) {
   2913       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2914       LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
   2915       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2916                                 true /* is_load */, true /* is64bit */);
   2917       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2918                                 false /* is_load */, true /* is64bit */);
   2919     }
   2920     return true;
   2921   }
   2922 
   2923   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2924   DCHECK_EQ(rl_result.location, kLocPhysReg);
   2925   DCHECK(!rl_result.reg.IsFloat());
   2926 
   2927   if (!IsNoOp(op, val_lo)) {
   2928     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2929     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2930   }
   2931   if (!IsNoOp(op, val_hi)) {
   2932     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   2933     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   2934   }
   2935   StoreValueWide(rl_dest, rl_result);
   2936   return true;
   2937 }
   2938 
   2939 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
   2940                                 RegLocation rl_src2, Instruction::Code op) {
   2941   DCHECK(rl_src2.is_const);
   2942   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   2943 
   2944   if (cu_->target64) {
   2945     // We can do with imm only if it fits 32 bit
   2946     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2947       return false;
   2948     }
   2949     if (rl_dest.location == kLocPhysReg &&
   2950         rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
   2951       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2952       OpRegCopy(rl_dest.reg, rl_src1.reg);
   2953       NewLIR2(x86op, rl_dest.reg.GetReg(), val);
   2954       StoreFinalValueWide(rl_dest, rl_dest);
   2955       return true;
   2956     }
   2957 
   2958     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2959     // We need the values to be in a temporary
   2960     RegLocation rl_result = ForceTempWide(rl_src1);
   2961 
   2962     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2963     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2964 
   2965     StoreFinalValueWide(rl_dest, rl_result);
   2966     return true;
   2967   }
   2968 
   2969   int32_t val_lo = Low32Bits(val);
   2970   int32_t val_hi = High32Bits(val);
   2971   rl_dest = UpdateLocWideTyped(rl_dest);
   2972   rl_src1 = UpdateLocWideTyped(rl_src1);
   2973 
   2974   // Can we do this directly into the destination registers?
   2975   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
   2976       rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
   2977       rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
   2978     if (!IsNoOp(op, val_lo)) {
   2979       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2980       NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
   2981     }
   2982     if (!IsNoOp(op, val_hi)) {
   2983       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2984       NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
   2985     }
   2986 
   2987     StoreFinalValueWide(rl_dest, rl_dest);
   2988     return true;
   2989   }
   2990 
   2991   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2992   DCHECK_EQ(rl_src1.location, kLocPhysReg);
   2993 
   2994   // We need the values to be in a temporary
   2995   RegLocation rl_result = ForceTempWide(rl_src1);
   2996   if (!IsNoOp(op, val_lo)) {
   2997     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2998     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2999   }
   3000   if (!IsNoOp(op, val_hi)) {
   3001     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   3002     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   3003   }
   3004 
   3005   StoreFinalValueWide(rl_dest, rl_result);
   3006   return true;
   3007 }
   3008 
   3009 // For final classes there are no sub-classes to check and so we can answer the instance-of
   3010 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
   3011 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
   3012                                     RegLocation rl_dest, RegLocation rl_src) {
   3013   RegLocation object = LoadValue(rl_src, kRefReg);
   3014   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3015   RegStorage result_reg = rl_result.reg;
   3016 
   3017   // For 32-bit, SETcc only works with EAX..EDX.
   3018   RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
   3019   if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
   3020     result_reg = AllocateByteRegister();
   3021   }
   3022 
   3023   // Assume that there is no match.
   3024   LoadConstant(result_reg, 0);
   3025   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
   3026 
   3027   // We will use this register to compare to memory below.
   3028   // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
   3029   // For this reason, force allocation of a 32 bit register to use, so that the
   3030   // compare to memory will be done using a 32 bit comparision.
   3031   // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
   3032   RegStorage check_class = AllocTemp();
   3033 
   3034   // If Method* is already in a register, we can save a copy.
   3035   RegLocation rl_method = mir_graph_->GetMethodLoc();
   3036   int32_t offset_of_type = mirror::Array::DataOffset(
   3037       sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
   3038       (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
   3039 
   3040   if (rl_method.location == kLocPhysReg) {
   3041     if (use_declaring_class) {
   3042       LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(),
   3043                   check_class, kNotVolatile);
   3044     } else {
   3045       LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   3046                   check_class, kNotVolatile);
   3047       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   3048     }
   3049   } else {
   3050     LoadCurrMethodDirect(check_class);
   3051     if (use_declaring_class) {
   3052       LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(),
   3053                   check_class, kNotVolatile);
   3054     } else {
   3055       LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   3056                   check_class, kNotVolatile);
   3057       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   3058     }
   3059   }
   3060 
   3061   // Compare the computed class to the class in the object.
   3062   DCHECK_EQ(object.location, kLocPhysReg);
   3063   OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
   3064 
   3065   // Set the low byte of the result to 0 or 1 from the compare condition code.
   3066   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
   3067 
   3068   LIR* target = NewLIR0(kPseudoTargetLabel);
   3069   null_branchover->target = target;
   3070   FreeTemp(check_class);
   3071   if (IsTemp(result_reg)) {
   3072     OpRegCopy(rl_result.reg, result_reg);
   3073     FreeTemp(result_reg);
   3074   }
   3075   StoreValue(rl_dest, rl_result);
   3076 }
   3077 
   3078 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
   3079                                RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
   3080   OpKind op = kOpBkpt;
   3081   bool is_div_rem = false;
   3082   bool unary = false;
   3083   bool shift_op = false;
   3084   bool is_two_addr = false;
   3085   RegLocation rl_result;
   3086   switch (opcode) {
   3087     case Instruction::NEG_INT:
   3088       op = kOpNeg;
   3089       unary = true;
   3090       break;
   3091     case Instruction::NOT_INT:
   3092       op = kOpMvn;
   3093       unary = true;
   3094       break;
   3095     case Instruction::ADD_INT_2ADDR:
   3096       is_two_addr = true;
   3097       FALLTHROUGH_INTENDED;
   3098     case Instruction::ADD_INT:
   3099       op = kOpAdd;
   3100       break;
   3101     case Instruction::SUB_INT_2ADDR:
   3102       is_two_addr = true;
   3103       FALLTHROUGH_INTENDED;
   3104     case Instruction::SUB_INT:
   3105       op = kOpSub;
   3106       break;
   3107     case Instruction::MUL_INT_2ADDR:
   3108       is_two_addr = true;
   3109       FALLTHROUGH_INTENDED;
   3110     case Instruction::MUL_INT:
   3111       op = kOpMul;
   3112       break;
   3113     case Instruction::DIV_INT_2ADDR:
   3114       is_two_addr = true;
   3115       FALLTHROUGH_INTENDED;
   3116     case Instruction::DIV_INT:
   3117       op = kOpDiv;
   3118       is_div_rem = true;
   3119       break;
   3120     /* NOTE: returns in kArg1 */
   3121     case Instruction::REM_INT_2ADDR:
   3122       is_two_addr = true;
   3123       FALLTHROUGH_INTENDED;
   3124     case Instruction::REM_INT:
   3125       op = kOpRem;
   3126       is_div_rem = true;
   3127       break;
   3128     case Instruction::AND_INT_2ADDR:
   3129       is_two_addr = true;
   3130       FALLTHROUGH_INTENDED;
   3131     case Instruction::AND_INT:
   3132       op = kOpAnd;
   3133       break;
   3134     case Instruction::OR_INT_2ADDR:
   3135       is_two_addr = true;
   3136       FALLTHROUGH_INTENDED;
   3137     case Instruction::OR_INT:
   3138       op = kOpOr;
   3139       break;
   3140     case Instruction::XOR_INT_2ADDR:
   3141       is_two_addr = true;
   3142       FALLTHROUGH_INTENDED;
   3143     case Instruction::XOR_INT:
   3144       op = kOpXor;
   3145       break;
   3146     case Instruction::SHL_INT_2ADDR:
   3147       is_two_addr = true;
   3148       FALLTHROUGH_INTENDED;
   3149     case Instruction::SHL_INT:
   3150       shift_op = true;
   3151       op = kOpLsl;
   3152       break;
   3153     case Instruction::SHR_INT_2ADDR:
   3154       is_two_addr = true;
   3155       FALLTHROUGH_INTENDED;
   3156     case Instruction::SHR_INT:
   3157       shift_op = true;
   3158       op = kOpAsr;
   3159       break;
   3160     case Instruction::USHR_INT_2ADDR:
   3161       is_two_addr = true;
   3162       FALLTHROUGH_INTENDED;
   3163     case Instruction::USHR_INT:
   3164       shift_op = true;
   3165       op = kOpLsr;
   3166       break;
   3167     default:
   3168       LOG(FATAL) << "Invalid word arith op: " << opcode;
   3169   }
   3170 
   3171   // Can we convert to a two address instruction?
   3172   if (!is_two_addr &&
   3173         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   3174          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
   3175     is_two_addr = true;
   3176   }
   3177 
   3178   if (!GenerateTwoOperandInstructions()) {
   3179     is_two_addr = false;
   3180   }
   3181 
   3182   // Get the div/rem stuff out of the way.
   3183   if (is_div_rem) {
   3184     rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
   3185     StoreValue(rl_dest, rl_result);
   3186     return;
   3187   }
   3188 
   3189   // If we generate any memory access below, it will reference a dalvik reg.
   3190   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   3191 
   3192   if (unary) {
   3193     rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3194     rl_result = UpdateLocTyped(rl_dest);
   3195     rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3196     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   3197   } else {
   3198     if (shift_op) {
   3199       // X86 doesn't require masking and must use ECX.
   3200       RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   3201       LoadValueDirectFixed(rl_rhs, t_reg);
   3202       if (is_two_addr) {
   3203         // Can we do this directly into memory?
   3204         rl_result = UpdateLocTyped(rl_dest);
   3205         if (rl_result.location != kLocPhysReg) {
   3206           // Okay, we can do this into memory
   3207           OpMemReg(op, rl_result, t_reg.GetReg());
   3208           FreeTemp(t_reg);
   3209           return;
   3210         } else if (!rl_result.reg.IsFloat()) {
   3211           // Can do this directly into the result register
   3212           OpRegReg(op, rl_result.reg, t_reg);
   3213           FreeTemp(t_reg);
   3214           StoreFinalValue(rl_dest, rl_result);
   3215           return;
   3216         }
   3217       }
   3218       // Three address form, or we can't do directly.
   3219       rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3220       rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3221       OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
   3222       FreeTemp(t_reg);
   3223     } else {
   3224       // Multiply is 3 operand only (sort of).
   3225       if (is_two_addr && op != kOpMul) {
   3226         // Can we do this directly into memory?
   3227         rl_result = UpdateLocTyped(rl_dest);
   3228         if (rl_result.location == kLocPhysReg) {
   3229           // Ensure res is in a core reg
   3230           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3231           // Can we do this from memory directly?
   3232           rl_rhs = UpdateLocTyped(rl_rhs);
   3233           if (rl_rhs.location != kLocPhysReg) {
   3234             OpRegMem(op, rl_result.reg, rl_rhs);
   3235             StoreFinalValue(rl_dest, rl_result);
   3236             return;
   3237           } else if (!rl_rhs.reg.IsFloat()) {
   3238             OpRegReg(op, rl_result.reg, rl_rhs.reg);
   3239             StoreFinalValue(rl_dest, rl_result);
   3240             return;
   3241           }
   3242         }
   3243         rl_rhs = LoadValue(rl_rhs, kCoreReg);
   3244         // It might happen rl_rhs and rl_dest are the same VR
   3245         // in this case rl_dest is in reg after LoadValue while
   3246         // rl_result is not updated yet, so do this
   3247         rl_result = UpdateLocTyped(rl_dest);
   3248         if (rl_result.location != kLocPhysReg) {
   3249           // Okay, we can do this into memory.
   3250           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
   3251           return;
   3252         } else if (!rl_result.reg.IsFloat()) {
   3253           // Can do this directly into the result register.
   3254           OpRegReg(op, rl_result.reg, rl_rhs.reg);
   3255           StoreFinalValue(rl_dest, rl_result);
   3256           return;
   3257         } else {
   3258           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3259           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3260           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   3261         }
   3262       } else {
   3263         // Try to use reg/memory instructions.
   3264         rl_lhs = UpdateLocTyped(rl_lhs);
   3265         rl_rhs = UpdateLocTyped(rl_rhs);
   3266         // We can't optimize with FP registers.
   3267         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
   3268           // Something is difficult, so fall back to the standard case.
   3269           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3270           rl_rhs = LoadValue(rl_rhs, kCoreReg);
   3271           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3272           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   3273         } else {
   3274           // We can optimize by moving to result and using memory operands.
   3275           if (rl_rhs.location != kLocPhysReg) {
   3276             // Force LHS into result.
   3277             // We should be careful with order here
   3278             // If rl_dest and rl_lhs points to the same VR we should load first
   3279             // If the are different we should find a register first for dest
   3280             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   3281                 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
   3282               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3283               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3284               // No-op if these are the same.
   3285               OpRegCopy(rl_result.reg, rl_lhs.reg);
   3286             } else {
   3287               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3288               LoadValueDirect(rl_lhs, rl_result.reg);
   3289             }
   3290             OpRegMem(op, rl_result.reg, rl_rhs);
   3291           } else if (rl_lhs.location != kLocPhysReg) {
   3292             // RHS is in a register; LHS is in memory.
   3293             if (op != kOpSub) {
   3294               // Force RHS into result and operate on memory.
   3295               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3296               OpRegCopy(rl_result.reg, rl_rhs.reg);
   3297               OpRegMem(op, rl_result.reg, rl_lhs);
   3298             } else {
   3299               // Subtraction isn't commutative.
   3300               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3301               rl_rhs = LoadValue(rl_rhs, kCoreReg);
   3302               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3303               OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   3304             }
   3305           } else {
   3306             // Both are in registers.
   3307             rl_lhs = LoadValue(rl_lhs, kCoreReg);
   3308             rl_rhs = LoadValue(rl_rhs, kCoreReg);
   3309             rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3310             OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   3311           }
   3312         }
   3313       }
   3314     }
   3315   }
   3316   StoreValue(rl_dest, rl_result);
   3317 }
   3318 
   3319 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
   3320   // If we have non-core registers, then we can't do good things.
   3321   if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
   3322     return false;
   3323   }
   3324   if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
   3325     return false;
   3326   }
   3327 
   3328   // Everything will be fine :-).
   3329   return true;
   3330 }
   3331 
   3332 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
   3333   if (!cu_->target64) {
   3334     Mir2Lir::GenIntToLong(rl_dest, rl_src);
   3335     return;
   3336   }
   3337   rl_src = UpdateLocTyped(rl_src);
   3338   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   3339   if (rl_src.location == kLocPhysReg) {
   3340     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   3341   } else {
   3342     int displacement = SRegOffset(rl_src.s_reg_low);
   3343     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   3344     LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(),
   3345                      displacement + LOWORD_OFFSET);
   3346     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   3347                             true /* is_load */, true /* is_64bit */);
   3348   }
   3349   StoreValueWide(rl_dest, rl_result);
   3350 }
   3351 
   3352 void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
   3353   rl_src = UpdateLocWide(rl_src);
   3354   rl_src = NarrowRegLoc(rl_src);
   3355   StoreValue(rl_dest, rl_src);
   3356 
   3357   if (cu_->target64) {
   3358     // if src and dest are in the same phys reg then StoreValue generates
   3359     // no operation but we need explicit 32-bit mov R, R to clear
   3360     // the higher 32-bits
   3361     rl_dest = UpdateLoc(rl_dest);
   3362     if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
   3363            && IsSameReg(rl_src.reg, rl_dest.reg)) {
   3364         LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
   3365         // remove nop flag set by OpRegCopyNoInsert if src == dest
   3366         copy_lir->flags.is_nop = false;
   3367         AppendLIR(copy_lir);
   3368     }
   3369   }
   3370 }
   3371 
   3372 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   3373                         RegLocation rl_src1, RegLocation rl_shift) {
   3374   if (!cu_->target64) {
   3375     // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
   3376     // the other half, shift the other half, if the shift amount is less than 32 we're done,
   3377     // otherwise move one register to the other and place zero or sign bits in the other.
   3378     LIR* branch;
   3379     FlushAllRegs();
   3380     LockCallTemps();
   3381     LoadValueDirectFixed(rl_shift, rs_rCX);
   3382     RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
   3383     LoadValueDirectWideFixed(rl_src1, r_tmp);
   3384     switch (opcode) {
   3385       case Instruction::SHL_LONG:
   3386       case Instruction::SHL_LONG_2ADDR:
   3387         NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
   3388         NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
   3389         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
   3390         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
   3391         OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
   3392         LoadConstant(r_tmp.GetLow(), 0);
   3393         branch->target = NewLIR0(kPseudoTargetLabel);
   3394         break;
   3395       case Instruction::SHR_LONG:
   3396       case Instruction::SHR_LONG_2ADDR:
   3397         NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
   3398         NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
   3399         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
   3400         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
   3401         OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
   3402         NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
   3403         branch->target = NewLIR0(kPseudoTargetLabel);
   3404         break;
   3405       case Instruction::USHR_LONG:
   3406       case Instruction::USHR_LONG_2ADDR:
   3407         NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
   3408                rs_rCX.GetReg());
   3409         NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
   3410         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
   3411         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
   3412         OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
   3413         LoadConstant(r_tmp.GetHigh(), 0);
   3414         branch->target = NewLIR0(kPseudoTargetLabel);
   3415         break;
   3416       default:
   3417         LOG(FATAL) << "Unexpected case: " << opcode;
   3418         return;
   3419     }
   3420     RegLocation rl_result = LocCReturnWide();
   3421     StoreValueWide(rl_dest, rl_result);
   3422     return;
   3423   }
   3424 
   3425   bool is_two_addr = false;
   3426   OpKind op = kOpBkpt;
   3427   RegLocation rl_result;
   3428 
   3429   switch (opcode) {
   3430     case Instruction::SHL_LONG_2ADDR:
   3431       is_two_addr = true;
   3432       FALLTHROUGH_INTENDED;
   3433     case Instruction::SHL_LONG:
   3434       op = kOpLsl;
   3435       break;
   3436     case Instruction::SHR_LONG_2ADDR:
   3437       is_two_addr = true;
   3438       FALLTHROUGH_INTENDED;
   3439     case Instruction::SHR_LONG:
   3440       op = kOpAsr;
   3441       break;
   3442     case Instruction::USHR_LONG_2ADDR:
   3443       is_two_addr = true;
   3444       FALLTHROUGH_INTENDED;
   3445     case Instruction::USHR_LONG:
   3446       op = kOpLsr;
   3447       break;
   3448     default:
   3449       op = kOpBkpt;
   3450   }
   3451 
   3452   // X86 doesn't require masking and must use ECX.
   3453   RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   3454   LoadValueDirectFixed(rl_shift, t_reg);
   3455   if (is_two_addr) {
   3456     // Can we do this directly into memory?
   3457     rl_result = UpdateLocWideTyped(rl_dest);
   3458     if (rl_result.location != kLocPhysReg) {
   3459       // Okay, we can do this into memory
   3460       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   3461       OpMemReg(op, rl_result, t_reg.GetReg());
   3462     } else if (!rl_result.reg.IsFloat()) {
   3463       // Can do this directly into the result register
   3464       OpRegReg(op, rl_result.reg, t_reg);
   3465       StoreFinalValueWide(rl_dest, rl_result);
   3466     }
   3467   } else {
   3468     // Three address form, or we can't do directly.
   3469     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   3470     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   3471     OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
   3472     StoreFinalValueWide(rl_dest, rl_result);
   3473   }
   3474 
   3475   FreeTemp(t_reg);
   3476 }
   3477 
   3478 }  // namespace art
   3479