Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /* This file contains codegen for the X86 ISA */
     18 
     19 #include "codegen_x86.h"
     20 #include "dex/quick/mir_to_lir-inl.h"
     21 #include "dex/reg_storage_eq.h"
     22 #include "mirror/art_method.h"
     23 #include "mirror/array.h"
     24 #include "x86_lir.h"
     25 
     26 namespace art {
     27 
     28 /*
     29  * Compare two 64-bit values
     30  *    x = y     return  0
     31  *    x < y     return -1
     32  *    x > y     return  1
     33  */
     34 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
     35                             RegLocation rl_src2) {
     36   if (cu_->target64) {
     37     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     38     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     39     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
     40     RegStorage temp_reg = AllocTemp();
     41     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
     42     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
     43     NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
     44     NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
     45     NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
     46 
     47     StoreValue(rl_dest, rl_result);
     48     FreeTemp(temp_reg);
     49     return;
     50   }
     51 
     52   FlushAllRegs();
     53   LockCallTemps();  // Prepare for explicit register usage
     54   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
     55   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
     56   LoadValueDirectWideFixed(rl_src1, r_tmp1);
     57   LoadValueDirectWideFixed(rl_src2, r_tmp2);
     58   // Compute (r1:r0) = (r1:r0) - (r3:r2)
     59   OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
     60   OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
     61   NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
     62   NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
     63   OpReg(kOpNeg, rs_r2);         // r2 = -r2
     64   OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
     65   NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
     66   NewLIR2(kX86Movzx8RR, r0, r0);
     67   OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
     68   RegLocation rl_result = LocCReturn();
     69   StoreValue(rl_dest, rl_result);
     70 }
     71 
     72 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
     73   switch (cond) {
     74     case kCondEq: return kX86CondEq;
     75     case kCondNe: return kX86CondNe;
     76     case kCondCs: return kX86CondC;
     77     case kCondCc: return kX86CondNc;
     78     case kCondUlt: return kX86CondC;
     79     case kCondUge: return kX86CondNc;
     80     case kCondMi: return kX86CondS;
     81     case kCondPl: return kX86CondNs;
     82     case kCondVs: return kX86CondO;
     83     case kCondVc: return kX86CondNo;
     84     case kCondHi: return kX86CondA;
     85     case kCondLs: return kX86CondBe;
     86     case kCondGe: return kX86CondGe;
     87     case kCondLt: return kX86CondL;
     88     case kCondGt: return kX86CondG;
     89     case kCondLe: return kX86CondLe;
     90     case kCondAl:
     91     case kCondNv: LOG(FATAL) << "Should not reach here";
     92   }
     93   return kX86CondO;
     94 }
     95 
     96 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
     97   NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
     98   X86ConditionCode cc = X86ConditionEncoding(cond);
     99   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
    100                         cc);
    101   branch->target = target;
    102   return branch;
    103 }
    104 
    105 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
    106                                 int check_value, LIR* target) {
    107   if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
    108     // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
    109     NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
    110   } else {
    111     if (reg.Is64Bit()) {
    112       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
    113     } else {
    114       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
    115     }
    116   }
    117   X86ConditionCode cc = X86ConditionEncoding(cond);
    118   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
    119   branch->target = target;
    120   return branch;
    121 }
    122 
    123 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
    124   // If src or dest is a pair, we'll be using low reg.
    125   if (r_dest.IsPair()) {
    126     r_dest = r_dest.GetLow();
    127   }
    128   if (r_src.IsPair()) {
    129     r_src = r_src.GetLow();
    130   }
    131   if (r_dest.IsFloat() || r_src.IsFloat())
    132     return OpFpRegCopy(r_dest, r_src);
    133   LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
    134                     r_dest.GetReg(), r_src.GetReg());
    135   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
    136     res->flags.is_nop = true;
    137   }
    138   return res;
    139 }
    140 
    141 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
    142   if (r_dest != r_src) {
    143     LIR *res = OpRegCopyNoInsert(r_dest, r_src);
    144     AppendLIR(res);
    145   }
    146 }
    147 
    148 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
    149   if (r_dest != r_src) {
    150     bool dest_fp = r_dest.IsFloat();
    151     bool src_fp = r_src.IsFloat();
    152     if (dest_fp) {
    153       if (src_fp) {
    154         OpRegCopy(r_dest, r_src);
    155       } else {
    156         // TODO: Prevent this from happening in the code. The result is often
    157         // unused or could have been loaded more easily from memory.
    158         if (!r_src.IsPair()) {
    159           DCHECK(!r_dest.IsPair());
    160           NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
    161         } else {
    162           NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
    163           RegStorage r_tmp = AllocTempDouble();
    164           NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
    165           NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
    166           FreeTemp(r_tmp);
    167         }
    168       }
    169     } else {
    170       if (src_fp) {
    171         if (!r_dest.IsPair()) {
    172           DCHECK(!r_src.IsPair());
    173           NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
    174         } else {
    175           NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
    176           RegStorage temp_reg = AllocTempDouble();
    177           NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
    178           NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
    179           NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
    180         }
    181       } else {
    182         DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
    183         if (!r_src.IsPair()) {
    184           // Just copy the register directly.
    185           OpRegCopy(r_dest, r_src);
    186         } else {
    187           // Handle overlap
    188           if (r_src.GetHighReg() == r_dest.GetLowReg() &&
    189               r_src.GetLowReg() == r_dest.GetHighReg()) {
    190             // Deal with cycles.
    191             RegStorage temp_reg = AllocTemp();
    192             OpRegCopy(temp_reg, r_dest.GetHigh());
    193             OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
    194             OpRegCopy(r_dest.GetLow(), temp_reg);
    195             FreeTemp(temp_reg);
    196           } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
    197             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    198             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    199           } else {
    200             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    201             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    202           }
    203         }
    204       }
    205     }
    206   }
    207 }
    208 
    209 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
    210                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
    211                                   int dest_reg_class) {
    212   DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
    213   DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
    214 
    215   // We really need this check for correctness, otherwise we will need to do more checks in
    216   // non zero/one case
    217   if (true_val == false_val) {
    218     LoadConstantNoClobber(rs_dest, true_val);
    219     return;
    220   }
    221 
    222   const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
    223 
    224   const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
    225   if (zero_one_case && IsByteRegister(rs_dest)) {
    226     if (!dest_intersect) {
    227       LoadConstantNoClobber(rs_dest, 0);
    228     }
    229     OpRegReg(kOpCmp, left_op, right_op);
    230     // Set the low byte of the result to 0 or 1 from the compare condition code.
    231     NewLIR2(kX86Set8R, rs_dest.GetReg(),
    232             X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
    233     if (dest_intersect) {
    234       NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
    235     }
    236   } else {
    237     // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
    238     // and it cannot use xor because it makes cc flags to be dirty
    239     RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
    240     if (temp_reg.Valid()) {
    241       if (false_val == 0 && dest_intersect) {
    242         code = FlipComparisonOrder(code);
    243         std::swap(true_val, false_val);
    244       }
    245       if (!dest_intersect) {
    246         LoadConstantNoClobber(rs_dest, false_val);
    247       }
    248       LoadConstantNoClobber(temp_reg, true_val);
    249       OpRegReg(kOpCmp, left_op, right_op);
    250       if (dest_intersect) {
    251         LoadConstantNoClobber(rs_dest, false_val);
    252         DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    253       }
    254       OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
    255       FreeTemp(temp_reg);
    256     } else {
    257       // slow path
    258       LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
    259       LoadConstantNoClobber(rs_dest, false_val);
    260       LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
    261       LIR* true_case = NewLIR0(kPseudoTargetLabel);
    262       cmp_branch->target = true_case;
    263       LoadConstantNoClobber(rs_dest, true_val);
    264       LIR* end = NewLIR0(kPseudoTargetLabel);
    265       that_is_it->target = end;
    266     }
    267   }
    268 }
    269 
    270 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
    271   RegLocation rl_result;
    272   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
    273   RegLocation rl_dest = mir_graph_->GetDest(mir);
    274   // Avoid using float regs here.
    275   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
    276   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
    277   ConditionCode ccode = mir->meta.ccode;
    278 
    279   // The kMirOpSelect has two variants, one for constants and one for moves.
    280   const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
    281 
    282   if (is_constant_case) {
    283     int true_val = mir->dalvikInsn.vB;
    284     int false_val = mir->dalvikInsn.vC;
    285 
    286     // simplest strange case
    287     if (true_val == false_val) {
    288       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    289       LoadConstantNoClobber(rl_result.reg, true_val);
    290     } else {
    291       // TODO: use GenSelectConst32 and handle additional opcode patterns such as
    292       // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
    293       rl_src = LoadValue(rl_src, src_reg_class);
    294       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    295       /*
    296        * For ccode == kCondEq:
    297        *
    298        * 1) When the true case is zero and result_reg is not same as src_reg:
    299        *     xor result_reg, result_reg
    300        *     cmp $0, src_reg
    301        *     mov t1, $false_case
    302        *     cmovnz result_reg, t1
    303        * 2) When the false case is zero and result_reg is not same as src_reg:
    304        *     xor result_reg, result_reg
    305        *     cmp $0, src_reg
    306        *     mov t1, $true_case
    307        *     cmovz result_reg, t1
    308        * 3) All other cases (we do compare first to set eflags):
    309        *     cmp $0, src_reg
    310        *     mov result_reg, $false_case
    311        *     mov t1, $true_case
    312        *     cmovz result_reg, t1
    313        */
    314       // FIXME: depending on how you use registers you could get a false != mismatch when dealing
    315       // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
    316       const bool result_reg_same_as_src =
    317           (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
    318       const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
    319       const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
    320       const bool catch_all_case = !(true_zero_case || false_zero_case);
    321 
    322       if (true_zero_case || false_zero_case) {
    323         OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
    324       }
    325 
    326       if (true_zero_case || false_zero_case || catch_all_case) {
    327         OpRegImm(kOpCmp, rl_src.reg, 0);
    328       }
    329 
    330       if (catch_all_case) {
    331         OpRegImm(kOpMov, rl_result.reg, false_val);
    332       }
    333 
    334       if (true_zero_case || false_zero_case || catch_all_case) {
    335         ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
    336         int immediateForTemp = true_zero_case ? false_val : true_val;
    337         RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
    338         OpRegImm(kOpMov, temp1_reg, immediateForTemp);
    339 
    340         OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
    341 
    342         FreeTemp(temp1_reg);
    343       }
    344     }
    345   } else {
    346     rl_src = LoadValue(rl_src, src_reg_class);
    347     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
    348     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
    349     rl_true = LoadValue(rl_true, result_reg_class);
    350     rl_false = LoadValue(rl_false, result_reg_class);
    351     rl_result = EvalLoc(rl_dest, result_reg_class, true);
    352 
    353     /*
    354      * For ccode == kCondEq:
    355      *
    356      * 1) When true case is already in place:
    357      *     cmp $0, src_reg
    358      *     cmovnz result_reg, false_reg
    359      * 2) When false case is already in place:
    360      *     cmp $0, src_reg
    361      *     cmovz result_reg, true_reg
    362      * 3) When neither cases are in place:
    363      *     cmp $0, src_reg
    364      *     mov result_reg, false_reg
    365      *     cmovz result_reg, true_reg
    366      */
    367 
    368     // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
    369     OpRegImm(kOpCmp, rl_src.reg, 0);
    370 
    371     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
    372       OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
    373     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
    374       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    375     } else {
    376       OpRegCopy(rl_result.reg, rl_false.reg);
    377       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    378     }
    379   }
    380 
    381   StoreValue(rl_dest, rl_result);
    382 }
    383 
    384 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
    385   LIR* taken = &block_label_list_[bb->taken];
    386   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    387   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    388   ConditionCode ccode = mir->meta.ccode;
    389 
    390   if (rl_src1.is_const) {
    391     std::swap(rl_src1, rl_src2);
    392     ccode = FlipComparisonOrder(ccode);
    393   }
    394   if (rl_src2.is_const) {
    395     // Do special compare/branch against simple const operand
    396     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
    397     GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
    398     return;
    399   }
    400 
    401   if (cu_->target64) {
    402     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    403     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
    404 
    405     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    406     OpCondBranch(ccode, taken);
    407     return;
    408   }
    409 
    410   FlushAllRegs();
    411   LockCallTemps();  // Prepare for explicit register usage
    412   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
    413   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
    414   LoadValueDirectWideFixed(rl_src1, r_tmp1);
    415   LoadValueDirectWideFixed(rl_src2, r_tmp2);
    416 
    417   // Swap operands and condition code to prevent use of zero flag.
    418   if (ccode == kCondLe || ccode == kCondGt) {
    419     // Compute (r3:r2) = (r3:r2) - (r1:r0)
    420     OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
    421     OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
    422   } else {
    423     // Compute (r1:r0) = (r1:r0) - (r3:r2)
    424     OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
    425     OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
    426   }
    427   switch (ccode) {
    428     case kCondEq:
    429     case kCondNe:
    430       OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
    431       break;
    432     case kCondLe:
    433       ccode = kCondGe;
    434       break;
    435     case kCondGt:
    436       ccode = kCondLt;
    437       break;
    438     case kCondLt:
    439     case kCondGe:
    440       break;
    441     default:
    442       LOG(FATAL) << "Unexpected ccode: " << ccode;
    443   }
    444   OpCondBranch(ccode, taken);
    445 }
    446 
    447 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
    448                                           int64_t val, ConditionCode ccode) {
    449   int32_t val_lo = Low32Bits(val);
    450   int32_t val_hi = High32Bits(val);
    451   LIR* taken = &block_label_list_[bb->taken];
    452   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    453   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
    454 
    455   if (cu_->target64) {
    456     if (is_equality_test && val == 0) {
    457       // We can simplify of comparing for ==, != to 0.
    458       NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
    459     } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
    460       OpRegImm(kOpCmp, rl_src1.reg, val_lo);
    461     } else {
    462       RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    463       LoadConstantWide(tmp, val);
    464       OpRegReg(kOpCmp, rl_src1.reg, tmp);
    465       FreeTemp(tmp);
    466     }
    467     OpCondBranch(ccode, taken);
    468     return;
    469   }
    470 
    471   if (is_equality_test && val != 0) {
    472     rl_src1 = ForceTempWide(rl_src1);
    473   }
    474   RegStorage low_reg = rl_src1.reg.GetLow();
    475   RegStorage high_reg = rl_src1.reg.GetHigh();
    476 
    477   if (is_equality_test) {
    478     // We can simplify of comparing for ==, != to 0.
    479     if (val == 0) {
    480       if (IsTemp(low_reg)) {
    481         OpRegReg(kOpOr, low_reg, high_reg);
    482         // We have now changed it; ignore the old values.
    483         Clobber(rl_src1.reg);
    484       } else {
    485         RegStorage t_reg = AllocTemp();
    486         OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
    487         FreeTemp(t_reg);
    488       }
    489       OpCondBranch(ccode, taken);
    490       return;
    491     }
    492 
    493     // Need to compute the actual value for ==, !=.
    494     OpRegImm(kOpSub, low_reg, val_lo);
    495     NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    496     OpRegReg(kOpOr, high_reg, low_reg);
    497     Clobber(rl_src1.reg);
    498   } else if (ccode == kCondLe || ccode == kCondGt) {
    499     // Swap operands and condition code to prevent use of zero flag.
    500     RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    501     LoadConstantWide(tmp, val);
    502     OpRegReg(kOpSub, tmp.GetLow(), low_reg);
    503     OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
    504     ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
    505     FreeTemp(tmp);
    506   } else {
    507     // We can use a compare for the low word to set CF.
    508     OpRegImm(kOpCmp, low_reg, val_lo);
    509     if (IsTemp(high_reg)) {
    510       NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    511       // We have now changed it; ignore the old values.
    512       Clobber(rl_src1.reg);
    513     } else {
    514       // mov temp_reg, high_reg; sbb temp_reg, high_constant
    515       RegStorage t_reg = AllocTemp();
    516       OpRegCopy(t_reg, high_reg);
    517       NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
    518       FreeTemp(t_reg);
    519     }
    520   }
    521 
    522   OpCondBranch(ccode, taken);
    523 }
    524 
    525 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
    526   // It does not make sense to calculate magic and shift for zero divisor.
    527   DCHECK_NE(divisor, 0);
    528 
    529   /* According to H.S.Warren's Hacker's Delight Chapter 10 and
    530    * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    531    * The magic number M and shift S can be calculated in the following way:
    532    * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
    533    * where divisor(d) >=2.
    534    * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
    535    * where divisor(d) <= -2.
    536    * Thus nc can be calculated like:
    537    * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
    538    * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
    539    *
    540    * So the shift p is the smallest p satisfying
    541    * 2^p > nc * (d - 2^p % d), where d >= 2
    542    * 2^p > nc * (d + 2^p % d), where d <= -2.
    543    *
    544    * the magic number M is calcuated by
    545    * M = (2^p + d - 2^p % d) / d, where d >= 2
    546    * M = (2^p - d - 2^p % d) / d, where d <= -2.
    547    *
    548    * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
    549    * the shift number S.
    550    */
    551 
    552   int64_t p = (is_long) ? 63 : 31;
    553   const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
    554 
    555   // Initialize the computations.
    556   uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
    557   uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
    558                                     static_cast<uint32_t>(divisor) >> 31);
    559   uint64_t abs_nc = tmp - 1 - tmp % abs_d;
    560   uint64_t quotient1 = exp / abs_nc;
    561   uint64_t remainder1 = exp % abs_nc;
    562   uint64_t quotient2 = exp / abs_d;
    563   uint64_t remainder2 = exp % abs_d;
    564 
    565   /*
    566    * To avoid handling both positive and negative divisor, Hacker's Delight
    567    * introduces a method to handle these 2 cases together to avoid duplication.
    568    */
    569   uint64_t delta;
    570   do {
    571     p++;
    572     quotient1 = 2 * quotient1;
    573     remainder1 = 2 * remainder1;
    574     if (remainder1 >= abs_nc) {
    575       quotient1++;
    576       remainder1 = remainder1 - abs_nc;
    577     }
    578     quotient2 = 2 * quotient2;
    579     remainder2 = 2 * remainder2;
    580     if (remainder2 >= abs_d) {
    581       quotient2++;
    582       remainder2 = remainder2 - abs_d;
    583     }
    584     delta = abs_d - remainder2;
    585   } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
    586 
    587   magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
    588 
    589   if (!is_long) {
    590     magic = static_cast<int>(magic);
    591   }
    592 
    593   shift = (is_long) ? p - 64 : p - 32;
    594 }
    595 
    596 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
    597   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
    598   return rl_dest;
    599 }
    600 
    601 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
    602                                      int imm, bool is_div) {
    603   // Use a multiply (and fixup) to perform an int div/rem by a constant.
    604   RegLocation rl_result;
    605 
    606   if (imm == 1) {
    607     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    608     if (is_div) {
    609       // x / 1 == x.
    610       LoadValueDirectFixed(rl_src, rl_result.reg);
    611     } else {
    612       // x % 1 == 0.
    613       LoadConstantNoClobber(rl_result.reg, 0);
    614     }
    615   } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
    616     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    617     if (is_div) {
    618       LoadValueDirectFixed(rl_src, rl_result.reg);
    619       OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
    620       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
    621 
    622       // for x != MIN_INT, x / -1 == -x.
    623       NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
    624 
    625       // EAX already contains the right value (0x80000000),
    626       minint_branch->target = NewLIR0(kPseudoTargetLabel);
    627     } else {
    628       // x % -1 == 0.
    629       LoadConstantNoClobber(rl_result.reg, 0);
    630     }
    631   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
    632     // Division using shifting.
    633     rl_src = LoadValue(rl_src, kCoreReg);
    634     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    635     if (IsSameReg(rl_result.reg, rl_src.reg)) {
    636       RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
    637       rl_result.reg.SetReg(rs_temp.GetReg());
    638     }
    639     NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
    640     NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
    641     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
    642     int shift_amount = LowestSetBit(imm);
    643     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
    644     if (imm < 0) {
    645       OpReg(kOpNeg, rl_result.reg);
    646     }
    647   } else {
    648     CHECK(imm <= -2 || imm >= 2);
    649 
    650     // Use H.S.Warren's Hacker's Delight Chapter 10 and
    651     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    652     int64_t magic;
    653     int shift;
    654     CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
    655 
    656     /*
    657      * For imm >= 2,
    658      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
    659      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
    660      * For imm <= -2,
    661      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
    662      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
    663      * We implement this algorithm in the following way:
    664      * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
    665      * 2. if imm > 0 and magic < 0, add numerator to EDX
    666      *    if imm < 0 and magic > 0, sub numerator from EDX
    667      * 3. if S !=0, SAR S bits for EDX
    668      * 4. add 1 to EDX if EDX < 0
    669      * 5. Thus, EDX is the quotient
    670      */
    671 
    672     FlushReg(rs_r0);
    673     Clobber(rs_r0);
    674     LockTemp(rs_r0);
    675     FlushReg(rs_r2);
    676     Clobber(rs_r2);
    677     LockTemp(rs_r2);
    678 
    679     // Assume that the result will be in EDX.
    680     rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
    681 
    682     // Numerator into EAX.
    683     RegStorage numerator_reg;
    684     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
    685       // We will need the value later.
    686       rl_src = LoadValue(rl_src, kCoreReg);
    687       numerator_reg = rl_src.reg;
    688       OpRegCopy(rs_r0, numerator_reg);
    689     } else {
    690       // Only need this once.  Just put it into EAX.
    691       LoadValueDirectFixed(rl_src, rs_r0);
    692     }
    693 
    694     // EDX = magic.
    695     LoadConstantNoClobber(rs_r2, magic);
    696 
    697     // EDX:EAX = magic & dividend.
    698     NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
    699 
    700     if (imm > 0 && magic < 0) {
    701       // Add numerator to EDX.
    702       DCHECK(numerator_reg.Valid());
    703       NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    704     } else if (imm < 0 && magic > 0) {
    705       DCHECK(numerator_reg.Valid());
    706       NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    707     }
    708 
    709     // Do we need the shift?
    710     if (shift != 0) {
    711       // Shift EDX by 'shift' bits.
    712       NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
    713     }
    714 
    715     // Add 1 to EDX if EDX < 0.
    716 
    717     // Move EDX to EAX.
    718     OpRegCopy(rs_r0, rs_r2);
    719 
    720     // Move sign bit to bit 0, zeroing the rest.
    721     NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
    722 
    723     // EDX = EDX + EAX.
    724     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
    725 
    726     // Quotient is in EDX.
    727     if (!is_div) {
    728       // We need to compute the remainder.
    729       // Remainder is divisor - (quotient * imm).
    730       DCHECK(numerator_reg.Valid());
    731       OpRegCopy(rs_r0, numerator_reg);
    732 
    733       // EAX = numerator * imm.
    734       OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
    735 
    736       // EDX -= EAX.
    737       NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
    738 
    739       // For this case, return the result in EAX.
    740       rl_result.reg.SetReg(r0);
    741     }
    742   }
    743 
    744   return rl_result;
    745 }
    746 
    747 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
    748                                   bool is_div) {
    749   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
    750   return rl_dest;
    751 }
    752 
    753 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
    754                                   RegLocation rl_src2, bool is_div, bool check_zero) {
    755   // We have to use fixed registers, so flush all the temps.
    756   FlushAllRegs();
    757   LockCallTemps();  // Prepare for explicit register usage.
    758 
    759   // Load LHS into EAX.
    760   LoadValueDirectFixed(rl_src1, rs_r0);
    761 
    762   // Load RHS into EBX.
    763   LoadValueDirectFixed(rl_src2, rs_r1);
    764 
    765   // Copy LHS sign bit into EDX.
    766   NewLIR0(kx86Cdq32Da);
    767 
    768   if (check_zero) {
    769     // Handle division by zero case.
    770     GenDivZeroCheck(rs_r1);
    771   }
    772 
    773   // Have to catch 0x80000000/-1 case, or we will get an exception!
    774   OpRegImm(kOpCmp, rs_r1, -1);
    775   LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    776 
    777   // RHS is -1.
    778   OpRegImm(kOpCmp, rs_r0, 0x80000000);
    779   LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    780 
    781   // In 0x80000000/-1 case.
    782   if (!is_div) {
    783     // For DIV, EAX is already right. For REM, we need EDX 0.
    784     LoadConstantNoClobber(rs_r2, 0);
    785   }
    786   LIR* done = NewLIR1(kX86Jmp8, 0);
    787 
    788   // Expected case.
    789   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
    790   minint_branch->target = minus_one_branch->target;
    791   NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
    792   done->target = NewLIR0(kPseudoTargetLabel);
    793 
    794   // Result is in EAX for div and EDX for rem.
    795   RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
    796   if (!is_div) {
    797     rl_result.reg.SetReg(r2);
    798   }
    799   return rl_result;
    800 }
    801 
    802 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
    803   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
    804 
    805   if (is_long && cu_->instruction_set == kX86) {
    806     return false;
    807   }
    808 
    809   // Get the two arguments to the invoke and place them in GP registers.
    810   RegLocation rl_src1 = info->args[0];
    811   RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
    812   rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
    813   rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
    814 
    815   RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
    816   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    817 
    818   /*
    819    * If the result register is the same as the second element, then we need to be careful.
    820    * The reason is that the first copy will inadvertently clobber the second element with
    821    * the first one thus yielding the wrong result. Thus we do a swap in that case.
    822    */
    823   if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
    824     std::swap(rl_src1, rl_src2);
    825   }
    826 
    827   // Pick the first integer as min/max.
    828   OpRegCopy(rl_result.reg, rl_src1.reg);
    829 
    830   // If the integers are both in the same register, then there is nothing else to do
    831   // because they are equal and we have already moved one into the result.
    832   if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
    833     // It is possible we didn't pick correctly so do the actual comparison now.
    834     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    835 
    836     // Conditionally move the other integer into the destination register.
    837     ConditionCode condition_code = is_min ? kCondGt : kCondLt;
    838     OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
    839   }
    840 
    841   if (is_long) {
    842     StoreValueWide(rl_dest, rl_result);
    843   } else {
    844     StoreValue(rl_dest, rl_result);
    845   }
    846   return true;
    847 }
    848 
    849 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
    850   RegLocation rl_src_address = info->args[0];  // long address
    851   RegLocation rl_address;
    852   if (!cu_->target64) {
    853     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
    854     rl_address = LoadValue(rl_src_address, kCoreReg);
    855   } else {
    856     rl_address = LoadValueWide(rl_src_address, kCoreReg);
    857   }
    858   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
    859   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    860   // Unaligned access is allowed on x86.
    861   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
    862   if (size == k64) {
    863     StoreValueWide(rl_dest, rl_result);
    864   } else {
    865     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    866     StoreValue(rl_dest, rl_result);
    867   }
    868   return true;
    869 }
    870 
    871 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
    872   RegLocation rl_src_address = info->args[0];  // long address
    873   RegLocation rl_address;
    874   if (!cu_->target64) {
    875     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
    876     rl_address = LoadValue(rl_src_address, kCoreReg);
    877   } else {
    878     rl_address = LoadValueWide(rl_src_address, kCoreReg);
    879   }
    880   RegLocation rl_src_value = info->args[2];  // [size] value
    881   RegLocation rl_value;
    882   if (size == k64) {
    883     // Unaligned access is allowed on x86.
    884     rl_value = LoadValueWide(rl_src_value, kCoreReg);
    885   } else {
    886     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    887     // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
    888     if (!cu_->target64 && size == kSignedByte) {
    889       rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
    890       if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
    891         RegStorage temp = AllocateByteRegister();
    892         OpRegCopy(temp, rl_src_value.reg);
    893         rl_value.reg = temp;
    894       } else {
    895         rl_value = LoadValue(rl_src_value, kCoreReg);
    896       }
    897     } else {
    898       rl_value = LoadValue(rl_src_value, kCoreReg);
    899     }
    900   }
    901   StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
    902   return true;
    903 }
    904 
    905 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
    906   NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
    907 }
    908 
    909 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
    910   DCHECK_EQ(kX86, cu_->instruction_set);
    911   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
    912 }
    913 
    914 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
    915   DCHECK_EQ(kX86_64, cu_->instruction_set);
    916   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
    917 }
    918 
    919 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
    920   return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
    921 }
    922 
    923 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
    924   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
    925   // Unused - RegLocation rl_src_unsafe = info->args[0];
    926   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
    927   RegLocation rl_src_offset = info->args[2];  // long low
    928   if (!cu_->target64) {
    929     rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
    930   }
    931   RegLocation rl_src_expected = info->args[4];  // int, long or Object
    932   // If is_long, high half is in info->args[5]
    933   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
    934   // If is_long, high half is in info->args[7]
    935 
    936   if (is_long && cu_->target64) {
    937     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
    938     FlushReg(rs_r0q);
    939     Clobber(rs_r0q);
    940     LockTemp(rs_r0q);
    941 
    942     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
    943     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
    944     RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
    945     LoadValueDirectWide(rl_src_expected, rs_r0q);
    946     NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
    947             rl_new_value.reg.GetReg());
    948 
    949     // After a store we need to insert barrier in case of potential load. Since the
    950     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
    951     GenMemBarrier(kAnyAny);
    952 
    953     FreeTemp(rs_r0q);
    954   } else if (is_long) {
    955     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
    956     // TODO: CFI support.
    957     FlushAllRegs();
    958     LockCallTemps();
    959     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
    960     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
    961     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
    962     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
    963     // FIXME: needs 64-bit update.
    964     const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
    965     const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
    966     DCHECK(!obj_in_si || !obj_in_di);
    967     const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
    968     const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
    969     DCHECK(!off_in_si || !off_in_di);
    970     // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
    971     RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
    972     RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
    973     bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
    974     bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
    975     if (push_di) {
    976       NewLIR1(kX86Push32R, rs_rDI.GetReg());
    977       MarkTemp(rs_rDI);
    978       LockTemp(rs_rDI);
    979     }
    980     if (push_si) {
    981       NewLIR1(kX86Push32R, rs_rSI.GetReg());
    982       MarkTemp(rs_rSI);
    983       LockTemp(rs_rSI);
    984     }
    985     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    986     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
    987     if (!obj_in_si && !obj_in_di) {
    988       LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
    989       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
    990       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
    991       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
    992       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
    993     }
    994     if (!off_in_si && !off_in_di) {
    995       LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
    996       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
    997       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
    998       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
    999       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
   1000     }
   1001     NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
   1002 
   1003     // After a store we need to insert barrier to prevent reordering with either
   1004     // earlier or later memory accesses.  Since
   1005     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1006     // and it will be associated with the cmpxchg instruction, preventing both.
   1007     GenMemBarrier(kAnyAny);
   1008 
   1009     if (push_si) {
   1010       FreeTemp(rs_rSI);
   1011       UnmarkTemp(rs_rSI);
   1012       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
   1013     }
   1014     if (push_di) {
   1015       FreeTemp(rs_rDI);
   1016       UnmarkTemp(rs_rDI);
   1017       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
   1018     }
   1019     FreeCallTemps();
   1020   } else {
   1021     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
   1022     FlushReg(rs_r0);
   1023     Clobber(rs_r0);
   1024     LockTemp(rs_r0);
   1025 
   1026     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   1027     RegLocation rl_new_value = LoadValue(rl_src_new_value);
   1028 
   1029     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
   1030       // Mark card for object assuming new value is stored.
   1031       FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
   1032       MarkGCCard(rl_new_value.reg, rl_object.reg);
   1033       LockTemp(rs_r0);
   1034     }
   1035 
   1036     RegLocation rl_offset;
   1037     if (cu_->target64) {
   1038       rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
   1039     } else {
   1040       rl_offset = LoadValue(rl_src_offset, kCoreReg);
   1041     }
   1042     LoadValueDirect(rl_src_expected, rs_r0);
   1043     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
   1044             rl_new_value.reg.GetReg());
   1045 
   1046     // After a store we need to insert barrier to prevent reordering with either
   1047     // earlier or later memory accesses.  Since
   1048     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1049     // and it will be associated with the cmpxchg instruction, preventing both.
   1050     GenMemBarrier(kAnyAny);
   1051 
   1052     FreeTemp(rs_r0);
   1053   }
   1054 
   1055   // Convert ZF to boolean
   1056   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
   1057   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1058   RegStorage result_reg = rl_result.reg;
   1059 
   1060   // For 32-bit, SETcc only works with EAX..EDX.
   1061   if (!IsByteRegister(result_reg)) {
   1062     result_reg = AllocateByteRegister();
   1063   }
   1064   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
   1065   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
   1066   if (IsTemp(result_reg)) {
   1067     FreeTemp(result_reg);
   1068   }
   1069   StoreValue(rl_dest, rl_result);
   1070   return true;
   1071 }
   1072 
   1073 LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   1074   CHECK(base_of_code_ != nullptr);
   1075 
   1076   // Address the start of the method
   1077   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
   1078   if (rl_method.wide) {
   1079     LoadValueDirectWideFixed(rl_method, reg);
   1080   } else {
   1081     LoadValueDirectFixed(rl_method, reg);
   1082   }
   1083   store_method_addr_used_ = true;
   1084 
   1085   // Load the proper value from the literal area.
   1086   // We don't know the proper offset for the value, so pick one that will force
   1087   // 4 byte offset.  We will fix this up in the assembler later to have the right
   1088   // value.
   1089   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   1090   LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
   1091                     0, 0, target);
   1092   res->target = target;
   1093   res->flags.fixup = kFixupLoad;
   1094   store_method_addr_used_ = true;
   1095   return res;
   1096 }
   1097 
   1098 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
   1099   LOG(FATAL) << "Unexpected use of OpVldm for x86";
   1100   return NULL;
   1101 }
   1102 
   1103 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
   1104   LOG(FATAL) << "Unexpected use of OpVstm for x86";
   1105   return NULL;
   1106 }
   1107 
   1108 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
   1109                                                RegLocation rl_result, int lit,
   1110                                                int first_bit, int second_bit) {
   1111   RegStorage t_reg = AllocTemp();
   1112   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
   1113   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
   1114   FreeTemp(t_reg);
   1115   if (first_bit != 0) {
   1116     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
   1117   }
   1118 }
   1119 
   1120 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
   1121   if (cu_->target64) {
   1122     DCHECK(reg.Is64Bit());
   1123 
   1124     NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
   1125   } else {
   1126     DCHECK(reg.IsPair());
   1127 
   1128     // We are not supposed to clobber the incoming storage, so allocate a temporary.
   1129     RegStorage t_reg = AllocTemp();
   1130     // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
   1131     OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
   1132     // The temp is no longer needed so free it at this time.
   1133     FreeTemp(t_reg);
   1134   }
   1135 
   1136   // In case of zero, throw ArithmeticException.
   1137   GenDivZeroCheck(kCondEq);
   1138 }
   1139 
   1140 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
   1141                                      RegStorage array_base,
   1142                                      int len_offset) {
   1143   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1144    public:
   1145     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
   1146                              RegStorage index, RegStorage array_base, int32_t len_offset)
   1147         : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
   1148           index_(index), array_base_(array_base), len_offset_(len_offset) {
   1149     }
   1150 
   1151     void Compile() OVERRIDE {
   1152       m2l_->ResetRegPool();
   1153       m2l_->ResetDefTracking();
   1154       GenerateTargetLabel(kPseudoThrowTarget);
   1155 
   1156       RegStorage new_index = index_;
   1157       // Move index out of kArg1, either directly to kArg0, or to kArg2.
   1158       // TODO: clean-up to check not a number but with type
   1159       if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
   1160         if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
   1161           m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
   1162           new_index = m2l_->TargetReg(kArg2, kNotWide);
   1163         } else {
   1164           m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
   1165           new_index = m2l_->TargetReg(kArg0, kNotWide);
   1166         }
   1167       }
   1168       // Load array length to kArg1.
   1169       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1170       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1171       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
   1172                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1173     }
   1174 
   1175    private:
   1176     const RegStorage index_;
   1177     const RegStorage array_base_;
   1178     const int32_t len_offset_;
   1179   };
   1180 
   1181   OpRegMem(kOpCmp, index, array_base, len_offset);
   1182   MarkPossibleNullPointerException(0);
   1183   LIR* branch = OpCondBranch(kCondUge, nullptr);
   1184   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1185                                                     index, array_base, len_offset));
   1186 }
   1187 
   1188 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
   1189                                      RegStorage array_base,
   1190                                      int32_t len_offset) {
   1191   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1192    public:
   1193     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
   1194                              int32_t index, RegStorage array_base, int32_t len_offset)
   1195         : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
   1196           index_(index), array_base_(array_base), len_offset_(len_offset) {
   1197     }
   1198 
   1199     void Compile() OVERRIDE {
   1200       m2l_->ResetRegPool();
   1201       m2l_->ResetDefTracking();
   1202       GenerateTargetLabel(kPseudoThrowTarget);
   1203 
   1204       // Load array length to kArg1.
   1205       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1206       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1207       x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
   1208       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
   1209                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1210     }
   1211 
   1212    private:
   1213     const int32_t index_;
   1214     const RegStorage array_base_;
   1215     const int32_t len_offset_;
   1216   };
   1217 
   1218   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
   1219   MarkPossibleNullPointerException(0);
   1220   LIR* branch = OpCondBranch(kCondLs, nullptr);
   1221   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1222                                                     index, array_base, len_offset));
   1223 }
   1224 
   1225 // Test suspend flag, return target of taken suspend branch
   1226 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
   1227   if (cu_->target64) {
   1228     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
   1229   } else {
   1230     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
   1231   }
   1232   return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
   1233 }
   1234 
   1235 // Decrement register and branch on condition
   1236 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
   1237   OpRegImm(kOpSub, reg, 1);
   1238   return OpCondBranch(c_code, target);
   1239 }
   1240 
   1241 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
   1242                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
   1243   LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
   1244   return false;
   1245 }
   1246 
   1247 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
   1248   LOG(FATAL) << "Unexpected use of easyMultiply in x86";
   1249   return false;
   1250 }
   1251 
   1252 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
   1253   LOG(FATAL) << "Unexpected use of OpIT in x86";
   1254   return NULL;
   1255 }
   1256 
   1257 void X86Mir2Lir::OpEndIT(LIR* it) {
   1258   LOG(FATAL) << "Unexpected use of OpEndIT in x86";
   1259 }
   1260 
   1261 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
   1262   switch (val) {
   1263     case 0:
   1264       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1265       break;
   1266     case 1:
   1267       OpRegCopy(dest, src);
   1268       break;
   1269     default:
   1270       OpRegRegImm(kOpMul, dest, src, val);
   1271       break;
   1272   }
   1273 }
   1274 
   1275 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
   1276   // All memory accesses below reference dalvik regs.
   1277   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1278 
   1279   LIR *m;
   1280   switch (val) {
   1281     case 0:
   1282       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1283       break;
   1284     case 1:
   1285       LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
   1286       break;
   1287     default:
   1288       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
   1289                   rs_rX86_SP.GetReg(), displacement, val);
   1290       AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
   1291       break;
   1292   }
   1293 }
   1294 
   1295 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
   1296                                 RegLocation rl_src2) {
   1297   if (!cu_->target64) {
   1298     // Some x86 32b ops are fallback.
   1299     switch (opcode) {
   1300       case Instruction::NOT_LONG:
   1301       case Instruction::DIV_LONG:
   1302       case Instruction::DIV_LONG_2ADDR:
   1303       case Instruction::REM_LONG:
   1304       case Instruction::REM_LONG_2ADDR:
   1305         Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   1306         return;
   1307 
   1308       default:
   1309         // Everything else we can handle.
   1310         break;
   1311     }
   1312   }
   1313 
   1314   switch (opcode) {
   1315     case Instruction::NOT_LONG:
   1316       GenNotLong(rl_dest, rl_src2);
   1317       return;
   1318 
   1319     case Instruction::ADD_LONG:
   1320     case Instruction::ADD_LONG_2ADDR:
   1321       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1322       return;
   1323 
   1324     case Instruction::SUB_LONG:
   1325     case Instruction::SUB_LONG_2ADDR:
   1326       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
   1327       return;
   1328 
   1329     case Instruction::MUL_LONG:
   1330     case Instruction::MUL_LONG_2ADDR:
   1331       GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
   1332       return;
   1333 
   1334     case Instruction::DIV_LONG:
   1335     case Instruction::DIV_LONG_2ADDR:
   1336       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
   1337       return;
   1338 
   1339     case Instruction::REM_LONG:
   1340     case Instruction::REM_LONG_2ADDR:
   1341       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
   1342       return;
   1343 
   1344     case Instruction::AND_LONG_2ADDR:
   1345     case Instruction::AND_LONG:
   1346       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1347       return;
   1348 
   1349     case Instruction::OR_LONG:
   1350     case Instruction::OR_LONG_2ADDR:
   1351       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1352       return;
   1353 
   1354     case Instruction::XOR_LONG:
   1355     case Instruction::XOR_LONG_2ADDR:
   1356       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1357       return;
   1358 
   1359     case Instruction::NEG_LONG:
   1360       GenNegLong(rl_dest, rl_src2);
   1361       return;
   1362 
   1363     default:
   1364       LOG(FATAL) << "Invalid long arith op";
   1365       return;
   1366   }
   1367 }
   1368 
   1369 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) {
   1370   // All memory accesses below reference dalvik regs.
   1371   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1372 
   1373   if (val == 0) {
   1374     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1375     if (cu_->target64) {
   1376       OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
   1377     } else {
   1378       OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
   1379       OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
   1380     }
   1381     StoreValueWide(rl_dest, rl_result);
   1382     return true;
   1383   } else if (val == 1) {
   1384     StoreValueWide(rl_dest, rl_src1);
   1385     return true;
   1386   } else if (val == 2) {
   1387     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
   1388     return true;
   1389   } else if (IsPowerOfTwo(val)) {
   1390     int shift_amount = LowestSetBit(val);
   1391     if (!BadOverlap(rl_src1, rl_dest)) {
   1392       rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1393       RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
   1394                                                 shift_amount);
   1395       StoreValueWide(rl_dest, rl_result);
   1396       return true;
   1397     }
   1398   }
   1399 
   1400   // Okay, on 32b just bite the bullet and do it, still better than the general case.
   1401   if (!cu_->target64) {
   1402     int32_t val_lo = Low32Bits(val);
   1403     int32_t val_hi = High32Bits(val);
   1404     FlushAllRegs();
   1405     LockCallTemps();  // Prepare for explicit register usage.
   1406     rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1407     bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1408     int displacement = SRegOffset(rl_src1.s_reg_low);
   1409 
   1410     // ECX <- 1H * 2L
   1411     // EAX <- 1L * 2H
   1412     if (src1_in_reg) {
   1413       GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
   1414       GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
   1415     } else {
   1416       GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
   1417       GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
   1418     }
   1419 
   1420     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1421     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1422 
   1423     // EAX <- 2L
   1424     LoadConstantNoClobber(rs_r0, val_lo);
   1425 
   1426     // EDX:EAX <- 2L * 1L (double precision)
   1427     if (src1_in_reg) {
   1428       NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1429     } else {
   1430       LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
   1431       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1432                               true /* is_load */, true /* is_64bit */);
   1433     }
   1434 
   1435     // EDX <- EDX + ECX (add high words)
   1436     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1437 
   1438     // Result is EDX:EAX
   1439     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1440                              RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1441     StoreValueWide(rl_dest, rl_result);
   1442     return true;
   1443   }
   1444   return false;
   1445 }
   1446 
   1447 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   1448                             RegLocation rl_src2) {
   1449   if (rl_src1.is_const) {
   1450     std::swap(rl_src1, rl_src2);
   1451   }
   1452 
   1453   if (rl_src2.is_const) {
   1454     if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) {
   1455       return;
   1456     }
   1457   }
   1458 
   1459   // All memory accesses below reference dalvik regs.
   1460   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1461 
   1462   if (cu_->target64) {
   1463     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1464     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1465     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1466     if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1467         rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1468       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
   1469     } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
   1470                rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1471       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
   1472     } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1473                rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
   1474       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1475     } else {
   1476       OpRegCopy(rl_result.reg, rl_src1.reg);
   1477       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1478     }
   1479     StoreValueWide(rl_dest, rl_result);
   1480     return;
   1481   }
   1482 
   1483   // Not multiplying by a constant. Do it the hard way
   1484   // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
   1485   bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
   1486                    mir_graph_->SRegToVReg(rl_src2.s_reg_low);
   1487 
   1488   FlushAllRegs();
   1489   LockCallTemps();  // Prepare for explicit register usage.
   1490   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1491   rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1492 
   1493   // At this point, the VRs are in their home locations.
   1494   bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1495   bool src2_in_reg = rl_src2.location == kLocPhysReg;
   1496 
   1497   // ECX <- 1H
   1498   if (src1_in_reg) {
   1499     NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
   1500   } else {
   1501     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
   1502                  kNotVolatile);
   1503   }
   1504 
   1505   if (is_square) {
   1506     // Take advantage of the fact that the values are the same.
   1507     // ECX <- ECX * 2L  (1H * 2L)
   1508     if (src2_in_reg) {
   1509       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1510     } else {
   1511       int displacement = SRegOffset(rl_src2.s_reg_low);
   1512       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
   1513                        displacement + LOWORD_OFFSET);
   1514       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1515                               true /* is_load */, true /* is_64bit */);
   1516     }
   1517 
   1518     // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
   1519     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
   1520   } else {
   1521     // EAX <- 2H
   1522     if (src2_in_reg) {
   1523       NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
   1524     } else {
   1525       LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
   1526                    kNotVolatile);
   1527     }
   1528 
   1529     // EAX <- EAX * 1L  (2H * 1L)
   1530     if (src1_in_reg) {
   1531       NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
   1532     } else {
   1533       int displacement = SRegOffset(rl_src1.s_reg_low);
   1534       LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
   1535                        displacement + LOWORD_OFFSET);
   1536       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1537                               true /* is_load */, true /* is_64bit */);
   1538     }
   1539 
   1540     // ECX <- ECX * 2L  (1H * 2L)
   1541     if (src2_in_reg) {
   1542       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1543     } else {
   1544       int displacement = SRegOffset(rl_src2.s_reg_low);
   1545       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
   1546                        displacement + LOWORD_OFFSET);
   1547       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1548                               true /* is_load */, true /* is_64bit */);
   1549     }
   1550 
   1551     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1552     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1553   }
   1554 
   1555   // EAX <- 2L
   1556   if (src2_in_reg) {
   1557     NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
   1558   } else {
   1559     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
   1560                  kNotVolatile);
   1561   }
   1562 
   1563   // EDX:EAX <- 2L * 1L (double precision)
   1564   if (src1_in_reg) {
   1565     NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1566   } else {
   1567     int displacement = SRegOffset(rl_src1.s_reg_low);
   1568     LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
   1569     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1570                             true /* is_load */, true /* is_64bit */);
   1571   }
   1572 
   1573   // EDX <- EDX + ECX (add high words)
   1574   NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1575 
   1576   // Result is EDX:EAX
   1577   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1578                            RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1579   StoreValueWide(rl_dest, rl_result);
   1580 }
   1581 
   1582 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
   1583                                    Instruction::Code op) {
   1584   DCHECK_EQ(rl_dest.location, kLocPhysReg);
   1585   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   1586   if (rl_src.location == kLocPhysReg) {
   1587     // Both operands are in registers.
   1588     // But we must ensure that rl_src is in pair
   1589     if (cu_->target64) {
   1590       NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
   1591     } else {
   1592       rl_src = LoadValueWide(rl_src, kCoreReg);
   1593       if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
   1594         // The registers are the same, so we would clobber it before the use.
   1595         RegStorage temp_reg = AllocTemp();
   1596         OpRegCopy(temp_reg, rl_dest.reg);
   1597         rl_src.reg.SetHighReg(temp_reg.GetReg());
   1598       }
   1599       NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
   1600 
   1601       x86op = GetOpcode(op, rl_dest, rl_src, true);
   1602       NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
   1603       FreeTemp(rl_src.reg);  // ???
   1604     }
   1605     return;
   1606   }
   1607 
   1608   // RHS is in memory.
   1609   DCHECK((rl_src.location == kLocDalvikFrame) ||
   1610          (rl_src.location == kLocCompilerTemp));
   1611   int r_base = rs_rX86_SP.GetReg();
   1612   int displacement = SRegOffset(rl_src.s_reg_low);
   1613 
   1614   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1615   LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
   1616                      r_base, displacement + LOWORD_OFFSET);
   1617   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1618                           true /* is_load */, true /* is64bit */);
   1619   if (!cu_->target64) {
   1620     x86op = GetOpcode(op, rl_dest, rl_src, true);
   1621     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
   1622     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1623                             true /* is_load */, true /* is64bit */);
   1624   }
   1625 }
   1626 
   1627 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   1628   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   1629   if (rl_dest.location == kLocPhysReg) {
   1630     // Ensure we are in a register pair
   1631     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1632 
   1633     rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
   1634     GenLongRegOrMemOp(rl_result, rl_src, op);
   1635     StoreFinalValueWide(rl_dest, rl_result);
   1636     return;
   1637   }
   1638 
   1639   // It wasn't in registers, so it better be in memory.
   1640   DCHECK((rl_dest.location == kLocDalvikFrame) ||
   1641          (rl_dest.location == kLocCompilerTemp));
   1642   rl_src = LoadValueWide(rl_src, kCoreReg);
   1643 
   1644   // Operate directly into memory.
   1645   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   1646   int r_base = rs_rX86_SP.GetReg();
   1647   int displacement = SRegOffset(rl_dest.s_reg_low);
   1648 
   1649   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1650   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
   1651                      cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   1652   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1653                           true /* is_load */, true /* is64bit */);
   1654   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1655                           false /* is_load */, true /* is64bit */);
   1656   if (!cu_->target64) {
   1657     x86op = GetOpcode(op, rl_dest, rl_src, true);
   1658     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
   1659     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1660                             true /* is_load */, true /* is64bit */);
   1661     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1662                             false /* is_load */, true /* is64bit */);
   1663   }
   1664   FreeTemp(rl_src.reg);
   1665 }
   1666 
   1667 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
   1668                               RegLocation rl_src2, Instruction::Code op,
   1669                               bool is_commutative) {
   1670   // Is this really a 2 operand operation?
   1671   switch (op) {
   1672     case Instruction::ADD_LONG_2ADDR:
   1673     case Instruction::SUB_LONG_2ADDR:
   1674     case Instruction::AND_LONG_2ADDR:
   1675     case Instruction::OR_LONG_2ADDR:
   1676     case Instruction::XOR_LONG_2ADDR:
   1677       if (GenerateTwoOperandInstructions()) {
   1678         GenLongArith(rl_dest, rl_src2, op);
   1679         return;
   1680       }
   1681       break;
   1682 
   1683     default:
   1684       break;
   1685   }
   1686 
   1687   if (rl_dest.location == kLocPhysReg) {
   1688     RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
   1689 
   1690     // We are about to clobber the LHS, so it needs to be a temp.
   1691     rl_result = ForceTempWide(rl_result);
   1692 
   1693     // Perform the operation using the RHS.
   1694     rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1695     GenLongRegOrMemOp(rl_result, rl_src2, op);
   1696 
   1697     // And now record that the result is in the temp.
   1698     StoreFinalValueWide(rl_dest, rl_result);
   1699     return;
   1700   }
   1701 
   1702   // It wasn't in registers, so it better be in memory.
   1703   DCHECK((rl_dest.location == kLocDalvikFrame) ||
   1704          (rl_dest.location == kLocCompilerTemp));
   1705   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1706   rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1707 
   1708   // Get one of the source operands into temporary register.
   1709   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1710   if (cu_->target64) {
   1711     if (IsTemp(rl_src1.reg)) {
   1712       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1713     } else if (is_commutative) {
   1714       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1715       // We need at least one of them to be a temporary.
   1716       if (!IsTemp(rl_src2.reg)) {
   1717         rl_src1 = ForceTempWide(rl_src1);
   1718         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1719       } else {
   1720         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   1721         StoreFinalValueWide(rl_dest, rl_src2);
   1722         return;
   1723       }
   1724     } else {
   1725       // Need LHS to be the temp.
   1726       rl_src1 = ForceTempWide(rl_src1);
   1727       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1728     }
   1729   } else {
   1730     if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
   1731       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1732     } else if (is_commutative) {
   1733       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1734       // We need at least one of them to be a temporary.
   1735       if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
   1736         rl_src1 = ForceTempWide(rl_src1);
   1737         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1738       } else {
   1739         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   1740         StoreFinalValueWide(rl_dest, rl_src2);
   1741         return;
   1742       }
   1743     } else {
   1744       // Need LHS to be the temp.
   1745       rl_src1 = ForceTempWide(rl_src1);
   1746       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1747     }
   1748   }
   1749 
   1750   StoreFinalValueWide(rl_dest, rl_src1);
   1751 }
   1752 
   1753 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
   1754   if (cu_->target64) {
   1755     rl_src = LoadValueWide(rl_src, kCoreReg);
   1756     RegLocation rl_result;
   1757     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1758     OpRegCopy(rl_result.reg, rl_src.reg);
   1759     OpReg(kOpNot, rl_result.reg);
   1760     StoreValueWide(rl_dest, rl_result);
   1761   } else {
   1762     LOG(FATAL) << "Unexpected use GenNotLong()";
   1763   }
   1764 }
   1765 
   1766 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
   1767                                   int64_t imm, bool is_div) {
   1768   if (imm == 0) {
   1769     GenDivZeroException();
   1770   } else if (imm == 1) {
   1771     if (is_div) {
   1772       // x / 1 == x.
   1773       StoreValueWide(rl_dest, rl_src);
   1774     } else {
   1775       // x % 1 == 0.
   1776       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1777       LoadConstantWide(rl_result.reg, 0);
   1778       StoreValueWide(rl_dest, rl_result);
   1779     }
   1780   } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
   1781     if (is_div) {
   1782       rl_src = LoadValueWide(rl_src, kCoreReg);
   1783       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1784       RegStorage rs_temp = AllocTempWide();
   1785 
   1786       OpRegCopy(rl_result.reg, rl_src.reg);
   1787       LoadConstantWide(rs_temp, 0x8000000000000000);
   1788 
   1789       // If x == MIN_LONG, return MIN_LONG.
   1790       OpRegReg(kOpCmp, rl_src.reg, rs_temp);
   1791       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
   1792 
   1793       // For x != MIN_LONG, x / -1 == -x.
   1794       OpReg(kOpNeg, rl_result.reg);
   1795 
   1796       minint_branch->target = NewLIR0(kPseudoTargetLabel);
   1797       FreeTemp(rs_temp);
   1798       StoreValueWide(rl_dest, rl_result);
   1799     } else {
   1800       // x % -1 == 0.
   1801       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1802       LoadConstantWide(rl_result.reg, 0);
   1803       StoreValueWide(rl_dest, rl_result);
   1804     }
   1805   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
   1806     // Division using shifting.
   1807     rl_src = LoadValueWide(rl_src, kCoreReg);
   1808     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1809     if (IsSameReg(rl_result.reg, rl_src.reg)) {
   1810       RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
   1811       rl_result.reg.SetReg(rs_temp.GetReg());
   1812     }
   1813     LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
   1814     OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
   1815     NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
   1816     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
   1817     int shift_amount = LowestSetBit(imm);
   1818     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
   1819     if (imm < 0) {
   1820       OpReg(kOpNeg, rl_result.reg);
   1821     }
   1822     StoreValueWide(rl_dest, rl_result);
   1823   } else {
   1824     CHECK(imm <= -2 || imm >= 2);
   1825 
   1826     FlushReg(rs_r0q);
   1827     Clobber(rs_r0q);
   1828     LockTemp(rs_r0q);
   1829     FlushReg(rs_r2q);
   1830     Clobber(rs_r2q);
   1831     LockTemp(rs_r2q);
   1832 
   1833     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
   1834 
   1835     // Use H.S.Warren's Hacker's Delight Chapter 10 and
   1836     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
   1837     int64_t magic;
   1838     int shift;
   1839     CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
   1840 
   1841     /*
   1842      * For imm >= 2,
   1843      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
   1844      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
   1845      * For imm <= -2,
   1846      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
   1847      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
   1848      * We implement this algorithm in the following way:
   1849      * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
   1850      * 2. if imm > 0 and magic < 0, add numerator to RDX
   1851      *    if imm < 0 and magic > 0, sub numerator from RDX
   1852      * 3. if S !=0, SAR S bits for RDX
   1853      * 4. add 1 to RDX if RDX < 0
   1854      * 5. Thus, RDX is the quotient
   1855      */
   1856 
   1857     // Numerator into RAX.
   1858     RegStorage numerator_reg;
   1859     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
   1860       // We will need the value later.
   1861       rl_src = LoadValueWide(rl_src, kCoreReg);
   1862       numerator_reg = rl_src.reg;
   1863       OpRegCopyWide(rs_r0q, numerator_reg);
   1864     } else {
   1865       // Only need this once.  Just put it into RAX.
   1866       LoadValueDirectWideFixed(rl_src, rs_r0q);
   1867     }
   1868 
   1869     // RDX = magic.
   1870     LoadConstantWide(rs_r2q, magic);
   1871 
   1872     // RDX:RAX = magic & dividend.
   1873     NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
   1874 
   1875     if (imm > 0 && magic < 0) {
   1876       // Add numerator to RDX.
   1877       DCHECK(numerator_reg.Valid());
   1878       OpRegReg(kOpAdd, rs_r2q, numerator_reg);
   1879     } else if (imm < 0 && magic > 0) {
   1880       DCHECK(numerator_reg.Valid());
   1881       OpRegReg(kOpSub, rs_r2q, numerator_reg);
   1882     }
   1883 
   1884     // Do we need the shift?
   1885     if (shift != 0) {
   1886       // Shift RDX by 'shift' bits.
   1887       OpRegImm(kOpAsr, rs_r2q, shift);
   1888     }
   1889 
   1890     // Move RDX to RAX.
   1891     OpRegCopyWide(rs_r0q, rs_r2q);
   1892 
   1893     // Move sign bit to bit 0, zeroing the rest.
   1894     OpRegImm(kOpLsr, rs_r2q, 63);
   1895 
   1896     // RDX = RDX + RAX.
   1897     OpRegReg(kOpAdd, rs_r2q, rs_r0q);
   1898 
   1899     // Quotient is in RDX.
   1900     if (!is_div) {
   1901       // We need to compute the remainder.
   1902       // Remainder is divisor - (quotient * imm).
   1903       DCHECK(numerator_reg.Valid());
   1904       OpRegCopyWide(rs_r0q, numerator_reg);
   1905 
   1906       // Imul doesn't support 64-bit imms.
   1907       if (imm > std::numeric_limits<int32_t>::max() ||
   1908           imm < std::numeric_limits<int32_t>::min()) {
   1909         RegStorage rs_temp = AllocTempWide();
   1910         LoadConstantWide(rs_temp, imm);
   1911 
   1912         // RAX = numerator * imm.
   1913         NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
   1914 
   1915         FreeTemp(rs_temp);
   1916       } else {
   1917         // RAX = numerator * imm.
   1918         int short_imm = static_cast<int>(imm);
   1919         NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
   1920       }
   1921 
   1922       // RDX -= RAX.
   1923       OpRegReg(kOpSub, rs_r0q, rs_r2q);
   1924 
   1925       // Store result.
   1926       OpRegCopyWide(rl_result.reg, rs_r0q);
   1927     } else {
   1928       // Store result.
   1929       OpRegCopyWide(rl_result.reg, rs_r2q);
   1930     }
   1931     StoreValueWide(rl_dest, rl_result);
   1932     FreeTemp(rs_r0q);
   1933     FreeTemp(rs_r2q);
   1934   }
   1935 }
   1936 
   1937 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   1938                                RegLocation rl_src2, bool is_div) {
   1939   if (!cu_->target64) {
   1940     LOG(FATAL) << "Unexpected use GenDivRemLong()";
   1941     return;
   1942   }
   1943 
   1944   if (rl_src2.is_const) {
   1945     DCHECK(rl_src2.wide);
   1946     int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
   1947     GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
   1948     return;
   1949   }
   1950 
   1951   // We have to use fixed registers, so flush all the temps.
   1952   FlushAllRegs();
   1953   LockCallTemps();  // Prepare for explicit register usage.
   1954 
   1955   // Load LHS into RAX.
   1956   LoadValueDirectWideFixed(rl_src1, rs_r0q);
   1957 
   1958   // Load RHS into RCX.
   1959   LoadValueDirectWideFixed(rl_src2, rs_r1q);
   1960 
   1961   // Copy LHS sign bit into RDX.
   1962   NewLIR0(kx86Cqo64Da);
   1963 
   1964   // Handle division by zero case.
   1965   GenDivZeroCheckWide(rs_r1q);
   1966 
   1967   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
   1968   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
   1969   LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   1970 
   1971   // RHS is -1.
   1972   LoadConstantWide(rs_r6q, 0x8000000000000000);
   1973   NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
   1974   LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   1975 
   1976   // In 0x8000000000000000/-1 case.
   1977   if (!is_div) {
   1978     // For DIV, RAX is already right. For REM, we need RDX 0.
   1979     NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
   1980   }
   1981   LIR* done = NewLIR1(kX86Jmp8, 0);
   1982 
   1983   // Expected case.
   1984   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
   1985   minint_branch->target = minus_one_branch->target;
   1986   NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
   1987   done->target = NewLIR0(kPseudoTargetLabel);
   1988 
   1989   // Result is in RAX for div and RDX for rem.
   1990   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
   1991   if (!is_div) {
   1992     rl_result.reg.SetReg(r2q);
   1993   }
   1994 
   1995   StoreValueWide(rl_dest, rl_result);
   1996 }
   1997 
   1998 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   1999   rl_src = LoadValueWide(rl_src, kCoreReg);
   2000   RegLocation rl_result;
   2001   if (cu_->target64) {
   2002     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2003     OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
   2004   } else {
   2005     rl_result = ForceTempWide(rl_src);
   2006     if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
   2007         ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
   2008       // The registers are the same, so we would clobber it before the use.
   2009       RegStorage temp_reg = AllocTemp();
   2010       OpRegCopy(temp_reg, rl_result.reg);
   2011       rl_result.reg.SetHighReg(temp_reg.GetReg());
   2012     }
   2013     OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
   2014     OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
   2015     OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   2016   }
   2017   StoreValueWide(rl_dest, rl_result);
   2018 }
   2019 
   2020 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
   2021   DCHECK_EQ(kX86, cu_->instruction_set);
   2022   X86OpCode opcode = kX86Bkpt;
   2023   switch (op) {
   2024   case kOpCmp: opcode = kX86Cmp32RT;  break;
   2025   case kOpMov: opcode = kX86Mov32RT;  break;
   2026   default:
   2027     LOG(FATAL) << "Bad opcode: " << op;
   2028     break;
   2029   }
   2030   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2031 }
   2032 
   2033 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   2034   DCHECK_EQ(kX86_64, cu_->instruction_set);
   2035   X86OpCode opcode = kX86Bkpt;
   2036   if (cu_->target64 && r_dest.Is64BitSolo()) {
   2037     switch (op) {
   2038     case kOpCmp: opcode = kX86Cmp64RT;  break;
   2039     case kOpMov: opcode = kX86Mov64RT;  break;
   2040     default:
   2041       LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
   2042       break;
   2043     }
   2044   } else {
   2045     switch (op) {
   2046     case kOpCmp: opcode = kX86Cmp32RT;  break;
   2047     case kOpMov: opcode = kX86Mov32RT;  break;
   2048     default:
   2049       LOG(FATAL) << "Bad opcode: " << op;
   2050       break;
   2051     }
   2052   }
   2053   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2054 }
   2055 
   2056 /*
   2057  * Generate array load
   2058  */
   2059 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   2060                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   2061   RegisterClass reg_class = RegClassBySize(size);
   2062   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2063   RegLocation rl_result;
   2064   rl_array = LoadValue(rl_array, kRefReg);
   2065 
   2066   int data_offset;
   2067   if (size == k64 || size == kDouble) {
   2068     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2069   } else {
   2070     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2071   }
   2072 
   2073   bool constant_index = rl_index.is_const;
   2074   int32_t constant_index_value = 0;
   2075   if (!constant_index) {
   2076     rl_index = LoadValue(rl_index, kCoreReg);
   2077   } else {
   2078     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2079     // If index is constant, just fold it into the data offset
   2080     data_offset += constant_index_value << scale;
   2081     // treat as non array below
   2082     rl_index.reg = RegStorage::InvalidReg();
   2083   }
   2084 
   2085   /* null object? */
   2086   GenNullCheck(rl_array.reg, opt_flags);
   2087 
   2088   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2089     if (constant_index) {
   2090       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2091     } else {
   2092       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2093     }
   2094   }
   2095   rl_result = EvalLoc(rl_dest, reg_class, true);
   2096   LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
   2097   if ((size == k64) || (size == kDouble)) {
   2098     StoreValueWide(rl_dest, rl_result);
   2099   } else {
   2100     StoreValue(rl_dest, rl_result);
   2101   }
   2102 }
   2103 
   2104 /*
   2105  * Generate array store
   2106  *
   2107  */
   2108 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
   2109                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   2110   RegisterClass reg_class = RegClassBySize(size);
   2111   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2112   int data_offset;
   2113 
   2114   if (size == k64 || size == kDouble) {
   2115     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2116   } else {
   2117     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2118   }
   2119 
   2120   rl_array = LoadValue(rl_array, kRefReg);
   2121   bool constant_index = rl_index.is_const;
   2122   int32_t constant_index_value = 0;
   2123   if (!constant_index) {
   2124     rl_index = LoadValue(rl_index, kCoreReg);
   2125   } else {
   2126     // If index is constant, just fold it into the data offset
   2127     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2128     data_offset += constant_index_value << scale;
   2129     // treat as non array below
   2130     rl_index.reg = RegStorage::InvalidReg();
   2131   }
   2132 
   2133   /* null object? */
   2134   GenNullCheck(rl_array.reg, opt_flags);
   2135 
   2136   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2137     if (constant_index) {
   2138       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2139     } else {
   2140       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2141     }
   2142   }
   2143   if ((size == k64) || (size == kDouble)) {
   2144     rl_src = LoadValueWide(rl_src, reg_class);
   2145   } else {
   2146     rl_src = LoadValue(rl_src, reg_class);
   2147   }
   2148   // If the src reg can't be byte accessed, move it to a temp first.
   2149   if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
   2150     RegStorage temp = AllocTemp();
   2151     OpRegCopy(temp, rl_src.reg);
   2152     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
   2153   } else {
   2154     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
   2155   }
   2156   if (card_mark) {
   2157     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
   2158     if (!constant_index) {
   2159       FreeTemp(rl_index.reg);
   2160     }
   2161     MarkGCCard(rl_src.reg, rl_array.reg);
   2162   }
   2163 }
   2164 
   2165 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2166                                           RegLocation rl_src, int shift_amount) {
   2167   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2168   if (cu_->target64) {
   2169     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
   2170     switch (opcode) {
   2171       case Instruction::SHL_LONG:
   2172       case Instruction::SHL_LONG_2ADDR:
   2173         op = kOpLsl;
   2174         break;
   2175       case Instruction::SHR_LONG:
   2176       case Instruction::SHR_LONG_2ADDR:
   2177         op = kOpAsr;
   2178         break;
   2179       case Instruction::USHR_LONG:
   2180       case Instruction::USHR_LONG_2ADDR:
   2181         op = kOpLsr;
   2182         break;
   2183       default:
   2184         LOG(FATAL) << "Unexpected case";
   2185     }
   2186     OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
   2187   } else {
   2188     switch (opcode) {
   2189       case Instruction::SHL_LONG:
   2190       case Instruction::SHL_LONG_2ADDR:
   2191         DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
   2192         if (shift_amount == 32) {
   2193           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2194           LoadConstant(rl_result.reg.GetLow(), 0);
   2195         } else if (shift_amount > 31) {
   2196           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2197           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
   2198           LoadConstant(rl_result.reg.GetLow(), 0);
   2199         } else {
   2200           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2201           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2202           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
   2203                   shift_amount);
   2204           NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
   2205         }
   2206         break;
   2207       case Instruction::SHR_LONG:
   2208       case Instruction::SHR_LONG_2ADDR:
   2209         if (shift_amount == 32) {
   2210           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2211           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2212           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2213         } else if (shift_amount > 31) {
   2214           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2215           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2216           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2217           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2218         } else {
   2219           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2220           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2221           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2222                   shift_amount);
   2223           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
   2224         }
   2225         break;
   2226       case Instruction::USHR_LONG:
   2227       case Instruction::USHR_LONG_2ADDR:
   2228         if (shift_amount == 32) {
   2229           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2230           LoadConstant(rl_result.reg.GetHigh(), 0);
   2231         } else if (shift_amount > 31) {
   2232           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2233           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2234           LoadConstant(rl_result.reg.GetHigh(), 0);
   2235         } else {
   2236           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2237           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2238           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2239                   shift_amount);
   2240           NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
   2241         }
   2242         break;
   2243       default:
   2244         LOG(FATAL) << "Unexpected case";
   2245     }
   2246   }
   2247   return rl_result;
   2248 }
   2249 
   2250 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2251                                    RegLocation rl_src, RegLocation rl_shift) {
   2252   // Per spec, we only care about low 6 bits of shift amount.
   2253   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
   2254   if (shift_amount == 0) {
   2255     rl_src = LoadValueWide(rl_src, kCoreReg);
   2256     StoreValueWide(rl_dest, rl_src);
   2257     return;
   2258   } else if (shift_amount == 1 &&
   2259             (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
   2260     // Need to handle this here to avoid calling StoreValueWide twice.
   2261     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
   2262     return;
   2263   }
   2264   if (BadOverlap(rl_src, rl_dest)) {
   2265     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
   2266     return;
   2267   }
   2268   rl_src = LoadValueWide(rl_src, kCoreReg);
   2269   RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
   2270   StoreValueWide(rl_dest, rl_result);
   2271 }
   2272 
   2273 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
   2274                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
   2275   bool isConstSuccess = false;
   2276   switch (opcode) {
   2277     case Instruction::ADD_LONG:
   2278     case Instruction::AND_LONG:
   2279     case Instruction::OR_LONG:
   2280     case Instruction::XOR_LONG:
   2281       if (rl_src2.is_const) {
   2282         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2283       } else {
   2284         DCHECK(rl_src1.is_const);
   2285         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2286       }
   2287       break;
   2288     case Instruction::SUB_LONG:
   2289     case Instruction::SUB_LONG_2ADDR:
   2290       if (rl_src2.is_const) {
   2291         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2292       } else {
   2293         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   2294         isConstSuccess = true;
   2295       }
   2296       break;
   2297     case Instruction::ADD_LONG_2ADDR:
   2298     case Instruction::OR_LONG_2ADDR:
   2299     case Instruction::XOR_LONG_2ADDR:
   2300     case Instruction::AND_LONG_2ADDR:
   2301       if (rl_src2.is_const) {
   2302         if (GenerateTwoOperandInstructions()) {
   2303           isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
   2304         } else {
   2305           isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2306         }
   2307       } else {
   2308         DCHECK(rl_src1.is_const);
   2309         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2310       }
   2311       break;
   2312     default:
   2313       isConstSuccess = false;
   2314       break;
   2315   }
   2316 
   2317   if (!isConstSuccess) {
   2318     // Default - bail to non-const handler.
   2319     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   2320   }
   2321 }
   2322 
   2323 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
   2324   switch (op) {
   2325     case Instruction::AND_LONG_2ADDR:
   2326     case Instruction::AND_LONG:
   2327       return value == -1;
   2328     case Instruction::OR_LONG:
   2329     case Instruction::OR_LONG_2ADDR:
   2330     case Instruction::XOR_LONG:
   2331     case Instruction::XOR_LONG_2ADDR:
   2332       return value == 0;
   2333     default:
   2334       return false;
   2335   }
   2336 }
   2337 
   2338 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
   2339                                 bool is_high_op) {
   2340   bool rhs_in_mem = rhs.location != kLocPhysReg;
   2341   bool dest_in_mem = dest.location != kLocPhysReg;
   2342   bool is64Bit = cu_->target64;
   2343   DCHECK(!rhs_in_mem || !dest_in_mem);
   2344   switch (op) {
   2345     case Instruction::ADD_LONG:
   2346     case Instruction::ADD_LONG_2ADDR:
   2347       if (dest_in_mem) {
   2348         return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
   2349       } else if (rhs_in_mem) {
   2350         return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
   2351       }
   2352       return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
   2353     case Instruction::SUB_LONG:
   2354     case Instruction::SUB_LONG_2ADDR:
   2355       if (dest_in_mem) {
   2356         return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
   2357       } else if (rhs_in_mem) {
   2358         return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
   2359       }
   2360       return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
   2361     case Instruction::AND_LONG_2ADDR:
   2362     case Instruction::AND_LONG:
   2363       if (dest_in_mem) {
   2364         return is64Bit ? kX86And64MR : kX86And32MR;
   2365       }
   2366       if (is64Bit) {
   2367         return rhs_in_mem ? kX86And64RM : kX86And64RR;
   2368       }
   2369       return rhs_in_mem ? kX86And32RM : kX86And32RR;
   2370     case Instruction::OR_LONG:
   2371     case Instruction::OR_LONG_2ADDR:
   2372       if (dest_in_mem) {
   2373         return is64Bit ? kX86Or64MR : kX86Or32MR;
   2374       }
   2375       if (is64Bit) {
   2376         return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
   2377       }
   2378       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
   2379     case Instruction::XOR_LONG:
   2380     case Instruction::XOR_LONG_2ADDR:
   2381       if (dest_in_mem) {
   2382         return is64Bit ? kX86Xor64MR : kX86Xor32MR;
   2383       }
   2384       if (is64Bit) {
   2385         return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
   2386       }
   2387       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
   2388     default:
   2389       LOG(FATAL) << "Unexpected opcode: " << op;
   2390       return kX86Add32RR;
   2391   }
   2392 }
   2393 
   2394 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
   2395                                 int32_t value) {
   2396   bool in_mem = loc.location != kLocPhysReg;
   2397   bool is64Bit = cu_->target64;
   2398   bool byte_imm = IS_SIMM8(value);
   2399   DCHECK(in_mem || !loc.reg.IsFloat());
   2400   switch (op) {
   2401     case Instruction::ADD_LONG:
   2402     case Instruction::ADD_LONG_2ADDR:
   2403       if (byte_imm) {
   2404         if (in_mem) {
   2405           return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
   2406         }
   2407         return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
   2408       }
   2409       if (in_mem) {
   2410         return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
   2411       }
   2412       return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
   2413     case Instruction::SUB_LONG:
   2414     case Instruction::SUB_LONG_2ADDR:
   2415       if (byte_imm) {
   2416         if (in_mem) {
   2417           return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
   2418         }
   2419         return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
   2420       }
   2421       if (in_mem) {
   2422         return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
   2423       }
   2424       return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
   2425     case Instruction::AND_LONG_2ADDR:
   2426     case Instruction::AND_LONG:
   2427       if (byte_imm) {
   2428         if (is64Bit) {
   2429           return in_mem ? kX86And64MI8 : kX86And64RI8;
   2430         }
   2431         return in_mem ? kX86And32MI8 : kX86And32RI8;
   2432       }
   2433       if (is64Bit) {
   2434         return in_mem ? kX86And64MI : kX86And64RI;
   2435       }
   2436       return in_mem ? kX86And32MI : kX86And32RI;
   2437     case Instruction::OR_LONG:
   2438     case Instruction::OR_LONG_2ADDR:
   2439       if (byte_imm) {
   2440         if (is64Bit) {
   2441           return in_mem ? kX86Or64MI8 : kX86Or64RI8;
   2442         }
   2443         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
   2444       }
   2445       if (is64Bit) {
   2446         return in_mem ? kX86Or64MI : kX86Or64RI;
   2447       }
   2448       return in_mem ? kX86Or32MI : kX86Or32RI;
   2449     case Instruction::XOR_LONG:
   2450     case Instruction::XOR_LONG_2ADDR:
   2451       if (byte_imm) {
   2452         if (is64Bit) {
   2453           return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
   2454         }
   2455         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
   2456       }
   2457       if (is64Bit) {
   2458         return in_mem ? kX86Xor64MI : kX86Xor64RI;
   2459       }
   2460       return in_mem ? kX86Xor32MI : kX86Xor32RI;
   2461     default:
   2462       LOG(FATAL) << "Unexpected opcode: " << op;
   2463       return kX86Add32MI;
   2464   }
   2465 }
   2466 
   2467 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   2468   DCHECK(rl_src.is_const);
   2469   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   2470 
   2471   if (cu_->target64) {
   2472     // We can do with imm only if it fits 32 bit
   2473     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2474       return false;
   2475     }
   2476 
   2477     rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2478 
   2479     if ((rl_dest.location == kLocDalvikFrame) ||
   2480         (rl_dest.location == kLocCompilerTemp)) {
   2481       int r_base = rs_rX86_SP.GetReg();
   2482       int displacement = SRegOffset(rl_dest.s_reg_low);
   2483 
   2484       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2485       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2486       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
   2487       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2488                               true /* is_load */, true /* is64bit */);
   2489       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2490                               false /* is_load */, true /* is64bit */);
   2491       return true;
   2492     }
   2493 
   2494     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2495     DCHECK_EQ(rl_result.location, kLocPhysReg);
   2496     DCHECK(!rl_result.reg.IsFloat());
   2497 
   2498     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2499     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2500 
   2501     StoreValueWide(rl_dest, rl_result);
   2502     return true;
   2503   }
   2504 
   2505   int32_t val_lo = Low32Bits(val);
   2506   int32_t val_hi = High32Bits(val);
   2507   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2508 
   2509   // Can we just do this into memory?
   2510   if ((rl_dest.location == kLocDalvikFrame) ||
   2511       (rl_dest.location == kLocCompilerTemp)) {
   2512     int r_base = rs_rX86_SP.GetReg();
   2513     int displacement = SRegOffset(rl_dest.s_reg_low);
   2514 
   2515     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2516     if (!IsNoOp(op, val_lo)) {
   2517       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2518       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
   2519       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2520                               true /* is_load */, true /* is64bit */);
   2521       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2522                               false /* is_load */, true /* is64bit */);
   2523     }
   2524     if (!IsNoOp(op, val_hi)) {
   2525       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2526       LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
   2527       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2528                                 true /* is_load */, true /* is64bit */);
   2529       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2530                                 false /* is_load */, true /* is64bit */);
   2531     }
   2532     return true;
   2533   }
   2534 
   2535   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2536   DCHECK_EQ(rl_result.location, kLocPhysReg);
   2537   DCHECK(!rl_result.reg.IsFloat());
   2538 
   2539   if (!IsNoOp(op, val_lo)) {
   2540     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2541     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2542   }
   2543   if (!IsNoOp(op, val_hi)) {
   2544     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   2545     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   2546   }
   2547   StoreValueWide(rl_dest, rl_result);
   2548   return true;
   2549 }
   2550 
   2551 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
   2552                                 RegLocation rl_src2, Instruction::Code op) {
   2553   DCHECK(rl_src2.is_const);
   2554   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   2555 
   2556   if (cu_->target64) {
   2557     // We can do with imm only if it fits 32 bit
   2558     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2559       return false;
   2560     }
   2561     if (rl_dest.location == kLocPhysReg &&
   2562         rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
   2563       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2564       OpRegCopy(rl_dest.reg, rl_src1.reg);
   2565       NewLIR2(x86op, rl_dest.reg.GetReg(), val);
   2566       StoreFinalValueWide(rl_dest, rl_dest);
   2567       return true;
   2568     }
   2569 
   2570     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2571     // We need the values to be in a temporary
   2572     RegLocation rl_result = ForceTempWide(rl_src1);
   2573 
   2574     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2575     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2576 
   2577     StoreFinalValueWide(rl_dest, rl_result);
   2578     return true;
   2579   }
   2580 
   2581   int32_t val_lo = Low32Bits(val);
   2582   int32_t val_hi = High32Bits(val);
   2583   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2584   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   2585 
   2586   // Can we do this directly into the destination registers?
   2587   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
   2588       rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
   2589       rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
   2590     if (!IsNoOp(op, val_lo)) {
   2591       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2592       NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
   2593     }
   2594     if (!IsNoOp(op, val_hi)) {
   2595       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2596       NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
   2597     }
   2598 
   2599     StoreFinalValueWide(rl_dest, rl_dest);
   2600     return true;
   2601   }
   2602 
   2603   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2604   DCHECK_EQ(rl_src1.location, kLocPhysReg);
   2605 
   2606   // We need the values to be in a temporary
   2607   RegLocation rl_result = ForceTempWide(rl_src1);
   2608   if (!IsNoOp(op, val_lo)) {
   2609     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2610     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2611   }
   2612   if (!IsNoOp(op, val_hi)) {
   2613     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   2614     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   2615   }
   2616 
   2617   StoreFinalValueWide(rl_dest, rl_result);
   2618   return true;
   2619 }
   2620 
   2621 // For final classes there are no sub-classes to check and so we can answer the instance-of
   2622 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
   2623 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
   2624                                     RegLocation rl_dest, RegLocation rl_src) {
   2625   RegLocation object = LoadValue(rl_src, kRefReg);
   2626   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2627   RegStorage result_reg = rl_result.reg;
   2628 
   2629   // For 32-bit, SETcc only works with EAX..EDX.
   2630   RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
   2631   if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
   2632     result_reg = AllocateByteRegister();
   2633   }
   2634 
   2635   // Assume that there is no match.
   2636   LoadConstant(result_reg, 0);
   2637   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
   2638 
   2639   // We will use this register to compare to memory below.
   2640   // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
   2641   // For this reason, force allocation of a 32 bit register to use, so that the
   2642   // compare to memory will be done using a 32 bit comparision.
   2643   // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
   2644   RegStorage check_class = AllocTemp();
   2645 
   2646   // If Method* is already in a register, we can save a copy.
   2647   RegLocation rl_method = mir_graph_->GetMethodLoc();
   2648   int32_t offset_of_type = mirror::Array::DataOffset(
   2649       sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
   2650       (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
   2651 
   2652   if (rl_method.location == kLocPhysReg) {
   2653     if (use_declaring_class) {
   2654       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
   2655                   check_class, kNotVolatile);
   2656     } else {
   2657       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   2658                   check_class, kNotVolatile);
   2659       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   2660     }
   2661   } else {
   2662     LoadCurrMethodDirect(check_class);
   2663     if (use_declaring_class) {
   2664       LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
   2665                   check_class, kNotVolatile);
   2666     } else {
   2667       LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   2668                   check_class, kNotVolatile);
   2669       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   2670     }
   2671   }
   2672 
   2673   // Compare the computed class to the class in the object.
   2674   DCHECK_EQ(object.location, kLocPhysReg);
   2675   OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
   2676 
   2677   // Set the low byte of the result to 0 or 1 from the compare condition code.
   2678   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
   2679 
   2680   LIR* target = NewLIR0(kPseudoTargetLabel);
   2681   null_branchover->target = target;
   2682   FreeTemp(check_class);
   2683   if (IsTemp(result_reg)) {
   2684     OpRegCopy(rl_result.reg, result_reg);
   2685     FreeTemp(result_reg);
   2686   }
   2687   StoreValue(rl_dest, rl_result);
   2688 }
   2689 
   2690 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
   2691                             RegLocation rl_lhs, RegLocation rl_rhs) {
   2692   OpKind op = kOpBkpt;
   2693   bool is_div_rem = false;
   2694   bool unary = false;
   2695   bool shift_op = false;
   2696   bool is_two_addr = false;
   2697   RegLocation rl_result;
   2698   switch (opcode) {
   2699     case Instruction::NEG_INT:
   2700       op = kOpNeg;
   2701       unary = true;
   2702       break;
   2703     case Instruction::NOT_INT:
   2704       op = kOpMvn;
   2705       unary = true;
   2706       break;
   2707     case Instruction::ADD_INT_2ADDR:
   2708       is_two_addr = true;
   2709       // Fallthrough
   2710     case Instruction::ADD_INT:
   2711       op = kOpAdd;
   2712       break;
   2713     case Instruction::SUB_INT_2ADDR:
   2714       is_two_addr = true;
   2715       // Fallthrough
   2716     case Instruction::SUB_INT:
   2717       op = kOpSub;
   2718       break;
   2719     case Instruction::MUL_INT_2ADDR:
   2720       is_two_addr = true;
   2721       // Fallthrough
   2722     case Instruction::MUL_INT:
   2723       op = kOpMul;
   2724       break;
   2725     case Instruction::DIV_INT_2ADDR:
   2726       is_two_addr = true;
   2727       // Fallthrough
   2728     case Instruction::DIV_INT:
   2729       op = kOpDiv;
   2730       is_div_rem = true;
   2731       break;
   2732     /* NOTE: returns in kArg1 */
   2733     case Instruction::REM_INT_2ADDR:
   2734       is_two_addr = true;
   2735       // Fallthrough
   2736     case Instruction::REM_INT:
   2737       op = kOpRem;
   2738       is_div_rem = true;
   2739       break;
   2740     case Instruction::AND_INT_2ADDR:
   2741       is_two_addr = true;
   2742       // Fallthrough
   2743     case Instruction::AND_INT:
   2744       op = kOpAnd;
   2745       break;
   2746     case Instruction::OR_INT_2ADDR:
   2747       is_two_addr = true;
   2748       // Fallthrough
   2749     case Instruction::OR_INT:
   2750       op = kOpOr;
   2751       break;
   2752     case Instruction::XOR_INT_2ADDR:
   2753       is_two_addr = true;
   2754       // Fallthrough
   2755     case Instruction::XOR_INT:
   2756       op = kOpXor;
   2757       break;
   2758     case Instruction::SHL_INT_2ADDR:
   2759       is_two_addr = true;
   2760       // Fallthrough
   2761     case Instruction::SHL_INT:
   2762       shift_op = true;
   2763       op = kOpLsl;
   2764       break;
   2765     case Instruction::SHR_INT_2ADDR:
   2766       is_two_addr = true;
   2767       // Fallthrough
   2768     case Instruction::SHR_INT:
   2769       shift_op = true;
   2770       op = kOpAsr;
   2771       break;
   2772     case Instruction::USHR_INT_2ADDR:
   2773       is_two_addr = true;
   2774       // Fallthrough
   2775     case Instruction::USHR_INT:
   2776       shift_op = true;
   2777       op = kOpLsr;
   2778       break;
   2779     default:
   2780       LOG(FATAL) << "Invalid word arith op: " << opcode;
   2781   }
   2782 
   2783   // Can we convert to a two address instruction?
   2784   if (!is_two_addr &&
   2785         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   2786          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
   2787     is_two_addr = true;
   2788   }
   2789 
   2790   if (!GenerateTwoOperandInstructions()) {
   2791     is_two_addr = false;
   2792   }
   2793 
   2794   // Get the div/rem stuff out of the way.
   2795   if (is_div_rem) {
   2796     rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
   2797     StoreValue(rl_dest, rl_result);
   2798     return;
   2799   }
   2800 
   2801   // If we generate any memory access below, it will reference a dalvik reg.
   2802   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2803 
   2804   if (unary) {
   2805     rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2806     rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2807     rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2808     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   2809   } else {
   2810     if (shift_op) {
   2811       // X86 doesn't require masking and must use ECX.
   2812       RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   2813       LoadValueDirectFixed(rl_rhs, t_reg);
   2814       if (is_two_addr) {
   2815         // Can we do this directly into memory?
   2816         rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2817         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2818         if (rl_result.location != kLocPhysReg) {
   2819           // Okay, we can do this into memory
   2820           OpMemReg(op, rl_result, t_reg.GetReg());
   2821           FreeTemp(t_reg);
   2822           return;
   2823         } else if (!rl_result.reg.IsFloat()) {
   2824           // Can do this directly into the result register
   2825           OpRegReg(op, rl_result.reg, t_reg);
   2826           FreeTemp(t_reg);
   2827           StoreFinalValue(rl_dest, rl_result);
   2828           return;
   2829         }
   2830       }
   2831       // Three address form, or we can't do directly.
   2832       rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2833       rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2834       OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
   2835       FreeTemp(t_reg);
   2836     } else {
   2837       // Multiply is 3 operand only (sort of).
   2838       if (is_two_addr && op != kOpMul) {
   2839         // Can we do this directly into memory?
   2840         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2841         if (rl_result.location == kLocPhysReg) {
   2842           // Ensure res is in a core reg
   2843           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2844           // Can we do this from memory directly?
   2845           rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
   2846           if (rl_rhs.location != kLocPhysReg) {
   2847             OpRegMem(op, rl_result.reg, rl_rhs);
   2848             StoreFinalValue(rl_dest, rl_result);
   2849             return;
   2850           } else if (!rl_rhs.reg.IsFloat()) {
   2851             OpRegReg(op, rl_result.reg, rl_rhs.reg);
   2852             StoreFinalValue(rl_dest, rl_result);
   2853             return;
   2854           }
   2855         }
   2856         rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2857         // It might happen rl_rhs and rl_dest are the same VR
   2858         // in this case rl_dest is in reg after LoadValue while
   2859         // rl_result is not updated yet, so do this
   2860         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2861         if (rl_result.location != kLocPhysReg) {
   2862           // Okay, we can do this into memory.
   2863           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
   2864           return;
   2865         } else if (!rl_result.reg.IsFloat()) {
   2866           // Can do this directly into the result register.
   2867           OpRegReg(op, rl_result.reg, rl_rhs.reg);
   2868           StoreFinalValue(rl_dest, rl_result);
   2869           return;
   2870         } else {
   2871           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2872           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2873           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2874         }
   2875       } else {
   2876         // Try to use reg/memory instructions.
   2877         rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
   2878         rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
   2879         // We can't optimize with FP registers.
   2880         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
   2881           // Something is difficult, so fall back to the standard case.
   2882           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2883           rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2884           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2885           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2886         } else {
   2887           // We can optimize by moving to result and using memory operands.
   2888           if (rl_rhs.location != kLocPhysReg) {
   2889             // Force LHS into result.
   2890             // We should be careful with order here
   2891             // If rl_dest and rl_lhs points to the same VR we should load first
   2892             // If the are different we should find a register first for dest
   2893             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   2894                 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
   2895               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2896               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2897               // No-op if these are the same.
   2898               OpRegCopy(rl_result.reg, rl_lhs.reg);
   2899             } else {
   2900               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2901               LoadValueDirect(rl_lhs, rl_result.reg);
   2902             }
   2903             OpRegMem(op, rl_result.reg, rl_rhs);
   2904           } else if (rl_lhs.location != kLocPhysReg) {
   2905             // RHS is in a register; LHS is in memory.
   2906             if (op != kOpSub) {
   2907               // Force RHS into result and operate on memory.
   2908               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2909               OpRegCopy(rl_result.reg, rl_rhs.reg);
   2910               OpRegMem(op, rl_result.reg, rl_lhs);
   2911             } else {
   2912               // Subtraction isn't commutative.
   2913               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2914               rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2915               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2916               OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2917             }
   2918           } else {
   2919             // Both are in registers.
   2920             rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2921             rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2922             rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2923             OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2924           }
   2925         }
   2926       }
   2927     }
   2928   }
   2929   StoreValue(rl_dest, rl_result);
   2930 }
   2931 
   2932 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
   2933   // If we have non-core registers, then we can't do good things.
   2934   if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
   2935     return false;
   2936   }
   2937   if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
   2938     return false;
   2939   }
   2940 
   2941   // Everything will be fine :-).
   2942   return true;
   2943 }
   2944 
   2945 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
   2946   if (!cu_->target64) {
   2947     Mir2Lir::GenIntToLong(rl_dest, rl_src);
   2948     return;
   2949   }
   2950   rl_src = UpdateLocTyped(rl_src, kCoreReg);
   2951   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2952   if (rl_src.location == kLocPhysReg) {
   2953     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   2954   } else {
   2955     int displacement = SRegOffset(rl_src.s_reg_low);
   2956     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2957     LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
   2958                      displacement + LOWORD_OFFSET);
   2959     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   2960                             true /* is_load */, true /* is_64bit */);
   2961   }
   2962   StoreValueWide(rl_dest, rl_result);
   2963 }
   2964 
   2965 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2966                         RegLocation rl_src1, RegLocation rl_shift) {
   2967   if (!cu_->target64) {
   2968     Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
   2969     return;
   2970   }
   2971 
   2972   bool is_two_addr = false;
   2973   OpKind op = kOpBkpt;
   2974   RegLocation rl_result;
   2975 
   2976   switch (opcode) {
   2977     case Instruction::SHL_LONG_2ADDR:
   2978       is_two_addr = true;
   2979       // Fallthrough
   2980     case Instruction::SHL_LONG:
   2981       op = kOpLsl;
   2982       break;
   2983     case Instruction::SHR_LONG_2ADDR:
   2984       is_two_addr = true;
   2985       // Fallthrough
   2986     case Instruction::SHR_LONG:
   2987       op = kOpAsr;
   2988       break;
   2989     case Instruction::USHR_LONG_2ADDR:
   2990       is_two_addr = true;
   2991       // Fallthrough
   2992     case Instruction::USHR_LONG:
   2993       op = kOpLsr;
   2994       break;
   2995     default:
   2996       op = kOpBkpt;
   2997   }
   2998 
   2999   // X86 doesn't require masking and must use ECX.
   3000   RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   3001   LoadValueDirectFixed(rl_shift, t_reg);
   3002   if (is_two_addr) {
   3003     // Can we do this directly into memory?
   3004     rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
   3005     if (rl_result.location != kLocPhysReg) {
   3006       // Okay, we can do this into memory
   3007       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   3008       OpMemReg(op, rl_result, t_reg.GetReg());
   3009     } else if (!rl_result.reg.IsFloat()) {
   3010       // Can do this directly into the result register
   3011       OpRegReg(op, rl_result.reg, t_reg);
   3012       StoreFinalValueWide(rl_dest, rl_result);
   3013     }
   3014   } else {
   3015     // Three address form, or we can't do directly.
   3016     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   3017     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   3018     OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
   3019     StoreFinalValueWide(rl_dest, rl_result);
   3020   }
   3021 
   3022   FreeTemp(t_reg);
   3023 }
   3024 
   3025 }  // namespace art
   3026