Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /* This file contains codegen for the X86 ISA */
     18 
     19 #include "codegen_x86.h"
     20 #include "dex/quick/mir_to_lir-inl.h"
     21 #include "dex/reg_storage_eq.h"
     22 #include "mirror/art_method.h"
     23 #include "mirror/array.h"
     24 #include "x86_lir.h"
     25 
     26 namespace art {
     27 
     28 /*
     29  * Compare two 64-bit values
     30  *    x = y     return  0
     31  *    x < y     return -1
     32  *    x > y     return  1
     33  */
     34 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
     35                             RegLocation rl_src2) {
     36   if (cu_->target64) {
     37     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     38     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     39     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
     40     RegStorage temp_reg = AllocTemp();
     41     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
     42     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
     43     NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
     44     NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
     45     NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
     46 
     47     StoreValue(rl_dest, rl_result);
     48     FreeTemp(temp_reg);
     49     return;
     50   }
     51 
     52   FlushAllRegs();
     53   LockCallTemps();  // Prepare for explicit register usage
     54   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
     55   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
     56   LoadValueDirectWideFixed(rl_src1, r_tmp1);
     57   LoadValueDirectWideFixed(rl_src2, r_tmp2);
     58   // Compute (r1:r0) = (r1:r0) - (r3:r2)
     59   OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
     60   OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
     61   NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
     62   NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
     63   OpReg(kOpNeg, rs_r2);         // r2 = -r2
     64   OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
     65   NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
     66   NewLIR2(kX86Movzx8RR, r0, r0);
     67   OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
     68   RegLocation rl_result = LocCReturn();
     69   StoreValue(rl_dest, rl_result);
     70 }
     71 
     72 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
     73   switch (cond) {
     74     case kCondEq: return kX86CondEq;
     75     case kCondNe: return kX86CondNe;
     76     case kCondCs: return kX86CondC;
     77     case kCondCc: return kX86CondNc;
     78     case kCondUlt: return kX86CondC;
     79     case kCondUge: return kX86CondNc;
     80     case kCondMi: return kX86CondS;
     81     case kCondPl: return kX86CondNs;
     82     case kCondVs: return kX86CondO;
     83     case kCondVc: return kX86CondNo;
     84     case kCondHi: return kX86CondA;
     85     case kCondLs: return kX86CondBe;
     86     case kCondGe: return kX86CondGe;
     87     case kCondLt: return kX86CondL;
     88     case kCondGt: return kX86CondG;
     89     case kCondLe: return kX86CondLe;
     90     case kCondAl:
     91     case kCondNv: LOG(FATAL) << "Should not reach here";
     92   }
     93   return kX86CondO;
     94 }
     95 
     96 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
     97   NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
     98   X86ConditionCode cc = X86ConditionEncoding(cond);
     99   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
    100                         cc);
    101   branch->target = target;
    102   return branch;
    103 }
    104 
    105 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
    106                                 int check_value, LIR* target) {
    107   if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
    108     // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
    109     NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
    110   } else {
    111     if (reg.Is64Bit()) {
    112       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
    113     } else {
    114       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
    115     }
    116   }
    117   X86ConditionCode cc = X86ConditionEncoding(cond);
    118   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
    119   branch->target = target;
    120   return branch;
    121 }
    122 
    123 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
    124   // If src or dest is a pair, we'll be using low reg.
    125   if (r_dest.IsPair()) {
    126     r_dest = r_dest.GetLow();
    127   }
    128   if (r_src.IsPair()) {
    129     r_src = r_src.GetLow();
    130   }
    131   if (r_dest.IsFloat() || r_src.IsFloat())
    132     return OpFpRegCopy(r_dest, r_src);
    133   LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
    134                     r_dest.GetReg(), r_src.GetReg());
    135   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
    136     res->flags.is_nop = true;
    137   }
    138   return res;
    139 }
    140 
    141 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
    142   if (r_dest != r_src) {
    143     LIR *res = OpRegCopyNoInsert(r_dest, r_src);
    144     AppendLIR(res);
    145   }
    146 }
    147 
    148 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
    149   if (r_dest != r_src) {
    150     bool dest_fp = r_dest.IsFloat();
    151     bool src_fp = r_src.IsFloat();
    152     if (dest_fp) {
    153       if (src_fp) {
    154         OpRegCopy(r_dest, r_src);
    155       } else {
    156         // TODO: Prevent this from happening in the code. The result is often
    157         // unused or could have been loaded more easily from memory.
    158         if (!r_src.IsPair()) {
    159           DCHECK(!r_dest.IsPair());
    160           NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
    161         } else {
    162           NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
    163           RegStorage r_tmp = AllocTempDouble();
    164           NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
    165           NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
    166           FreeTemp(r_tmp);
    167         }
    168       }
    169     } else {
    170       if (src_fp) {
    171         if (!r_dest.IsPair()) {
    172           DCHECK(!r_src.IsPair());
    173           NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
    174         } else {
    175           NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
    176           RegStorage temp_reg = AllocTempDouble();
    177           NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
    178           NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
    179           NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
    180         }
    181       } else {
    182         DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
    183         if (!r_src.IsPair()) {
    184           // Just copy the register directly.
    185           OpRegCopy(r_dest, r_src);
    186         } else {
    187           // Handle overlap
    188           if (r_src.GetHighReg() == r_dest.GetLowReg() &&
    189               r_src.GetLowReg() == r_dest.GetHighReg()) {
    190             // Deal with cycles.
    191             RegStorage temp_reg = AllocTemp();
    192             OpRegCopy(temp_reg, r_dest.GetHigh());
    193             OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
    194             OpRegCopy(r_dest.GetLow(), temp_reg);
    195             FreeTemp(temp_reg);
    196           } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
    197             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    198             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    199           } else {
    200             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
    201             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
    202           }
    203         }
    204       }
    205     }
    206   }
    207 }
    208 
    209 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
    210                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
    211                                   int dest_reg_class) {
    212   DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
    213   DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
    214 
    215   // We really need this check for correctness, otherwise we will need to do more checks in
    216   // non zero/one case
    217   if (true_val == false_val) {
    218     LoadConstantNoClobber(rs_dest, true_val);
    219     return;
    220   }
    221 
    222   const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
    223 
    224   const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
    225   if (zero_one_case && IsByteRegister(rs_dest)) {
    226     if (!dest_intersect) {
    227       LoadConstantNoClobber(rs_dest, 0);
    228     }
    229     OpRegReg(kOpCmp, left_op, right_op);
    230     // Set the low byte of the result to 0 or 1 from the compare condition code.
    231     NewLIR2(kX86Set8R, rs_dest.GetReg(),
    232             X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
    233     if (dest_intersect) {
    234       NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
    235     }
    236   } else {
    237     // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
    238     // and it cannot use xor because it makes cc flags to be dirty
    239     RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
    240     if (temp_reg.Valid()) {
    241       if (false_val == 0 && dest_intersect) {
    242         code = FlipComparisonOrder(code);
    243         std::swap(true_val, false_val);
    244       }
    245       if (!dest_intersect) {
    246         LoadConstantNoClobber(rs_dest, false_val);
    247       }
    248       LoadConstantNoClobber(temp_reg, true_val);
    249       OpRegReg(kOpCmp, left_op, right_op);
    250       if (dest_intersect) {
    251         LoadConstantNoClobber(rs_dest, false_val);
    252         DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
    253       }
    254       OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
    255       FreeTemp(temp_reg);
    256     } else {
    257       // slow path
    258       LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
    259       LoadConstantNoClobber(rs_dest, false_val);
    260       LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
    261       LIR* true_case = NewLIR0(kPseudoTargetLabel);
    262       cmp_branch->target = true_case;
    263       LoadConstantNoClobber(rs_dest, true_val);
    264       LIR* end = NewLIR0(kPseudoTargetLabel);
    265       that_is_it->target = end;
    266     }
    267   }
    268 }
    269 
    270 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
    271   RegLocation rl_result;
    272   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
    273   RegLocation rl_dest = mir_graph_->GetDest(mir);
    274   // Avoid using float regs here.
    275   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
    276   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
    277   ConditionCode ccode = mir->meta.ccode;
    278 
    279   // The kMirOpSelect has two variants, one for constants and one for moves.
    280   const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
    281 
    282   if (is_constant_case) {
    283     int true_val = mir->dalvikInsn.vB;
    284     int false_val = mir->dalvikInsn.vC;
    285 
    286     // simplest strange case
    287     if (true_val == false_val) {
    288       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    289       LoadConstantNoClobber(rl_result.reg, true_val);
    290     } else {
    291       // TODO: use GenSelectConst32 and handle additional opcode patterns such as
    292       // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
    293       rl_src = LoadValue(rl_src, src_reg_class);
    294       rl_result = EvalLoc(rl_dest, result_reg_class, true);
    295       /*
    296        * For ccode == kCondEq:
    297        *
    298        * 1) When the true case is zero and result_reg is not same as src_reg:
    299        *     xor result_reg, result_reg
    300        *     cmp $0, src_reg
    301        *     mov t1, $false_case
    302        *     cmovnz result_reg, t1
    303        * 2) When the false case is zero and result_reg is not same as src_reg:
    304        *     xor result_reg, result_reg
    305        *     cmp $0, src_reg
    306        *     mov t1, $true_case
    307        *     cmovz result_reg, t1
    308        * 3) All other cases (we do compare first to set eflags):
    309        *     cmp $0, src_reg
    310        *     mov result_reg, $false_case
    311        *     mov t1, $true_case
    312        *     cmovz result_reg, t1
    313        */
    314       // FIXME: depending on how you use registers you could get a false != mismatch when dealing
    315       // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
    316       const bool result_reg_same_as_src =
    317           (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
    318       const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
    319       const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
    320       const bool catch_all_case = !(true_zero_case || false_zero_case);
    321 
    322       if (true_zero_case || false_zero_case) {
    323         OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
    324       }
    325 
    326       if (true_zero_case || false_zero_case || catch_all_case) {
    327         OpRegImm(kOpCmp, rl_src.reg, 0);
    328       }
    329 
    330       if (catch_all_case) {
    331         OpRegImm(kOpMov, rl_result.reg, false_val);
    332       }
    333 
    334       if (true_zero_case || false_zero_case || catch_all_case) {
    335         ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
    336         int immediateForTemp = true_zero_case ? false_val : true_val;
    337         RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
    338         OpRegImm(kOpMov, temp1_reg, immediateForTemp);
    339 
    340         OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
    341 
    342         FreeTemp(temp1_reg);
    343       }
    344     }
    345   } else {
    346     rl_src = LoadValue(rl_src, src_reg_class);
    347     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
    348     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
    349     rl_true = LoadValue(rl_true, result_reg_class);
    350     rl_false = LoadValue(rl_false, result_reg_class);
    351     rl_result = EvalLoc(rl_dest, result_reg_class, true);
    352 
    353     /*
    354      * For ccode == kCondEq:
    355      *
    356      * 1) When true case is already in place:
    357      *     cmp $0, src_reg
    358      *     cmovnz result_reg, false_reg
    359      * 2) When false case is already in place:
    360      *     cmp $0, src_reg
    361      *     cmovz result_reg, true_reg
    362      * 3) When neither cases are in place:
    363      *     cmp $0, src_reg
    364      *     mov result_reg, false_reg
    365      *     cmovz result_reg, true_reg
    366      */
    367 
    368     // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
    369     OpRegImm(kOpCmp, rl_src.reg, 0);
    370 
    371     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
    372       OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
    373     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
    374       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    375     } else {
    376       OpRegCopy(rl_result.reg, rl_false.reg);
    377       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
    378     }
    379   }
    380 
    381   StoreValue(rl_dest, rl_result);
    382 }
    383 
    384 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
    385   LIR* taken = &block_label_list_[bb->taken];
    386   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    387   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    388   ConditionCode ccode = mir->meta.ccode;
    389 
    390   if (rl_src1.is_const) {
    391     std::swap(rl_src1, rl_src2);
    392     ccode = FlipComparisonOrder(ccode);
    393   }
    394   if (rl_src2.is_const) {
    395     // Do special compare/branch against simple const operand
    396     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
    397     GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
    398     return;
    399   }
    400 
    401   if (cu_->target64) {
    402     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    403     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
    404 
    405     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    406     OpCondBranch(ccode, taken);
    407     return;
    408   }
    409 
    410   FlushAllRegs();
    411   LockCallTemps();  // Prepare for explicit register usage
    412   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
    413   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
    414   LoadValueDirectWideFixed(rl_src1, r_tmp1);
    415   LoadValueDirectWideFixed(rl_src2, r_tmp2);
    416 
    417   // Swap operands and condition code to prevent use of zero flag.
    418   if (ccode == kCondLe || ccode == kCondGt) {
    419     // Compute (r3:r2) = (r3:r2) - (r1:r0)
    420     OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
    421     OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
    422   } else {
    423     // Compute (r1:r0) = (r1:r0) - (r3:r2)
    424     OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
    425     OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
    426   }
    427   switch (ccode) {
    428     case kCondEq:
    429     case kCondNe:
    430       OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
    431       break;
    432     case kCondLe:
    433       ccode = kCondGe;
    434       break;
    435     case kCondGt:
    436       ccode = kCondLt;
    437       break;
    438     case kCondLt:
    439     case kCondGe:
    440       break;
    441     default:
    442       LOG(FATAL) << "Unexpected ccode: " << ccode;
    443   }
    444   OpCondBranch(ccode, taken);
    445 }
    446 
    447 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
    448                                           int64_t val, ConditionCode ccode) {
    449   int32_t val_lo = Low32Bits(val);
    450   int32_t val_hi = High32Bits(val);
    451   LIR* taken = &block_label_list_[bb->taken];
    452   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
    453   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
    454 
    455   if (cu_->target64) {
    456     if (is_equality_test && val == 0) {
    457       // We can simplify of comparing for ==, != to 0.
    458       NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
    459     } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
    460       OpRegImm(kOpCmp, rl_src1.reg, val_lo);
    461     } else {
    462       RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    463       LoadConstantWide(tmp, val);
    464       OpRegReg(kOpCmp, rl_src1.reg, tmp);
    465       FreeTemp(tmp);
    466     }
    467     OpCondBranch(ccode, taken);
    468     return;
    469   }
    470 
    471   if (is_equality_test && val != 0) {
    472     rl_src1 = ForceTempWide(rl_src1);
    473   }
    474   RegStorage low_reg = rl_src1.reg.GetLow();
    475   RegStorage high_reg = rl_src1.reg.GetHigh();
    476 
    477   if (is_equality_test) {
    478     // We can simplify of comparing for ==, != to 0.
    479     if (val == 0) {
    480       if (IsTemp(low_reg)) {
    481         OpRegReg(kOpOr, low_reg, high_reg);
    482         // We have now changed it; ignore the old values.
    483         Clobber(rl_src1.reg);
    484       } else {
    485         RegStorage t_reg = AllocTemp();
    486         OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
    487         FreeTemp(t_reg);
    488       }
    489       OpCondBranch(ccode, taken);
    490       return;
    491     }
    492 
    493     // Need to compute the actual value for ==, !=.
    494     OpRegImm(kOpSub, low_reg, val_lo);
    495     NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    496     OpRegReg(kOpOr, high_reg, low_reg);
    497     Clobber(rl_src1.reg);
    498   } else if (ccode == kCondLe || ccode == kCondGt) {
    499     // Swap operands and condition code to prevent use of zero flag.
    500     RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
    501     LoadConstantWide(tmp, val);
    502     OpRegReg(kOpSub, tmp.GetLow(), low_reg);
    503     OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
    504     ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
    505     FreeTemp(tmp);
    506   } else {
    507     // We can use a compare for the low word to set CF.
    508     OpRegImm(kOpCmp, low_reg, val_lo);
    509     if (IsTemp(high_reg)) {
    510       NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
    511       // We have now changed it; ignore the old values.
    512       Clobber(rl_src1.reg);
    513     } else {
    514       // mov temp_reg, high_reg; sbb temp_reg, high_constant
    515       RegStorage t_reg = AllocTemp();
    516       OpRegCopy(t_reg, high_reg);
    517       NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
    518       FreeTemp(t_reg);
    519     }
    520   }
    521 
    522   OpCondBranch(ccode, taken);
    523 }
    524 
    525 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
    526   // It does not make sense to calculate magic and shift for zero divisor.
    527   DCHECK_NE(divisor, 0);
    528 
    529   /* According to H.S.Warren's Hacker's Delight Chapter 10 and
    530    * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    531    * The magic number M and shift S can be calculated in the following way:
    532    * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
    533    * where divisor(d) >=2.
    534    * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
    535    * where divisor(d) <= -2.
    536    * Thus nc can be calculated like:
    537    * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
    538    * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
    539    *
    540    * So the shift p is the smallest p satisfying
    541    * 2^p > nc * (d - 2^p % d), where d >= 2
    542    * 2^p > nc * (d + 2^p % d), where d <= -2.
    543    *
    544    * the magic number M is calcuated by
    545    * M = (2^p + d - 2^p % d) / d, where d >= 2
    546    * M = (2^p - d - 2^p % d) / d, where d <= -2.
    547    *
    548    * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
    549    * the shift number S.
    550    */
    551 
    552   int64_t p = (is_long) ? 63 : 31;
    553   const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
    554 
    555   // Initialize the computations.
    556   uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
    557   uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
    558                                     static_cast<uint32_t>(divisor) >> 31);
    559   uint64_t abs_nc = tmp - 1 - tmp % abs_d;
    560   uint64_t quotient1 = exp / abs_nc;
    561   uint64_t remainder1 = exp % abs_nc;
    562   uint64_t quotient2 = exp / abs_d;
    563   uint64_t remainder2 = exp % abs_d;
    564 
    565   /*
    566    * To avoid handling both positive and negative divisor, Hacker's Delight
    567    * introduces a method to handle these 2 cases together to avoid duplication.
    568    */
    569   uint64_t delta;
    570   do {
    571     p++;
    572     quotient1 = 2 * quotient1;
    573     remainder1 = 2 * remainder1;
    574     if (remainder1 >= abs_nc) {
    575       quotient1++;
    576       remainder1 = remainder1 - abs_nc;
    577     }
    578     quotient2 = 2 * quotient2;
    579     remainder2 = 2 * remainder2;
    580     if (remainder2 >= abs_d) {
    581       quotient2++;
    582       remainder2 = remainder2 - abs_d;
    583     }
    584     delta = abs_d - remainder2;
    585   } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
    586 
    587   magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
    588 
    589   if (!is_long) {
    590     magic = static_cast<int>(magic);
    591   }
    592 
    593   shift = (is_long) ? p - 64 : p - 32;
    594 }
    595 
    596 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
    597   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
    598   return rl_dest;
    599 }
    600 
    601 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
    602                                      int imm, bool is_div) {
    603   // Use a multiply (and fixup) to perform an int div/rem by a constant.
    604   RegLocation rl_result;
    605 
    606   if (imm == 1) {
    607     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    608     if (is_div) {
    609       // x / 1 == x.
    610       LoadValueDirectFixed(rl_src, rl_result.reg);
    611     } else {
    612       // x % 1 == 0.
    613       LoadConstantNoClobber(rl_result.reg, 0);
    614     }
    615   } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
    616     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    617     if (is_div) {
    618       LoadValueDirectFixed(rl_src, rl_result.reg);
    619       OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
    620       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
    621 
    622       // for x != MIN_INT, x / -1 == -x.
    623       NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
    624 
    625       // EAX already contains the right value (0x80000000),
    626       minint_branch->target = NewLIR0(kPseudoTargetLabel);
    627     } else {
    628       // x % -1 == 0.
    629       LoadConstantNoClobber(rl_result.reg, 0);
    630     }
    631   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
    632     // Division using shifting.
    633     rl_src = LoadValue(rl_src, kCoreReg);
    634     rl_result = EvalLoc(rl_dest, kCoreReg, true);
    635     if (IsSameReg(rl_result.reg, rl_src.reg)) {
    636       RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
    637       rl_result.reg.SetReg(rs_temp.GetReg());
    638     }
    639     NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
    640     NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
    641     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
    642     int shift_amount = LowestSetBit(imm);
    643     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
    644     if (imm < 0) {
    645       OpReg(kOpNeg, rl_result.reg);
    646     }
    647   } else {
    648     CHECK(imm <= -2 || imm >= 2);
    649 
    650     // Use H.S.Warren's Hacker's Delight Chapter 10 and
    651     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
    652     int64_t magic;
    653     int shift;
    654     CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
    655 
    656     /*
    657      * For imm >= 2,
    658      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
    659      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
    660      * For imm <= -2,
    661      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
    662      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
    663      * We implement this algorithm in the following way:
    664      * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
    665      * 2. if imm > 0 and magic < 0, add numerator to EDX
    666      *    if imm < 0 and magic > 0, sub numerator from EDX
    667      * 3. if S !=0, SAR S bits for EDX
    668      * 4. add 1 to EDX if EDX < 0
    669      * 5. Thus, EDX is the quotient
    670      */
    671 
    672     FlushReg(rs_r0);
    673     Clobber(rs_r0);
    674     LockTemp(rs_r0);
    675     FlushReg(rs_r2);
    676     Clobber(rs_r2);
    677     LockTemp(rs_r2);
    678 
    679     // Assume that the result will be in EDX.
    680     rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
    681 
    682     // Numerator into EAX.
    683     RegStorage numerator_reg;
    684     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
    685       // We will need the value later.
    686       rl_src = LoadValue(rl_src, kCoreReg);
    687       numerator_reg = rl_src.reg;
    688       OpRegCopy(rs_r0, numerator_reg);
    689     } else {
    690       // Only need this once.  Just put it into EAX.
    691       LoadValueDirectFixed(rl_src, rs_r0);
    692     }
    693 
    694     // EDX = magic.
    695     LoadConstantNoClobber(rs_r2, magic);
    696 
    697     // EDX:EAX = magic & dividend.
    698     NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
    699 
    700     if (imm > 0 && magic < 0) {
    701       // Add numerator to EDX.
    702       DCHECK(numerator_reg.Valid());
    703       NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    704     } else if (imm < 0 && magic > 0) {
    705       DCHECK(numerator_reg.Valid());
    706       NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
    707     }
    708 
    709     // Do we need the shift?
    710     if (shift != 0) {
    711       // Shift EDX by 'shift' bits.
    712       NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
    713     }
    714 
    715     // Add 1 to EDX if EDX < 0.
    716 
    717     // Move EDX to EAX.
    718     OpRegCopy(rs_r0, rs_r2);
    719 
    720     // Move sign bit to bit 0, zeroing the rest.
    721     NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
    722 
    723     // EDX = EDX + EAX.
    724     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
    725 
    726     // Quotient is in EDX.
    727     if (!is_div) {
    728       // We need to compute the remainder.
    729       // Remainder is divisor - (quotient * imm).
    730       DCHECK(numerator_reg.Valid());
    731       OpRegCopy(rs_r0, numerator_reg);
    732 
    733       // EAX = numerator * imm.
    734       OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
    735 
    736       // EDX -= EAX.
    737       NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
    738 
    739       // For this case, return the result in EAX.
    740       rl_result.reg.SetReg(r0);
    741     }
    742   }
    743 
    744   return rl_result;
    745 }
    746 
    747 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
    748                                   bool is_div) {
    749   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
    750   return rl_dest;
    751 }
    752 
    753 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
    754                                   RegLocation rl_src2, bool is_div, bool check_zero) {
    755   // We have to use fixed registers, so flush all the temps.
    756   FlushAllRegs();
    757   LockCallTemps();  // Prepare for explicit register usage.
    758 
    759   // Load LHS into EAX.
    760   LoadValueDirectFixed(rl_src1, rs_r0);
    761 
    762   // Load RHS into EBX.
    763   LoadValueDirectFixed(rl_src2, rs_r1);
    764 
    765   // Copy LHS sign bit into EDX.
    766   NewLIR0(kx86Cdq32Da);
    767 
    768   if (check_zero) {
    769     // Handle division by zero case.
    770     GenDivZeroCheck(rs_r1);
    771   }
    772 
    773   // Have to catch 0x80000000/-1 case, or we will get an exception!
    774   OpRegImm(kOpCmp, rs_r1, -1);
    775   LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    776 
    777   // RHS is -1.
    778   OpRegImm(kOpCmp, rs_r0, 0x80000000);
    779   LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    780 
    781   // In 0x80000000/-1 case.
    782   if (!is_div) {
    783     // For DIV, EAX is already right. For REM, we need EDX 0.
    784     LoadConstantNoClobber(rs_r2, 0);
    785   }
    786   LIR* done = NewLIR1(kX86Jmp8, 0);
    787 
    788   // Expected case.
    789   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
    790   minint_branch->target = minus_one_branch->target;
    791   NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
    792   done->target = NewLIR0(kPseudoTargetLabel);
    793 
    794   // Result is in EAX for div and EDX for rem.
    795   RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
    796   if (!is_div) {
    797     rl_result.reg.SetReg(r2);
    798   }
    799   return rl_result;
    800 }
    801 
    802 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
    803   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
    804 
    805   if (is_long && cu_->instruction_set == kX86) {
    806     return false;
    807   }
    808 
    809   // Get the two arguments to the invoke and place them in GP registers.
    810   RegLocation rl_src1 = info->args[0];
    811   RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
    812   rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
    813   rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
    814 
    815   RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
    816   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    817 
    818   /*
    819    * If the result register is the same as the second element, then we need to be careful.
    820    * The reason is that the first copy will inadvertently clobber the second element with
    821    * the first one thus yielding the wrong result. Thus we do a swap in that case.
    822    */
    823   if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
    824     std::swap(rl_src1, rl_src2);
    825   }
    826 
    827   // Pick the first integer as min/max.
    828   OpRegCopy(rl_result.reg, rl_src1.reg);
    829 
    830   // If the integers are both in the same register, then there is nothing else to do
    831   // because they are equal and we have already moved one into the result.
    832   if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
    833     // It is possible we didn't pick correctly so do the actual comparison now.
    834     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
    835 
    836     // Conditionally move the other integer into the destination register.
    837     ConditionCode condition_code = is_min ? kCondGt : kCondLt;
    838     OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
    839   }
    840 
    841   if (is_long) {
    842     StoreValueWide(rl_dest, rl_result);
    843   } else {
    844     StoreValue(rl_dest, rl_result);
    845   }
    846   return true;
    847 }
    848 
    849 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
    850   RegLocation rl_src_address = info->args[0];  // long address
    851   RegLocation rl_address;
    852   if (!cu_->target64) {
    853     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
    854     rl_address = LoadValue(rl_src_address, kCoreReg);
    855   } else {
    856     rl_address = LoadValueWide(rl_src_address, kCoreReg);
    857   }
    858   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
    859   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    860   // Unaligned access is allowed on x86.
    861   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
    862   if (size == k64) {
    863     StoreValueWide(rl_dest, rl_result);
    864   } else {
    865     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    866     StoreValue(rl_dest, rl_result);
    867   }
    868   return true;
    869 }
    870 
    871 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
    872   RegLocation rl_src_address = info->args[0];  // long address
    873   RegLocation rl_address;
    874   if (!cu_->target64) {
    875     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
    876     rl_address = LoadValue(rl_src_address, kCoreReg);
    877   } else {
    878     rl_address = LoadValueWide(rl_src_address, kCoreReg);
    879   }
    880   RegLocation rl_src_value = info->args[2];  // [size] value
    881   RegLocation rl_value;
    882   if (size == k64) {
    883     // Unaligned access is allowed on x86.
    884     rl_value = LoadValueWide(rl_src_value, kCoreReg);
    885   } else {
    886     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
    887     // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
    888     if (!cu_->target64 && size == kSignedByte) {
    889       rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
    890       if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
    891         RegStorage temp = AllocateByteRegister();
    892         OpRegCopy(temp, rl_src_value.reg);
    893         rl_value.reg = temp;
    894       } else {
    895         rl_value = LoadValue(rl_src_value, kCoreReg);
    896       }
    897     } else {
    898       rl_value = LoadValue(rl_src_value, kCoreReg);
    899     }
    900   }
    901   StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
    902   return true;
    903 }
    904 
    905 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
    906   NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
    907 }
    908 
    909 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
    910   DCHECK_EQ(kX86, cu_->instruction_set);
    911   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
    912 }
    913 
    914 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
    915   DCHECK_EQ(kX86_64, cu_->instruction_set);
    916   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
    917 }
    918 
    919 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
    920   return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
    921 }
    922 
    923 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
    924   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
    925   // Unused - RegLocation rl_src_unsafe = info->args[0];
    926   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
    927   RegLocation rl_src_offset = info->args[2];  // long low
    928   if (!cu_->target64) {
    929     rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
    930   }
    931   RegLocation rl_src_expected = info->args[4];  // int, long or Object
    932   // If is_long, high half is in info->args[5]
    933   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
    934   // If is_long, high half is in info->args[7]
    935 
    936   if (is_long && cu_->target64) {
    937     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
    938     FlushReg(rs_r0q);
    939     Clobber(rs_r0q);
    940     LockTemp(rs_r0q);
    941 
    942     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
    943     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
    944     RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
    945     LoadValueDirectWide(rl_src_expected, rs_r0q);
    946     NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
    947             rl_new_value.reg.GetReg());
    948 
    949     // After a store we need to insert barrier in case of potential load. Since the
    950     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
    951     GenMemBarrier(kAnyAny);
    952 
    953     FreeTemp(rs_r0q);
    954   } else if (is_long) {
    955     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
    956     // TODO: CFI support.
    957     FlushAllRegs();
    958     LockCallTemps();
    959     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
    960     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
    961     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
    962     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
    963     // FIXME: needs 64-bit update.
    964     const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
    965     const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
    966     DCHECK(!obj_in_si || !obj_in_di);
    967     const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
    968     const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
    969     DCHECK(!off_in_si || !off_in_di);
    970     // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
    971     RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
    972     RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
    973     bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
    974     bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
    975     if (push_di) {
    976       NewLIR1(kX86Push32R, rs_rDI.GetReg());
    977       MarkTemp(rs_rDI);
    978       LockTemp(rs_rDI);
    979     }
    980     if (push_si) {
    981       NewLIR1(kX86Push32R, rs_rSI.GetReg());
    982       MarkTemp(rs_rSI);
    983       LockTemp(rs_rSI);
    984     }
    985     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    986     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
    987     if (!obj_in_si && !obj_in_di) {
    988       LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
    989       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
    990       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
    991       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
    992       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
    993     }
    994     if (!off_in_si && !off_in_di) {
    995       LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
    996       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
    997       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
    998       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
    999       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
   1000     }
   1001     NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
   1002 
   1003     // After a store we need to insert barrier to prevent reordering with either
   1004     // earlier or later memory accesses.  Since
   1005     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1006     // and it will be associated with the cmpxchg instruction, preventing both.
   1007     GenMemBarrier(kAnyAny);
   1008 
   1009     if (push_si) {
   1010       FreeTemp(rs_rSI);
   1011       UnmarkTemp(rs_rSI);
   1012       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
   1013     }
   1014     if (push_di) {
   1015       FreeTemp(rs_rDI);
   1016       UnmarkTemp(rs_rDI);
   1017       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
   1018     }
   1019     FreeCallTemps();
   1020   } else {
   1021     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
   1022     FlushReg(rs_r0);
   1023     Clobber(rs_r0);
   1024     LockTemp(rs_r0);
   1025 
   1026     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   1027     RegLocation rl_new_value = LoadValue(rl_src_new_value);
   1028 
   1029     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
   1030       // Mark card for object assuming new value is stored.
   1031       FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
   1032       MarkGCCard(rl_new_value.reg, rl_object.reg);
   1033       LockTemp(rs_r0);
   1034     }
   1035 
   1036     RegLocation rl_offset;
   1037     if (cu_->target64) {
   1038       rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
   1039     } else {
   1040       rl_offset = LoadValue(rl_src_offset, kCoreReg);
   1041     }
   1042     LoadValueDirect(rl_src_expected, rs_r0);
   1043     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
   1044             rl_new_value.reg.GetReg());
   1045 
   1046     // After a store we need to insert barrier to prevent reordering with either
   1047     // earlier or later memory accesses.  Since
   1048     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
   1049     // and it will be associated with the cmpxchg instruction, preventing both.
   1050     GenMemBarrier(kAnyAny);
   1051 
   1052     FreeTemp(rs_r0);
   1053   }
   1054 
   1055   // Convert ZF to boolean
   1056   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
   1057   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   1058   RegStorage result_reg = rl_result.reg;
   1059 
   1060   // For 32-bit, SETcc only works with EAX..EDX.
   1061   if (!IsByteRegister(result_reg)) {
   1062     result_reg = AllocateByteRegister();
   1063   }
   1064   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
   1065   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
   1066   if (IsTemp(result_reg)) {
   1067     FreeTemp(result_reg);
   1068   }
   1069   StoreValue(rl_dest, rl_result);
   1070   return true;
   1071 }
   1072 
   1073 LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   1074   CHECK(base_of_code_ != nullptr);
   1075 
   1076   // Address the start of the method
   1077   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
   1078   if (rl_method.wide) {
   1079     LoadValueDirectWideFixed(rl_method, reg);
   1080   } else {
   1081     LoadValueDirectFixed(rl_method, reg);
   1082   }
   1083   store_method_addr_used_ = true;
   1084 
   1085   // Load the proper value from the literal area.
   1086   // We don't know the proper offset for the value, so pick one that will force
   1087   // 4 byte offset.  We will fix this up in the assembler later to have the right
   1088   // value.
   1089   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   1090   LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
   1091                     0, 0, target);
   1092   res->target = target;
   1093   res->flags.fixup = kFixupLoad;
   1094   store_method_addr_used_ = true;
   1095   return res;
   1096 }
   1097 
   1098 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
   1099   LOG(FATAL) << "Unexpected use of OpVldm for x86";
   1100   return NULL;
   1101 }
   1102 
   1103 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
   1104   LOG(FATAL) << "Unexpected use of OpVstm for x86";
   1105   return NULL;
   1106 }
   1107 
   1108 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
   1109                                                RegLocation rl_result, int lit,
   1110                                                int first_bit, int second_bit) {
   1111   RegStorage t_reg = AllocTemp();
   1112   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
   1113   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
   1114   FreeTemp(t_reg);
   1115   if (first_bit != 0) {
   1116     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
   1117   }
   1118 }
   1119 
   1120 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
   1121   if (cu_->target64) {
   1122     DCHECK(reg.Is64Bit());
   1123 
   1124     NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
   1125   } else {
   1126     DCHECK(reg.IsPair());
   1127 
   1128     // We are not supposed to clobber the incoming storage, so allocate a temporary.
   1129     RegStorage t_reg = AllocTemp();
   1130     // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
   1131     OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
   1132     // The temp is no longer needed so free it at this time.
   1133     FreeTemp(t_reg);
   1134   }
   1135 
   1136   // In case of zero, throw ArithmeticException.
   1137   GenDivZeroCheck(kCondEq);
   1138 }
   1139 
   1140 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
   1141                                      RegStorage array_base,
   1142                                      int len_offset) {
   1143   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1144    public:
   1145     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
   1146                              RegStorage index, RegStorage array_base, int32_t len_offset)
   1147         : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
   1148           index_(index), array_base_(array_base), len_offset_(len_offset) {
   1149     }
   1150 
   1151     void Compile() OVERRIDE {
   1152       m2l_->ResetRegPool();
   1153       m2l_->ResetDefTracking();
   1154       GenerateTargetLabel(kPseudoThrowTarget);
   1155 
   1156       RegStorage new_index = index_;
   1157       // Move index out of kArg1, either directly to kArg0, or to kArg2.
   1158       // TODO: clean-up to check not a number but with type
   1159       if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
   1160         if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
   1161           m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
   1162           new_index = m2l_->TargetReg(kArg2, kNotWide);
   1163         } else {
   1164           m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
   1165           new_index = m2l_->TargetReg(kArg0, kNotWide);
   1166         }
   1167       }
   1168       // Load array length to kArg1.
   1169       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1170       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1171       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
   1172                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1173     }
   1174 
   1175    private:
   1176     const RegStorage index_;
   1177     const RegStorage array_base_;
   1178     const int32_t len_offset_;
   1179   };
   1180 
   1181   OpRegMem(kOpCmp, index, array_base, len_offset);
   1182   MarkPossibleNullPointerException(0);
   1183   LIR* branch = OpCondBranch(kCondUge, nullptr);
   1184   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1185                                                     index, array_base, len_offset));
   1186 }
   1187 
   1188 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
   1189                                      RegStorage array_base,
   1190                                      int32_t len_offset) {
   1191   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
   1192    public:
   1193     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
   1194                              int32_t index, RegStorage array_base, int32_t len_offset)
   1195         : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
   1196           index_(index), array_base_(array_base), len_offset_(len_offset) {
   1197     }
   1198 
   1199     void Compile() OVERRIDE {
   1200       m2l_->ResetRegPool();
   1201       m2l_->ResetDefTracking();
   1202       GenerateTargetLabel(kPseudoThrowTarget);
   1203 
   1204       // Load array length to kArg1.
   1205       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
   1206       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
   1207       x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
   1208       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
   1209                                        m2l_->TargetReg(kArg1, kNotWide), true);
   1210     }
   1211 
   1212    private:
   1213     const int32_t index_;
   1214     const RegStorage array_base_;
   1215     const int32_t len_offset_;
   1216   };
   1217 
   1218   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
   1219   MarkPossibleNullPointerException(0);
   1220   LIR* branch = OpCondBranch(kCondLs, nullptr);
   1221   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
   1222                                                     index, array_base, len_offset));
   1223 }
   1224 
   1225 // Test suspend flag, return target of taken suspend branch
   1226 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
   1227   if (cu_->target64) {
   1228     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
   1229   } else {
   1230     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
   1231   }
   1232   return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
   1233 }
   1234 
   1235 // Decrement register and branch on condition
   1236 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
   1237   OpRegImm(kOpSub, reg, 1);
   1238   return OpCondBranch(c_code, target);
   1239 }
   1240 
   1241 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
   1242                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
   1243   LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
   1244   return false;
   1245 }
   1246 
   1247 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
   1248   LOG(FATAL) << "Unexpected use of easyMultiply in x86";
   1249   return false;
   1250 }
   1251 
   1252 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
   1253   LOG(FATAL) << "Unexpected use of OpIT in x86";
   1254   return NULL;
   1255 }
   1256 
   1257 void X86Mir2Lir::OpEndIT(LIR* it) {
   1258   LOG(FATAL) << "Unexpected use of OpEndIT in x86";
   1259 }
   1260 
   1261 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
   1262   switch (val) {
   1263     case 0:
   1264       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1265       break;
   1266     case 1:
   1267       OpRegCopy(dest, src);
   1268       break;
   1269     default:
   1270       OpRegRegImm(kOpMul, dest, src, val);
   1271       break;
   1272   }
   1273 }
   1274 
   1275 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
   1276   // All memory accesses below reference dalvik regs.
   1277   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1278 
   1279   LIR *m;
   1280   switch (val) {
   1281     case 0:
   1282       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
   1283       break;
   1284     case 1:
   1285       LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
   1286       break;
   1287     default:
   1288       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
   1289                   rs_rX86_SP.GetReg(), displacement, val);
   1290       AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
   1291       break;
   1292   }
   1293 }
   1294 
   1295 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
   1296                                 RegLocation rl_src2) {
   1297   if (!cu_->target64) {
   1298     // Some x86 32b ops are fallback.
   1299     switch (opcode) {
   1300       case Instruction::NOT_LONG:
   1301       case Instruction::DIV_LONG:
   1302       case Instruction::DIV_LONG_2ADDR:
   1303       case Instruction::REM_LONG:
   1304       case Instruction::REM_LONG_2ADDR:
   1305         Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   1306         return;
   1307 
   1308       default:
   1309         // Everything else we can handle.
   1310         break;
   1311     }
   1312   }
   1313 
   1314   switch (opcode) {
   1315     case Instruction::NOT_LONG:
   1316       GenNotLong(rl_dest, rl_src2);
   1317       return;
   1318 
   1319     case Instruction::ADD_LONG:
   1320     case Instruction::ADD_LONG_2ADDR:
   1321       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1322       return;
   1323 
   1324     case Instruction::SUB_LONG:
   1325     case Instruction::SUB_LONG_2ADDR:
   1326       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
   1327       return;
   1328 
   1329     case Instruction::MUL_LONG:
   1330     case Instruction::MUL_LONG_2ADDR:
   1331       GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
   1332       return;
   1333 
   1334     case Instruction::DIV_LONG:
   1335     case Instruction::DIV_LONG_2ADDR:
   1336       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
   1337       return;
   1338 
   1339     case Instruction::REM_LONG:
   1340     case Instruction::REM_LONG_2ADDR:
   1341       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
   1342       return;
   1343 
   1344     case Instruction::AND_LONG_2ADDR:
   1345     case Instruction::AND_LONG:
   1346       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1347       return;
   1348 
   1349     case Instruction::OR_LONG:
   1350     case Instruction::OR_LONG_2ADDR:
   1351       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1352       return;
   1353 
   1354     case Instruction::XOR_LONG:
   1355     case Instruction::XOR_LONG_2ADDR:
   1356       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
   1357       return;
   1358 
   1359     case Instruction::NEG_LONG:
   1360       GenNegLong(rl_dest, rl_src2);
   1361       return;
   1362 
   1363     default:
   1364       LOG(FATAL) << "Invalid long arith op";
   1365       return;
   1366   }
   1367 }
   1368 
   1369 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) {
   1370   // All memory accesses below reference dalvik regs.
   1371   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1372 
   1373   if (val == 0) {
   1374     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1375     if (cu_->target64) {
   1376       OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
   1377     } else {
   1378       OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
   1379       OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
   1380     }
   1381     StoreValueWide(rl_dest, rl_result);
   1382     return true;
   1383   } else if (val == 1) {
   1384     StoreValueWide(rl_dest, rl_src1);
   1385     return true;
   1386   } else if (val == 2) {
   1387     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
   1388     return true;
   1389   } else if (IsPowerOfTwo(val)) {
   1390     int shift_amount = LowestSetBit(val);
   1391     if (!BadOverlap(rl_src1, rl_dest)) {
   1392       rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1393       RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
   1394                                                 shift_amount);
   1395       StoreValueWide(rl_dest, rl_result);
   1396       return true;
   1397     }
   1398   }
   1399 
   1400   // Okay, on 32b just bite the bullet and do it, still better than the general case.
   1401   if (!cu_->target64) {
   1402     int32_t val_lo = Low32Bits(val);
   1403     int32_t val_hi = High32Bits(val);
   1404     FlushAllRegs();
   1405     LockCallTemps();  // Prepare for explicit register usage.
   1406     rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1407     bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1408     int displacement = SRegOffset(rl_src1.s_reg_low);
   1409 
   1410     // ECX <- 1H * 2L
   1411     // EAX <- 1L * 2H
   1412     if (src1_in_reg) {
   1413       GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
   1414       GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
   1415     } else {
   1416       GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
   1417       GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
   1418     }
   1419 
   1420     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1421     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1422 
   1423     // EAX <- 2L
   1424     LoadConstantNoClobber(rs_r0, val_lo);
   1425 
   1426     // EDX:EAX <- 2L * 1L (double precision)
   1427     if (src1_in_reg) {
   1428       NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1429     } else {
   1430       LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
   1431       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1432                               true /* is_load */, true /* is_64bit */);
   1433     }
   1434 
   1435     // EDX <- EDX + ECX (add high words)
   1436     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1437 
   1438     // Result is EDX:EAX
   1439     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1440                              RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1441     StoreValueWide(rl_dest, rl_result);
   1442     return true;
   1443   }
   1444   return false;
   1445 }
   1446 
   1447 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   1448                             RegLocation rl_src2) {
   1449   if (rl_src1.is_const) {
   1450     std::swap(rl_src1, rl_src2);
   1451   }
   1452 
   1453   if (rl_src2.is_const) {
   1454     if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) {
   1455       return;
   1456     }
   1457   }
   1458 
   1459   // All memory accesses below reference dalvik regs.
   1460   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1461 
   1462   if (cu_->target64) {
   1463     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1464     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1465     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1466     if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1467         rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1468       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
   1469     } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
   1470                rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
   1471       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
   1472     } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
   1473                rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
   1474       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1475     } else {
   1476       OpRegCopy(rl_result.reg, rl_src1.reg);
   1477       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   1478     }
   1479     StoreValueWide(rl_dest, rl_result);
   1480     return;
   1481   }
   1482 
   1483   // Not multiplying by a constant. Do it the hard way
   1484   // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
   1485   bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
   1486                    mir_graph_->SRegToVReg(rl_src2.s_reg_low);
   1487 
   1488   FlushAllRegs();
   1489   LockCallTemps();  // Prepare for explicit register usage.
   1490   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1491   rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1492 
   1493   // At this point, the VRs are in their home locations.
   1494   bool src1_in_reg = rl_src1.location == kLocPhysReg;
   1495   bool src2_in_reg = rl_src2.location == kLocPhysReg;
   1496 
   1497   // ECX <- 1H
   1498   if (src1_in_reg) {
   1499     NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
   1500   } else {
   1501     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
   1502                  kNotVolatile);
   1503   }
   1504 
   1505   if (is_square) {
   1506     // Take advantage of the fact that the values are the same.
   1507     // ECX <- ECX * 2L  (1H * 2L)
   1508     if (src2_in_reg) {
   1509       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1510     } else {
   1511       int displacement = SRegOffset(rl_src2.s_reg_low);
   1512       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
   1513                        displacement + LOWORD_OFFSET);
   1514       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1515                               true /* is_load */, true /* is_64bit */);
   1516     }
   1517 
   1518     // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
   1519     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
   1520   } else {
   1521     // EAX <- 2H
   1522     if (src2_in_reg) {
   1523       NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
   1524     } else {
   1525       LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
   1526                    kNotVolatile);
   1527     }
   1528 
   1529     // EAX <- EAX * 1L  (2H * 1L)
   1530     if (src1_in_reg) {
   1531       NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
   1532     } else {
   1533       int displacement = SRegOffset(rl_src1.s_reg_low);
   1534       LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
   1535                        displacement + LOWORD_OFFSET);
   1536       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1537                               true /* is_load */, true /* is_64bit */);
   1538     }
   1539 
   1540     // ECX <- ECX * 2L  (1H * 2L)
   1541     if (src2_in_reg) {
   1542       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
   1543     } else {
   1544       int displacement = SRegOffset(rl_src2.s_reg_low);
   1545       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
   1546                        displacement + LOWORD_OFFSET);
   1547       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1548                               true /* is_load */, true /* is_64bit */);
   1549     }
   1550 
   1551     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
   1552     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   1553   }
   1554 
   1555   // EAX <- 2L
   1556   if (src2_in_reg) {
   1557     NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
   1558   } else {
   1559     LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
   1560                  kNotVolatile);
   1561   }
   1562 
   1563   // EDX:EAX <- 2L * 1L (double precision)
   1564   if (src1_in_reg) {
   1565     NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   1566   } else {
   1567     int displacement = SRegOffset(rl_src1.s_reg_low);
   1568     LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
   1569     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   1570                             true /* is_load */, true /* is_64bit */);
   1571   }
   1572 
   1573   // EDX <- EDX + ECX (add high words)
   1574   NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
   1575 
   1576   // Result is EDX:EAX
   1577   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
   1578                            RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   1579   StoreValueWide(rl_dest, rl_result);
   1580 }
   1581 
   1582 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
   1583                                    Instruction::Code op) {
   1584   DCHECK_EQ(rl_dest.location, kLocPhysReg);
   1585   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   1586   if (rl_src.location == kLocPhysReg) {
   1587     // Both operands are in registers.
   1588     // But we must ensure that rl_src is in pair
   1589     if (cu_->target64) {
   1590       NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
   1591     } else {
   1592       rl_src = LoadValueWide(rl_src, kCoreReg);
   1593       if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
   1594         // The registers are the same, so we would clobber it before the use.
   1595         RegStorage temp_reg = AllocTemp();
   1596         OpRegCopy(temp_reg, rl_dest.reg);
   1597         rl_src.reg.SetHighReg(temp_reg.GetReg());
   1598       }
   1599       NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
   1600 
   1601       x86op = GetOpcode(op, rl_dest, rl_src, true);
   1602       NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
   1603       FreeTemp(rl_src.reg);  // ???
   1604     }
   1605     return;
   1606   }
   1607 
   1608   // RHS is in memory.
   1609   DCHECK((rl_src.location == kLocDalvikFrame) ||
   1610          (rl_src.location == kLocCompilerTemp));
   1611   int r_base = rs_rX86_SP.GetReg();
   1612   int displacement = SRegOffset(rl_src.s_reg_low);
   1613 
   1614   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1615   LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
   1616                      r_base, displacement + LOWORD_OFFSET);
   1617   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1618                           true /* is_load */, true /* is64bit */);
   1619   if (!cu_->target64) {
   1620     x86op = GetOpcode(op, rl_dest, rl_src, true);
   1621     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
   1622     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1623                             true /* is_load */, true /* is64bit */);
   1624   }
   1625 }
   1626 
   1627 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   1628   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   1629   if (rl_dest.location == kLocPhysReg) {
   1630     // Ensure we are in a register pair
   1631     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1632 
   1633     rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
   1634     GenLongRegOrMemOp(rl_result, rl_src, op);
   1635     StoreFinalValueWide(rl_dest, rl_result);
   1636     return;
   1637   }
   1638 
   1639   // It wasn't in registers, so it better be in memory.
   1640   DCHECK((rl_dest.location == kLocDalvikFrame) ||
   1641          (rl_dest.location == kLocCompilerTemp));
   1642   rl_src = LoadValueWide(rl_src, kCoreReg);
   1643 
   1644   // Operate directly into memory.
   1645   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
   1646   int r_base = rs_rX86_SP.GetReg();
   1647   int displacement = SRegOffset(rl_dest.s_reg_low);
   1648 
   1649   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   1650   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
   1651                      cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   1652   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1653                           true /* is_load */, true /* is64bit */);
   1654   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   1655                           false /* is_load */, true /* is64bit */);
   1656   if (!cu_->target64) {
   1657     x86op = GetOpcode(op, rl_dest, rl_src, true);
   1658     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
   1659     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1660                             true /* is_load */, true /* is64bit */);
   1661     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   1662                             false /* is_load */, true /* is64bit */);
   1663   }
   1664 
   1665   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
   1666   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
   1667 
   1668   // If the left operand is in memory and the right operand is in a register
   1669   // and both belong to the same dalvik register then we should clobber the
   1670   // right one because it doesn't hold valid data anymore.
   1671   if (v_src_reg == v_dst_reg) {
   1672     Clobber(rl_src.reg);
   1673   }
   1674 }
   1675 
   1676 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
   1677                               RegLocation rl_src2, Instruction::Code op,
   1678                               bool is_commutative) {
   1679   // Is this really a 2 operand operation?
   1680   switch (op) {
   1681     case Instruction::ADD_LONG_2ADDR:
   1682     case Instruction::SUB_LONG_2ADDR:
   1683     case Instruction::AND_LONG_2ADDR:
   1684     case Instruction::OR_LONG_2ADDR:
   1685     case Instruction::XOR_LONG_2ADDR:
   1686       if (GenerateTwoOperandInstructions()) {
   1687         GenLongArith(rl_dest, rl_src2, op);
   1688         return;
   1689       }
   1690       break;
   1691 
   1692     default:
   1693       break;
   1694   }
   1695 
   1696   if (rl_dest.location == kLocPhysReg) {
   1697     RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
   1698 
   1699     // We are about to clobber the LHS, so it needs to be a temp.
   1700     rl_result = ForceTempWide(rl_result);
   1701 
   1702     // Perform the operation using the RHS.
   1703     rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1704     GenLongRegOrMemOp(rl_result, rl_src2, op);
   1705 
   1706     // And now record that the result is in the temp.
   1707     StoreFinalValueWide(rl_dest, rl_result);
   1708     return;
   1709   }
   1710 
   1711   // It wasn't in registers, so it better be in memory.
   1712   DCHECK((rl_dest.location == kLocDalvikFrame) ||
   1713          (rl_dest.location == kLocCompilerTemp));
   1714   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   1715   rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
   1716 
   1717   // Get one of the source operands into temporary register.
   1718   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   1719   if (cu_->target64) {
   1720     if (IsTemp(rl_src1.reg)) {
   1721       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1722     } else if (is_commutative) {
   1723       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1724       // We need at least one of them to be a temporary.
   1725       if (!IsTemp(rl_src2.reg)) {
   1726         rl_src1 = ForceTempWide(rl_src1);
   1727         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1728       } else {
   1729         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   1730         StoreFinalValueWide(rl_dest, rl_src2);
   1731         return;
   1732       }
   1733     } else {
   1734       // Need LHS to be the temp.
   1735       rl_src1 = ForceTempWide(rl_src1);
   1736       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1737     }
   1738   } else {
   1739     if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
   1740       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1741     } else if (is_commutative) {
   1742       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   1743       // We need at least one of them to be a temporary.
   1744       if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
   1745         rl_src1 = ForceTempWide(rl_src1);
   1746         GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1747       } else {
   1748         GenLongRegOrMemOp(rl_src2, rl_src1, op);
   1749         StoreFinalValueWide(rl_dest, rl_src2);
   1750         return;
   1751       }
   1752     } else {
   1753       // Need LHS to be the temp.
   1754       rl_src1 = ForceTempWide(rl_src1);
   1755       GenLongRegOrMemOp(rl_src1, rl_src2, op);
   1756     }
   1757   }
   1758 
   1759   StoreFinalValueWide(rl_dest, rl_src1);
   1760 }
   1761 
   1762 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
   1763   if (cu_->target64) {
   1764     rl_src = LoadValueWide(rl_src, kCoreReg);
   1765     RegLocation rl_result;
   1766     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1767     OpRegCopy(rl_result.reg, rl_src.reg);
   1768     OpReg(kOpNot, rl_result.reg);
   1769     StoreValueWide(rl_dest, rl_result);
   1770   } else {
   1771     LOG(FATAL) << "Unexpected use GenNotLong()";
   1772   }
   1773 }
   1774 
   1775 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
   1776                                   int64_t imm, bool is_div) {
   1777   if (imm == 0) {
   1778     GenDivZeroException();
   1779   } else if (imm == 1) {
   1780     if (is_div) {
   1781       // x / 1 == x.
   1782       StoreValueWide(rl_dest, rl_src);
   1783     } else {
   1784       // x % 1 == 0.
   1785       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1786       LoadConstantWide(rl_result.reg, 0);
   1787       StoreValueWide(rl_dest, rl_result);
   1788     }
   1789   } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
   1790     if (is_div) {
   1791       rl_src = LoadValueWide(rl_src, kCoreReg);
   1792       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1793       RegStorage rs_temp = AllocTempWide();
   1794 
   1795       OpRegCopy(rl_result.reg, rl_src.reg);
   1796       LoadConstantWide(rs_temp, 0x8000000000000000);
   1797 
   1798       // If x == MIN_LONG, return MIN_LONG.
   1799       OpRegReg(kOpCmp, rl_src.reg, rs_temp);
   1800       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
   1801 
   1802       // For x != MIN_LONG, x / -1 == -x.
   1803       OpReg(kOpNeg, rl_result.reg);
   1804 
   1805       minint_branch->target = NewLIR0(kPseudoTargetLabel);
   1806       FreeTemp(rs_temp);
   1807       StoreValueWide(rl_dest, rl_result);
   1808     } else {
   1809       // x % -1 == 0.
   1810       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1811       LoadConstantWide(rl_result.reg, 0);
   1812       StoreValueWide(rl_dest, rl_result);
   1813     }
   1814   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
   1815     // Division using shifting.
   1816     rl_src = LoadValueWide(rl_src, kCoreReg);
   1817     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   1818     if (IsSameReg(rl_result.reg, rl_src.reg)) {
   1819       RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
   1820       rl_result.reg.SetReg(rs_temp.GetReg());
   1821     }
   1822     LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
   1823     OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
   1824     NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
   1825     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
   1826     int shift_amount = LowestSetBit(imm);
   1827     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
   1828     if (imm < 0) {
   1829       OpReg(kOpNeg, rl_result.reg);
   1830     }
   1831     StoreValueWide(rl_dest, rl_result);
   1832   } else {
   1833     CHECK(imm <= -2 || imm >= 2);
   1834 
   1835     FlushReg(rs_r0q);
   1836     Clobber(rs_r0q);
   1837     LockTemp(rs_r0q);
   1838     FlushReg(rs_r2q);
   1839     Clobber(rs_r2q);
   1840     LockTemp(rs_r2q);
   1841 
   1842     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
   1843 
   1844     // Use H.S.Warren's Hacker's Delight Chapter 10 and
   1845     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
   1846     int64_t magic;
   1847     int shift;
   1848     CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
   1849 
   1850     /*
   1851      * For imm >= 2,
   1852      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
   1853      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
   1854      * For imm <= -2,
   1855      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
   1856      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
   1857      * We implement this algorithm in the following way:
   1858      * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
   1859      * 2. if imm > 0 and magic < 0, add numerator to RDX
   1860      *    if imm < 0 and magic > 0, sub numerator from RDX
   1861      * 3. if S !=0, SAR S bits for RDX
   1862      * 4. add 1 to RDX if RDX < 0
   1863      * 5. Thus, RDX is the quotient
   1864      */
   1865 
   1866     // Numerator into RAX.
   1867     RegStorage numerator_reg;
   1868     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
   1869       // We will need the value later.
   1870       rl_src = LoadValueWide(rl_src, kCoreReg);
   1871       numerator_reg = rl_src.reg;
   1872       OpRegCopyWide(rs_r0q, numerator_reg);
   1873     } else {
   1874       // Only need this once.  Just put it into RAX.
   1875       LoadValueDirectWideFixed(rl_src, rs_r0q);
   1876     }
   1877 
   1878     // RDX = magic.
   1879     LoadConstantWide(rs_r2q, magic);
   1880 
   1881     // RDX:RAX = magic & dividend.
   1882     NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
   1883 
   1884     if (imm > 0 && magic < 0) {
   1885       // Add numerator to RDX.
   1886       DCHECK(numerator_reg.Valid());
   1887       OpRegReg(kOpAdd, rs_r2q, numerator_reg);
   1888     } else if (imm < 0 && magic > 0) {
   1889       DCHECK(numerator_reg.Valid());
   1890       OpRegReg(kOpSub, rs_r2q, numerator_reg);
   1891     }
   1892 
   1893     // Do we need the shift?
   1894     if (shift != 0) {
   1895       // Shift RDX by 'shift' bits.
   1896       OpRegImm(kOpAsr, rs_r2q, shift);
   1897     }
   1898 
   1899     // Move RDX to RAX.
   1900     OpRegCopyWide(rs_r0q, rs_r2q);
   1901 
   1902     // Move sign bit to bit 0, zeroing the rest.
   1903     OpRegImm(kOpLsr, rs_r2q, 63);
   1904 
   1905     // RDX = RDX + RAX.
   1906     OpRegReg(kOpAdd, rs_r2q, rs_r0q);
   1907 
   1908     // Quotient is in RDX.
   1909     if (!is_div) {
   1910       // We need to compute the remainder.
   1911       // Remainder is divisor - (quotient * imm).
   1912       DCHECK(numerator_reg.Valid());
   1913       OpRegCopyWide(rs_r0q, numerator_reg);
   1914 
   1915       // Imul doesn't support 64-bit imms.
   1916       if (imm > std::numeric_limits<int32_t>::max() ||
   1917           imm < std::numeric_limits<int32_t>::min()) {
   1918         RegStorage rs_temp = AllocTempWide();
   1919         LoadConstantWide(rs_temp, imm);
   1920 
   1921         // RAX = numerator * imm.
   1922         NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
   1923 
   1924         FreeTemp(rs_temp);
   1925       } else {
   1926         // RAX = numerator * imm.
   1927         int short_imm = static_cast<int>(imm);
   1928         NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
   1929       }
   1930 
   1931       // RDX -= RAX.
   1932       OpRegReg(kOpSub, rs_r0q, rs_r2q);
   1933 
   1934       // Store result.
   1935       OpRegCopyWide(rl_result.reg, rs_r0q);
   1936     } else {
   1937       // Store result.
   1938       OpRegCopyWide(rl_result.reg, rs_r2q);
   1939     }
   1940     StoreValueWide(rl_dest, rl_result);
   1941     FreeTemp(rs_r0q);
   1942     FreeTemp(rs_r2q);
   1943   }
   1944 }
   1945 
   1946 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
   1947                                RegLocation rl_src2, bool is_div) {
   1948   if (!cu_->target64) {
   1949     LOG(FATAL) << "Unexpected use GenDivRemLong()";
   1950     return;
   1951   }
   1952 
   1953   if (rl_src2.is_const) {
   1954     DCHECK(rl_src2.wide);
   1955     int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
   1956     GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
   1957     return;
   1958   }
   1959 
   1960   // We have to use fixed registers, so flush all the temps.
   1961   FlushAllRegs();
   1962   LockCallTemps();  // Prepare for explicit register usage.
   1963 
   1964   // Load LHS into RAX.
   1965   LoadValueDirectWideFixed(rl_src1, rs_r0q);
   1966 
   1967   // Load RHS into RCX.
   1968   LoadValueDirectWideFixed(rl_src2, rs_r1q);
   1969 
   1970   // Copy LHS sign bit into RDX.
   1971   NewLIR0(kx86Cqo64Da);
   1972 
   1973   // Handle division by zero case.
   1974   GenDivZeroCheckWide(rs_r1q);
   1975 
   1976   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
   1977   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
   1978   LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   1979 
   1980   // RHS is -1.
   1981   LoadConstantWide(rs_r6q, 0x8000000000000000);
   1982   NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
   1983   LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
   1984 
   1985   // In 0x8000000000000000/-1 case.
   1986   if (!is_div) {
   1987     // For DIV, RAX is already right. For REM, we need RDX 0.
   1988     NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
   1989   }
   1990   LIR* done = NewLIR1(kX86Jmp8, 0);
   1991 
   1992   // Expected case.
   1993   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
   1994   minint_branch->target = minus_one_branch->target;
   1995   NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
   1996   done->target = NewLIR0(kPseudoTargetLabel);
   1997 
   1998   // Result is in RAX for div and RDX for rem.
   1999   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
   2000   if (!is_div) {
   2001     rl_result.reg.SetReg(r2q);
   2002   }
   2003 
   2004   StoreValueWide(rl_dest, rl_result);
   2005 }
   2006 
   2007 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   2008   rl_src = LoadValueWide(rl_src, kCoreReg);
   2009   RegLocation rl_result;
   2010   if (cu_->target64) {
   2011     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2012     OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
   2013   } else {
   2014     rl_result = ForceTempWide(rl_src);
   2015     OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
   2016     OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
   2017     OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   2018   }
   2019   StoreValueWide(rl_dest, rl_result);
   2020 }
   2021 
   2022 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
   2023   DCHECK_EQ(kX86, cu_->instruction_set);
   2024   X86OpCode opcode = kX86Bkpt;
   2025   switch (op) {
   2026   case kOpCmp: opcode = kX86Cmp32RT;  break;
   2027   case kOpMov: opcode = kX86Mov32RT;  break;
   2028   default:
   2029     LOG(FATAL) << "Bad opcode: " << op;
   2030     break;
   2031   }
   2032   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2033 }
   2034 
   2035 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   2036   DCHECK_EQ(kX86_64, cu_->instruction_set);
   2037   X86OpCode opcode = kX86Bkpt;
   2038   if (cu_->target64 && r_dest.Is64BitSolo()) {
   2039     switch (op) {
   2040     case kOpCmp: opcode = kX86Cmp64RT;  break;
   2041     case kOpMov: opcode = kX86Mov64RT;  break;
   2042     default:
   2043       LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
   2044       break;
   2045     }
   2046   } else {
   2047     switch (op) {
   2048     case kOpCmp: opcode = kX86Cmp32RT;  break;
   2049     case kOpMov: opcode = kX86Mov32RT;  break;
   2050     default:
   2051       LOG(FATAL) << "Bad opcode: " << op;
   2052       break;
   2053     }
   2054   }
   2055   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
   2056 }
   2057 
   2058 /*
   2059  * Generate array load
   2060  */
   2061 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   2062                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   2063   RegisterClass reg_class = RegClassBySize(size);
   2064   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2065   RegLocation rl_result;
   2066   rl_array = LoadValue(rl_array, kRefReg);
   2067 
   2068   int data_offset;
   2069   if (size == k64 || size == kDouble) {
   2070     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2071   } else {
   2072     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2073   }
   2074 
   2075   bool constant_index = rl_index.is_const;
   2076   int32_t constant_index_value = 0;
   2077   if (!constant_index) {
   2078     rl_index = LoadValue(rl_index, kCoreReg);
   2079   } else {
   2080     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2081     // If index is constant, just fold it into the data offset
   2082     data_offset += constant_index_value << scale;
   2083     // treat as non array below
   2084     rl_index.reg = RegStorage::InvalidReg();
   2085   }
   2086 
   2087   /* null object? */
   2088   GenNullCheck(rl_array.reg, opt_flags);
   2089 
   2090   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2091     if (constant_index) {
   2092       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2093     } else {
   2094       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2095     }
   2096   }
   2097   rl_result = EvalLoc(rl_dest, reg_class, true);
   2098   LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
   2099   if ((size == k64) || (size == kDouble)) {
   2100     StoreValueWide(rl_dest, rl_result);
   2101   } else {
   2102     StoreValue(rl_dest, rl_result);
   2103   }
   2104 }
   2105 
   2106 /*
   2107  * Generate array store
   2108  *
   2109  */
   2110 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
   2111                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   2112   RegisterClass reg_class = RegClassBySize(size);
   2113   int len_offset = mirror::Array::LengthOffset().Int32Value();
   2114   int data_offset;
   2115 
   2116   if (size == k64 || size == kDouble) {
   2117     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   2118   } else {
   2119     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   2120   }
   2121 
   2122   rl_array = LoadValue(rl_array, kRefReg);
   2123   bool constant_index = rl_index.is_const;
   2124   int32_t constant_index_value = 0;
   2125   if (!constant_index) {
   2126     rl_index = LoadValue(rl_index, kCoreReg);
   2127   } else {
   2128     // If index is constant, just fold it into the data offset
   2129     constant_index_value = mir_graph_->ConstantValue(rl_index);
   2130     data_offset += constant_index_value << scale;
   2131     // treat as non array below
   2132     rl_index.reg = RegStorage::InvalidReg();
   2133   }
   2134 
   2135   /* null object? */
   2136   GenNullCheck(rl_array.reg, opt_flags);
   2137 
   2138   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
   2139     if (constant_index) {
   2140       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
   2141     } else {
   2142       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
   2143     }
   2144   }
   2145   if ((size == k64) || (size == kDouble)) {
   2146     rl_src = LoadValueWide(rl_src, reg_class);
   2147   } else {
   2148     rl_src = LoadValue(rl_src, reg_class);
   2149   }
   2150   // If the src reg can't be byte accessed, move it to a temp first.
   2151   if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
   2152     RegStorage temp = AllocTemp();
   2153     OpRegCopy(temp, rl_src.reg);
   2154     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
   2155   } else {
   2156     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
   2157   }
   2158   if (card_mark) {
   2159     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
   2160     if (!constant_index) {
   2161       FreeTemp(rl_index.reg);
   2162     }
   2163     MarkGCCard(rl_src.reg, rl_array.reg);
   2164   }
   2165 }
   2166 
   2167 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2168                                           RegLocation rl_src, int shift_amount) {
   2169   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2170   if (cu_->target64) {
   2171     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
   2172     switch (opcode) {
   2173       case Instruction::SHL_LONG:
   2174       case Instruction::SHL_LONG_2ADDR:
   2175         op = kOpLsl;
   2176         break;
   2177       case Instruction::SHR_LONG:
   2178       case Instruction::SHR_LONG_2ADDR:
   2179         op = kOpAsr;
   2180         break;
   2181       case Instruction::USHR_LONG:
   2182       case Instruction::USHR_LONG_2ADDR:
   2183         op = kOpLsr;
   2184         break;
   2185       default:
   2186         LOG(FATAL) << "Unexpected case";
   2187     }
   2188     OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
   2189   } else {
   2190     switch (opcode) {
   2191       case Instruction::SHL_LONG:
   2192       case Instruction::SHL_LONG_2ADDR:
   2193         DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
   2194         if (shift_amount == 32) {
   2195           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2196           LoadConstant(rl_result.reg.GetLow(), 0);
   2197         } else if (shift_amount > 31) {
   2198           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
   2199           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
   2200           LoadConstant(rl_result.reg.GetLow(), 0);
   2201         } else {
   2202           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2203           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2204           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
   2205                   shift_amount);
   2206           NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
   2207         }
   2208         break;
   2209       case Instruction::SHR_LONG:
   2210       case Instruction::SHR_LONG_2ADDR:
   2211         if (shift_amount == 32) {
   2212           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2213           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2214           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2215         } else if (shift_amount > 31) {
   2216           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2217           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2218           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2219           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
   2220         } else {
   2221           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2222           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2223           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2224                   shift_amount);
   2225           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
   2226         }
   2227         break;
   2228       case Instruction::USHR_LONG:
   2229       case Instruction::USHR_LONG_2ADDR:
   2230         if (shift_amount == 32) {
   2231           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2232           LoadConstant(rl_result.reg.GetHigh(), 0);
   2233         } else if (shift_amount > 31) {
   2234           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
   2235           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
   2236           LoadConstant(rl_result.reg.GetHigh(), 0);
   2237         } else {
   2238           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
   2239           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
   2240           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
   2241                   shift_amount);
   2242           NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
   2243         }
   2244         break;
   2245       default:
   2246         LOG(FATAL) << "Unexpected case";
   2247     }
   2248   }
   2249   return rl_result;
   2250 }
   2251 
   2252 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2253                                    RegLocation rl_src, RegLocation rl_shift) {
   2254   // Per spec, we only care about low 6 bits of shift amount.
   2255   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
   2256   if (shift_amount == 0) {
   2257     rl_src = LoadValueWide(rl_src, kCoreReg);
   2258     StoreValueWide(rl_dest, rl_src);
   2259     return;
   2260   } else if (shift_amount == 1 &&
   2261             (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
   2262     // Need to handle this here to avoid calling StoreValueWide twice.
   2263     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
   2264     return;
   2265   }
   2266   if (BadOverlap(rl_src, rl_dest)) {
   2267     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
   2268     return;
   2269   }
   2270   rl_src = LoadValueWide(rl_src, kCoreReg);
   2271   RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
   2272   StoreValueWide(rl_dest, rl_result);
   2273 }
   2274 
   2275 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
   2276                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
   2277   bool isConstSuccess = false;
   2278   switch (opcode) {
   2279     case Instruction::ADD_LONG:
   2280     case Instruction::AND_LONG:
   2281     case Instruction::OR_LONG:
   2282     case Instruction::XOR_LONG:
   2283       if (rl_src2.is_const) {
   2284         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2285       } else {
   2286         DCHECK(rl_src1.is_const);
   2287         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2288       }
   2289       break;
   2290     case Instruction::SUB_LONG:
   2291     case Instruction::SUB_LONG_2ADDR:
   2292       if (rl_src2.is_const) {
   2293         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2294       } else {
   2295         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   2296         isConstSuccess = true;
   2297       }
   2298       break;
   2299     case Instruction::ADD_LONG_2ADDR:
   2300     case Instruction::OR_LONG_2ADDR:
   2301     case Instruction::XOR_LONG_2ADDR:
   2302     case Instruction::AND_LONG_2ADDR:
   2303       if (rl_src2.is_const) {
   2304         if (GenerateTwoOperandInstructions()) {
   2305           isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
   2306         } else {
   2307           isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
   2308         }
   2309       } else {
   2310         DCHECK(rl_src1.is_const);
   2311         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
   2312       }
   2313       break;
   2314     default:
   2315       isConstSuccess = false;
   2316       break;
   2317   }
   2318 
   2319   if (!isConstSuccess) {
   2320     // Default - bail to non-const handler.
   2321     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
   2322   }
   2323 }
   2324 
   2325 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
   2326   switch (op) {
   2327     case Instruction::AND_LONG_2ADDR:
   2328     case Instruction::AND_LONG:
   2329       return value == -1;
   2330     case Instruction::OR_LONG:
   2331     case Instruction::OR_LONG_2ADDR:
   2332     case Instruction::XOR_LONG:
   2333     case Instruction::XOR_LONG_2ADDR:
   2334       return value == 0;
   2335     default:
   2336       return false;
   2337   }
   2338 }
   2339 
   2340 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
   2341                                 bool is_high_op) {
   2342   bool rhs_in_mem = rhs.location != kLocPhysReg;
   2343   bool dest_in_mem = dest.location != kLocPhysReg;
   2344   bool is64Bit = cu_->target64;
   2345   DCHECK(!rhs_in_mem || !dest_in_mem);
   2346   switch (op) {
   2347     case Instruction::ADD_LONG:
   2348     case Instruction::ADD_LONG_2ADDR:
   2349       if (dest_in_mem) {
   2350         return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
   2351       } else if (rhs_in_mem) {
   2352         return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
   2353       }
   2354       return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
   2355     case Instruction::SUB_LONG:
   2356     case Instruction::SUB_LONG_2ADDR:
   2357       if (dest_in_mem) {
   2358         return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
   2359       } else if (rhs_in_mem) {
   2360         return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
   2361       }
   2362       return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
   2363     case Instruction::AND_LONG_2ADDR:
   2364     case Instruction::AND_LONG:
   2365       if (dest_in_mem) {
   2366         return is64Bit ? kX86And64MR : kX86And32MR;
   2367       }
   2368       if (is64Bit) {
   2369         return rhs_in_mem ? kX86And64RM : kX86And64RR;
   2370       }
   2371       return rhs_in_mem ? kX86And32RM : kX86And32RR;
   2372     case Instruction::OR_LONG:
   2373     case Instruction::OR_LONG_2ADDR:
   2374       if (dest_in_mem) {
   2375         return is64Bit ? kX86Or64MR : kX86Or32MR;
   2376       }
   2377       if (is64Bit) {
   2378         return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
   2379       }
   2380       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
   2381     case Instruction::XOR_LONG:
   2382     case Instruction::XOR_LONG_2ADDR:
   2383       if (dest_in_mem) {
   2384         return is64Bit ? kX86Xor64MR : kX86Xor32MR;
   2385       }
   2386       if (is64Bit) {
   2387         return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
   2388       }
   2389       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
   2390     default:
   2391       LOG(FATAL) << "Unexpected opcode: " << op;
   2392       return kX86Add32RR;
   2393   }
   2394 }
   2395 
   2396 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
   2397                                 int32_t value) {
   2398   bool in_mem = loc.location != kLocPhysReg;
   2399   bool is64Bit = cu_->target64;
   2400   bool byte_imm = IS_SIMM8(value);
   2401   DCHECK(in_mem || !loc.reg.IsFloat());
   2402   switch (op) {
   2403     case Instruction::ADD_LONG:
   2404     case Instruction::ADD_LONG_2ADDR:
   2405       if (byte_imm) {
   2406         if (in_mem) {
   2407           return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
   2408         }
   2409         return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
   2410       }
   2411       if (in_mem) {
   2412         return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
   2413       }
   2414       return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
   2415     case Instruction::SUB_LONG:
   2416     case Instruction::SUB_LONG_2ADDR:
   2417       if (byte_imm) {
   2418         if (in_mem) {
   2419           return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
   2420         }
   2421         return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
   2422       }
   2423       if (in_mem) {
   2424         return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
   2425       }
   2426       return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
   2427     case Instruction::AND_LONG_2ADDR:
   2428     case Instruction::AND_LONG:
   2429       if (byte_imm) {
   2430         if (is64Bit) {
   2431           return in_mem ? kX86And64MI8 : kX86And64RI8;
   2432         }
   2433         return in_mem ? kX86And32MI8 : kX86And32RI8;
   2434       }
   2435       if (is64Bit) {
   2436         return in_mem ? kX86And64MI : kX86And64RI;
   2437       }
   2438       return in_mem ? kX86And32MI : kX86And32RI;
   2439     case Instruction::OR_LONG:
   2440     case Instruction::OR_LONG_2ADDR:
   2441       if (byte_imm) {
   2442         if (is64Bit) {
   2443           return in_mem ? kX86Or64MI8 : kX86Or64RI8;
   2444         }
   2445         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
   2446       }
   2447       if (is64Bit) {
   2448         return in_mem ? kX86Or64MI : kX86Or64RI;
   2449       }
   2450       return in_mem ? kX86Or32MI : kX86Or32RI;
   2451     case Instruction::XOR_LONG:
   2452     case Instruction::XOR_LONG_2ADDR:
   2453       if (byte_imm) {
   2454         if (is64Bit) {
   2455           return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
   2456         }
   2457         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
   2458       }
   2459       if (is64Bit) {
   2460         return in_mem ? kX86Xor64MI : kX86Xor64RI;
   2461       }
   2462       return in_mem ? kX86Xor32MI : kX86Xor32RI;
   2463     default:
   2464       LOG(FATAL) << "Unexpected opcode: " << op;
   2465       return kX86Add32MI;
   2466   }
   2467 }
   2468 
   2469 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   2470   DCHECK(rl_src.is_const);
   2471   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   2472 
   2473   if (cu_->target64) {
   2474     // We can do with imm only if it fits 32 bit
   2475     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2476       return false;
   2477     }
   2478 
   2479     rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2480 
   2481     if ((rl_dest.location == kLocDalvikFrame) ||
   2482         (rl_dest.location == kLocCompilerTemp)) {
   2483       int r_base = rs_rX86_SP.GetReg();
   2484       int displacement = SRegOffset(rl_dest.s_reg_low);
   2485 
   2486       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2487       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2488       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
   2489       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2490                               true /* is_load */, true /* is64bit */);
   2491       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2492                               false /* is_load */, true /* is64bit */);
   2493       return true;
   2494     }
   2495 
   2496     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2497     DCHECK_EQ(rl_result.location, kLocPhysReg);
   2498     DCHECK(!rl_result.reg.IsFloat());
   2499 
   2500     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2501     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2502 
   2503     StoreValueWide(rl_dest, rl_result);
   2504     return true;
   2505   }
   2506 
   2507   int32_t val_lo = Low32Bits(val);
   2508   int32_t val_hi = High32Bits(val);
   2509   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2510 
   2511   // Can we just do this into memory?
   2512   if ((rl_dest.location == kLocDalvikFrame) ||
   2513       (rl_dest.location == kLocCompilerTemp)) {
   2514     int r_base = rs_rX86_SP.GetReg();
   2515     int displacement = SRegOffset(rl_dest.s_reg_low);
   2516 
   2517     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2518     if (!IsNoOp(op, val_lo)) {
   2519       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2520       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
   2521       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2522                               true /* is_load */, true /* is64bit */);
   2523       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
   2524                               false /* is_load */, true /* is64bit */);
   2525     }
   2526     if (!IsNoOp(op, val_hi)) {
   2527       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2528       LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
   2529       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2530                                 true /* is_load */, true /* is64bit */);
   2531       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
   2532                                 false /* is_load */, true /* is64bit */);
   2533     }
   2534     return true;
   2535   }
   2536 
   2537   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   2538   DCHECK_EQ(rl_result.location, kLocPhysReg);
   2539   DCHECK(!rl_result.reg.IsFloat());
   2540 
   2541   if (!IsNoOp(op, val_lo)) {
   2542     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2543     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2544   }
   2545   if (!IsNoOp(op, val_hi)) {
   2546     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   2547     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   2548   }
   2549   StoreValueWide(rl_dest, rl_result);
   2550   return true;
   2551 }
   2552 
   2553 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
   2554                                 RegLocation rl_src2, Instruction::Code op) {
   2555   DCHECK(rl_src2.is_const);
   2556   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   2557 
   2558   if (cu_->target64) {
   2559     // We can do with imm only if it fits 32 bit
   2560     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
   2561       return false;
   2562     }
   2563     if (rl_dest.location == kLocPhysReg &&
   2564         rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
   2565       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
   2566       OpRegCopy(rl_dest.reg, rl_src1.reg);
   2567       NewLIR2(x86op, rl_dest.reg.GetReg(), val);
   2568       StoreFinalValueWide(rl_dest, rl_dest);
   2569       return true;
   2570     }
   2571 
   2572     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2573     // We need the values to be in a temporary
   2574     RegLocation rl_result = ForceTempWide(rl_src1);
   2575 
   2576     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
   2577     NewLIR2(x86op, rl_result.reg.GetReg(), val);
   2578 
   2579     StoreFinalValueWide(rl_dest, rl_result);
   2580     return true;
   2581   }
   2582 
   2583   int32_t val_lo = Low32Bits(val);
   2584   int32_t val_hi = High32Bits(val);
   2585   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   2586   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   2587 
   2588   // Can we do this directly into the destination registers?
   2589   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
   2590       rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
   2591       rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
   2592     if (!IsNoOp(op, val_lo)) {
   2593       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
   2594       NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
   2595     }
   2596     if (!IsNoOp(op, val_hi)) {
   2597       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
   2598       NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
   2599     }
   2600 
   2601     StoreFinalValueWide(rl_dest, rl_dest);
   2602     return true;
   2603   }
   2604 
   2605   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   2606   DCHECK_EQ(rl_src1.location, kLocPhysReg);
   2607 
   2608   // We need the values to be in a temporary
   2609   RegLocation rl_result = ForceTempWide(rl_src1);
   2610   if (!IsNoOp(op, val_lo)) {
   2611     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
   2612     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
   2613   }
   2614   if (!IsNoOp(op, val_hi)) {
   2615     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
   2616     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   2617   }
   2618 
   2619   StoreFinalValueWide(rl_dest, rl_result);
   2620   return true;
   2621 }
   2622 
   2623 // For final classes there are no sub-classes to check and so we can answer the instance-of
   2624 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
   2625 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
   2626                                     RegLocation rl_dest, RegLocation rl_src) {
   2627   RegLocation object = LoadValue(rl_src, kRefReg);
   2628   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2629   RegStorage result_reg = rl_result.reg;
   2630 
   2631   // For 32-bit, SETcc only works with EAX..EDX.
   2632   RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
   2633   if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
   2634     result_reg = AllocateByteRegister();
   2635   }
   2636 
   2637   // Assume that there is no match.
   2638   LoadConstant(result_reg, 0);
   2639   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
   2640 
   2641   // We will use this register to compare to memory below.
   2642   // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
   2643   // For this reason, force allocation of a 32 bit register to use, so that the
   2644   // compare to memory will be done using a 32 bit comparision.
   2645   // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
   2646   RegStorage check_class = AllocTemp();
   2647 
   2648   // If Method* is already in a register, we can save a copy.
   2649   RegLocation rl_method = mir_graph_->GetMethodLoc();
   2650   int32_t offset_of_type = mirror::Array::DataOffset(
   2651       sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
   2652       (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
   2653 
   2654   if (rl_method.location == kLocPhysReg) {
   2655     if (use_declaring_class) {
   2656       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
   2657                   check_class, kNotVolatile);
   2658     } else {
   2659       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   2660                   check_class, kNotVolatile);
   2661       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   2662     }
   2663   } else {
   2664     LoadCurrMethodDirect(check_class);
   2665     if (use_declaring_class) {
   2666       LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
   2667                   check_class, kNotVolatile);
   2668     } else {
   2669       LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
   2670                   check_class, kNotVolatile);
   2671       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   2672     }
   2673   }
   2674 
   2675   // Compare the computed class to the class in the object.
   2676   DCHECK_EQ(object.location, kLocPhysReg);
   2677   OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
   2678 
   2679   // Set the low byte of the result to 0 or 1 from the compare condition code.
   2680   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
   2681 
   2682   LIR* target = NewLIR0(kPseudoTargetLabel);
   2683   null_branchover->target = target;
   2684   FreeTemp(check_class);
   2685   if (IsTemp(result_reg)) {
   2686     OpRegCopy(rl_result.reg, result_reg);
   2687     FreeTemp(result_reg);
   2688   }
   2689   StoreValue(rl_dest, rl_result);
   2690 }
   2691 
   2692 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
   2693                             RegLocation rl_lhs, RegLocation rl_rhs) {
   2694   OpKind op = kOpBkpt;
   2695   bool is_div_rem = false;
   2696   bool unary = false;
   2697   bool shift_op = false;
   2698   bool is_two_addr = false;
   2699   RegLocation rl_result;
   2700   switch (opcode) {
   2701     case Instruction::NEG_INT:
   2702       op = kOpNeg;
   2703       unary = true;
   2704       break;
   2705     case Instruction::NOT_INT:
   2706       op = kOpMvn;
   2707       unary = true;
   2708       break;
   2709     case Instruction::ADD_INT_2ADDR:
   2710       is_two_addr = true;
   2711       // Fallthrough
   2712     case Instruction::ADD_INT:
   2713       op = kOpAdd;
   2714       break;
   2715     case Instruction::SUB_INT_2ADDR:
   2716       is_two_addr = true;
   2717       // Fallthrough
   2718     case Instruction::SUB_INT:
   2719       op = kOpSub;
   2720       break;
   2721     case Instruction::MUL_INT_2ADDR:
   2722       is_two_addr = true;
   2723       // Fallthrough
   2724     case Instruction::MUL_INT:
   2725       op = kOpMul;
   2726       break;
   2727     case Instruction::DIV_INT_2ADDR:
   2728       is_two_addr = true;
   2729       // Fallthrough
   2730     case Instruction::DIV_INT:
   2731       op = kOpDiv;
   2732       is_div_rem = true;
   2733       break;
   2734     /* NOTE: returns in kArg1 */
   2735     case Instruction::REM_INT_2ADDR:
   2736       is_two_addr = true;
   2737       // Fallthrough
   2738     case Instruction::REM_INT:
   2739       op = kOpRem;
   2740       is_div_rem = true;
   2741       break;
   2742     case Instruction::AND_INT_2ADDR:
   2743       is_two_addr = true;
   2744       // Fallthrough
   2745     case Instruction::AND_INT:
   2746       op = kOpAnd;
   2747       break;
   2748     case Instruction::OR_INT_2ADDR:
   2749       is_two_addr = true;
   2750       // Fallthrough
   2751     case Instruction::OR_INT:
   2752       op = kOpOr;
   2753       break;
   2754     case Instruction::XOR_INT_2ADDR:
   2755       is_two_addr = true;
   2756       // Fallthrough
   2757     case Instruction::XOR_INT:
   2758       op = kOpXor;
   2759       break;
   2760     case Instruction::SHL_INT_2ADDR:
   2761       is_two_addr = true;
   2762       // Fallthrough
   2763     case Instruction::SHL_INT:
   2764       shift_op = true;
   2765       op = kOpLsl;
   2766       break;
   2767     case Instruction::SHR_INT_2ADDR:
   2768       is_two_addr = true;
   2769       // Fallthrough
   2770     case Instruction::SHR_INT:
   2771       shift_op = true;
   2772       op = kOpAsr;
   2773       break;
   2774     case Instruction::USHR_INT_2ADDR:
   2775       is_two_addr = true;
   2776       // Fallthrough
   2777     case Instruction::USHR_INT:
   2778       shift_op = true;
   2779       op = kOpLsr;
   2780       break;
   2781     default:
   2782       LOG(FATAL) << "Invalid word arith op: " << opcode;
   2783   }
   2784 
   2785   // Can we convert to a two address instruction?
   2786   if (!is_two_addr &&
   2787         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   2788          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
   2789     is_two_addr = true;
   2790   }
   2791 
   2792   if (!GenerateTwoOperandInstructions()) {
   2793     is_two_addr = false;
   2794   }
   2795 
   2796   // Get the div/rem stuff out of the way.
   2797   if (is_div_rem) {
   2798     rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
   2799     StoreValue(rl_dest, rl_result);
   2800     return;
   2801   }
   2802 
   2803   // If we generate any memory access below, it will reference a dalvik reg.
   2804   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2805 
   2806   if (unary) {
   2807     rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2808     rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2809     rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2810     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   2811   } else {
   2812     if (shift_op) {
   2813       // X86 doesn't require masking and must use ECX.
   2814       RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   2815       LoadValueDirectFixed(rl_rhs, t_reg);
   2816       if (is_two_addr) {
   2817         // Can we do this directly into memory?
   2818         rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2819         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2820         if (rl_result.location != kLocPhysReg) {
   2821           // Okay, we can do this into memory
   2822           OpMemReg(op, rl_result, t_reg.GetReg());
   2823           FreeTemp(t_reg);
   2824           return;
   2825         } else if (!rl_result.reg.IsFloat()) {
   2826           // Can do this directly into the result register
   2827           OpRegReg(op, rl_result.reg, t_reg);
   2828           FreeTemp(t_reg);
   2829           StoreFinalValue(rl_dest, rl_result);
   2830           return;
   2831         }
   2832       }
   2833       // Three address form, or we can't do directly.
   2834       rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2835       rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2836       OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
   2837       FreeTemp(t_reg);
   2838     } else {
   2839       // Multiply is 3 operand only (sort of).
   2840       if (is_two_addr && op != kOpMul) {
   2841         // Can we do this directly into memory?
   2842         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2843         if (rl_result.location == kLocPhysReg) {
   2844           // Ensure res is in a core reg
   2845           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2846           // Can we do this from memory directly?
   2847           rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
   2848           if (rl_rhs.location != kLocPhysReg) {
   2849             OpRegMem(op, rl_result.reg, rl_rhs);
   2850             StoreFinalValue(rl_dest, rl_result);
   2851             return;
   2852           } else if (!rl_rhs.reg.IsFloat()) {
   2853             OpRegReg(op, rl_result.reg, rl_rhs.reg);
   2854             StoreFinalValue(rl_dest, rl_result);
   2855             return;
   2856           }
   2857         }
   2858         rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2859         // It might happen rl_rhs and rl_dest are the same VR
   2860         // in this case rl_dest is in reg after LoadValue while
   2861         // rl_result is not updated yet, so do this
   2862         rl_result = UpdateLocTyped(rl_dest, kCoreReg);
   2863         if (rl_result.location != kLocPhysReg) {
   2864           // Okay, we can do this into memory.
   2865           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
   2866           return;
   2867         } else if (!rl_result.reg.IsFloat()) {
   2868           // Can do this directly into the result register.
   2869           OpRegReg(op, rl_result.reg, rl_rhs.reg);
   2870           StoreFinalValue(rl_dest, rl_result);
   2871           return;
   2872         } else {
   2873           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2874           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2875           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2876         }
   2877       } else {
   2878         // Try to use reg/memory instructions.
   2879         rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
   2880         rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
   2881         // We can't optimize with FP registers.
   2882         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
   2883           // Something is difficult, so fall back to the standard case.
   2884           rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2885           rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2886           rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2887           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2888         } else {
   2889           // We can optimize by moving to result and using memory operands.
   2890           if (rl_rhs.location != kLocPhysReg) {
   2891             // Force LHS into result.
   2892             // We should be careful with order here
   2893             // If rl_dest and rl_lhs points to the same VR we should load first
   2894             // If the are different we should find a register first for dest
   2895             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
   2896                 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
   2897               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2898               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2899               // No-op if these are the same.
   2900               OpRegCopy(rl_result.reg, rl_lhs.reg);
   2901             } else {
   2902               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2903               LoadValueDirect(rl_lhs, rl_result.reg);
   2904             }
   2905             OpRegMem(op, rl_result.reg, rl_rhs);
   2906           } else if (rl_lhs.location != kLocPhysReg) {
   2907             // RHS is in a register; LHS is in memory.
   2908             if (op != kOpSub) {
   2909               // Force RHS into result and operate on memory.
   2910               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2911               OpRegCopy(rl_result.reg, rl_rhs.reg);
   2912               OpRegMem(op, rl_result.reg, rl_lhs);
   2913             } else {
   2914               // Subtraction isn't commutative.
   2915               rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2916               rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2917               rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2918               OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2919             }
   2920           } else {
   2921             // Both are in registers.
   2922             rl_lhs = LoadValue(rl_lhs, kCoreReg);
   2923             rl_rhs = LoadValue(rl_rhs, kCoreReg);
   2924             rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2925             OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
   2926           }
   2927         }
   2928       }
   2929     }
   2930   }
   2931   StoreValue(rl_dest, rl_result);
   2932 }
   2933 
   2934 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
   2935   // If we have non-core registers, then we can't do good things.
   2936   if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
   2937     return false;
   2938   }
   2939   if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
   2940     return false;
   2941   }
   2942 
   2943   // Everything will be fine :-).
   2944   return true;
   2945 }
   2946 
   2947 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
   2948   if (!cu_->target64) {
   2949     Mir2Lir::GenIntToLong(rl_dest, rl_src);
   2950     return;
   2951   }
   2952   rl_src = UpdateLocTyped(rl_src, kCoreReg);
   2953   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   2954   if (rl_src.location == kLocPhysReg) {
   2955     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   2956   } else {
   2957     int displacement = SRegOffset(rl_src.s_reg_low);
   2958     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   2959     LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
   2960                      displacement + LOWORD_OFFSET);
   2961     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
   2962                             true /* is_load */, true /* is_64bit */);
   2963   }
   2964   StoreValueWide(rl_dest, rl_result);
   2965 }
   2966 
   2967 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   2968                         RegLocation rl_src1, RegLocation rl_shift) {
   2969   if (!cu_->target64) {
   2970     Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
   2971     return;
   2972   }
   2973 
   2974   bool is_two_addr = false;
   2975   OpKind op = kOpBkpt;
   2976   RegLocation rl_result;
   2977 
   2978   switch (opcode) {
   2979     case Instruction::SHL_LONG_2ADDR:
   2980       is_two_addr = true;
   2981       // Fallthrough
   2982     case Instruction::SHL_LONG:
   2983       op = kOpLsl;
   2984       break;
   2985     case Instruction::SHR_LONG_2ADDR:
   2986       is_two_addr = true;
   2987       // Fallthrough
   2988     case Instruction::SHR_LONG:
   2989       op = kOpAsr;
   2990       break;
   2991     case Instruction::USHR_LONG_2ADDR:
   2992       is_two_addr = true;
   2993       // Fallthrough
   2994     case Instruction::USHR_LONG:
   2995       op = kOpLsr;
   2996       break;
   2997     default:
   2998       op = kOpBkpt;
   2999   }
   3000 
   3001   // X86 doesn't require masking and must use ECX.
   3002   RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   3003   LoadValueDirectFixed(rl_shift, t_reg);
   3004   if (is_two_addr) {
   3005     // Can we do this directly into memory?
   3006     rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
   3007     if (rl_result.location != kLocPhysReg) {
   3008       // Okay, we can do this into memory
   3009       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   3010       OpMemReg(op, rl_result, t_reg.GetReg());
   3011     } else if (!rl_result.reg.IsFloat()) {
   3012       // Can do this directly into the result register
   3013       OpRegReg(op, rl_result.reg, t_reg);
   3014       StoreFinalValueWide(rl_dest, rl_result);
   3015     }
   3016   } else {
   3017     // Three address form, or we can't do directly.
   3018     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   3019     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   3020     OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
   3021     StoreFinalValueWide(rl_dest, rl_result);
   3022   }
   3023 
   3024   FreeTemp(t_reg);
   3025 }
   3026 
   3027 }  // namespace art
   3028