Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "codegen_x86.h"
     18 
     19 #include "base/logging.h"
     20 #include "dex/quick/mir_to_lir-inl.h"
     21 #include "dex/reg_storage_eq.h"
     22 #include "x86_lir.h"
     23 
     24 namespace art {
     25 
     26 void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
     27                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
     28   X86OpCode op = kX86Nop;
     29   RegLocation rl_result;
     30 
     31   /*
     32    * Don't attempt to optimize register usage since these opcodes call out to
     33    * the handlers.
     34    */
     35   switch (opcode) {
     36     case Instruction::ADD_FLOAT_2ADDR:
     37     case Instruction::ADD_FLOAT:
     38       op = kX86AddssRR;
     39       break;
     40     case Instruction::SUB_FLOAT_2ADDR:
     41     case Instruction::SUB_FLOAT:
     42       op = kX86SubssRR;
     43       break;
     44     case Instruction::DIV_FLOAT_2ADDR:
     45     case Instruction::DIV_FLOAT:
     46       op = kX86DivssRR;
     47       break;
     48     case Instruction::MUL_FLOAT_2ADDR:
     49     case Instruction::MUL_FLOAT:
     50       op = kX86MulssRR;
     51       break;
     52     case Instruction::REM_FLOAT_2ADDR:
     53     case Instruction::REM_FLOAT:
     54       GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
     55       return;
     56     case Instruction::NEG_FLOAT:
     57       GenNegFloat(rl_dest, rl_src1);
     58       return;
     59     default:
     60       LOG(FATAL) << "Unexpected opcode: " << opcode;
     61   }
     62   rl_src1 = LoadValue(rl_src1, kFPReg);
     63   rl_src2 = LoadValue(rl_src2, kFPReg);
     64   rl_result = EvalLoc(rl_dest, kFPReg, true);
     65   RegStorage r_dest = rl_result.reg;
     66   RegStorage r_src1 = rl_src1.reg;
     67   RegStorage r_src2 = rl_src2.reg;
     68   if (r_dest == r_src2) {
     69     r_src2 = AllocTempSingle();
     70     OpRegCopy(r_src2, r_dest);
     71   }
     72   OpRegCopy(r_dest, r_src1);
     73   NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
     74   StoreValue(rl_dest, rl_result);
     75 }
     76 
     77 void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
     78                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
     79   DCHECK(rl_dest.wide);
     80   DCHECK(rl_dest.fp);
     81   DCHECK(rl_src1.wide);
     82   DCHECK(rl_src1.fp);
     83   DCHECK(rl_src2.wide);
     84   DCHECK(rl_src2.fp);
     85   X86OpCode op = kX86Nop;
     86   RegLocation rl_result;
     87 
     88   switch (opcode) {
     89     case Instruction::ADD_DOUBLE_2ADDR:
     90     case Instruction::ADD_DOUBLE:
     91       op = kX86AddsdRR;
     92       break;
     93     case Instruction::SUB_DOUBLE_2ADDR:
     94     case Instruction::SUB_DOUBLE:
     95       op = kX86SubsdRR;
     96       break;
     97     case Instruction::DIV_DOUBLE_2ADDR:
     98     case Instruction::DIV_DOUBLE:
     99       op = kX86DivsdRR;
    100       break;
    101     case Instruction::MUL_DOUBLE_2ADDR:
    102     case Instruction::MUL_DOUBLE:
    103       op = kX86MulsdRR;
    104       break;
    105     case Instruction::REM_DOUBLE_2ADDR:
    106     case Instruction::REM_DOUBLE:
    107       GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
    108       return;
    109     case Instruction::NEG_DOUBLE:
    110       GenNegDouble(rl_dest, rl_src1);
    111       return;
    112     default:
    113       LOG(FATAL) << "Unexpected opcode: " << opcode;
    114   }
    115   rl_src1 = LoadValueWide(rl_src1, kFPReg);
    116   rl_src2 = LoadValueWide(rl_src2, kFPReg);
    117   rl_result = EvalLoc(rl_dest, kFPReg, true);
    118   if (rl_result.reg == rl_src2.reg) {
    119     rl_src2.reg = AllocTempDouble();
    120     OpRegCopy(rl_src2.reg, rl_result.reg);
    121   }
    122   OpRegCopy(rl_result.reg, rl_src1.reg);
    123   NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    124   StoreValueWide(rl_dest, rl_result);
    125 }
    126 
    127 void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
    128                                             int32_t constant) {
    129   // TODO: need x86 implementation.
    130   UNUSED(rl_dest, rl_src1, constant);
    131   LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
    132 }
    133 
    134 void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
    135                                              int64_t constant) {
    136   // TODO: need x86 implementation.
    137   UNUSED(rl_dest, rl_src1, constant);
    138   LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
    139 }
    140 
    141 void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
    142   // Compute offsets to the source and destination VRs on stack
    143   int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
    144   int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
    145 
    146   // Update the in-register state of source.
    147   rl_src = UpdateLocWide(rl_src);
    148 
    149   // All memory accesses below reference dalvik regs.
    150   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    151 
    152   // If the source is in physical register, then put it in its location on stack.
    153   if (rl_src.location == kLocPhysReg) {
    154     RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
    155 
    156     if (reg_info != nullptr && reg_info->IsTemp()) {
    157       // Calling FlushSpecificReg because it will only write back VR if it is dirty.
    158       FlushSpecificReg(reg_info);
    159       // ResetDef to prevent NullifyRange from removing stores.
    160       ResetDef(rl_src.reg);
    161     } else {
    162       // It must have been register promoted if it is not a temp but is still in physical
    163       // register. Since we need it to be in memory to convert, we place it there now.
    164       const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
    165       StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
    166     }
    167   }
    168 
    169   // Push the source virtual register onto the x87 stack.
    170   LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(),
    171                               src_v_reg_offset + LOWORD_OFFSET);
    172   AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
    173                           true /* is_load */, true /* is64bit */);
    174 
    175   // Now pop off x87 stack and store it in the destination VR's stack location.
    176   int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
    177   int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
    178   LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement);
    179   AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
    180 
    181   /*
    182    * The result is in a physical register if it was in a temp or was register
    183    * promoted. For that reason it is enough to check if it is in physical
    184    * register. If it is, then we must do all of the bookkeeping necessary to
    185    * invalidate temp (if needed) and load in promoted register (if needed).
    186    * If the result's location is in memory, then we do not need to do anything
    187    * more since the fstp has already placed the correct value in memory.
    188    */
    189   RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
    190   if (rl_result.location == kLocPhysReg) {
    191     /*
    192      * We already know that the result is in a physical register but do not know if it is the
    193      * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
    194      * correct register class.
    195      */
    196     rl_result = EvalLoc(rl_dest, kFPReg, true);
    197     const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
    198     if (is_double) {
    199       LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
    200 
    201       StoreFinalValueWide(rl_dest, rl_result);
    202     } else {
    203       Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
    204 
    205       StoreFinalValue(rl_dest, rl_result);
    206     }
    207   }
    208 }
    209 
    210 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
    211                                RegLocation rl_src) {
    212   RegisterClass rcSrc = kFPReg;
    213   X86OpCode op = kX86Nop;
    214   RegLocation rl_result;
    215   switch (opcode) {
    216     case Instruction::INT_TO_FLOAT:
    217       rcSrc = kCoreReg;
    218       op = kX86Cvtsi2ssRR;
    219       break;
    220     case Instruction::DOUBLE_TO_FLOAT:
    221       rcSrc = kFPReg;
    222       op = kX86Cvtsd2ssRR;
    223       break;
    224     case Instruction::FLOAT_TO_DOUBLE:
    225       rcSrc = kFPReg;
    226       op = kX86Cvtss2sdRR;
    227       break;
    228     case Instruction::INT_TO_DOUBLE:
    229       rcSrc = kCoreReg;
    230       op = kX86Cvtsi2sdRR;
    231       break;
    232     case Instruction::FLOAT_TO_INT: {
    233       rl_src = LoadValue(rl_src, kFPReg);
    234       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
    235       ClobberSReg(rl_dest.s_reg_low);
    236       rl_result = EvalLoc(rl_dest, kCoreReg, true);
    237       RegStorage temp_reg = AllocTempSingle();
    238 
    239       LoadConstant(rl_result.reg, 0x7fffffff);
    240       NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
    241       NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
    242       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
    243       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
    244       NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    245       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
    246       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
    247       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
    248       branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
    249       branch_normal->target = NewLIR0(kPseudoTargetLabel);
    250       StoreValue(rl_dest, rl_result);
    251       return;
    252     }
    253     case Instruction::DOUBLE_TO_INT: {
    254       rl_src = LoadValueWide(rl_src, kFPReg);
    255       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
    256       ClobberSReg(rl_dest.s_reg_low);
    257       rl_result = EvalLoc(rl_dest, kCoreReg, true);
    258       RegStorage temp_reg = AllocTempDouble();
    259 
    260       LoadConstant(rl_result.reg, 0x7fffffff);
    261       NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
    262       NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
    263       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
    264       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
    265       NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    266       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
    267       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
    268       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
    269       branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
    270       branch_normal->target = NewLIR0(kPseudoTargetLabel);
    271       StoreValue(rl_dest, rl_result);
    272       return;
    273     }
    274     case Instruction::LONG_TO_DOUBLE:
    275       if (cu_->target64) {
    276         rcSrc = kCoreReg;
    277         op = kX86Cvtsqi2sdRR;
    278         break;
    279       }
    280       GenLongToFP(rl_dest, rl_src, true /* is_double */);
    281       return;
    282     case Instruction::LONG_TO_FLOAT:
    283       if (cu_->target64) {
    284         rcSrc = kCoreReg;
    285         op = kX86Cvtsqi2ssRR;
    286        break;
    287       }
    288       GenLongToFP(rl_dest, rl_src, false /* is_double */);
    289       return;
    290     case Instruction::FLOAT_TO_LONG:
    291       if (cu_->target64) {
    292         rl_src = LoadValue(rl_src, kFPReg);
    293         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
    294         ClobberSReg(rl_dest.s_reg_low);
    295         rl_result = EvalLoc(rl_dest, kCoreReg, true);
    296         RegStorage temp_reg = AllocTempSingle();
    297 
    298         // Set 0x7fffffffffffffff to rl_result
    299         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
    300         NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
    301         NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
    302         LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
    303         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
    304         NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    305         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
    306         branch_na_n->target = NewLIR0(kPseudoTargetLabel);
    307         NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
    308         branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
    309         branch_normal->target = NewLIR0(kPseudoTargetLabel);
    310         StoreValueWide(rl_dest, rl_result);
    311       } else {
    312         CheckEntrypointTypes<kQuickF2l, int64_t, float>();  // int64_t -> kCoreReg
    313         GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
    314       }
    315       return;
    316     case Instruction::DOUBLE_TO_LONG:
    317       if (cu_->target64) {
    318         rl_src = LoadValueWide(rl_src, kFPReg);
    319         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
    320         ClobberSReg(rl_dest.s_reg_low);
    321         rl_result = EvalLoc(rl_dest, kCoreReg, true);
    322         RegStorage temp_reg = AllocTempDouble();
    323 
    324         // Set 0x7fffffffffffffff to rl_result
    325         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
    326         NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
    327         NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
    328         LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
    329         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
    330         NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    331         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
    332         branch_na_n->target = NewLIR0(kPseudoTargetLabel);
    333         NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
    334         branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
    335         branch_normal->target = NewLIR0(kPseudoTargetLabel);
    336         StoreValueWide(rl_dest, rl_result);
    337       } else {
    338         CheckEntrypointTypes<kQuickD2l, int64_t, double>();  // int64_t -> kCoreReg
    339         GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
    340       }
    341       return;
    342     default:
    343       LOG(INFO) << "Unexpected opcode: " << opcode;
    344   }
    345   // At this point, target will be either float or double.
    346   DCHECK(rl_dest.fp);
    347   if (rl_src.wide) {
    348     rl_src = LoadValueWide(rl_src, rcSrc);
    349   } else {
    350     rl_src = LoadValue(rl_src, rcSrc);
    351   }
    352   rl_result = EvalLoc(rl_dest, kFPReg, true);
    353   NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    354   if (rl_dest.wide) {
    355     StoreValueWide(rl_dest, rl_result);
    356   } else {
    357     StoreValue(rl_dest, rl_result);
    358   }
    359 }
    360 
    361 void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
    362   // Compute offsets to the source and destination VRs on stack.
    363   int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
    364   int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
    365   int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
    366 
    367   // Update the in-register state of sources.
    368   rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
    369   rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
    370 
    371   // All memory accesses below reference dalvik regs.
    372   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    373 
    374   // If the source is in physical register, then put it in its location on stack.
    375   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
    376   if (rl_src1.location == kLocPhysReg) {
    377     RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
    378 
    379     if (reg_info != nullptr && reg_info->IsTemp()) {
    380       // Calling FlushSpecificReg because it will only write back VR if it is dirty.
    381       FlushSpecificReg(reg_info);
    382       // ResetDef to prevent NullifyRange from removing stores.
    383       ResetDef(rl_src1.reg);
    384     } else {
    385       // It must have been register promoted if it is not a temp but is still in physical
    386       // register. Since we need it to be in memory to convert, we place it there now.
    387       StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
    388                     kNotVolatile);
    389     }
    390   }
    391 
    392   if (rl_src2.location == kLocPhysReg) {
    393     RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
    394     if (reg_info != nullptr && reg_info->IsTemp()) {
    395       FlushSpecificReg(reg_info);
    396       ResetDef(rl_src2.reg);
    397     } else {
    398       StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
    399                     kNotVolatile);
    400     }
    401   }
    402 
    403   int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
    404 
    405   // Push the source virtual registers onto the x87 stack.
    406   LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
    407                              src2_v_reg_offset + LOWORD_OFFSET);
    408   AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
    409                           true /* is_load */, is_double /* is64bit */);
    410 
    411   LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
    412                              src1_v_reg_offset + LOWORD_OFFSET);
    413   AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
    414                           true /* is_load */, is_double /* is64bit */);
    415 
    416   FlushReg(rs_rAX);
    417   Clobber(rs_rAX);
    418   LockTemp(rs_rAX);
    419 
    420   LIR* retry = NewLIR0(kPseudoTargetLabel);
    421 
    422   // Divide ST(0) by ST(1) and place result to ST(0).
    423   NewLIR0(kX86Fprem);
    424 
    425   // Move FPU status word to AX.
    426   NewLIR0(kX86Fstsw16R);
    427 
    428   // Check if reduction is complete.
    429   OpRegImm(kOpAnd, rs_rAX, 0x400);
    430 
    431   // If no then continue to compute remainder.
    432   LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
    433   branch->target = retry;
    434 
    435   FreeTemp(rs_rAX);
    436 
    437   // Now store result in the destination VR's stack location.
    438   int displacement = dest_v_reg_offset + LOWORD_OFFSET;
    439   int opcode = is_double ? kX86Fst64M : kX86Fst32M;
    440   LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement);
    441   AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
    442 
    443   // Pop ST(1) and ST(0).
    444   NewLIR0(kX86Fucompp);
    445 
    446   /*
    447    * The result is in a physical register if it was in a temp or was register
    448    * promoted. For that reason it is enough to check if it is in physical
    449    * register. If it is, then we must do all of the bookkeeping necessary to
    450    * invalidate temp (if needed) and load in promoted register (if needed).
    451    * If the result's location is in memory, then we do not need to do anything
    452    * more since the fstp has already placed the correct value in memory.
    453    */
    454   RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
    455   if (rl_result.location == kLocPhysReg) {
    456     rl_result = EvalLoc(rl_dest, kFPReg, true);
    457     if (is_double) {
    458       LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
    459       StoreFinalValueWide(rl_dest, rl_result);
    460     } else {
    461       Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
    462       StoreFinalValue(rl_dest, rl_result);
    463     }
    464   }
    465 }
    466 
    467 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
    468                           RegLocation rl_src1, RegLocation rl_src2) {
    469   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
    470   bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
    471   if (single) {
    472     rl_src1 = LoadValue(rl_src1, kFPReg);
    473     rl_src2 = LoadValue(rl_src2, kFPReg);
    474   } else {
    475     rl_src1 = LoadValueWide(rl_src1, kFPReg);
    476     rl_src2 = LoadValueWide(rl_src2, kFPReg);
    477   }
    478   // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
    479   ClobberSReg(rl_dest.s_reg_low);
    480   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    481   LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
    482   if (single) {
    483     NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
    484   } else {
    485     NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
    486   }
    487   LIR* branch = nullptr;
    488   if (unordered_gt) {
    489     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    490   }
    491   // If the result reg can't be byte accessed, use a jump and move instead of a set.
    492   if (!IsByteRegister(rl_result.reg)) {
    493     LIR* branch2 = nullptr;
    494     if (unordered_gt) {
    495       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
    496       NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
    497     } else {
    498       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
    499       NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
    500     }
    501     branch2->target = NewLIR0(kPseudoTargetLabel);
    502   } else {
    503     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
    504   }
    505   NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
    506   if (unordered_gt) {
    507     branch->target = NewLIR0(kPseudoTargetLabel);
    508   }
    509   StoreValue(rl_dest, rl_result);
    510 }
    511 
    512 void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
    513                                      bool is_double) {
    514   LIR* taken = &block_label_list_[bb->taken];
    515   LIR* not_taken = &block_label_list_[bb->fall_through];
    516   LIR* branch = nullptr;
    517   RegLocation rl_src1;
    518   RegLocation rl_src2;
    519   if (is_double) {
    520     rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    521     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    522     rl_src1 = LoadValueWide(rl_src1, kFPReg);
    523     rl_src2 = LoadValueWide(rl_src2, kFPReg);
    524     NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
    525   } else {
    526     rl_src1 = mir_graph_->GetSrc(mir, 0);
    527     rl_src2 = mir_graph_->GetSrc(mir, 1);
    528     rl_src1 = LoadValue(rl_src1, kFPReg);
    529     rl_src2 = LoadValue(rl_src2, kFPReg);
    530     NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
    531   }
    532   ConditionCode ccode = mir->meta.ccode;
    533   switch (ccode) {
    534     case kCondEq:
    535       if (!gt_bias) {
    536         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    537         branch->target = not_taken;
    538       }
    539       break;
    540     case kCondNe:
    541       if (!gt_bias) {
    542         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    543         branch->target = taken;
    544       }
    545       break;
    546     case kCondLt:
    547       if (gt_bias) {
    548         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    549         branch->target = not_taken;
    550       }
    551       ccode = kCondUlt;
    552       break;
    553     case kCondLe:
    554       if (gt_bias) {
    555         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    556         branch->target = not_taken;
    557       }
    558       ccode = kCondLs;
    559       break;
    560     case kCondGt:
    561       if (gt_bias) {
    562         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    563         branch->target = taken;
    564       }
    565       ccode = kCondHi;
    566       break;
    567     case kCondGe:
    568       if (gt_bias) {
    569         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
    570         branch->target = taken;
    571       }
    572       ccode = kCondUge;
    573       break;
    574     default:
    575       LOG(FATAL) << "Unexpected ccode: " << ccode;
    576   }
    577   OpCondBranch(ccode, taken);
    578 }
    579 
    580 void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
    581   RegLocation rl_result;
    582   rl_src = LoadValue(rl_src, kCoreReg);
    583   rl_result = EvalLoc(rl_dest, kCoreReg, true);
    584   OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
    585   StoreValue(rl_dest, rl_result);
    586 }
    587 
    588 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
    589   RegLocation rl_result;
    590   rl_src = LoadValueWide(rl_src, kCoreReg);
    591   if (cu_->target64) {
    592     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
    593     OpRegCopy(rl_result.reg, rl_src.reg);
    594     // Flip sign bit.
    595     NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
    596     NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
    597     NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
    598   } else {
    599     rl_result = ForceTempWide(rl_src);
    600     OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000);
    601   }
    602   StoreValueWide(rl_dest, rl_result);
    603 }
    604 
    605 bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
    606   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
    607   if (rl_dest.s_reg_low == INVALID_SREG) {
    608     // Result is unused, the code is dead. Inlining successful, no code generated.
    609     return true;
    610   }
    611   RegLocation rl_src = info->args[0];
    612   rl_src = LoadValueWide(rl_src, kFPReg);
    613   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
    614   NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    615   StoreValueWide(rl_dest, rl_result);
    616   return true;
    617 }
    618 
    619 bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
    620   // Get the argument
    621   RegLocation rl_src = info->args[0];
    622 
    623   // Get the inlined intrinsic target virtual register
    624   RegLocation rl_dest = InlineTarget(info);
    625 
    626   // Get the virtual register number
    627   DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
    628   if (rl_dest.s_reg_low == INVALID_SREG) {
    629     // Result is unused, the code is dead. Inlining successful, no code generated.
    630     return true;
    631   }
    632   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
    633   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
    634 
    635   // if argument is the same as inlined intrinsic target
    636   if (v_src_reg == v_dst_reg) {
    637     rl_src = UpdateLoc(rl_src);
    638 
    639     // if argument is in the physical register
    640     if (rl_src.location == kLocPhysReg) {
    641       rl_src = LoadValue(rl_src, kCoreReg);
    642       OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
    643       StoreValue(rl_dest, rl_src);
    644       return true;
    645     }
    646     // the argument is in memory
    647     DCHECK((rl_src.location == kLocDalvikFrame) ||
    648          (rl_src.location == kLocCompilerTemp));
    649 
    650     // Operate directly into memory.
    651     int displacement = SRegOffset(rl_dest.s_reg_low);
    652     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    653     LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff);
    654     AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
    655     AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
    656     return true;
    657   } else {
    658     rl_src = LoadValue(rl_src, kCoreReg);
    659     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    660     OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
    661     StoreValue(rl_dest, rl_result);
    662     return true;
    663   }
    664 }
    665 
    666 bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
    667   RegLocation rl_src = info->args[0];
    668   RegLocation rl_dest = InlineTargetWide(info);
    669   DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
    670   if (rl_dest.s_reg_low == INVALID_SREG) {
    671     // Result is unused, the code is dead. Inlining successful, no code generated.
    672     return true;
    673   }
    674   if (cu_->target64) {
    675     rl_src = LoadValueWide(rl_src, kCoreReg);
    676     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    677     OpRegCopyWide(rl_result.reg, rl_src.reg);
    678     OpRegImm(kOpLsl, rl_result.reg, 1);
    679     OpRegImm(kOpLsr, rl_result.reg, 1);
    680     StoreValueWide(rl_dest, rl_result);
    681     return true;
    682   }
    683   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
    684   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
    685   rl_src = UpdateLocWide(rl_src);
    686 
    687   // if argument is in the physical XMM register
    688   if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
    689     RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
    690     if (rl_result.reg != rl_src.reg) {
    691       LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
    692       NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    693     } else {
    694       RegStorage sign_mask = AllocTempDouble();
    695       LoadConstantWide(sign_mask, 0x7fffffffffffffff);
    696       NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
    697       FreeTemp(sign_mask);
    698     }
    699     StoreValueWide(rl_dest, rl_result);
    700     return true;
    701   } else if (v_src_reg == v_dst_reg) {
    702     // if argument is the same as inlined intrinsic target
    703     // if argument is in the physical register
    704     if (rl_src.location == kLocPhysReg) {
    705       rl_src = LoadValueWide(rl_src, kCoreReg);
    706       OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
    707       StoreValueWide(rl_dest, rl_src);
    708       return true;
    709     }
    710     // the argument is in memory
    711     DCHECK((rl_src.location == kLocDalvikFrame) ||
    712            (rl_src.location == kLocCompilerTemp));
    713 
    714     // Operate directly into memory.
    715     int displacement = SRegOffset(rl_dest.s_reg_low);
    716     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    717     LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
    718     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
    719     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
    720     return true;
    721   } else {
    722     rl_src = LoadValueWide(rl_src, kCoreReg);
    723     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    724     OpRegCopyWide(rl_result.reg, rl_src.reg);
    725     OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
    726     StoreValueWide(rl_dest, rl_result);
    727     return true;
    728   }
    729 }
    730 
    731 bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
    732   if (is_double) {
    733     RegLocation rl_dest = InlineTargetWide(info);
    734     if (rl_dest.s_reg_low == INVALID_SREG) {
    735       // Result is unused, the code is dead. Inlining successful, no code generated.
    736       return true;
    737     }
    738     RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
    739     RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
    740     RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
    741 
    742     // Avoid src2 corruption by OpRegCopyWide.
    743     if (rl_result.reg == rl_src2.reg) {
    744         std::swap(rl_src2.reg, rl_src1.reg);
    745     }
    746 
    747     OpRegCopyWide(rl_result.reg, rl_src1.reg);
    748     NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    749     // If either arg is NaN, return NaN.
    750     LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
    751     // Min/Max branches.
    752     LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
    753     LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
    754     // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
    755     NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    756     LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
    757     // Handle NaN.
    758     branch_nan->target = NewLIR0(kPseudoTargetLabel);
    759     LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
    760 
    761     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
    762     // Handle Min/Max. Copy greater/lesser value from src2.
    763     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
    764     OpRegCopyWide(rl_result.reg, rl_src2.reg);
    765     // Right operand is already in result reg.
    766     branch_cond2->target = NewLIR0(kPseudoTargetLabel);
    767     // Exit.
    768     branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
    769     branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
    770     StoreValueWide(rl_dest, rl_result);
    771   } else {
    772     RegLocation rl_dest = InlineTarget(info);
    773     if (rl_dest.s_reg_low == INVALID_SREG) {
    774       // Result is unused, the code is dead. Inlining successful, no code generated.
    775       return true;
    776     }
    777     RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
    778     RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
    779     RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
    780 
    781     // Avoid src2 corruption by OpRegCopyWide.
    782     if (rl_result.reg == rl_src2.reg) {
    783         std::swap(rl_src2.reg, rl_src1.reg);
    784     }
    785 
    786     OpRegCopy(rl_result.reg, rl_src1.reg);
    787     NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    788     // If either arg is NaN, return NaN.
    789     LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
    790     // Min/Max branches.
    791     LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
    792     LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
    793     // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
    794     NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    795     LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
    796     // Handle NaN.
    797     branch_nan->target = NewLIR0(kPseudoTargetLabel);
    798     LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
    799     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
    800     // Handle Min/Max. Copy greater/lesser value from src2.
    801     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
    802     OpRegCopy(rl_result.reg, rl_src2.reg);
    803     // Right operand is already in result reg.
    804     branch_cond2->target = NewLIR0(kPseudoTargetLabel);
    805     // Exit.
    806     branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
    807     branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
    808     StoreValue(rl_dest, rl_result);
    809   }
    810   return true;
    811 }
    812 
    813 }  // namespace art
    814