1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "codegen_x86.h" 18 19 #include "base/logging.h" 20 #include "dex/quick/mir_to_lir-inl.h" 21 #include "dex/reg_storage_eq.h" 22 #include "x86_lir.h" 23 24 namespace art { 25 26 void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode, 27 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 28 X86OpCode op = kX86Nop; 29 RegLocation rl_result; 30 31 /* 32 * Don't attempt to optimize register usage since these opcodes call out to 33 * the handlers. 34 */ 35 switch (opcode) { 36 case Instruction::ADD_FLOAT_2ADDR: 37 case Instruction::ADD_FLOAT: 38 op = kX86AddssRR; 39 break; 40 case Instruction::SUB_FLOAT_2ADDR: 41 case Instruction::SUB_FLOAT: 42 op = kX86SubssRR; 43 break; 44 case Instruction::DIV_FLOAT_2ADDR: 45 case Instruction::DIV_FLOAT: 46 op = kX86DivssRR; 47 break; 48 case Instruction::MUL_FLOAT_2ADDR: 49 case Instruction::MUL_FLOAT: 50 op = kX86MulssRR; 51 break; 52 case Instruction::REM_FLOAT_2ADDR: 53 case Instruction::REM_FLOAT: 54 GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */); 55 return; 56 case Instruction::NEG_FLOAT: 57 GenNegFloat(rl_dest, rl_src1); 58 return; 59 default: 60 LOG(FATAL) << "Unexpected opcode: " << opcode; 61 } 62 rl_src1 = LoadValue(rl_src1, kFPReg); 63 rl_src2 = LoadValue(rl_src2, kFPReg); 64 rl_result = EvalLoc(rl_dest, kFPReg, true); 65 RegStorage r_dest = rl_result.reg; 66 RegStorage r_src1 = rl_src1.reg; 67 RegStorage r_src2 = rl_src2.reg; 68 if (r_dest == r_src2) { 69 r_src2 = AllocTempSingle(); 70 OpRegCopy(r_src2, r_dest); 71 } 72 OpRegCopy(r_dest, r_src1); 73 NewLIR2(op, r_dest.GetReg(), r_src2.GetReg()); 74 StoreValue(rl_dest, rl_result); 75 } 76 77 void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode, 78 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 79 DCHECK(rl_dest.wide); 80 DCHECK(rl_dest.fp); 81 DCHECK(rl_src1.wide); 82 DCHECK(rl_src1.fp); 83 DCHECK(rl_src2.wide); 84 DCHECK(rl_src2.fp); 85 X86OpCode op = kX86Nop; 86 RegLocation rl_result; 87 88 switch (opcode) { 89 case Instruction::ADD_DOUBLE_2ADDR: 90 case Instruction::ADD_DOUBLE: 91 op = kX86AddsdRR; 92 break; 93 case Instruction::SUB_DOUBLE_2ADDR: 94 case Instruction::SUB_DOUBLE: 95 op = kX86SubsdRR; 96 break; 97 case Instruction::DIV_DOUBLE_2ADDR: 98 case Instruction::DIV_DOUBLE: 99 op = kX86DivsdRR; 100 break; 101 case Instruction::MUL_DOUBLE_2ADDR: 102 case Instruction::MUL_DOUBLE: 103 op = kX86MulsdRR; 104 break; 105 case Instruction::REM_DOUBLE_2ADDR: 106 case Instruction::REM_DOUBLE: 107 GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */); 108 return; 109 case Instruction::NEG_DOUBLE: 110 GenNegDouble(rl_dest, rl_src1); 111 return; 112 default: 113 LOG(FATAL) << "Unexpected opcode: " << opcode; 114 } 115 rl_src1 = LoadValueWide(rl_src1, kFPReg); 116 rl_src2 = LoadValueWide(rl_src2, kFPReg); 117 rl_result = EvalLoc(rl_dest, kFPReg, true); 118 if (rl_result.reg == rl_src2.reg) { 119 rl_src2.reg = AllocTempDouble(); 120 OpRegCopy(rl_src2.reg, rl_result.reg); 121 } 122 OpRegCopy(rl_result.reg, rl_src1.reg); 123 NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 124 StoreValueWide(rl_dest, rl_result); 125 } 126 127 void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1, 128 int32_t constant) { 129 // TODO: need x86 implementation. 130 UNUSED(rl_dest, rl_src1, constant); 131 LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86"; 132 } 133 134 void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1, 135 int64_t constant) { 136 // TODO: need x86 implementation. 137 UNUSED(rl_dest, rl_src1, constant); 138 LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86"; 139 } 140 141 void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) { 142 // Compute offsets to the source and destination VRs on stack 143 int src_v_reg_offset = SRegOffset(rl_src.s_reg_low); 144 int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low); 145 146 // Update the in-register state of source. 147 rl_src = UpdateLocWide(rl_src); 148 149 // All memory accesses below reference dalvik regs. 150 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 151 152 // If the source is in physical register, then put it in its location on stack. 153 if (rl_src.location == kLocPhysReg) { 154 RegisterInfo* reg_info = GetRegInfo(rl_src.reg); 155 156 if (reg_info != nullptr && reg_info->IsTemp()) { 157 // Calling FlushSpecificReg because it will only write back VR if it is dirty. 158 FlushSpecificReg(reg_info); 159 // ResetDef to prevent NullifyRange from removing stores. 160 ResetDef(rl_src.reg); 161 } else { 162 // It must have been register promoted if it is not a temp but is still in physical 163 // register. Since we need it to be in memory to convert, we place it there now. 164 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 165 StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile); 166 } 167 } 168 169 // Push the source virtual register onto the x87 stack. 170 LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(), 171 src_v_reg_offset + LOWORD_OFFSET); 172 AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2, 173 true /* is_load */, true /* is64bit */); 174 175 // Now pop off x87 stack and store it in the destination VR's stack location. 176 int opcode = is_double ? kX86Fstp64M : kX86Fstp32M; 177 int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset; 178 LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement); 179 AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double); 180 181 /* 182 * The result is in a physical register if it was in a temp or was register 183 * promoted. For that reason it is enough to check if it is in physical 184 * register. If it is, then we must do all of the bookkeeping necessary to 185 * invalidate temp (if needed) and load in promoted register (if needed). 186 * If the result's location is in memory, then we do not need to do anything 187 * more since the fstp has already placed the correct value in memory. 188 */ 189 RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest); 190 if (rl_result.location == kLocPhysReg) { 191 /* 192 * We already know that the result is in a physical register but do not know if it is the 193 * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the 194 * correct register class. 195 */ 196 rl_result = EvalLoc(rl_dest, kFPReg, true); 197 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 198 if (is_double) { 199 LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile); 200 201 StoreFinalValueWide(rl_dest, rl_result); 202 } else { 203 Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg); 204 205 StoreFinalValue(rl_dest, rl_result); 206 } 207 } 208 } 209 210 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, 211 RegLocation rl_src) { 212 RegisterClass rcSrc = kFPReg; 213 X86OpCode op = kX86Nop; 214 RegLocation rl_result; 215 switch (opcode) { 216 case Instruction::INT_TO_FLOAT: 217 rcSrc = kCoreReg; 218 op = kX86Cvtsi2ssRR; 219 break; 220 case Instruction::DOUBLE_TO_FLOAT: 221 rcSrc = kFPReg; 222 op = kX86Cvtsd2ssRR; 223 break; 224 case Instruction::FLOAT_TO_DOUBLE: 225 rcSrc = kFPReg; 226 op = kX86Cvtss2sdRR; 227 break; 228 case Instruction::INT_TO_DOUBLE: 229 rcSrc = kCoreReg; 230 op = kX86Cvtsi2sdRR; 231 break; 232 case Instruction::FLOAT_TO_INT: { 233 rl_src = LoadValue(rl_src, kFPReg); 234 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 235 ClobberSReg(rl_dest.s_reg_low); 236 rl_result = EvalLoc(rl_dest, kCoreReg, true); 237 RegStorage temp_reg = AllocTempSingle(); 238 239 LoadConstant(rl_result.reg, 0x7fffffff); 240 NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); 241 NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); 242 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); 243 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 244 NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 245 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 246 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 247 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 248 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 249 branch_normal->target = NewLIR0(kPseudoTargetLabel); 250 StoreValue(rl_dest, rl_result); 251 return; 252 } 253 case Instruction::DOUBLE_TO_INT: { 254 rl_src = LoadValueWide(rl_src, kFPReg); 255 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 256 ClobberSReg(rl_dest.s_reg_low); 257 rl_result = EvalLoc(rl_dest, kCoreReg, true); 258 RegStorage temp_reg = AllocTempDouble(); 259 260 LoadConstant(rl_result.reg, 0x7fffffff); 261 NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); 262 NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); 263 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); 264 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 265 NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 266 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 267 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 268 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 269 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 270 branch_normal->target = NewLIR0(kPseudoTargetLabel); 271 StoreValue(rl_dest, rl_result); 272 return; 273 } 274 case Instruction::LONG_TO_DOUBLE: 275 if (cu_->target64) { 276 rcSrc = kCoreReg; 277 op = kX86Cvtsqi2sdRR; 278 break; 279 } 280 GenLongToFP(rl_dest, rl_src, true /* is_double */); 281 return; 282 case Instruction::LONG_TO_FLOAT: 283 if (cu_->target64) { 284 rcSrc = kCoreReg; 285 op = kX86Cvtsqi2ssRR; 286 break; 287 } 288 GenLongToFP(rl_dest, rl_src, false /* is_double */); 289 return; 290 case Instruction::FLOAT_TO_LONG: 291 if (cu_->target64) { 292 rl_src = LoadValue(rl_src, kFPReg); 293 // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 294 ClobberSReg(rl_dest.s_reg_low); 295 rl_result = EvalLoc(rl_dest, kCoreReg, true); 296 RegStorage temp_reg = AllocTempSingle(); 297 298 // Set 0x7fffffffffffffff to rl_result 299 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); 300 NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); 301 NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); 302 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); 303 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 304 NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 305 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 306 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 307 NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 308 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 309 branch_normal->target = NewLIR0(kPseudoTargetLabel); 310 StoreValueWide(rl_dest, rl_result); 311 } else { 312 CheckEntrypointTypes<kQuickF2l, int64_t, float>(); // int64_t -> kCoreReg 313 GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg); 314 } 315 return; 316 case Instruction::DOUBLE_TO_LONG: 317 if (cu_->target64) { 318 rl_src = LoadValueWide(rl_src, kFPReg); 319 // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 320 ClobberSReg(rl_dest.s_reg_low); 321 rl_result = EvalLoc(rl_dest, kCoreReg, true); 322 RegStorage temp_reg = AllocTempDouble(); 323 324 // Set 0x7fffffffffffffff to rl_result 325 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); 326 NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); 327 NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); 328 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); 329 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 330 NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 331 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 332 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 333 NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 334 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 335 branch_normal->target = NewLIR0(kPseudoTargetLabel); 336 StoreValueWide(rl_dest, rl_result); 337 } else { 338 CheckEntrypointTypes<kQuickD2l, int64_t, double>(); // int64_t -> kCoreReg 339 GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg); 340 } 341 return; 342 default: 343 LOG(INFO) << "Unexpected opcode: " << opcode; 344 } 345 // At this point, target will be either float or double. 346 DCHECK(rl_dest.fp); 347 if (rl_src.wide) { 348 rl_src = LoadValueWide(rl_src, rcSrc); 349 } else { 350 rl_src = LoadValue(rl_src, rcSrc); 351 } 352 rl_result = EvalLoc(rl_dest, kFPReg, true); 353 NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 354 if (rl_dest.wide) { 355 StoreValueWide(rl_dest, rl_result); 356 } else { 357 StoreValue(rl_dest, rl_result); 358 } 359 } 360 361 void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) { 362 // Compute offsets to the source and destination VRs on stack. 363 int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low); 364 int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low); 365 int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low); 366 367 // Update the in-register state of sources. 368 rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1); 369 rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2); 370 371 // All memory accesses below reference dalvik regs. 372 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 373 374 // If the source is in physical register, then put it in its location on stack. 375 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 376 if (rl_src1.location == kLocPhysReg) { 377 RegisterInfo* reg_info = GetRegInfo(rl_src1.reg); 378 379 if (reg_info != nullptr && reg_info->IsTemp()) { 380 // Calling FlushSpecificReg because it will only write back VR if it is dirty. 381 FlushSpecificReg(reg_info); 382 // ResetDef to prevent NullifyRange from removing stores. 383 ResetDef(rl_src1.reg); 384 } else { 385 // It must have been register promoted if it is not a temp but is still in physical 386 // register. Since we need it to be in memory to convert, we place it there now. 387 StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32, 388 kNotVolatile); 389 } 390 } 391 392 if (rl_src2.location == kLocPhysReg) { 393 RegisterInfo* reg_info = GetRegInfo(rl_src2.reg); 394 if (reg_info != nullptr && reg_info->IsTemp()) { 395 FlushSpecificReg(reg_info); 396 ResetDef(rl_src2.reg); 397 } else { 398 StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32, 399 kNotVolatile); 400 } 401 } 402 403 int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M; 404 405 // Push the source virtual registers onto the x87 stack. 406 LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(), 407 src2_v_reg_offset + LOWORD_OFFSET); 408 AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2, 409 true /* is_load */, is_double /* is64bit */); 410 411 LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(), 412 src1_v_reg_offset + LOWORD_OFFSET); 413 AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2, 414 true /* is_load */, is_double /* is64bit */); 415 416 FlushReg(rs_rAX); 417 Clobber(rs_rAX); 418 LockTemp(rs_rAX); 419 420 LIR* retry = NewLIR0(kPseudoTargetLabel); 421 422 // Divide ST(0) by ST(1) and place result to ST(0). 423 NewLIR0(kX86Fprem); 424 425 // Move FPU status word to AX. 426 NewLIR0(kX86Fstsw16R); 427 428 // Check if reduction is complete. 429 OpRegImm(kOpAnd, rs_rAX, 0x400); 430 431 // If no then continue to compute remainder. 432 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 433 branch->target = retry; 434 435 FreeTemp(rs_rAX); 436 437 // Now store result in the destination VR's stack location. 438 int displacement = dest_v_reg_offset + LOWORD_OFFSET; 439 int opcode = is_double ? kX86Fst64M : kX86Fst32M; 440 LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement); 441 AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */); 442 443 // Pop ST(1) and ST(0). 444 NewLIR0(kX86Fucompp); 445 446 /* 447 * The result is in a physical register if it was in a temp or was register 448 * promoted. For that reason it is enough to check if it is in physical 449 * register. If it is, then we must do all of the bookkeeping necessary to 450 * invalidate temp (if needed) and load in promoted register (if needed). 451 * If the result's location is in memory, then we do not need to do anything 452 * more since the fstp has already placed the correct value in memory. 453 */ 454 RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest); 455 if (rl_result.location == kLocPhysReg) { 456 rl_result = EvalLoc(rl_dest, kFPReg, true); 457 if (is_double) { 458 LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile); 459 StoreFinalValueWide(rl_dest, rl_result); 460 } else { 461 Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg); 462 StoreFinalValue(rl_dest, rl_result); 463 } 464 } 465 } 466 467 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, 468 RegLocation rl_src1, RegLocation rl_src2) { 469 bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT); 470 bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT); 471 if (single) { 472 rl_src1 = LoadValue(rl_src1, kFPReg); 473 rl_src2 = LoadValue(rl_src2, kFPReg); 474 } else { 475 rl_src1 = LoadValueWide(rl_src1, kFPReg); 476 rl_src2 = LoadValueWide(rl_src2, kFPReg); 477 } 478 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 479 ClobberSReg(rl_dest.s_reg_low); 480 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 481 LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0); 482 if (single) { 483 NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 484 } else { 485 NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 486 } 487 LIR* branch = nullptr; 488 if (unordered_gt) { 489 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 490 } 491 // If the result reg can't be byte accessed, use a jump and move instead of a set. 492 if (!IsByteRegister(rl_result.reg)) { 493 LIR* branch2 = nullptr; 494 if (unordered_gt) { 495 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA); 496 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0); 497 } else { 498 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe); 499 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1); 500 } 501 branch2->target = NewLIR0(kPseudoTargetLabel); 502 } else { 503 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */); 504 } 505 NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0); 506 if (unordered_gt) { 507 branch->target = NewLIR0(kPseudoTargetLabel); 508 } 509 StoreValue(rl_dest, rl_result); 510 } 511 512 void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, 513 bool is_double) { 514 LIR* taken = &block_label_list_[bb->taken]; 515 LIR* not_taken = &block_label_list_[bb->fall_through]; 516 LIR* branch = nullptr; 517 RegLocation rl_src1; 518 RegLocation rl_src2; 519 if (is_double) { 520 rl_src1 = mir_graph_->GetSrcWide(mir, 0); 521 rl_src2 = mir_graph_->GetSrcWide(mir, 2); 522 rl_src1 = LoadValueWide(rl_src1, kFPReg); 523 rl_src2 = LoadValueWide(rl_src2, kFPReg); 524 NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 525 } else { 526 rl_src1 = mir_graph_->GetSrc(mir, 0); 527 rl_src2 = mir_graph_->GetSrc(mir, 1); 528 rl_src1 = LoadValue(rl_src1, kFPReg); 529 rl_src2 = LoadValue(rl_src2, kFPReg); 530 NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 531 } 532 ConditionCode ccode = mir->meta.ccode; 533 switch (ccode) { 534 case kCondEq: 535 if (!gt_bias) { 536 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 537 branch->target = not_taken; 538 } 539 break; 540 case kCondNe: 541 if (!gt_bias) { 542 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 543 branch->target = taken; 544 } 545 break; 546 case kCondLt: 547 if (gt_bias) { 548 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 549 branch->target = not_taken; 550 } 551 ccode = kCondUlt; 552 break; 553 case kCondLe: 554 if (gt_bias) { 555 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 556 branch->target = not_taken; 557 } 558 ccode = kCondLs; 559 break; 560 case kCondGt: 561 if (gt_bias) { 562 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 563 branch->target = taken; 564 } 565 ccode = kCondHi; 566 break; 567 case kCondGe: 568 if (gt_bias) { 569 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 570 branch->target = taken; 571 } 572 ccode = kCondUge; 573 break; 574 default: 575 LOG(FATAL) << "Unexpected ccode: " << ccode; 576 } 577 OpCondBranch(ccode, taken); 578 } 579 580 void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { 581 RegLocation rl_result; 582 rl_src = LoadValue(rl_src, kCoreReg); 583 rl_result = EvalLoc(rl_dest, kCoreReg, true); 584 OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000); 585 StoreValue(rl_dest, rl_result); 586 } 587 588 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { 589 RegLocation rl_result; 590 rl_src = LoadValueWide(rl_src, kCoreReg); 591 if (cu_->target64) { 592 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 593 OpRegCopy(rl_result.reg, rl_src.reg); 594 // Flip sign bit. 595 NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1); 596 NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1); 597 NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1); 598 } else { 599 rl_result = ForceTempWide(rl_src); 600 OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000); 601 } 602 StoreValueWide(rl_dest, rl_result); 603 } 604 605 bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) { 606 RegLocation rl_dest = InlineTargetWide(info); // double place for result 607 if (rl_dest.s_reg_low == INVALID_SREG) { 608 // Result is unused, the code is dead. Inlining successful, no code generated. 609 return true; 610 } 611 RegLocation rl_src = info->args[0]; 612 rl_src = LoadValueWide(rl_src, kFPReg); 613 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 614 NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 615 StoreValueWide(rl_dest, rl_result); 616 return true; 617 } 618 619 bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) { 620 // Get the argument 621 RegLocation rl_src = info->args[0]; 622 623 // Get the inlined intrinsic target virtual register 624 RegLocation rl_dest = InlineTarget(info); 625 626 // Get the virtual register number 627 DCHECK_NE(rl_src.s_reg_low, INVALID_SREG); 628 if (rl_dest.s_reg_low == INVALID_SREG) { 629 // Result is unused, the code is dead. Inlining successful, no code generated. 630 return true; 631 } 632 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low); 633 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); 634 635 // if argument is the same as inlined intrinsic target 636 if (v_src_reg == v_dst_reg) { 637 rl_src = UpdateLoc(rl_src); 638 639 // if argument is in the physical register 640 if (rl_src.location == kLocPhysReg) { 641 rl_src = LoadValue(rl_src, kCoreReg); 642 OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff); 643 StoreValue(rl_dest, rl_src); 644 return true; 645 } 646 // the argument is in memory 647 DCHECK((rl_src.location == kLocDalvikFrame) || 648 (rl_src.location == kLocCompilerTemp)); 649 650 // Operate directly into memory. 651 int displacement = SRegOffset(rl_dest.s_reg_low); 652 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 653 LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff); 654 AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */); 655 AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/); 656 return true; 657 } else { 658 rl_src = LoadValue(rl_src, kCoreReg); 659 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 660 OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff); 661 StoreValue(rl_dest, rl_result); 662 return true; 663 } 664 } 665 666 bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { 667 RegLocation rl_src = info->args[0]; 668 RegLocation rl_dest = InlineTargetWide(info); 669 DCHECK_NE(rl_src.s_reg_low, INVALID_SREG); 670 if (rl_dest.s_reg_low == INVALID_SREG) { 671 // Result is unused, the code is dead. Inlining successful, no code generated. 672 return true; 673 } 674 if (cu_->target64) { 675 rl_src = LoadValueWide(rl_src, kCoreReg); 676 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 677 OpRegCopyWide(rl_result.reg, rl_src.reg); 678 OpRegImm(kOpLsl, rl_result.reg, 1); 679 OpRegImm(kOpLsr, rl_result.reg, 1); 680 StoreValueWide(rl_dest, rl_result); 681 return true; 682 } 683 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low); 684 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); 685 rl_src = UpdateLocWide(rl_src); 686 687 // if argument is in the physical XMM register 688 if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) { 689 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 690 if (rl_result.reg != rl_src.reg) { 691 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); 692 NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 693 } else { 694 RegStorage sign_mask = AllocTempDouble(); 695 LoadConstantWide(sign_mask, 0x7fffffffffffffff); 696 NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg()); 697 FreeTemp(sign_mask); 698 } 699 StoreValueWide(rl_dest, rl_result); 700 return true; 701 } else if (v_src_reg == v_dst_reg) { 702 // if argument is the same as inlined intrinsic target 703 // if argument is in the physical register 704 if (rl_src.location == kLocPhysReg) { 705 rl_src = LoadValueWide(rl_src, kCoreReg); 706 OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff); 707 StoreValueWide(rl_dest, rl_src); 708 return true; 709 } 710 // the argument is in memory 711 DCHECK((rl_src.location == kLocDalvikFrame) || 712 (rl_src.location == kLocCompilerTemp)); 713 714 // Operate directly into memory. 715 int displacement = SRegOffset(rl_dest.s_reg_low); 716 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 717 LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement + HIWORD_OFFSET, 0x7fffffff); 718 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/); 719 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */); 720 return true; 721 } else { 722 rl_src = LoadValueWide(rl_src, kCoreReg); 723 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 724 OpRegCopyWide(rl_result.reg, rl_src.reg); 725 OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); 726 StoreValueWide(rl_dest, rl_result); 727 return true; 728 } 729 } 730 731 bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) { 732 if (is_double) { 733 RegLocation rl_dest = InlineTargetWide(info); 734 if (rl_dest.s_reg_low == INVALID_SREG) { 735 // Result is unused, the code is dead. Inlining successful, no code generated. 736 return true; 737 } 738 RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg); 739 RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg); 740 RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true); 741 742 // Avoid src2 corruption by OpRegCopyWide. 743 if (rl_result.reg == rl_src2.reg) { 744 std::swap(rl_src2.reg, rl_src1.reg); 745 } 746 747 OpRegCopyWide(rl_result.reg, rl_src1.reg); 748 NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 749 // If either arg is NaN, return NaN. 750 LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP); 751 // Min/Max branches. 752 LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB); 753 LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA); 754 // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0. 755 NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 756 LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0); 757 // Handle NaN. 758 branch_nan->target = NewLIR0(kPseudoTargetLabel); 759 LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000)); 760 761 LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0); 762 // Handle Min/Max. Copy greater/lesser value from src2. 763 branch_cond1->target = NewLIR0(kPseudoTargetLabel); 764 OpRegCopyWide(rl_result.reg, rl_src2.reg); 765 // Right operand is already in result reg. 766 branch_cond2->target = NewLIR0(kPseudoTargetLabel); 767 // Exit. 768 branch_exit_nan->target = NewLIR0(kPseudoTargetLabel); 769 branch_exit_equal->target = NewLIR0(kPseudoTargetLabel); 770 StoreValueWide(rl_dest, rl_result); 771 } else { 772 RegLocation rl_dest = InlineTarget(info); 773 if (rl_dest.s_reg_low == INVALID_SREG) { 774 // Result is unused, the code is dead. Inlining successful, no code generated. 775 return true; 776 } 777 RegLocation rl_src1 = LoadValue(info->args[0], kFPReg); 778 RegLocation rl_src2 = LoadValue(info->args[1], kFPReg); 779 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 780 781 // Avoid src2 corruption by OpRegCopyWide. 782 if (rl_result.reg == rl_src2.reg) { 783 std::swap(rl_src2.reg, rl_src1.reg); 784 } 785 786 OpRegCopy(rl_result.reg, rl_src1.reg); 787 NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 788 // If either arg is NaN, return NaN. 789 LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP); 790 // Min/Max branches. 791 LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB); 792 LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA); 793 // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0. 794 NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 795 LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0); 796 // Handle NaN. 797 branch_nan->target = NewLIR0(kPseudoTargetLabel); 798 LoadConstantNoClobber(rl_result.reg, 0x7fc00000); 799 LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0); 800 // Handle Min/Max. Copy greater/lesser value from src2. 801 branch_cond1->target = NewLIR0(kPseudoTargetLabel); 802 OpRegCopy(rl_result.reg, rl_src2.reg); 803 // Right operand is already in result reg. 804 branch_cond2->target = NewLIR0(kPseudoTargetLabel); 805 // Exit. 806 branch_exit_nan->target = NewLIR0(kPseudoTargetLabel); 807 branch_exit_equal->target = NewLIR0(kPseudoTargetLabel); 808 StoreValue(rl_dest, rl_result); 809 } 810 return true; 811 } 812 813 } // namespace art 814