1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* This file contains codegen for the X86 ISA */ 18 19 #include "codegen_x86.h" 20 21 #include "art_method.h" 22 #include "base/bit_utils.h" 23 #include "base/logging.h" 24 #include "dex/quick/mir_to_lir-inl.h" 25 #include "dex/reg_storage_eq.h" 26 #include "mirror/array-inl.h" 27 #include "x86_lir.h" 28 29 namespace art { 30 31 /* 32 * Compare two 64-bit values 33 * x = y return 0 34 * x < y return -1 35 * x > y return 1 36 */ 37 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 38 RegLocation rl_src2) { 39 if (cu_->target64) { 40 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 41 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 42 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 43 RegStorage temp_reg = AllocTemp(); 44 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 45 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0 46 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1 47 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg()); 48 NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 49 50 StoreValue(rl_dest, rl_result); 51 FreeTemp(temp_reg); 52 return; 53 } 54 55 // Prepare for explicit register usage 56 ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); 57 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); 58 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); 59 LoadValueDirectWideFixed(rl_src1, r_tmp1); 60 LoadValueDirectWideFixed(rl_src2, r_tmp2); 61 // Compute (r1:r0) = (r1:r0) - (r3:r2) 62 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 63 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 64 NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 65 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg()); 66 OpReg(kOpNeg, rs_r2); // r2 = -r2 67 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF 68 NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 69 NewLIR2(kX86Movzx8RR, r0, r0); 70 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2 71 RegLocation rl_result = LocCReturn(); 72 StoreValue(rl_dest, rl_result); 73 } 74 75 X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 76 switch (cond) { 77 case kCondEq: return kX86CondEq; 78 case kCondNe: return kX86CondNe; 79 case kCondCs: return kX86CondC; 80 case kCondCc: return kX86CondNc; 81 case kCondUlt: return kX86CondC; 82 case kCondUge: return kX86CondNc; 83 case kCondMi: return kX86CondS; 84 case kCondPl: return kX86CondNs; 85 case kCondVs: return kX86CondO; 86 case kCondVc: return kX86CondNo; 87 case kCondHi: return kX86CondA; 88 case kCondLs: return kX86CondBe; 89 case kCondGe: return kX86CondGe; 90 case kCondLt: return kX86CondL; 91 case kCondGt: return kX86CondG; 92 case kCondLe: return kX86CondLe; 93 case kCondAl: 94 case kCondNv: LOG(FATAL) << "Should not reach here"; 95 } 96 return kX86CondO; 97 } 98 99 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { 100 NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg()); 101 X86ConditionCode cc = X86ConditionEncoding(cond); 102 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 103 cc); 104 branch->target = target; 105 return branch; 106 } 107 108 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, 109 int check_value, LIR* target) { 110 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 111 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 112 NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg()); 113 } else { 114 if (reg.Is64Bit()) { 115 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value); 116 } else { 117 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value); 118 } 119 } 120 X86ConditionCode cc = X86ConditionEncoding(cond); 121 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 122 branch->target = target; 123 return branch; 124 } 125 126 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { 127 // If src or dest is a pair, we'll be using low reg. 128 if (r_dest.IsPair()) { 129 r_dest = r_dest.GetLow(); 130 } 131 if (r_src.IsPair()) { 132 r_src = r_src.GetLow(); 133 } 134 if (r_dest.IsFloat() || r_src.IsFloat()) 135 return OpFpRegCopy(r_dest, r_src); 136 LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, 137 r_dest.GetReg(), r_src.GetReg()); 138 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 139 res->flags.is_nop = true; 140 } 141 return res; 142 } 143 144 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { 145 if (r_dest != r_src) { 146 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 147 AppendLIR(res); 148 } 149 } 150 151 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { 152 if (r_dest != r_src) { 153 bool dest_fp = r_dest.IsFloat(); 154 bool src_fp = r_src.IsFloat(); 155 if (dest_fp) { 156 if (src_fp) { 157 OpRegCopy(r_dest, r_src); 158 } else { 159 // TODO: Prevent this from happening in the code. The result is often 160 // unused or could have been loaded more easily from memory. 161 if (!r_src.IsPair()) { 162 DCHECK(!r_dest.IsPair()); 163 NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); 164 } else { 165 NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); 166 RegStorage r_tmp = AllocTempDouble(); 167 NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); 168 NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); 169 FreeTemp(r_tmp); 170 } 171 } 172 } else { 173 if (src_fp) { 174 if (!r_dest.IsPair()) { 175 DCHECK(!r_src.IsPair()); 176 NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); 177 } else { 178 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); 179 RegStorage temp_reg = AllocTempDouble(); 180 NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); 181 NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); 182 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); 183 } 184 } else { 185 DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); 186 if (!r_src.IsPair()) { 187 // Just copy the register directly. 188 OpRegCopy(r_dest, r_src); 189 } else { 190 // Handle overlap 191 if (r_src.GetHighReg() == r_dest.GetLowReg() && 192 r_src.GetLowReg() == r_dest.GetHighReg()) { 193 // Deal with cycles. 194 RegStorage temp_reg = AllocTemp(); 195 OpRegCopy(temp_reg, r_dest.GetHigh()); 196 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); 197 OpRegCopy(r_dest.GetLow(), temp_reg); 198 FreeTemp(temp_reg); 199 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { 200 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 201 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 202 } else { 203 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 204 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 205 } 206 } 207 } 208 } 209 } 210 } 211 212 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, 213 int32_t true_val, int32_t false_val, RegStorage rs_dest, 214 RegisterClass dest_reg_class) { 215 DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair()); 216 DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat()); 217 218 // We really need this check for correctness, otherwise we will need to do more checks in 219 // non zero/one case 220 if (true_val == false_val) { 221 LoadConstantNoClobber(rs_dest, true_val); 222 return; 223 } 224 225 const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op); 226 227 const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0); 228 if (zero_one_case && IsByteRegister(rs_dest)) { 229 if (!dest_intersect) { 230 LoadConstantNoClobber(rs_dest, 0); 231 } 232 OpRegReg(kOpCmp, left_op, right_op); 233 // Set the low byte of the result to 0 or 1 from the compare condition code. 234 NewLIR2(kX86Set8R, rs_dest.GetReg(), 235 X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code))); 236 if (dest_intersect) { 237 NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg()); 238 } 239 } else { 240 // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops 241 // and it cannot use xor because it makes cc flags to be dirty 242 RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false); 243 if (temp_reg.Valid()) { 244 if (false_val == 0 && dest_intersect) { 245 code = FlipComparisonOrder(code); 246 std::swap(true_val, false_val); 247 } 248 if (!dest_intersect) { 249 LoadConstantNoClobber(rs_dest, false_val); 250 } 251 LoadConstantNoClobber(temp_reg, true_val); 252 OpRegReg(kOpCmp, left_op, right_op); 253 if (dest_intersect) { 254 LoadConstantNoClobber(rs_dest, false_val); 255 DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); 256 } 257 OpCondRegReg(kOpCmov, code, rs_dest, temp_reg); 258 FreeTemp(temp_reg); 259 } else { 260 // slow path 261 LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr); 262 LoadConstantNoClobber(rs_dest, false_val); 263 LIR* that_is_it = NewLIR1(kX86Jmp8, 0); 264 LIR* true_case = NewLIR0(kPseudoTargetLabel); 265 cmp_branch->target = true_case; 266 LoadConstantNoClobber(rs_dest, true_val); 267 LIR* end = NewLIR0(kPseudoTargetLabel); 268 that_is_it->target = end; 269 } 270 } 271 } 272 273 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 274 UNUSED(bb); 275 RegLocation rl_result; 276 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 277 RegLocation rl_dest = mir_graph_->GetDest(mir); 278 // Avoid using float regs here. 279 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; 280 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; 281 ConditionCode ccode = mir->meta.ccode; 282 283 // The kMirOpSelect has two variants, one for constants and one for moves. 284 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 285 286 if (is_constant_case) { 287 int true_val = mir->dalvikInsn.vB; 288 int false_val = mir->dalvikInsn.vC; 289 290 // simplest strange case 291 if (true_val == false_val) { 292 rl_result = EvalLoc(rl_dest, result_reg_class, true); 293 LoadConstantNoClobber(rl_result.reg, true_val); 294 } else { 295 // TODO: use GenSelectConst32 and handle additional opcode patterns such as 296 // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal". 297 rl_src = LoadValue(rl_src, src_reg_class); 298 rl_result = EvalLoc(rl_dest, result_reg_class, true); 299 /* 300 * For ccode == kCondEq: 301 * 302 * 1) When the true case is zero and result_reg is not same as src_reg: 303 * xor result_reg, result_reg 304 * cmp $0, src_reg 305 * mov t1, $false_case 306 * cmovnz result_reg, t1 307 * 2) When the false case is zero and result_reg is not same as src_reg: 308 * xor result_reg, result_reg 309 * cmp $0, src_reg 310 * mov t1, $true_case 311 * cmovz result_reg, t1 312 * 3) All other cases (we do compare first to set eflags): 313 * cmp $0, src_reg 314 * mov result_reg, $false_case 315 * mov t1, $true_case 316 * cmovz result_reg, t1 317 */ 318 // FIXME: depending on how you use registers you could get a false != mismatch when dealing 319 // with different views of the same underlying physical resource (i.e. solo32 vs. solo64). 320 const bool result_reg_same_as_src = 321 (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum()); 322 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 323 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 324 const bool catch_all_case = !(true_zero_case || false_zero_case); 325 326 if (true_zero_case || false_zero_case) { 327 OpRegReg(kOpXor, rl_result.reg, rl_result.reg); 328 } 329 330 if (true_zero_case || false_zero_case || catch_all_case) { 331 OpRegImm(kOpCmp, rl_src.reg, 0); 332 } 333 334 if (catch_all_case) { 335 OpRegImm(kOpMov, rl_result.reg, false_val); 336 } 337 338 if (true_zero_case || false_zero_case || catch_all_case) { 339 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; 340 int immediateForTemp = true_zero_case ? false_val : true_val; 341 RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class); 342 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 343 344 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); 345 346 FreeTemp(temp1_reg); 347 } 348 } 349 } else { 350 rl_src = LoadValue(rl_src, src_reg_class); 351 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 352 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 353 rl_true = LoadValue(rl_true, result_reg_class); 354 rl_false = LoadValue(rl_false, result_reg_class); 355 rl_result = EvalLoc(rl_dest, result_reg_class, true); 356 357 /* 358 * For ccode == kCondEq: 359 * 360 * 1) When true case is already in place: 361 * cmp $0, src_reg 362 * cmovnz result_reg, false_reg 363 * 2) When false case is already in place: 364 * cmp $0, src_reg 365 * cmovz result_reg, true_reg 366 * 3) When neither cases are in place: 367 * cmp $0, src_reg 368 * mov result_reg, false_reg 369 * cmovz result_reg, true_reg 370 */ 371 372 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 373 OpRegImm(kOpCmp, rl_src.reg, 0); 374 375 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { 376 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg); 377 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { 378 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 379 } else { 380 OpRegCopy(rl_result.reg, rl_false.reg); 381 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 382 } 383 } 384 385 StoreValue(rl_dest, rl_result); 386 } 387 388 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 389 LIR* taken = &block_label_list_[bb->taken]; 390 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 391 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 392 ConditionCode ccode = mir->meta.ccode; 393 394 if (rl_src1.is_const) { 395 std::swap(rl_src1, rl_src2); 396 ccode = FlipComparisonOrder(ccode); 397 } 398 if (rl_src2.is_const) { 399 // Do special compare/branch against simple const operand 400 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 401 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 402 return; 403 } 404 405 if (cu_->target64) { 406 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 407 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 408 409 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 410 OpCondBranch(ccode, taken); 411 return; 412 } 413 414 // Prepare for explicit register usage 415 ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); 416 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); 417 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); 418 LoadValueDirectWideFixed(rl_src1, r_tmp1); 419 LoadValueDirectWideFixed(rl_src2, r_tmp2); 420 421 // Swap operands and condition code to prevent use of zero flag. 422 if (ccode == kCondLe || ccode == kCondGt) { 423 // Compute (r3:r2) = (r3:r2) - (r1:r0) 424 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0 425 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF 426 } else { 427 // Compute (r1:r0) = (r1:r0) - (r3:r2) 428 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 429 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 430 } 431 switch (ccode) { 432 case kCondEq: 433 case kCondNe: 434 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1 435 break; 436 case kCondLe: 437 ccode = kCondGe; 438 break; 439 case kCondGt: 440 ccode = kCondLt; 441 break; 442 case kCondLt: 443 case kCondGe: 444 break; 445 default: 446 LOG(FATAL) << "Unexpected ccode: " << ccode; 447 } 448 OpCondBranch(ccode, taken); 449 } 450 451 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 452 int64_t val, ConditionCode ccode) { 453 int32_t val_lo = Low32Bits(val); 454 int32_t val_hi = High32Bits(val); 455 LIR* taken = &block_label_list_[bb->taken]; 456 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 457 bool is_equality_test = ccode == kCondEq || ccode == kCondNe; 458 459 if (cu_->target64) { 460 if (is_equality_test && val == 0) { 461 // We can simplify of comparing for ==, != to 0. 462 NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg()); 463 } else if (is_equality_test && val_hi == 0 && val_lo > 0) { 464 OpRegImm(kOpCmp, rl_src1.reg, val_lo); 465 } else { 466 RegStorage tmp = AllocTypedTempWide(false, kCoreReg); 467 LoadConstantWide(tmp, val); 468 OpRegReg(kOpCmp, rl_src1.reg, tmp); 469 FreeTemp(tmp); 470 } 471 OpCondBranch(ccode, taken); 472 return; 473 } 474 475 if (is_equality_test && val != 0) { 476 rl_src1 = ForceTempWide(rl_src1); 477 } 478 RegStorage low_reg = rl_src1.reg.GetLow(); 479 RegStorage high_reg = rl_src1.reg.GetHigh(); 480 481 if (is_equality_test) { 482 // We can simplify of comparing for ==, != to 0. 483 if (val == 0) { 484 if (IsTemp(low_reg)) { 485 OpRegReg(kOpOr, low_reg, high_reg); 486 // We have now changed it; ignore the old values. 487 Clobber(rl_src1.reg); 488 } else { 489 RegStorage t_reg = AllocTemp(); 490 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 491 FreeTemp(t_reg); 492 } 493 OpCondBranch(ccode, taken); 494 return; 495 } 496 497 // Need to compute the actual value for ==, !=. 498 OpRegImm(kOpSub, low_reg, val_lo); 499 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); 500 OpRegReg(kOpOr, high_reg, low_reg); 501 Clobber(rl_src1.reg); 502 } else if (ccode == kCondLe || ccode == kCondGt) { 503 // Swap operands and condition code to prevent use of zero flag. 504 RegStorage tmp = AllocTypedTempWide(false, kCoreReg); 505 LoadConstantWide(tmp, val); 506 OpRegReg(kOpSub, tmp.GetLow(), low_reg); 507 OpRegReg(kOpSbc, tmp.GetHigh(), high_reg); 508 ccode = (ccode == kCondLe) ? kCondGe : kCondLt; 509 FreeTemp(tmp); 510 } else { 511 // We can use a compare for the low word to set CF. 512 OpRegImm(kOpCmp, low_reg, val_lo); 513 if (IsTemp(high_reg)) { 514 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); 515 // We have now changed it; ignore the old values. 516 Clobber(rl_src1.reg); 517 } else { 518 // mov temp_reg, high_reg; sbb temp_reg, high_constant 519 RegStorage t_reg = AllocTemp(); 520 OpRegCopy(t_reg, high_reg); 521 NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi); 522 FreeTemp(t_reg); 523 } 524 } 525 526 OpCondBranch(ccode, taken); 527 } 528 529 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) { 530 // It does not make sense to calculate magic and shift for zero divisor. 531 DCHECK_NE(divisor, 0); 532 533 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 534 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 535 * The magic number M and shift S can be calculated in the following way: 536 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 537 * where divisor(d) >=2. 538 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 539 * where divisor(d) <= -2. 540 * Thus nc can be calculated like: 541 * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long 542 * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long 543 * 544 * So the shift p is the smallest p satisfying 545 * 2^p > nc * (d - 2^p % d), where d >= 2 546 * 2^p > nc * (d + 2^p % d), where d <= -2. 547 * 548 * the magic number M is calcuated by 549 * M = (2^p + d - 2^p % d) / d, where d >= 2 550 * M = (2^p - d - 2^p % d) / d, where d <= -2. 551 * 552 * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as 553 * the shift number S. 554 */ 555 556 int64_t p = (is_long) ? 63 : 31; 557 const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U; 558 559 // Initialize the computations. 560 uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; 561 uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 : 562 static_cast<uint32_t>(divisor) >> 31); 563 uint64_t abs_nc = tmp - 1 - tmp % abs_d; 564 uint64_t quotient1 = exp / abs_nc; 565 uint64_t remainder1 = exp % abs_nc; 566 uint64_t quotient2 = exp / abs_d; 567 uint64_t remainder2 = exp % abs_d; 568 569 /* 570 * To avoid handling both positive and negative divisor, Hacker's Delight 571 * introduces a method to handle these 2 cases together to avoid duplication. 572 */ 573 uint64_t delta; 574 do { 575 p++; 576 quotient1 = 2 * quotient1; 577 remainder1 = 2 * remainder1; 578 if (remainder1 >= abs_nc) { 579 quotient1++; 580 remainder1 = remainder1 - abs_nc; 581 } 582 quotient2 = 2 * quotient2; 583 remainder2 = 2 * remainder2; 584 if (remainder2 >= abs_d) { 585 quotient2++; 586 remainder2 = remainder2 - abs_d; 587 } 588 delta = abs_d - remainder2; 589 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 590 591 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 592 593 if (!is_long) { 594 magic = static_cast<int>(magic); 595 } 596 597 shift = (is_long) ? p - 64 : p - 32; 598 } 599 600 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) { 601 UNUSED(rl_dest, reg_lo, lit, is_div); 602 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 603 UNREACHABLE(); 604 } 605 606 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 607 int imm, bool is_div) { 608 // Use a multiply (and fixup) to perform an int div/rem by a constant. 609 RegLocation rl_result; 610 611 if (imm == 1) { 612 rl_result = EvalLoc(rl_dest, kCoreReg, true); 613 if (is_div) { 614 // x / 1 == x. 615 LoadValueDirectFixed(rl_src, rl_result.reg); 616 } else { 617 // x % 1 == 0. 618 LoadConstantNoClobber(rl_result.reg, 0); 619 } 620 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 621 rl_result = EvalLoc(rl_dest, kCoreReg, true); 622 if (is_div) { 623 LoadValueDirectFixed(rl_src, rl_result.reg); 624 625 // Check if numerator is 0 626 OpRegImm(kOpCmp, rl_result.reg, 0); 627 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 628 629 // handle 0x80000000 / -1 630 OpRegImm(kOpCmp, rl_result.reg, 0x80000000); 631 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 632 633 // for x != MIN_INT, x / -1 == -x. 634 NewLIR1(kX86Neg32R, rl_result.reg.GetReg()); 635 636 // EAX already contains the right value (0x80000000), 637 minint_branch->target = NewLIR0(kPseudoTargetLabel); 638 branch->target = NewLIR0(kPseudoTargetLabel); 639 } else { 640 // x % -1 == 0. 641 LoadConstantNoClobber(rl_result.reg, 0); 642 } 643 } else if (is_div && IsPowerOfTwo(std::abs(imm))) { 644 // Division using shifting. 645 rl_src = LoadValue(rl_src, kCoreReg); 646 rl_result = EvalLoc(rl_dest, kCoreReg, true); 647 if (IsSameReg(rl_result.reg, rl_src.reg)) { 648 RegStorage rs_temp = AllocTypedTemp(false, kCoreReg); 649 rl_result.reg.SetReg(rs_temp.GetReg()); 650 } 651 652 // Check if numerator is 0 653 OpRegImm(kOpCmp, rl_src.reg, 0); 654 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 655 LoadConstantNoClobber(rl_result.reg, 0); 656 LIR* done = NewLIR1(kX86Jmp8, 0); 657 branch->target = NewLIR0(kPseudoTargetLabel); 658 659 NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1); 660 NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); 661 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); 662 int shift_amount = CTZ(imm); 663 OpRegImm(kOpAsr, rl_result.reg, shift_amount); 664 if (imm < 0) { 665 OpReg(kOpNeg, rl_result.reg); 666 } 667 done->target = NewLIR0(kPseudoTargetLabel); 668 } else { 669 CHECK(imm <= -2 || imm >= 2); 670 671 // Use H.S.Warren's Hacker's Delight Chapter 10 and 672 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 673 int64_t magic; 674 int shift; 675 CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */); 676 677 /* 678 * For imm >= 2, 679 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 680 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 681 * For imm <= -2, 682 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 683 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 684 * We implement this algorithm in the following way: 685 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 686 * 2. if imm > 0 and magic < 0, add numerator to EDX 687 * if imm < 0 and magic > 0, sub numerator from EDX 688 * 3. if S !=0, SAR S bits for EDX 689 * 4. add 1 to EDX if EDX < 0 690 * 5. Thus, EDX is the quotient 691 */ 692 693 FlushReg(rs_r0); 694 Clobber(rs_r0); 695 LockTemp(rs_r0); 696 FlushReg(rs_r2); 697 Clobber(rs_r2); 698 LockTemp(rs_r2); 699 700 // Assume that the result will be in EDX for divide, and EAX for remainder. 701 rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0, 702 INVALID_SREG, INVALID_SREG}; 703 704 // We need the value at least twice. Load into a temp. 705 rl_src = LoadValue(rl_src, kCoreReg); 706 RegStorage numerator_reg = rl_src.reg; 707 708 // Check if numerator is 0. 709 OpRegImm(kOpCmp, numerator_reg, 0); 710 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 711 // Return result 0 if numerator was 0. 712 LoadConstantNoClobber(rl_result.reg, 0); 713 LIR* done = NewLIR1(kX86Jmp8, 0); 714 branch->target = NewLIR0(kPseudoTargetLabel); 715 716 // EAX = magic. 717 LoadConstant(rs_r0, magic); 718 719 // EDX:EAX = magic * numerator. 720 NewLIR1(kX86Imul32DaR, numerator_reg.GetReg()); 721 722 if (imm > 0 && magic < 0) { 723 // Add numerator to EDX. 724 DCHECK(numerator_reg.Valid()); 725 NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg()); 726 } else if (imm < 0 && magic > 0) { 727 DCHECK(numerator_reg.Valid()); 728 NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg()); 729 } 730 731 // Do we need the shift? 732 if (shift != 0) { 733 // Shift EDX by 'shift' bits. 734 NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift); 735 } 736 737 // Add 1 to EDX if EDX < 0. 738 739 // Move EDX to EAX. 740 OpRegCopy(rs_r0, rs_r2); 741 742 // Move sign bit to bit 0, zeroing the rest. 743 NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31); 744 745 // EDX = EDX + EAX. 746 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg()); 747 748 // Quotient is in EDX. 749 if (!is_div) { 750 // We need to compute the remainder. 751 // Remainder is divisor - (quotient * imm). 752 DCHECK(numerator_reg.Valid()); 753 OpRegCopy(rs_r0, numerator_reg); 754 755 // EAX = numerator * imm. 756 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm); 757 758 // EAX -= EDX. 759 NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg()); 760 761 // For this case, return the result in EAX. 762 } 763 done->target = NewLIR0(kPseudoTargetLabel); 764 } 765 766 return rl_result; 767 } 768 769 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, 770 bool is_div) { 771 UNUSED(rl_dest, reg_lo, reg_hi, is_div); 772 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 773 UNREACHABLE(); 774 } 775 776 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 777 RegLocation rl_src2, bool is_div, int flags) { 778 UNUSED(rl_dest); 779 // We have to use fixed registers, so flush all the temps. 780 781 // Prepare for explicit register usage. 782 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); 783 784 // Load LHS into EAX. 785 LoadValueDirectFixed(rl_src1, rs_r0); 786 787 // Load RHS into EBX. 788 LoadValueDirectFixed(rl_src2, rs_r1); 789 790 // Copy LHS sign bit into EDX. 791 NewLIR0(kx86Cdq32Da); 792 793 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { 794 // Handle division by zero case. 795 GenDivZeroCheck(rs_r1); 796 } 797 798 // Check if numerator is 0 799 OpRegImm(kOpCmp, rs_r0, 0); 800 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 801 802 // Have to catch 0x80000000/-1 case, or we will get an exception! 803 OpRegImm(kOpCmp, rs_r1, -1); 804 LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 805 806 // RHS is -1. 807 OpRegImm(kOpCmp, rs_r0, 0x80000000); 808 LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 809 810 branch->target = NewLIR0(kPseudoTargetLabel); 811 812 // In 0x80000000/-1 case. 813 if (!is_div) { 814 // For DIV, EAX is already right. For REM, we need EDX 0. 815 LoadConstantNoClobber(rs_r2, 0); 816 } 817 LIR* done = NewLIR1(kX86Jmp8, 0); 818 819 // Expected case. 820 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 821 minint_branch->target = minus_one_branch->target; 822 NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg()); 823 done->target = NewLIR0(kPseudoTargetLabel); 824 825 // Result is in EAX for div and EDX for rem. 826 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG}; 827 if (!is_div) { 828 rl_result.reg.SetReg(r2); 829 } 830 return rl_result; 831 } 832 833 static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { 834 return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); 835 } 836 837 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { 838 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 839 840 if (is_long && !cu_->target64) { 841 /* 842 * We want to implement the following algorithm 843 * mov eax, low part of arg1 844 * mov edx, high part of arg1 845 * mov ebx, low part of arg2 846 * mov ecx, high part of arg2 847 * mov edi, eax 848 * sub edi, ebx 849 * mov edi, edx 850 * sbb edi, ecx 851 * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx" 852 * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx" 853 * 854 * The algorithm above needs 5 registers: a pair for the first operand 855 * (which later will be used as result), a pair for the second operand 856 * and a temp register (e.g. 'edi') for intermediate calculations. 857 * Ideally we have 6 GP caller-save registers in 32-bit mode. They are: 858 * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be 859 * always enough registers to operate on. Practically, there is a pair 860 * of registers 'edi' and 'esi' which holds promoted values and 861 * sometimes should be treated as 'callee save'. If one of the operands 862 * is in the promoted registers then we have enough register to 863 * operate on. Otherwise there is lack of resources and we have to 864 * save 'edi' before calculations and restore after. 865 */ 866 867 RegLocation rl_src1 = info->args[0]; 868 RegLocation rl_src2 = info->args[2]; 869 RegLocation rl_dest = InlineTargetWide(info); 870 871 if (rl_dest.s_reg_low == INVALID_SREG) { 872 // Result is unused, the code is dead. Inlining successful, no code generated. 873 return true; 874 } 875 876 if (PartiallyIntersects(rl_src1, rl_dest) && 877 PartiallyIntersects(rl_src2, rl_dest)) { 878 // A special case which we don't want to handle. 879 // This is when src1 is mapped on v0 and v1, 880 // src2 is mapped on v2, v3, 881 // result is mapped on v1, v2 882 return false; 883 } 884 885 886 /* 887 * If the result register is the same as the second element, then we 888 * need to be careful. The reason is that the first copy will 889 * inadvertently clobber the second element with the first one thus 890 * yielding the wrong result. Thus we do a swap in that case. 891 */ 892 if (Intersects(rl_src2, rl_dest)) { 893 std::swap(rl_src1, rl_src2); 894 } 895 896 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 897 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 898 899 // Pick the first integer as min/max. 900 OpRegCopyWide(rl_result.reg, rl_src1.reg); 901 902 /* 903 * If the integers are both in the same register, then there is 904 * nothing else to do because they are equal and we have already 905 * moved one into the result. 906 */ 907 if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 908 mir_graph_->SRegToVReg(rl_src2.s_reg_low)) { 909 StoreValueWide(rl_dest, rl_result); 910 return true; 911 } 912 913 // Free registers to make some room for the second operand. 914 // But don't try to free part of a source which intersects 915 // part of result or promoted registers. 916 917 if (IsTemp(rl_src1.reg.GetLow()) && 918 (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) && 919 (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) { 920 // Is low part temporary and doesn't intersect any parts of result? 921 FreeTemp(rl_src1.reg.GetLow()); 922 } 923 924 if (IsTemp(rl_src1.reg.GetHigh()) && 925 (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) && 926 (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) { 927 // Is high part temporary and doesn't intersect any parts of result? 928 FreeTemp(rl_src1.reg.GetHigh()); 929 } 930 931 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 932 933 // Do we have a free register for intermediate calculations? 934 RegStorage tmp = AllocTemp(false); 935 const int kRegSize = cu_->target64 ? 8 : 4; 936 if (tmp == RegStorage::InvalidReg()) { 937 /* 938 * No, will use 'edi'. 939 * 940 * As mentioned above we have 4 temporary and 2 promotable 941 * caller-save registers. Therefore, we assume that a free 942 * register can be allocated only if 'esi' and 'edi' are 943 * already used as operands. If number of promotable registers 944 * increases from 2 to 4 then our assumption fails and operand 945 * data is corrupted. 946 * Let's DCHECK it. 947 */ 948 DCHECK(IsTemp(rl_src2.reg.GetLow()) && 949 IsTemp(rl_src2.reg.GetHigh()) && 950 IsTemp(rl_result.reg.GetLow()) && 951 IsTemp(rl_result.reg.GetHigh())); 952 tmp = rs_rDI; 953 NewLIR1(kX86Push32R, tmp.GetReg()); 954 cfi_.AdjustCFAOffset(kRegSize); 955 // Record cfi only if it is not already spilled. 956 if (!CoreSpillMaskContains(tmp.GetReg())) { 957 cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0); 958 } 959 } 960 961 // Now we are ready to do calculations. 962 OpRegReg(kOpMov, tmp, rl_result.reg.GetLow()); 963 OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow()); 964 OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh()); 965 OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh()); 966 967 // Let's put pop 'edi' here to break a bit the dependency chain. 968 if (tmp == rs_rDI) { 969 NewLIR1(kX86Pop32R, tmp.GetReg()); 970 cfi_.AdjustCFAOffset(-kRegSize); 971 if (!CoreSpillMaskContains(tmp.GetReg())) { 972 cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg())); 973 } 974 } else { 975 FreeTemp(tmp); 976 } 977 978 // Conditionally move the other integer into the destination register. 979 ConditionCode cc = is_min ? kCondGe : kCondLt; 980 OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow()); 981 OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh()); 982 FreeTemp(rl_src2.reg); 983 StoreValueWide(rl_dest, rl_result); 984 return true; 985 } 986 987 // Get the two arguments to the invoke and place them in GP registers. 988 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info); 989 if (rl_dest.s_reg_low == INVALID_SREG) { 990 // Result is unused, the code is dead. Inlining successful, no code generated. 991 return true; 992 } 993 RegLocation rl_src1 = info->args[0]; 994 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1]; 995 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg); 996 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg); 997 998 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 999 1000 /* 1001 * If the result register is the same as the second element, then we need to be careful. 1002 * The reason is that the first copy will inadvertently clobber the second element with 1003 * the first one thus yielding the wrong result. Thus we do a swap in that case. 1004 */ 1005 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 1006 std::swap(rl_src1, rl_src2); 1007 } 1008 1009 // Pick the first integer as min/max. 1010 OpRegCopy(rl_result.reg, rl_src1.reg); 1011 1012 // If the integers are both in the same register, then there is nothing else to do 1013 // because they are equal and we have already moved one into the result. 1014 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) { 1015 // It is possible we didn't pick correctly so do the actual comparison now. 1016 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 1017 1018 // Conditionally move the other integer into the destination register. 1019 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 1020 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg); 1021 } 1022 1023 if (is_long) { 1024 StoreValueWide(rl_dest, rl_result); 1025 } else { 1026 StoreValue(rl_dest, rl_result); 1027 } 1028 return true; 1029 } 1030 1031 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 1032 RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info); 1033 if (rl_dest.s_reg_low == INVALID_SREG) { 1034 // Result is unused, the code is dead. Inlining successful, no code generated. 1035 return true; 1036 } 1037 RegLocation rl_src_address = info->args[0]; // long address 1038 RegLocation rl_address; 1039 if (!cu_->target64) { 1040 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] 1041 rl_address = LoadValue(rl_src_address, kCoreReg); 1042 } else { 1043 rl_address = LoadValueWide(rl_src_address, kCoreReg); 1044 } 1045 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1046 // Unaligned access is allowed on x86. 1047 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); 1048 if (size == k64) { 1049 StoreValueWide(rl_dest, rl_result); 1050 } else { 1051 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 1052 StoreValue(rl_dest, rl_result); 1053 } 1054 return true; 1055 } 1056 1057 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 1058 RegLocation rl_src_address = info->args[0]; // long address 1059 RegLocation rl_address; 1060 if (!cu_->target64) { 1061 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] 1062 rl_address = LoadValue(rl_src_address, kCoreReg); 1063 } else { 1064 rl_address = LoadValueWide(rl_src_address, kCoreReg); 1065 } 1066 RegLocation rl_src_value = info->args[2]; // [size] value 1067 RegLocation rl_value; 1068 if (size == k64) { 1069 // Unaligned access is allowed on x86. 1070 rl_value = LoadValueWide(rl_src_value, kCoreReg); 1071 } else { 1072 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 1073 // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR. 1074 if (!cu_->target64 && size == kSignedByte) { 1075 rl_src_value = UpdateLocTyped(rl_src_value); 1076 if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) { 1077 RegStorage temp = AllocateByteRegister(); 1078 OpRegCopy(temp, rl_src_value.reg); 1079 rl_value.reg = temp; 1080 } else { 1081 rl_value = LoadValue(rl_src_value, kCoreReg); 1082 } 1083 } else { 1084 rl_value = LoadValue(rl_src_value, kCoreReg); 1085 } 1086 } 1087 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); 1088 return true; 1089 } 1090 1091 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { 1092 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset); 1093 } 1094 1095 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { 1096 DCHECK_EQ(kX86, cu_->instruction_set); 1097 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 1098 } 1099 1100 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { 1101 DCHECK_EQ(kX86_64, cu_->instruction_set); 1102 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 1103 } 1104 1105 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) { 1106 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home); 1107 } 1108 1109 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 1110 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 1111 // Unused - RegLocation rl_src_unsafe = info->args[0]; 1112 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 1113 RegLocation rl_src_offset = info->args[2]; // long low 1114 if (!cu_->target64) { 1115 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] 1116 } 1117 RegLocation rl_src_expected = info->args[4]; // int, long or Object 1118 // If is_long, high half is in info->args[5] 1119 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 1120 // If is_long, high half is in info->args[7] 1121 const int kRegSize = cu_->target64 ? 8 : 4; 1122 1123 if (is_long && cu_->target64) { 1124 // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. 1125 FlushReg(rs_r0q); 1126 Clobber(rs_r0q); 1127 LockTemp(rs_r0q); 1128 1129 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); 1130 RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); 1131 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); 1132 LoadValueDirectWide(rl_src_expected, rs_r0q); 1133 NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, 1134 rl_new_value.reg.GetReg()); 1135 1136 // After a store we need to insert barrier in case of potential load. Since the 1137 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. 1138 GenMemBarrier(kAnyAny); 1139 1140 FreeTemp(rs_r0q); 1141 } else if (is_long) { 1142 // TODO: avoid unnecessary loads of SI and DI when the values are in registers. 1143 FlushAllRegs(); 1144 LockCallTemps(); 1145 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); 1146 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); 1147 LoadValueDirectWideFixed(rl_src_expected, r_tmp1); 1148 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); 1149 // FIXME: needs 64-bit update. 1150 const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); 1151 const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); 1152 DCHECK(!obj_in_si || !obj_in_di); 1153 const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); 1154 const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); 1155 DCHECK(!off_in_si || !off_in_di); 1156 // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. 1157 RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; 1158 RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; 1159 bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); 1160 bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); 1161 if (push_di) { 1162 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 1163 MarkTemp(rs_rDI); 1164 LockTemp(rs_rDI); 1165 cfi_.AdjustCFAOffset(kRegSize); 1166 // Record cfi only if it is not already spilled. 1167 if (!CoreSpillMaskContains(rs_rDI.GetReg())) { 1168 cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); 1169 } 1170 } 1171 if (push_si) { 1172 NewLIR1(kX86Push32R, rs_rSI.GetReg()); 1173 MarkTemp(rs_rSI); 1174 LockTemp(rs_rSI); 1175 cfi_.AdjustCFAOffset(kRegSize); 1176 // Record cfi only if it is not already spilled. 1177 if (!CoreSpillMaskContains(rs_rSI.GetReg())) { 1178 cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0); 1179 } 1180 } 1181 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1182 const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); 1183 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 1184 if (!obj_in_si && !obj_in_di) { 1185 LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); 1186 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. 1187 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); 1188 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; 1189 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); 1190 } 1191 if (!off_in_si && !off_in_di) { 1192 LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); 1193 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. 1194 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); 1195 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; 1196 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); 1197 } 1198 NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); 1199 1200 // After a store we need to insert barrier to prevent reordering with either 1201 // earlier or later memory accesses. Since 1202 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, 1203 // and it will be associated with the cmpxchg instruction, preventing both. 1204 GenMemBarrier(kAnyAny); 1205 1206 if (push_si) { 1207 FreeTemp(rs_rSI); 1208 UnmarkTemp(rs_rSI); 1209 NewLIR1(kX86Pop32R, rs_rSI.GetReg()); 1210 cfi_.AdjustCFAOffset(-kRegSize); 1211 if (!CoreSpillMaskContains(rs_rSI.GetReg())) { 1212 cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); 1213 } 1214 } 1215 if (push_di) { 1216 FreeTemp(rs_rDI); 1217 UnmarkTemp(rs_rDI); 1218 NewLIR1(kX86Pop32R, rs_rDI.GetReg()); 1219 cfi_.AdjustCFAOffset(-kRegSize); 1220 if (!CoreSpillMaskContains(rs_rDI.GetReg())) { 1221 cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); 1222 } 1223 } 1224 FreeCallTemps(); 1225 } else { 1226 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 1227 FlushReg(rs_r0); 1228 Clobber(rs_r0); 1229 LockTemp(rs_r0); 1230 1231 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); 1232 RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); 1233 1234 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 1235 // Mark card for object assuming new value is stored. 1236 FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard(). 1237 MarkGCCard(0, rl_new_value.reg, rl_object.reg); 1238 LockTemp(rs_r0); 1239 } 1240 1241 RegLocation rl_offset; 1242 if (cu_->target64) { 1243 rl_offset = LoadValueWide(rl_src_offset, kCoreReg); 1244 } else { 1245 rl_offset = LoadValue(rl_src_offset, kCoreReg); 1246 } 1247 LoadValueDirect(rl_src_expected, rs_r0); 1248 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, 1249 rl_new_value.reg.GetReg()); 1250 1251 // After a store we need to insert barrier to prevent reordering with either 1252 // earlier or later memory accesses. Since 1253 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, 1254 // and it will be associated with the cmpxchg instruction, preventing both. 1255 GenMemBarrier(kAnyAny); 1256 1257 FreeTemp(rs_r0); 1258 } 1259 1260 // Convert ZF to boolean 1261 RegLocation rl_dest = InlineTarget(info); // boolean place for result 1262 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1263 RegStorage result_reg = rl_result.reg; 1264 1265 // For 32-bit, SETcc only works with EAX..EDX. 1266 if (!IsByteRegister(result_reg)) { 1267 result_reg = AllocateByteRegister(); 1268 } 1269 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); 1270 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); 1271 if (IsTemp(result_reg)) { 1272 FreeTemp(result_reg); 1273 } 1274 StoreValue(rl_dest, rl_result); 1275 return true; 1276 } 1277 1278 void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) { 1279 RegStorage r_temp = AllocTemp(); 1280 OpRegCopy(r_temp, result_reg); 1281 OpRegImm(kOpLsr, result_reg, shift); 1282 OpRegImm(kOpAnd, r_temp, value); 1283 OpRegImm(kOpAnd, result_reg, value); 1284 OpRegImm(kOpLsl, r_temp, shift); 1285 OpRegReg(kOpOr, result_reg, r_temp); 1286 FreeTemp(r_temp); 1287 } 1288 1289 void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) { 1290 RegStorage r_temp = AllocTempWide(); 1291 OpRegCopy(r_temp, result_reg); 1292 OpRegImm(kOpLsr, result_reg, shift); 1293 RegStorage r_value = AllocTempWide(); 1294 LoadConstantWide(r_value, value); 1295 OpRegReg(kOpAnd, r_temp, r_value); 1296 OpRegReg(kOpAnd, result_reg, r_value); 1297 OpRegImm(kOpLsl, r_temp, shift); 1298 OpRegReg(kOpOr, result_reg, r_temp); 1299 FreeTemp(r_temp); 1300 FreeTemp(r_value); 1301 } 1302 1303 bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { 1304 RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); 1305 if (rl_dest.s_reg_low == INVALID_SREG) { 1306 // Result is unused, the code is dead. Inlining successful, no code generated. 1307 return true; 1308 } 1309 RegLocation rl_src_i = info->args[0]; 1310 RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) 1311 : LoadValue(rl_src_i, kCoreReg); 1312 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1313 if (size == k64) { 1314 if (cu_->instruction_set == kX86_64) { 1315 /* Use one bswap instruction to reverse byte order first and then use 3 rounds of 1316 swapping bits to reverse bits in a long number x. Using bswap to save instructions 1317 compared to generic luni implementation which has 5 rounds of swapping bits. 1318 x = bswap x 1319 x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; 1320 x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; 1321 x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; 1322 */ 1323 OpRegReg(kOpRev, rl_result.reg, rl_i.reg); 1324 SwapBits64(rl_result.reg, 1, 0x5555555555555555); 1325 SwapBits64(rl_result.reg, 2, 0x3333333333333333); 1326 SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f); 1327 StoreValueWide(rl_dest, rl_result); 1328 return true; 1329 } 1330 RegStorage r_i_low = rl_i.reg.GetLow(); 1331 if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { 1332 // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second 1333 // REV. 1334 r_i_low = AllocTemp(); 1335 OpRegCopy(r_i_low, rl_i.reg); 1336 } 1337 OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh()); 1338 OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low); 1339 // Free up at least one input register if it was a temp. Otherwise we may be in the bad 1340 // situation of not having a temp available for SwapBits. Make sure it's not overlapping 1341 // with the output, though. 1342 if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { 1343 // There's definitely a free temp after this. 1344 FreeTemp(r_i_low); 1345 } else { 1346 // We opportunistically release both here. That saves duplication of the register state 1347 // lookup (to see if it's actually a temp). 1348 if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) { 1349 FreeTemp(rl_i.reg.GetLow()); 1350 } 1351 if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() && 1352 rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) { 1353 FreeTemp(rl_i.reg.GetHigh()); 1354 } 1355 } 1356 1357 SwapBits(rl_result.reg.GetLow(), 1, 0x55555555); 1358 SwapBits(rl_result.reg.GetLow(), 2, 0x33333333); 1359 SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f); 1360 SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555); 1361 SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333); 1362 SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f); 1363 StoreValueWide(rl_dest, rl_result); 1364 } else { 1365 OpRegReg(kOpRev, rl_result.reg, rl_i.reg); 1366 SwapBits(rl_result.reg, 1, 0x55555555); 1367 SwapBits(rl_result.reg, 2, 0x33333333); 1368 SwapBits(rl_result.reg, 4, 0x0f0f0f0f); 1369 StoreValue(rl_dest, rl_result); 1370 } 1371 return true; 1372 } 1373 1374 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 1375 if (cu_->target64) { 1376 // We can do this directly using RIP addressing. 1377 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 1378 LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset); 1379 res->target = target; 1380 res->flags.fixup = kFixupLoad; 1381 return; 1382 } 1383 1384 // Get the PC to a register and get the anchor. 1385 LIR* anchor; 1386 RegStorage r_pc = GetPcAndAnchor(&anchor); 1387 1388 // Load the proper value from the literal area. 1389 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 1390 LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset); 1391 res->operands[4] = WrapPointer(anchor); 1392 res->target = target; 1393 res->flags.fixup = kFixupLoad; 1394 } 1395 1396 bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { 1397 return dex_cache_arrays_layout_.Valid(); 1398 } 1399 1400 LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) { 1401 DCHECK(!cu_->target64); 1402 LIR* call = NewLIR1(kX86CallI, 0); 1403 call->flags.fixup = kFixupLabel; 1404 LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg()); 1405 pop->flags.fixup = kFixupLabel; 1406 DCHECK(NEXT_LIR(call) == pop); 1407 return call; 1408 } 1409 1410 RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) { 1411 if (pc_rel_base_reg_.Valid()) { 1412 DCHECK(setup_pc_rel_base_reg_ != nullptr); 1413 *anchor = NEXT_LIR(setup_pc_rel_base_reg_); 1414 DCHECK(*anchor != nullptr); 1415 DCHECK_EQ((*anchor)->opcode, kX86Pop32R); 1416 pc_rel_base_reg_used_ = true; 1417 return pc_rel_base_reg_; 1418 } else { 1419 RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef(); 1420 LIR* load_pc = OpLoadPc(r_pc); 1421 *anchor = NEXT_LIR(load_pc); 1422 DCHECK(*anchor != nullptr); 1423 DCHECK_EQ((*anchor)->opcode, kX86Pop32R); 1424 return r_pc; 1425 } 1426 } 1427 1428 void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, 1429 bool wide) { 1430 if (cu_->target64) { 1431 LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg, 1432 kDummy32BitOffset); 1433 mov->flags.fixup = kFixupLabel; 1434 mov->operands[3] = WrapPointer(dex_file); 1435 mov->operands[4] = offset; 1436 mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher). 1437 dex_cache_access_insns_.push_back(mov); 1438 } else { 1439 CHECK(!wide) << "Unsupported"; 1440 // Get the PC to a register and get the anchor. Use r_dest for the temp if needed. 1441 LIR* anchor; 1442 RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest); 1443 LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset); 1444 mov->flags.fixup = kFixupLabel; 1445 mov->operands[3] = WrapPointer(dex_file); 1446 mov->operands[4] = offset; 1447 mov->target = anchor; // Used for pc_insn_offset. 1448 dex_cache_access_insns_.push_back(mov); 1449 } 1450 } 1451 1452 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { 1453 UNUSED(r_base, count); 1454 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 1455 UNREACHABLE(); 1456 } 1457 1458 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) { 1459 UNUSED(r_base, count); 1460 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 1461 UNREACHABLE(); 1462 } 1463 1464 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 1465 RegLocation rl_result, int lit, 1466 int first_bit, int second_bit) { 1467 UNUSED(lit); 1468 RegStorage t_reg = AllocTemp(); 1469 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit); 1470 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg); 1471 FreeTemp(t_reg); 1472 if (first_bit != 0) { 1473 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); 1474 } 1475 } 1476 1477 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { 1478 if (cu_->target64) { 1479 DCHECK(reg.Is64Bit()); 1480 1481 NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); 1482 } else { 1483 DCHECK(reg.IsPair()); 1484 1485 // We are not supposed to clobber the incoming storage, so allocate a temporary. 1486 RegStorage t_reg = AllocTemp(); 1487 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 1488 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); 1489 // The temp is no longer needed so free it at this time. 1490 FreeTemp(t_reg); 1491 } 1492 1493 // In case of zero, throw ArithmeticException. 1494 GenDivZeroCheck(kCondEq); 1495 } 1496 1497 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, 1498 RegStorage array_base, 1499 int len_offset) { 1500 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { 1501 public: 1502 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, 1503 RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in) 1504 : LIRSlowPath(m2l, branch_in), 1505 index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { 1506 } 1507 1508 void Compile() OVERRIDE { 1509 m2l_->ResetRegPool(); 1510 m2l_->ResetDefTracking(); 1511 GenerateTargetLabel(kPseudoThrowTarget); 1512 1513 RegStorage new_index = index_; 1514 // Move index out of kArg1, either directly to kArg0, or to kArg2. 1515 // TODO: clean-up to check not a number but with type 1516 if (index_ == m2l_->TargetReg(kArg1, kNotWide)) { 1517 if (array_base_ == m2l_->TargetReg(kArg0, kRef)) { 1518 m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_); 1519 new_index = m2l_->TargetReg(kArg2, kNotWide); 1520 } else { 1521 m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_); 1522 new_index = m2l_->TargetReg(kArg0, kNotWide); 1523 } 1524 } 1525 // Load array length to kArg1. 1526 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_); 1527 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); 1528 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index, 1529 m2l_->TargetReg(kArg1, kNotWide), true); 1530 } 1531 1532 private: 1533 const RegStorage index_; 1534 const RegStorage array_base_; 1535 const int32_t len_offset_; 1536 }; 1537 1538 OpRegMem(kOpCmp, index, array_base, len_offset); 1539 MarkPossibleNullPointerException(0); 1540 LIR* branch = OpCondBranch(kCondUge, nullptr); 1541 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, 1542 index, array_base, len_offset)); 1543 } 1544 1545 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index, 1546 RegStorage array_base, 1547 int32_t len_offset) { 1548 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { 1549 public: 1550 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, 1551 int32_t index_in, RegStorage array_base_in, int32_t len_offset_in) 1552 : LIRSlowPath(m2l, branch_in), 1553 index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { 1554 } 1555 1556 void Compile() OVERRIDE { 1557 m2l_->ResetRegPool(); 1558 m2l_->ResetDefTracking(); 1559 GenerateTargetLabel(kPseudoThrowTarget); 1560 1561 // Load array length to kArg1. 1562 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_); 1563 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); 1564 x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_); 1565 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide), 1566 m2l_->TargetReg(kArg1, kNotWide), true); 1567 } 1568 1569 private: 1570 const int32_t index_; 1571 const RegStorage array_base_; 1572 const int32_t len_offset_; 1573 }; 1574 1575 NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index); 1576 MarkPossibleNullPointerException(0); 1577 LIR* branch = OpCondBranch(kCondLs, nullptr); 1578 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, 1579 index, array_base, len_offset)); 1580 } 1581 1582 // Test suspend flag, return target of taken suspend branch 1583 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 1584 if (cu_->target64) { 1585 OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0); 1586 } else { 1587 OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0); 1588 } 1589 return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target); 1590 } 1591 1592 // Decrement register and branch on condition 1593 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { 1594 OpRegImm(kOpSub, reg, 1); 1595 return OpCondBranch(c_code, target); 1596 } 1597 1598 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 1599 RegLocation rl_src, RegLocation rl_dest, int lit) { 1600 UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit); 1601 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 1602 UNREACHABLE(); 1603 } 1604 1605 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { 1606 UNUSED(rl_src, rl_dest, lit); 1607 LOG(FATAL) << "Unexpected use of easyMultiply in x86"; 1608 UNREACHABLE(); 1609 } 1610 1611 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 1612 UNUSED(cond, guide); 1613 LOG(FATAL) << "Unexpected use of OpIT in x86"; 1614 UNREACHABLE(); 1615 } 1616 1617 void X86Mir2Lir::OpEndIT(LIR* it) { 1618 UNUSED(it); 1619 LOG(FATAL) << "Unexpected use of OpEndIT in x86"; 1620 UNREACHABLE(); 1621 } 1622 1623 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { 1624 switch (val) { 1625 case 0: 1626 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 1627 break; 1628 case 1: 1629 OpRegCopy(dest, src); 1630 break; 1631 default: 1632 OpRegRegImm(kOpMul, dest, src, val); 1633 break; 1634 } 1635 } 1636 1637 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { 1638 UNUSED(sreg); 1639 // All memory accesses below reference dalvik regs. 1640 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1641 1642 LIR *m; 1643 switch (val) { 1644 case 0: 1645 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 1646 break; 1647 case 1: { 1648 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 1649 LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile); 1650 break; 1651 } 1652 default: 1653 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), 1654 rs_rX86_SP_32.GetReg(), displacement, val); 1655 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 1656 break; 1657 } 1658 } 1659 1660 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 1661 RegLocation rl_src2, int flags) { 1662 if (!cu_->target64) { 1663 // Some x86 32b ops are fallback. 1664 switch (opcode) { 1665 case Instruction::NOT_LONG: 1666 case Instruction::DIV_LONG: 1667 case Instruction::DIV_LONG_2ADDR: 1668 case Instruction::REM_LONG: 1669 case Instruction::REM_LONG_2ADDR: 1670 Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); 1671 return; 1672 1673 default: 1674 // Everything else we can handle. 1675 break; 1676 } 1677 } 1678 1679 switch (opcode) { 1680 case Instruction::NOT_LONG: 1681 GenNotLong(rl_dest, rl_src2); 1682 return; 1683 1684 case Instruction::ADD_LONG: 1685 case Instruction::ADD_LONG_2ADDR: 1686 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1687 return; 1688 1689 case Instruction::SUB_LONG: 1690 case Instruction::SUB_LONG_2ADDR: 1691 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1692 return; 1693 1694 case Instruction::MUL_LONG: 1695 case Instruction::MUL_LONG_2ADDR: 1696 GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags); 1697 return; 1698 1699 case Instruction::DIV_LONG: 1700 case Instruction::DIV_LONG_2ADDR: 1701 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags); 1702 return; 1703 1704 case Instruction::REM_LONG: 1705 case Instruction::REM_LONG_2ADDR: 1706 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags); 1707 return; 1708 1709 case Instruction::AND_LONG_2ADDR: 1710 case Instruction::AND_LONG: 1711 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1712 return; 1713 1714 case Instruction::OR_LONG: 1715 case Instruction::OR_LONG_2ADDR: 1716 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1717 return; 1718 1719 case Instruction::XOR_LONG: 1720 case Instruction::XOR_LONG_2ADDR: 1721 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1722 return; 1723 1724 case Instruction::NEG_LONG: 1725 GenNegLong(rl_dest, rl_src2); 1726 return; 1727 1728 default: 1729 LOG(FATAL) << "Invalid long arith op"; 1730 return; 1731 } 1732 } 1733 1734 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) { 1735 // All memory accesses below reference dalvik regs. 1736 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1737 1738 if (val == 0) { 1739 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1740 if (cu_->target64) { 1741 OpRegReg(kOpXor, rl_result.reg, rl_result.reg); 1742 } else { 1743 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); 1744 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); 1745 } 1746 StoreValueWide(rl_dest, rl_result); 1747 return true; 1748 } else if (val == 1) { 1749 StoreValueWide(rl_dest, rl_src1); 1750 return true; 1751 } else if (val == 2) { 1752 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags); 1753 return true; 1754 } else if (IsPowerOfTwo(val)) { 1755 int shift_amount = CTZ(val); 1756 if (!PartiallyIntersects(rl_src1, rl_dest)) { 1757 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1758 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, 1759 shift_amount, flags); 1760 StoreValueWide(rl_dest, rl_result); 1761 return true; 1762 } 1763 } 1764 1765 // Okay, on 32b just bite the bullet and do it, still better than the general case. 1766 if (!cu_->target64) { 1767 int32_t val_lo = Low32Bits(val); 1768 int32_t val_hi = High32Bits(val); 1769 // Prepare for explicit register usage. 1770 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); 1771 rl_src1 = UpdateLocWideTyped(rl_src1); 1772 bool src1_in_reg = rl_src1.location == kLocPhysReg; 1773 int displacement = SRegOffset(rl_src1.s_reg_low); 1774 1775 // ECX <- 1H * 2L 1776 // EAX <- 1L * 2H 1777 if (src1_in_reg) { 1778 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo); 1779 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi); 1780 } else { 1781 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 1782 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 1783 } 1784 1785 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1786 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); 1787 1788 // EAX <- 2L 1789 LoadConstantNoClobber(rs_r0, val_lo); 1790 1791 // EDX:EAX <- 2L * 1L (double precision) 1792 if (src1_in_reg) { 1793 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1794 } else { 1795 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); 1796 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1797 true /* is_load */, true /* is_64bit */); 1798 } 1799 1800 // EDX <- EDX + ECX (add high words) 1801 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); 1802 1803 // Result is EDX:EAX 1804 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 1805 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; 1806 StoreValueWide(rl_dest, rl_result); 1807 return true; 1808 } 1809 return false; 1810 } 1811 1812 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 1813 RegLocation rl_src2, int flags) { 1814 if (rl_src1.is_const) { 1815 std::swap(rl_src1, rl_src2); 1816 } 1817 1818 if (rl_src2.is_const) { 1819 if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) { 1820 return; 1821 } 1822 } 1823 1824 // All memory accesses below reference dalvik regs. 1825 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1826 1827 if (cu_->target64) { 1828 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1829 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1830 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1831 if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && 1832 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 1833 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 1834 } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && 1835 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 1836 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); 1837 } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && 1838 rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { 1839 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 1840 } else { 1841 OpRegCopy(rl_result.reg, rl_src1.reg); 1842 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 1843 } 1844 StoreValueWide(rl_dest, rl_result); 1845 return; 1846 } 1847 1848 // Not multiplying by a constant. Do it the hard way 1849 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. 1850 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 1851 mir_graph_->SRegToVReg(rl_src2.s_reg_low); 1852 1853 // Prepare for explicit register usage. 1854 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); 1855 rl_src1 = UpdateLocWideTyped(rl_src1); 1856 rl_src2 = UpdateLocWideTyped(rl_src2); 1857 1858 // At this point, the VRs are in their home locations. 1859 bool src1_in_reg = rl_src1.location == kLocPhysReg; 1860 bool src2_in_reg = rl_src2.location == kLocPhysReg; 1861 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; 1862 1863 // ECX <- 1H 1864 if (src1_in_reg) { 1865 NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg()); 1866 } else { 1867 LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32, 1868 kNotVolatile); 1869 } 1870 1871 if (is_square) { 1872 // Take advantage of the fact that the values are the same. 1873 // ECX <- ECX * 2L (1H * 2L) 1874 if (src2_in_reg) { 1875 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); 1876 } else { 1877 int displacement = SRegOffset(rl_src2.s_reg_low); 1878 LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(), 1879 displacement + LOWORD_OFFSET); 1880 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1881 true /* is_load */, true /* is_64bit */); 1882 } 1883 1884 // ECX <- 2*ECX (2H * 1L) + (1H * 2L) 1885 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg()); 1886 } else { 1887 // EAX <- 2H 1888 if (src2_in_reg) { 1889 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg()); 1890 } else { 1891 LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32, 1892 kNotVolatile); 1893 } 1894 1895 // EAX <- EAX * 1L (2H * 1L) 1896 if (src1_in_reg) { 1897 NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg()); 1898 } else { 1899 int displacement = SRegOffset(rl_src1.s_reg_low); 1900 LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(), 1901 displacement + LOWORD_OFFSET); 1902 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1903 true /* is_load */, true /* is_64bit */); 1904 } 1905 1906 // ECX <- ECX * 2L (1H * 2L) 1907 if (src2_in_reg) { 1908 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); 1909 } else { 1910 int displacement = SRegOffset(rl_src2.s_reg_low); 1911 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(), 1912 displacement + LOWORD_OFFSET); 1913 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1914 true /* is_load */, true /* is_64bit */); 1915 } 1916 1917 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1918 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); 1919 } 1920 1921 // EAX <- 2L 1922 if (src2_in_reg) { 1923 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg()); 1924 } else { 1925 LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32, 1926 kNotVolatile); 1927 } 1928 1929 // EDX:EAX <- 2L * 1L (double precision) 1930 if (src1_in_reg) { 1931 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1932 } else { 1933 int displacement = SRegOffset(rl_src1.s_reg_low); 1934 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); 1935 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1936 true /* is_load */, true /* is_64bit */); 1937 } 1938 1939 // EDX <- EDX + ECX (add high words) 1940 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); 1941 1942 // Result is EDX:EAX 1943 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 1944 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; 1945 StoreValueWide(rl_dest, rl_result); 1946 } 1947 1948 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1949 Instruction::Code op) { 1950 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1951 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1952 if (rl_src.location == kLocPhysReg) { 1953 // Both operands are in registers. 1954 // But we must ensure that rl_src is in pair 1955 if (cu_->target64) { 1956 NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); 1957 } else { 1958 rl_src = LoadValueWide(rl_src, kCoreReg); 1959 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { 1960 // The registers are the same, so we would clobber it before the use. 1961 RegStorage temp_reg = AllocTemp(); 1962 OpRegCopy(temp_reg, rl_dest.reg); 1963 rl_src.reg.SetHighReg(temp_reg.GetReg()); 1964 } 1965 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); 1966 1967 x86op = GetOpcode(op, rl_dest, rl_src, true); 1968 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); 1969 } 1970 return; 1971 } 1972 1973 // RHS is in memory. 1974 DCHECK((rl_src.location == kLocDalvikFrame) || 1975 (rl_src.location == kLocCompilerTemp)); 1976 int r_base = rs_rX86_SP_32.GetReg(); 1977 int displacement = SRegOffset(rl_src.s_reg_low); 1978 1979 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1980 LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), 1981 r_base, displacement + LOWORD_OFFSET); 1982 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1983 true /* is_load */, true /* is64bit */); 1984 if (!cu_->target64) { 1985 x86op = GetOpcode(op, rl_dest, rl_src, true); 1986 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); 1987 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1988 true /* is_load */, true /* is64bit */); 1989 } 1990 } 1991 1992 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1993 rl_dest = UpdateLocWideTyped(rl_dest); 1994 if (rl_dest.location == kLocPhysReg) { 1995 // Ensure we are in a register pair 1996 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1997 1998 rl_src = UpdateLocWideTyped(rl_src); 1999 GenLongRegOrMemOp(rl_result, rl_src, op); 2000 StoreFinalValueWide(rl_dest, rl_result); 2001 return; 2002 } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) { 2003 // Handle the case when src and dest are intersect. 2004 rl_src = LoadValueWide(rl_src, kCoreReg); 2005 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2006 rl_src = UpdateLocWideTyped(rl_src); 2007 GenLongRegOrMemOp(rl_result, rl_src, op); 2008 StoreFinalValueWide(rl_dest, rl_result); 2009 return; 2010 } 2011 2012 // It wasn't in registers, so it better be in memory. 2013 DCHECK((rl_dest.location == kLocDalvikFrame) || 2014 (rl_dest.location == kLocCompilerTemp)); 2015 rl_src = LoadValueWide(rl_src, kCoreReg); 2016 2017 // Operate directly into memory. 2018 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 2019 int r_base = rs_rX86_SP_32.GetReg(); 2020 int displacement = SRegOffset(rl_dest.s_reg_low); 2021 2022 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2023 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, 2024 cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); 2025 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2026 true /* is_load */, true /* is64bit */); 2027 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2028 false /* is_load */, true /* is64bit */); 2029 if (!cu_->target64) { 2030 x86op = GetOpcode(op, rl_dest, rl_src, true); 2031 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); 2032 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2033 true /* is_load */, true /* is64bit */); 2034 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2035 false /* is_load */, true /* is64bit */); 2036 } 2037 2038 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low); 2039 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); 2040 2041 // If the left operand is in memory and the right operand is in a register 2042 // and both belong to the same dalvik register then we should clobber the 2043 // right one because it doesn't hold valid data anymore. 2044 if (v_src_reg == v_dst_reg) { 2045 Clobber(rl_src.reg); 2046 } 2047 } 2048 2049 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 2050 RegLocation rl_src2, Instruction::Code op, 2051 bool is_commutative) { 2052 // Is this really a 2 operand operation? 2053 switch (op) { 2054 case Instruction::ADD_LONG_2ADDR: 2055 case Instruction::SUB_LONG_2ADDR: 2056 case Instruction::AND_LONG_2ADDR: 2057 case Instruction::OR_LONG_2ADDR: 2058 case Instruction::XOR_LONG_2ADDR: 2059 if (GenerateTwoOperandInstructions()) { 2060 GenLongArith(rl_dest, rl_src2, op); 2061 return; 2062 } 2063 break; 2064 2065 default: 2066 break; 2067 } 2068 2069 if (rl_dest.location == kLocPhysReg) { 2070 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 2071 2072 // We are about to clobber the LHS, so it needs to be a temp. 2073 rl_result = ForceTempWide(rl_result); 2074 2075 // Perform the operation using the RHS. 2076 rl_src2 = UpdateLocWideTyped(rl_src2); 2077 GenLongRegOrMemOp(rl_result, rl_src2, op); 2078 2079 // And now record that the result is in the temp. 2080 StoreFinalValueWide(rl_dest, rl_result); 2081 return; 2082 } 2083 2084 // It wasn't in registers, so it better be in memory. 2085 DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp)); 2086 rl_src1 = UpdateLocWideTyped(rl_src1); 2087 rl_src2 = UpdateLocWideTyped(rl_src2); 2088 2089 // Get one of the source operands into temporary register. 2090 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2091 if (cu_->target64) { 2092 if (IsTemp(rl_src1.reg)) { 2093 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2094 } else if (is_commutative) { 2095 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 2096 // We need at least one of them to be a temporary. 2097 if (!IsTemp(rl_src2.reg)) { 2098 rl_src1 = ForceTempWide(rl_src1); 2099 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2100 } else { 2101 GenLongRegOrMemOp(rl_src2, rl_src1, op); 2102 StoreFinalValueWide(rl_dest, rl_src2); 2103 return; 2104 } 2105 } else { 2106 // Need LHS to be the temp. 2107 rl_src1 = ForceTempWide(rl_src1); 2108 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2109 } 2110 } else { 2111 if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { 2112 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2113 } else if (is_commutative) { 2114 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 2115 // We need at least one of them to be a temporary. 2116 if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { 2117 rl_src1 = ForceTempWide(rl_src1); 2118 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2119 } else { 2120 GenLongRegOrMemOp(rl_src2, rl_src1, op); 2121 StoreFinalValueWide(rl_dest, rl_src2); 2122 return; 2123 } 2124 } else { 2125 // Need LHS to be the temp. 2126 rl_src1 = ForceTempWide(rl_src1); 2127 GenLongRegOrMemOp(rl_src1, rl_src2, op); 2128 } 2129 } 2130 2131 StoreFinalValueWide(rl_dest, rl_src1); 2132 } 2133 2134 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { 2135 if (cu_->target64) { 2136 rl_src = LoadValueWide(rl_src, kCoreReg); 2137 RegLocation rl_result; 2138 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2139 OpRegCopy(rl_result.reg, rl_src.reg); 2140 OpReg(kOpNot, rl_result.reg); 2141 StoreValueWide(rl_dest, rl_result); 2142 } else { 2143 LOG(FATAL) << "Unexpected use GenNotLong()"; 2144 } 2145 } 2146 2147 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, 2148 int64_t imm, bool is_div) { 2149 if (imm == 0) { 2150 GenDivZeroException(); 2151 } else if (imm == 1) { 2152 if (is_div) { 2153 // x / 1 == x. 2154 StoreValueWide(rl_dest, rl_src); 2155 } else { 2156 // x % 1 == 0. 2157 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2158 LoadConstantWide(rl_result.reg, 0); 2159 StoreValueWide(rl_dest, rl_result); 2160 } 2161 } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case. 2162 if (is_div) { 2163 rl_src = LoadValueWide(rl_src, kCoreReg); 2164 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2165 RegStorage rs_temp = AllocTempWide(); 2166 2167 OpRegCopy(rl_result.reg, rl_src.reg); 2168 LoadConstantWide(rs_temp, 0x8000000000000000); 2169 2170 // If x == MIN_LONG, return MIN_LONG. 2171 OpRegReg(kOpCmp, rl_src.reg, rs_temp); 2172 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 2173 2174 // For x != MIN_LONG, x / -1 == -x. 2175 OpReg(kOpNeg, rl_result.reg); 2176 2177 minint_branch->target = NewLIR0(kPseudoTargetLabel); 2178 FreeTemp(rs_temp); 2179 StoreValueWide(rl_dest, rl_result); 2180 } else { 2181 // x % -1 == 0. 2182 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2183 LoadConstantWide(rl_result.reg, 0); 2184 StoreValueWide(rl_dest, rl_result); 2185 } 2186 } else if (is_div && IsPowerOfTwo(std::abs(imm))) { 2187 // Division using shifting. 2188 rl_src = LoadValueWide(rl_src, kCoreReg); 2189 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2190 if (IsSameReg(rl_result.reg, rl_src.reg)) { 2191 RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg); 2192 rl_result.reg.SetReg(rs_temp.GetReg()); 2193 } 2194 LoadConstantWide(rl_result.reg, std::abs(imm) - 1); 2195 OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); 2196 NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); 2197 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); 2198 int shift_amount = CTZ(imm); 2199 OpRegImm(kOpAsr, rl_result.reg, shift_amount); 2200 if (imm < 0) { 2201 OpReg(kOpNeg, rl_result.reg); 2202 } 2203 StoreValueWide(rl_dest, rl_result); 2204 } else { 2205 CHECK(imm <= -2 || imm >= 2); 2206 2207 FlushReg(rs_r0q); 2208 Clobber(rs_r0q); 2209 LockTemp(rs_r0q); 2210 FlushReg(rs_r2q); 2211 Clobber(rs_r2q); 2212 LockTemp(rs_r2q); 2213 2214 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 2215 is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG}; 2216 2217 // Use H.S.Warren's Hacker's Delight Chapter 10 and 2218 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 2219 int64_t magic; 2220 int shift; 2221 CalculateMagicAndShift(imm, magic, shift, true /* is_long */); 2222 2223 /* 2224 * For imm >= 2, 2225 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 2226 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 2227 * For imm <= -2, 2228 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 2229 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 2230 * We implement this algorithm in the following way: 2231 * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX 2232 * 2. if imm > 0 and magic < 0, add numerator to RDX 2233 * if imm < 0 and magic > 0, sub numerator from RDX 2234 * 3. if S !=0, SAR S bits for RDX 2235 * 4. add 1 to RDX if RDX < 0 2236 * 5. Thus, RDX is the quotient 2237 */ 2238 2239 // RAX = magic. 2240 LoadConstantWide(rs_r0q, magic); 2241 2242 // Multiply by numerator. 2243 RegStorage numerator_reg; 2244 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 2245 // We will need the value later. 2246 rl_src = LoadValueWide(rl_src, kCoreReg); 2247 numerator_reg = rl_src.reg; 2248 2249 // RDX:RAX = magic * numerator. 2250 NewLIR1(kX86Imul64DaR, numerator_reg.GetReg()); 2251 } else { 2252 // Only need this once. Multiply directly from the value. 2253 rl_src = UpdateLocWideTyped(rl_src); 2254 if (rl_src.location != kLocPhysReg) { 2255 // Okay, we can do this from memory. 2256 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2257 int displacement = SRegOffset(rl_src.s_reg_low); 2258 // RDX:RAX = magic * numerator. 2259 LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement); 2260 AnnotateDalvikRegAccess(m, displacement >> 2, 2261 true /* is_load */, true /* is_64bit */); 2262 } else { 2263 // RDX:RAX = magic * numerator. 2264 NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg()); 2265 } 2266 } 2267 2268 if (imm > 0 && magic < 0) { 2269 // Add numerator to RDX. 2270 DCHECK(numerator_reg.Valid()); 2271 OpRegReg(kOpAdd, rs_r2q, numerator_reg); 2272 } else if (imm < 0 && magic > 0) { 2273 DCHECK(numerator_reg.Valid()); 2274 OpRegReg(kOpSub, rs_r2q, numerator_reg); 2275 } 2276 2277 // Do we need the shift? 2278 if (shift != 0) { 2279 // Shift RDX by 'shift' bits. 2280 OpRegImm(kOpAsr, rs_r2q, shift); 2281 } 2282 2283 // Move RDX to RAX. 2284 OpRegCopyWide(rs_r0q, rs_r2q); 2285 2286 // Move sign bit to bit 0, zeroing the rest. 2287 OpRegImm(kOpLsr, rs_r2q, 63); 2288 2289 // RDX = RDX + RAX. 2290 OpRegReg(kOpAdd, rs_r2q, rs_r0q); 2291 2292 // Quotient is in RDX. 2293 if (!is_div) { 2294 // We need to compute the remainder. 2295 // Remainder is divisor - (quotient * imm). 2296 DCHECK(numerator_reg.Valid()); 2297 OpRegCopyWide(rs_r0q, numerator_reg); 2298 2299 // Imul doesn't support 64-bit imms. 2300 if (imm > std::numeric_limits<int32_t>::max() || 2301 imm < std::numeric_limits<int32_t>::min()) { 2302 RegStorage rs_temp = AllocTempWide(); 2303 LoadConstantWide(rs_temp, imm); 2304 2305 // RAX = numerator * imm. 2306 NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg()); 2307 2308 FreeTemp(rs_temp); 2309 } else { 2310 // RAX = numerator * imm. 2311 int short_imm = static_cast<int>(imm); 2312 NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm); 2313 } 2314 2315 // RAX -= RDX. 2316 OpRegReg(kOpSub, rs_r0q, rs_r2q); 2317 2318 // Result in RAX. 2319 } else { 2320 // Result in RDX. 2321 } 2322 StoreValueWide(rl_dest, rl_result); 2323 FreeTemp(rs_r0q); 2324 FreeTemp(rs_r2q); 2325 } 2326 } 2327 2328 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 2329 RegLocation rl_src2, bool is_div, int flags) { 2330 if (!cu_->target64) { 2331 LOG(FATAL) << "Unexpected use GenDivRemLong()"; 2332 return; 2333 } 2334 2335 if (rl_src2.is_const) { 2336 DCHECK(rl_src2.wide); 2337 int64_t imm = mir_graph_->ConstantValueWide(rl_src2); 2338 GenDivRemLongLit(rl_dest, rl_src1, imm, is_div); 2339 return; 2340 } 2341 2342 // We have to use fixed registers, so flush all the temps. 2343 // Prepare for explicit register usage. 2344 ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q); 2345 2346 // Load LHS into RAX. 2347 LoadValueDirectWideFixed(rl_src1, rs_r0q); 2348 2349 // Load RHS into RCX. 2350 LoadValueDirectWideFixed(rl_src2, rs_r1q); 2351 2352 // Copy LHS sign bit into RDX. 2353 NewLIR0(kx86Cqo64Da); 2354 2355 // Handle division by zero case. 2356 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { 2357 GenDivZeroCheckWide(rs_r1q); 2358 } 2359 2360 // Have to catch 0x8000000000000000/-1 case, or we will get an exception! 2361 NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); 2362 LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 2363 2364 // RHS is -1. 2365 LoadConstantWide(rs_r6q, 0x8000000000000000); 2366 NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg()); 2367 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 2368 2369 // In 0x8000000000000000/-1 case. 2370 if (!is_div) { 2371 // For DIV, RAX is already right. For REM, we need RDX 0. 2372 NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); 2373 } 2374 LIR* done = NewLIR1(kX86Jmp8, 0); 2375 2376 // Expected case. 2377 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 2378 minint_branch->target = minus_one_branch->target; 2379 NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); 2380 done->target = NewLIR0(kPseudoTargetLabel); 2381 2382 // Result is in RAX for div and RDX for rem. 2383 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; 2384 if (!is_div) { 2385 rl_result.reg.SetReg(r2q); 2386 } 2387 2388 StoreValueWide(rl_dest, rl_result); 2389 } 2390 2391 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 2392 rl_src = LoadValueWide(rl_src, kCoreReg); 2393 RegLocation rl_result; 2394 if (cu_->target64) { 2395 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2396 OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); 2397 } else { 2398 rl_result = ForceTempWide(rl_src); 2399 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow 2400 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF 2401 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh 2402 } 2403 StoreValueWide(rl_dest, rl_result); 2404 } 2405 2406 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) { 2407 DCHECK_EQ(kX86, cu_->instruction_set); 2408 X86OpCode opcode = kX86Bkpt; 2409 switch (op) { 2410 case kOpCmp: opcode = kX86Cmp32RT; break; 2411 case kOpMov: opcode = kX86Mov32RT; break; 2412 default: 2413 LOG(FATAL) << "Bad opcode: " << op; 2414 break; 2415 } 2416 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); 2417 } 2418 2419 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) { 2420 DCHECK_EQ(kX86_64, cu_->instruction_set); 2421 X86OpCode opcode = kX86Bkpt; 2422 if (cu_->target64 && r_dest.Is64BitSolo()) { 2423 switch (op) { 2424 case kOpCmp: opcode = kX86Cmp64RT; break; 2425 case kOpMov: opcode = kX86Mov64RT; break; 2426 default: 2427 LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op; 2428 break; 2429 } 2430 } else { 2431 switch (op) { 2432 case kOpCmp: opcode = kX86Cmp32RT; break; 2433 case kOpMov: opcode = kX86Mov32RT; break; 2434 default: 2435 LOG(FATAL) << "Bad opcode: " << op; 2436 break; 2437 } 2438 } 2439 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); 2440 } 2441 2442 /* 2443 * Generate array load 2444 */ 2445 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 2446 RegLocation rl_index, RegLocation rl_dest, int scale) { 2447 RegisterClass reg_class = RegClassForFieldLoadStore(size, false); 2448 int len_offset = mirror::Array::LengthOffset().Int32Value(); 2449 RegLocation rl_result; 2450 rl_array = LoadValue(rl_array, kRefReg); 2451 2452 int data_offset; 2453 if (size == k64 || size == kDouble) { 2454 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 2455 } else { 2456 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 2457 } 2458 2459 bool constant_index = rl_index.is_const; 2460 int32_t constant_index_value = 0; 2461 if (!constant_index) { 2462 rl_index = LoadValue(rl_index, kCoreReg); 2463 } else { 2464 constant_index_value = mir_graph_->ConstantValue(rl_index); 2465 // If index is constant, just fold it into the data offset 2466 data_offset += constant_index_value << scale; 2467 // treat as non array below 2468 rl_index.reg = RegStorage::InvalidReg(); 2469 } 2470 2471 /* null object? */ 2472 GenNullCheck(rl_array.reg, opt_flags); 2473 2474 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 2475 if (constant_index) { 2476 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); 2477 } else { 2478 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); 2479 } 2480 } 2481 rl_result = EvalLoc(rl_dest, reg_class, true); 2482 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size); 2483 if ((size == k64) || (size == kDouble)) { 2484 StoreValueWide(rl_dest, rl_result); 2485 } else { 2486 StoreValue(rl_dest, rl_result); 2487 } 2488 } 2489 2490 /* 2491 * Generate array store 2492 * 2493 */ 2494 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 2495 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 2496 RegisterClass reg_class = RegClassForFieldLoadStore(size, false); 2497 int len_offset = mirror::Array::LengthOffset().Int32Value(); 2498 int data_offset; 2499 2500 if (size == k64 || size == kDouble) { 2501 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 2502 } else { 2503 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 2504 } 2505 2506 rl_array = LoadValue(rl_array, kRefReg); 2507 bool constant_index = rl_index.is_const; 2508 int32_t constant_index_value = 0; 2509 if (!constant_index) { 2510 rl_index = LoadValue(rl_index, kCoreReg); 2511 } else { 2512 // If index is constant, just fold it into the data offset 2513 constant_index_value = mir_graph_->ConstantValue(rl_index); 2514 data_offset += constant_index_value << scale; 2515 // treat as non array below 2516 rl_index.reg = RegStorage::InvalidReg(); 2517 } 2518 2519 /* null object? */ 2520 GenNullCheck(rl_array.reg, opt_flags); 2521 2522 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 2523 if (constant_index) { 2524 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); 2525 } else { 2526 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); 2527 } 2528 } 2529 if ((size == k64) || (size == kDouble)) { 2530 rl_src = LoadValueWide(rl_src, reg_class); 2531 } else { 2532 rl_src = LoadValue(rl_src, reg_class); 2533 } 2534 // If the src reg can't be byte accessed, move it to a temp first. 2535 if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { 2536 RegStorage temp = AllocTemp(); 2537 OpRegCopy(temp, rl_src.reg); 2538 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags); 2539 } else { 2540 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags); 2541 } 2542 if (card_mark) { 2543 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 2544 if (!constant_index) { 2545 FreeTemp(rl_index.reg); 2546 } 2547 MarkGCCard(opt_flags, rl_src.reg, rl_array.reg); 2548 } 2549 } 2550 2551 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 2552 RegLocation rl_src, int shift_amount, int flags) { 2553 UNUSED(flags); 2554 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2555 if (cu_->target64) { 2556 OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ 2557 switch (opcode) { 2558 case Instruction::SHL_LONG: 2559 case Instruction::SHL_LONG_2ADDR: 2560 op = kOpLsl; 2561 break; 2562 case Instruction::SHR_LONG: 2563 case Instruction::SHR_LONG_2ADDR: 2564 op = kOpAsr; 2565 break; 2566 case Instruction::USHR_LONG: 2567 case Instruction::USHR_LONG_2ADDR: 2568 op = kOpLsr; 2569 break; 2570 default: 2571 LOG(FATAL) << "Unexpected case"; 2572 } 2573 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); 2574 } else { 2575 switch (opcode) { 2576 case Instruction::SHL_LONG: 2577 case Instruction::SHL_LONG_2ADDR: 2578 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 2579 if (shift_amount == 32) { 2580 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 2581 LoadConstant(rl_result.reg.GetLow(), 0); 2582 } else if (shift_amount > 31) { 2583 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 2584 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); 2585 LoadConstant(rl_result.reg.GetLow(), 0); 2586 } else { 2587 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 2588 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2589 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), 2590 shift_amount); 2591 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); 2592 } 2593 break; 2594 case Instruction::SHR_LONG: 2595 case Instruction::SHR_LONG_2ADDR: 2596 if (shift_amount == 32) { 2597 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2598 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2599 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 2600 } else if (shift_amount > 31) { 2601 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2602 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2603 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 2604 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 2605 } else { 2606 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 2607 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2608 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), 2609 shift_amount); 2610 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); 2611 } 2612 break; 2613 case Instruction::USHR_LONG: 2614 case Instruction::USHR_LONG_2ADDR: 2615 if (shift_amount == 32) { 2616 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2617 LoadConstant(rl_result.reg.GetHigh(), 0); 2618 } else if (shift_amount > 31) { 2619 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2620 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 2621 LoadConstant(rl_result.reg.GetHigh(), 0); 2622 } else { 2623 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 2624 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2625 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), 2626 shift_amount); 2627 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); 2628 } 2629 break; 2630 default: 2631 LOG(FATAL) << "Unexpected case"; 2632 } 2633 } 2634 return rl_result; 2635 } 2636 2637 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 2638 RegLocation rl_src, RegLocation rl_shift, int flags) { 2639 // Per spec, we only care about low 6 bits of shift amount. 2640 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 2641 if (shift_amount == 0) { 2642 rl_src = LoadValueWide(rl_src, kCoreReg); 2643 StoreValueWide(rl_dest, rl_src); 2644 return; 2645 } else if (shift_amount == 1 && 2646 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 2647 // Need to handle this here to avoid calling StoreValueWide twice. 2648 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags); 2649 return; 2650 } 2651 if (PartiallyIntersects(rl_src, rl_dest)) { 2652 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 2653 return; 2654 } 2655 rl_src = LoadValueWide(rl_src, kCoreReg); 2656 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags); 2657 StoreValueWide(rl_dest, rl_result); 2658 } 2659 2660 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 2661 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, 2662 int flags) { 2663 bool isConstSuccess = false; 2664 switch (opcode) { 2665 case Instruction::ADD_LONG: 2666 case Instruction::AND_LONG: 2667 case Instruction::OR_LONG: 2668 case Instruction::XOR_LONG: 2669 if (rl_src2.is_const) { 2670 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2671 } else { 2672 DCHECK(rl_src1.is_const); 2673 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 2674 } 2675 break; 2676 case Instruction::SUB_LONG: 2677 case Instruction::SUB_LONG_2ADDR: 2678 if (rl_src2.is_const) { 2679 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2680 } else { 2681 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); 2682 isConstSuccess = true; 2683 } 2684 break; 2685 case Instruction::ADD_LONG_2ADDR: 2686 case Instruction::OR_LONG_2ADDR: 2687 case Instruction::XOR_LONG_2ADDR: 2688 case Instruction::AND_LONG_2ADDR: 2689 if (rl_src2.is_const) { 2690 if (GenerateTwoOperandInstructions()) { 2691 isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); 2692 } else { 2693 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2694 } 2695 } else { 2696 DCHECK(rl_src1.is_const); 2697 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 2698 } 2699 break; 2700 default: 2701 isConstSuccess = false; 2702 break; 2703 } 2704 2705 if (!isConstSuccess) { 2706 // Default - bail to non-const handler. 2707 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); 2708 } 2709 } 2710 2711 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 2712 switch (op) { 2713 case Instruction::AND_LONG_2ADDR: 2714 case Instruction::AND_LONG: 2715 return value == -1; 2716 case Instruction::OR_LONG: 2717 case Instruction::OR_LONG_2ADDR: 2718 case Instruction::XOR_LONG: 2719 case Instruction::XOR_LONG_2ADDR: 2720 return value == 0; 2721 default: 2722 return false; 2723 } 2724 } 2725 2726 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 2727 bool is_high_op) { 2728 bool rhs_in_mem = rhs.location != kLocPhysReg; 2729 bool dest_in_mem = dest.location != kLocPhysReg; 2730 bool is64Bit = cu_->target64; 2731 DCHECK(!rhs_in_mem || !dest_in_mem); 2732 switch (op) { 2733 case Instruction::ADD_LONG: 2734 case Instruction::ADD_LONG_2ADDR: 2735 if (dest_in_mem) { 2736 return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; 2737 } else if (rhs_in_mem) { 2738 return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; 2739 } 2740 return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; 2741 case Instruction::SUB_LONG: 2742 case Instruction::SUB_LONG_2ADDR: 2743 if (dest_in_mem) { 2744 return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; 2745 } else if (rhs_in_mem) { 2746 return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; 2747 } 2748 return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; 2749 case Instruction::AND_LONG_2ADDR: 2750 case Instruction::AND_LONG: 2751 if (dest_in_mem) { 2752 return is64Bit ? kX86And64MR : kX86And32MR; 2753 } 2754 if (is64Bit) { 2755 return rhs_in_mem ? kX86And64RM : kX86And64RR; 2756 } 2757 return rhs_in_mem ? kX86And32RM : kX86And32RR; 2758 case Instruction::OR_LONG: 2759 case Instruction::OR_LONG_2ADDR: 2760 if (dest_in_mem) { 2761 return is64Bit ? kX86Or64MR : kX86Or32MR; 2762 } 2763 if (is64Bit) { 2764 return rhs_in_mem ? kX86Or64RM : kX86Or64RR; 2765 } 2766 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 2767 case Instruction::XOR_LONG: 2768 case Instruction::XOR_LONG_2ADDR: 2769 if (dest_in_mem) { 2770 return is64Bit ? kX86Xor64MR : kX86Xor32MR; 2771 } 2772 if (is64Bit) { 2773 return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; 2774 } 2775 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 2776 default: 2777 LOG(FATAL) << "Unexpected opcode: " << op; 2778 return kX86Add32RR; 2779 } 2780 } 2781 2782 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 2783 int32_t value) { 2784 bool in_mem = loc.location != kLocPhysReg; 2785 bool is64Bit = cu_->target64; 2786 bool byte_imm = IS_SIMM8(value); 2787 DCHECK(in_mem || !loc.reg.IsFloat()); 2788 switch (op) { 2789 case Instruction::ADD_LONG: 2790 case Instruction::ADD_LONG_2ADDR: 2791 if (byte_imm) { 2792 if (in_mem) { 2793 return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 2794 } 2795 return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 2796 } 2797 if (in_mem) { 2798 return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; 2799 } 2800 return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; 2801 case Instruction::SUB_LONG: 2802 case Instruction::SUB_LONG_2ADDR: 2803 if (byte_imm) { 2804 if (in_mem) { 2805 return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 2806 } 2807 return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 2808 } 2809 if (in_mem) { 2810 return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; 2811 } 2812 return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; 2813 case Instruction::AND_LONG_2ADDR: 2814 case Instruction::AND_LONG: 2815 if (byte_imm) { 2816 if (is64Bit) { 2817 return in_mem ? kX86And64MI8 : kX86And64RI8; 2818 } 2819 return in_mem ? kX86And32MI8 : kX86And32RI8; 2820 } 2821 if (is64Bit) { 2822 return in_mem ? kX86And64MI : kX86And64RI; 2823 } 2824 return in_mem ? kX86And32MI : kX86And32RI; 2825 case Instruction::OR_LONG: 2826 case Instruction::OR_LONG_2ADDR: 2827 if (byte_imm) { 2828 if (is64Bit) { 2829 return in_mem ? kX86Or64MI8 : kX86Or64RI8; 2830 } 2831 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 2832 } 2833 if (is64Bit) { 2834 return in_mem ? kX86Or64MI : kX86Or64RI; 2835 } 2836 return in_mem ? kX86Or32MI : kX86Or32RI; 2837 case Instruction::XOR_LONG: 2838 case Instruction::XOR_LONG_2ADDR: 2839 if (byte_imm) { 2840 if (is64Bit) { 2841 return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; 2842 } 2843 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 2844 } 2845 if (is64Bit) { 2846 return in_mem ? kX86Xor64MI : kX86Xor64RI; 2847 } 2848 return in_mem ? kX86Xor32MI : kX86Xor32RI; 2849 default: 2850 LOG(FATAL) << "Unexpected opcode: " << op; 2851 UNREACHABLE(); 2852 } 2853 } 2854 2855 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 2856 DCHECK(rl_src.is_const); 2857 int64_t val = mir_graph_->ConstantValueWide(rl_src); 2858 2859 if (cu_->target64) { 2860 // We can do with imm only if it fits 32 bit 2861 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { 2862 return false; 2863 } 2864 2865 rl_dest = UpdateLocWideTyped(rl_dest); 2866 2867 if ((rl_dest.location == kLocDalvikFrame) || 2868 (rl_dest.location == kLocCompilerTemp)) { 2869 int r_base = rs_rX86_SP_32.GetReg(); 2870 int displacement = SRegOffset(rl_dest.s_reg_low); 2871 2872 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2873 X86OpCode x86op = GetOpcode(op, rl_dest, false, val); 2874 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); 2875 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2876 true /* is_load */, true /* is64bit */); 2877 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2878 false /* is_load */, true /* is64bit */); 2879 return true; 2880 } 2881 2882 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2883 DCHECK_EQ(rl_result.location, kLocPhysReg); 2884 DCHECK(!rl_result.reg.IsFloat()); 2885 2886 X86OpCode x86op = GetOpcode(op, rl_result, false, val); 2887 NewLIR2(x86op, rl_result.reg.GetReg(), val); 2888 2889 StoreValueWide(rl_dest, rl_result); 2890 return true; 2891 } 2892 2893 int32_t val_lo = Low32Bits(val); 2894 int32_t val_hi = High32Bits(val); 2895 rl_dest = UpdateLocWideTyped(rl_dest); 2896 2897 // Can we just do this into memory? 2898 if ((rl_dest.location == kLocDalvikFrame) || 2899 (rl_dest.location == kLocCompilerTemp)) { 2900 int r_base = rs_rX86_SP_32.GetReg(); 2901 int displacement = SRegOffset(rl_dest.s_reg_low); 2902 2903 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2904 if (!IsNoOp(op, val_lo)) { 2905 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 2906 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); 2907 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2908 true /* is_load */, true /* is64bit */); 2909 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2910 false /* is_load */, true /* is64bit */); 2911 } 2912 if (!IsNoOp(op, val_hi)) { 2913 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 2914 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi); 2915 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2916 true /* is_load */, true /* is64bit */); 2917 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2918 false /* is_load */, true /* is64bit */); 2919 } 2920 return true; 2921 } 2922 2923 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2924 DCHECK_EQ(rl_result.location, kLocPhysReg); 2925 DCHECK(!rl_result.reg.IsFloat()); 2926 2927 if (!IsNoOp(op, val_lo)) { 2928 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 2929 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 2930 } 2931 if (!IsNoOp(op, val_hi)) { 2932 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 2933 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 2934 } 2935 StoreValueWide(rl_dest, rl_result); 2936 return true; 2937 } 2938 2939 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 2940 RegLocation rl_src2, Instruction::Code op) { 2941 DCHECK(rl_src2.is_const); 2942 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 2943 2944 if (cu_->target64) { 2945 // We can do with imm only if it fits 32 bit 2946 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { 2947 return false; 2948 } 2949 if (rl_dest.location == kLocPhysReg && 2950 rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { 2951 X86OpCode x86op = GetOpcode(op, rl_dest, false, val); 2952 OpRegCopy(rl_dest.reg, rl_src1.reg); 2953 NewLIR2(x86op, rl_dest.reg.GetReg(), val); 2954 StoreFinalValueWide(rl_dest, rl_dest); 2955 return true; 2956 } 2957 2958 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2959 // We need the values to be in a temporary 2960 RegLocation rl_result = ForceTempWide(rl_src1); 2961 2962 X86OpCode x86op = GetOpcode(op, rl_result, false, val); 2963 NewLIR2(x86op, rl_result.reg.GetReg(), val); 2964 2965 StoreFinalValueWide(rl_dest, rl_result); 2966 return true; 2967 } 2968 2969 int32_t val_lo = Low32Bits(val); 2970 int32_t val_hi = High32Bits(val); 2971 rl_dest = UpdateLocWideTyped(rl_dest); 2972 rl_src1 = UpdateLocWideTyped(rl_src1); 2973 2974 // Can we do this directly into the destination registers? 2975 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 2976 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() && 2977 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) { 2978 if (!IsNoOp(op, val_lo)) { 2979 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 2980 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo); 2981 } 2982 if (!IsNoOp(op, val_hi)) { 2983 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 2984 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi); 2985 } 2986 2987 StoreFinalValueWide(rl_dest, rl_dest); 2988 return true; 2989 } 2990 2991 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2992 DCHECK_EQ(rl_src1.location, kLocPhysReg); 2993 2994 // We need the values to be in a temporary 2995 RegLocation rl_result = ForceTempWide(rl_src1); 2996 if (!IsNoOp(op, val_lo)) { 2997 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 2998 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 2999 } 3000 if (!IsNoOp(op, val_hi)) { 3001 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 3002 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 3003 } 3004 3005 StoreFinalValueWide(rl_dest, rl_result); 3006 return true; 3007 } 3008 3009 // For final classes there are no sub-classes to check and so we can answer the instance-of 3010 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 3011 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 3012 RegLocation rl_dest, RegLocation rl_src) { 3013 RegLocation object = LoadValue(rl_src, kRefReg); 3014 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 3015 RegStorage result_reg = rl_result.reg; 3016 3017 // For 32-bit, SETcc only works with EAX..EDX. 3018 RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg; 3019 if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) { 3020 result_reg = AllocateByteRegister(); 3021 } 3022 3023 // Assume that there is no match. 3024 LoadConstant(result_reg, 0); 3025 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr); 3026 3027 // We will use this register to compare to memory below. 3028 // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode). 3029 // For this reason, force allocation of a 32 bit register to use, so that the 3030 // compare to memory will be done using a 32 bit comparision. 3031 // The LoadRefDisp(s) below will work normally, even in 64 bit mode. 3032 RegStorage check_class = AllocTemp(); 3033 3034 // If Method* is already in a register, we can save a copy. 3035 RegLocation rl_method = mir_graph_->GetMethodLoc(); 3036 int32_t offset_of_type = mirror::Array::DataOffset( 3037 sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + 3038 (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx); 3039 3040 if (rl_method.location == kLocPhysReg) { 3041 if (use_declaring_class) { 3042 LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(), 3043 check_class, kNotVolatile); 3044 } else { 3045 LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 3046 check_class, kNotVolatile); 3047 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); 3048 } 3049 } else { 3050 LoadCurrMethodDirect(check_class); 3051 if (use_declaring_class) { 3052 LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(), 3053 check_class, kNotVolatile); 3054 } else { 3055 LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 3056 check_class, kNotVolatile); 3057 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); 3058 } 3059 } 3060 3061 // Compare the computed class to the class in the object. 3062 DCHECK_EQ(object.location, kLocPhysReg); 3063 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value()); 3064 3065 // Set the low byte of the result to 0 or 1 from the compare condition code. 3066 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq); 3067 3068 LIR* target = NewLIR0(kPseudoTargetLabel); 3069 null_branchover->target = target; 3070 FreeTemp(check_class); 3071 if (IsTemp(result_reg)) { 3072 OpRegCopy(rl_result.reg, result_reg); 3073 FreeTemp(result_reg); 3074 } 3075 StoreValue(rl_dest, rl_result); 3076 } 3077 3078 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 3079 RegLocation rl_lhs, RegLocation rl_rhs, int flags) { 3080 OpKind op = kOpBkpt; 3081 bool is_div_rem = false; 3082 bool unary = false; 3083 bool shift_op = false; 3084 bool is_two_addr = false; 3085 RegLocation rl_result; 3086 switch (opcode) { 3087 case Instruction::NEG_INT: 3088 op = kOpNeg; 3089 unary = true; 3090 break; 3091 case Instruction::NOT_INT: 3092 op = kOpMvn; 3093 unary = true; 3094 break; 3095 case Instruction::ADD_INT_2ADDR: 3096 is_two_addr = true; 3097 FALLTHROUGH_INTENDED; 3098 case Instruction::ADD_INT: 3099 op = kOpAdd; 3100 break; 3101 case Instruction::SUB_INT_2ADDR: 3102 is_two_addr = true; 3103 FALLTHROUGH_INTENDED; 3104 case Instruction::SUB_INT: 3105 op = kOpSub; 3106 break; 3107 case Instruction::MUL_INT_2ADDR: 3108 is_two_addr = true; 3109 FALLTHROUGH_INTENDED; 3110 case Instruction::MUL_INT: 3111 op = kOpMul; 3112 break; 3113 case Instruction::DIV_INT_2ADDR: 3114 is_two_addr = true; 3115 FALLTHROUGH_INTENDED; 3116 case Instruction::DIV_INT: 3117 op = kOpDiv; 3118 is_div_rem = true; 3119 break; 3120 /* NOTE: returns in kArg1 */ 3121 case Instruction::REM_INT_2ADDR: 3122 is_two_addr = true; 3123 FALLTHROUGH_INTENDED; 3124 case Instruction::REM_INT: 3125 op = kOpRem; 3126 is_div_rem = true; 3127 break; 3128 case Instruction::AND_INT_2ADDR: 3129 is_two_addr = true; 3130 FALLTHROUGH_INTENDED; 3131 case Instruction::AND_INT: 3132 op = kOpAnd; 3133 break; 3134 case Instruction::OR_INT_2ADDR: 3135 is_two_addr = true; 3136 FALLTHROUGH_INTENDED; 3137 case Instruction::OR_INT: 3138 op = kOpOr; 3139 break; 3140 case Instruction::XOR_INT_2ADDR: 3141 is_two_addr = true; 3142 FALLTHROUGH_INTENDED; 3143 case Instruction::XOR_INT: 3144 op = kOpXor; 3145 break; 3146 case Instruction::SHL_INT_2ADDR: 3147 is_two_addr = true; 3148 FALLTHROUGH_INTENDED; 3149 case Instruction::SHL_INT: 3150 shift_op = true; 3151 op = kOpLsl; 3152 break; 3153 case Instruction::SHR_INT_2ADDR: 3154 is_two_addr = true; 3155 FALLTHROUGH_INTENDED; 3156 case Instruction::SHR_INT: 3157 shift_op = true; 3158 op = kOpAsr; 3159 break; 3160 case Instruction::USHR_INT_2ADDR: 3161 is_two_addr = true; 3162 FALLTHROUGH_INTENDED; 3163 case Instruction::USHR_INT: 3164 shift_op = true; 3165 op = kOpLsr; 3166 break; 3167 default: 3168 LOG(FATAL) << "Invalid word arith op: " << opcode; 3169 } 3170 3171 // Can we convert to a two address instruction? 3172 if (!is_two_addr && 3173 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 3174 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 3175 is_two_addr = true; 3176 } 3177 3178 if (!GenerateTwoOperandInstructions()) { 3179 is_two_addr = false; 3180 } 3181 3182 // Get the div/rem stuff out of the way. 3183 if (is_div_rem) { 3184 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags); 3185 StoreValue(rl_dest, rl_result); 3186 return; 3187 } 3188 3189 // If we generate any memory access below, it will reference a dalvik reg. 3190 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 3191 3192 if (unary) { 3193 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3194 rl_result = UpdateLocTyped(rl_dest); 3195 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3196 OpRegReg(op, rl_result.reg, rl_lhs.reg); 3197 } else { 3198 if (shift_op) { 3199 // X86 doesn't require masking and must use ECX. 3200 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX 3201 LoadValueDirectFixed(rl_rhs, t_reg); 3202 if (is_two_addr) { 3203 // Can we do this directly into memory? 3204 rl_result = UpdateLocTyped(rl_dest); 3205 if (rl_result.location != kLocPhysReg) { 3206 // Okay, we can do this into memory 3207 OpMemReg(op, rl_result, t_reg.GetReg()); 3208 FreeTemp(t_reg); 3209 return; 3210 } else if (!rl_result.reg.IsFloat()) { 3211 // Can do this directly into the result register 3212 OpRegReg(op, rl_result.reg, t_reg); 3213 FreeTemp(t_reg); 3214 StoreFinalValue(rl_dest, rl_result); 3215 return; 3216 } 3217 } 3218 // Three address form, or we can't do directly. 3219 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3220 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3221 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg); 3222 FreeTemp(t_reg); 3223 } else { 3224 // Multiply is 3 operand only (sort of). 3225 if (is_two_addr && op != kOpMul) { 3226 // Can we do this directly into memory? 3227 rl_result = UpdateLocTyped(rl_dest); 3228 if (rl_result.location == kLocPhysReg) { 3229 // Ensure res is in a core reg 3230 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3231 // Can we do this from memory directly? 3232 rl_rhs = UpdateLocTyped(rl_rhs); 3233 if (rl_rhs.location != kLocPhysReg) { 3234 OpRegMem(op, rl_result.reg, rl_rhs); 3235 StoreFinalValue(rl_dest, rl_result); 3236 return; 3237 } else if (!rl_rhs.reg.IsFloat()) { 3238 OpRegReg(op, rl_result.reg, rl_rhs.reg); 3239 StoreFinalValue(rl_dest, rl_result); 3240 return; 3241 } 3242 } 3243 rl_rhs = LoadValue(rl_rhs, kCoreReg); 3244 // It might happen rl_rhs and rl_dest are the same VR 3245 // in this case rl_dest is in reg after LoadValue while 3246 // rl_result is not updated yet, so do this 3247 rl_result = UpdateLocTyped(rl_dest); 3248 if (rl_result.location != kLocPhysReg) { 3249 // Okay, we can do this into memory. 3250 OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); 3251 return; 3252 } else if (!rl_result.reg.IsFloat()) { 3253 // Can do this directly into the result register. 3254 OpRegReg(op, rl_result.reg, rl_rhs.reg); 3255 StoreFinalValue(rl_dest, rl_result); 3256 return; 3257 } else { 3258 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3259 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3260 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 3261 } 3262 } else { 3263 // Try to use reg/memory instructions. 3264 rl_lhs = UpdateLocTyped(rl_lhs); 3265 rl_rhs = UpdateLocTyped(rl_rhs); 3266 // We can't optimize with FP registers. 3267 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 3268 // Something is difficult, so fall back to the standard case. 3269 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3270 rl_rhs = LoadValue(rl_rhs, kCoreReg); 3271 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3272 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 3273 } else { 3274 // We can optimize by moving to result and using memory operands. 3275 if (rl_rhs.location != kLocPhysReg) { 3276 // Force LHS into result. 3277 // We should be careful with order here 3278 // If rl_dest and rl_lhs points to the same VR we should load first 3279 // If the are different we should find a register first for dest 3280 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 3281 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { 3282 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3283 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3284 // No-op if these are the same. 3285 OpRegCopy(rl_result.reg, rl_lhs.reg); 3286 } else { 3287 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3288 LoadValueDirect(rl_lhs, rl_result.reg); 3289 } 3290 OpRegMem(op, rl_result.reg, rl_rhs); 3291 } else if (rl_lhs.location != kLocPhysReg) { 3292 // RHS is in a register; LHS is in memory. 3293 if (op != kOpSub) { 3294 // Force RHS into result and operate on memory. 3295 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3296 OpRegCopy(rl_result.reg, rl_rhs.reg); 3297 OpRegMem(op, rl_result.reg, rl_lhs); 3298 } else { 3299 // Subtraction isn't commutative. 3300 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3301 rl_rhs = LoadValue(rl_rhs, kCoreReg); 3302 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3303 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 3304 } 3305 } else { 3306 // Both are in registers. 3307 rl_lhs = LoadValue(rl_lhs, kCoreReg); 3308 rl_rhs = LoadValue(rl_rhs, kCoreReg); 3309 rl_result = EvalLoc(rl_dest, kCoreReg, true); 3310 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 3311 } 3312 } 3313 } 3314 } 3315 } 3316 StoreValue(rl_dest, rl_result); 3317 } 3318 3319 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 3320 // If we have non-core registers, then we can't do good things. 3321 if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) { 3322 return false; 3323 } 3324 if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) { 3325 return false; 3326 } 3327 3328 // Everything will be fine :-). 3329 return true; 3330 } 3331 3332 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { 3333 if (!cu_->target64) { 3334 Mir2Lir::GenIntToLong(rl_dest, rl_src); 3335 return; 3336 } 3337 rl_src = UpdateLocTyped(rl_src); 3338 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 3339 if (rl_src.location == kLocPhysReg) { 3340 NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 3341 } else { 3342 int displacement = SRegOffset(rl_src.s_reg_low); 3343 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 3344 LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(), 3345 displacement + LOWORD_OFFSET); 3346 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 3347 true /* is_load */, true /* is_64bit */); 3348 } 3349 StoreValueWide(rl_dest, rl_result); 3350 } 3351 3352 void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) { 3353 rl_src = UpdateLocWide(rl_src); 3354 rl_src = NarrowRegLoc(rl_src); 3355 StoreValue(rl_dest, rl_src); 3356 3357 if (cu_->target64) { 3358 // if src and dest are in the same phys reg then StoreValue generates 3359 // no operation but we need explicit 32-bit mov R, R to clear 3360 // the higher 32-bits 3361 rl_dest = UpdateLoc(rl_dest); 3362 if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg 3363 && IsSameReg(rl_src.reg, rl_dest.reg)) { 3364 LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg); 3365 // remove nop flag set by OpRegCopyNoInsert if src == dest 3366 copy_lir->flags.is_nop = false; 3367 AppendLIR(copy_lir); 3368 } 3369 } 3370 } 3371 3372 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, 3373 RegLocation rl_src1, RegLocation rl_shift) { 3374 if (!cu_->target64) { 3375 // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from 3376 // the other half, shift the other half, if the shift amount is less than 32 we're done, 3377 // otherwise move one register to the other and place zero or sign bits in the other. 3378 LIR* branch; 3379 FlushAllRegs(); 3380 LockCallTemps(); 3381 LoadValueDirectFixed(rl_shift, rs_rCX); 3382 RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX); 3383 LoadValueDirectWideFixed(rl_src1, r_tmp); 3384 switch (opcode) { 3385 case Instruction::SHL_LONG: 3386 case Instruction::SHL_LONG_2ADDR: 3387 NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg()); 3388 NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg()); 3389 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); 3390 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); 3391 OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow()); 3392 LoadConstant(r_tmp.GetLow(), 0); 3393 branch->target = NewLIR0(kPseudoTargetLabel); 3394 break; 3395 case Instruction::SHR_LONG: 3396 case Instruction::SHR_LONG_2ADDR: 3397 NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg()); 3398 NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); 3399 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); 3400 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); 3401 OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); 3402 NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31); 3403 branch->target = NewLIR0(kPseudoTargetLabel); 3404 break; 3405 case Instruction::USHR_LONG: 3406 case Instruction::USHR_LONG_2ADDR: 3407 NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), 3408 rs_rCX.GetReg()); 3409 NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); 3410 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); 3411 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); 3412 OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); 3413 LoadConstant(r_tmp.GetHigh(), 0); 3414 branch->target = NewLIR0(kPseudoTargetLabel); 3415 break; 3416 default: 3417 LOG(FATAL) << "Unexpected case: " << opcode; 3418 return; 3419 } 3420 RegLocation rl_result = LocCReturnWide(); 3421 StoreValueWide(rl_dest, rl_result); 3422 return; 3423 } 3424 3425 bool is_two_addr = false; 3426 OpKind op = kOpBkpt; 3427 RegLocation rl_result; 3428 3429 switch (opcode) { 3430 case Instruction::SHL_LONG_2ADDR: 3431 is_two_addr = true; 3432 FALLTHROUGH_INTENDED; 3433 case Instruction::SHL_LONG: 3434 op = kOpLsl; 3435 break; 3436 case Instruction::SHR_LONG_2ADDR: 3437 is_two_addr = true; 3438 FALLTHROUGH_INTENDED; 3439 case Instruction::SHR_LONG: 3440 op = kOpAsr; 3441 break; 3442 case Instruction::USHR_LONG_2ADDR: 3443 is_two_addr = true; 3444 FALLTHROUGH_INTENDED; 3445 case Instruction::USHR_LONG: 3446 op = kOpLsr; 3447 break; 3448 default: 3449 op = kOpBkpt; 3450 } 3451 3452 // X86 doesn't require masking and must use ECX. 3453 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX 3454 LoadValueDirectFixed(rl_shift, t_reg); 3455 if (is_two_addr) { 3456 // Can we do this directly into memory? 3457 rl_result = UpdateLocWideTyped(rl_dest); 3458 if (rl_result.location != kLocPhysReg) { 3459 // Okay, we can do this into memory 3460 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 3461 OpMemReg(op, rl_result, t_reg.GetReg()); 3462 } else if (!rl_result.reg.IsFloat()) { 3463 // Can do this directly into the result register 3464 OpRegReg(op, rl_result.reg, t_reg); 3465 StoreFinalValueWide(rl_dest, rl_result); 3466 } 3467 } else { 3468 // Three address form, or we can't do directly. 3469 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 3470 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 3471 OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); 3472 StoreFinalValueWide(rl_dest, rl_result); 3473 } 3474 3475 FreeTemp(t_reg); 3476 } 3477 3478 } // namespace art 3479