1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "arm64_lir.h" 18 #include "codegen_arm64.h" 19 #include "dex/quick/mir_to_lir-inl.h" 20 #include "dex/reg_storage_eq.h" 21 22 namespace art { 23 24 /* This file contains codegen for the A64 ISA. */ 25 26 int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) { 27 /* 28 * Valid values will have the form: 29 * 30 * aBbb.bbbc.defg.h000.0000.0000.0000.0000 31 * 32 * where B = not(b). In other words, if b == 1, then B == 0 and viceversa. 33 */ 34 35 // bits[19..0] are cleared. 36 if ((bits & 0x0007ffff) != 0) 37 return -1; 38 39 // bits[29..25] are all set or all cleared. 40 uint32_t b_pattern = (bits >> 16) & 0x3e00; 41 if (b_pattern != 0 && b_pattern != 0x3e00) 42 return -1; 43 44 // bit[30] and bit[29] are opposite. 45 if (((bits ^ (bits << 1)) & 0x40000000) == 0) 46 return -1; 47 48 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 49 // bit7: a000.0000 50 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 51 // bit6: 0b00.0000 52 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 53 // bit5_to_0: 00cd.efgh 54 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 55 return (bit7 | bit6 | bit5_to_0); 56 } 57 58 int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) { 59 /* 60 * Valid values will have the form: 61 * 62 * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 63 * 0000.0000.0000.0000.0000.0000.0000.0000 64 * 65 * where B = not(b). 66 */ 67 68 // bits[47..0] are cleared. 69 if ((bits & UINT64_C(0xffffffffffff)) != 0) 70 return -1; 71 72 // bits[61..54] are all set or all cleared. 73 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 74 if (b_pattern != 0 && b_pattern != 0x3fc0) 75 return -1; 76 77 // bit[62] and bit[61] are opposite. 78 if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0) 79 return -1; 80 81 // bit7: a000.0000 82 uint32_t bit7 = ((bits >> 63) & 0x1) << 7; 83 // bit6: 0b00.0000 84 uint32_t bit6 = ((bits >> 61) & 0x1) << 6; 85 // bit5_to_0: 00cd.efgh 86 uint32_t bit5_to_0 = (bits >> 48) & 0x3f; 87 return (bit7 | bit6 | bit5_to_0); 88 } 89 90 size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) { 91 bool opcode_is_wide = IS_WIDE(lir->opcode); 92 ArmOpcode opcode = UNWIDE(lir->opcode); 93 DCHECK(!IsPseudoLirOp(opcode)); 94 const ArmEncodingMap *encoder = &EncodingMap[opcode]; 95 uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; 96 return (bits >> 30); 97 } 98 99 size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) { 100 size_t offset = lir->operands[2]; 101 uint64_t check_flags = GetTargetInstFlags(lir->opcode); 102 DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE)); 103 if (check_flags & SCALED_OFFSET_X0) { 104 DCHECK(check_flags & IS_TERTIARY_OP); 105 offset = offset * (1 << GetLoadStoreSize(lir)); 106 } 107 return offset; 108 } 109 110 LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) { 111 DCHECK(r_dest.IsSingle()); 112 if (value == 0) { 113 return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr); 114 } else { 115 int32_t encoded_imm = EncodeImmSingle((uint32_t)value); 116 if (encoded_imm >= 0) { 117 return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm); 118 } 119 } 120 121 LIR* data_target = ScanLiteralPool(literal_list_, value, 0); 122 if (data_target == NULL) { 123 // Wide, as we need 8B alignment. 124 data_target = AddWideData(&literal_list_, value, 0); 125 } 126 127 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 128 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, 129 r_dest.GetReg(), 0, 0, 0, 0, data_target); 130 AppendLIR(load_pc_rel); 131 return load_pc_rel; 132 } 133 134 LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) { 135 DCHECK(r_dest.IsDouble()); 136 if (value == 0) { 137 return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr); 138 } else { 139 int32_t encoded_imm = EncodeImmDouble(value); 140 if (encoded_imm >= 0) { 141 return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm); 142 } 143 } 144 145 // No short form - load from the literal pool. 146 int32_t val_lo = Low32Bits(value); 147 int32_t val_hi = High32Bits(value); 148 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 149 if (data_target == NULL) { 150 data_target = AddWideData(&literal_list_, val_lo, val_hi); 151 } 152 153 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 154 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), 155 r_dest.GetReg(), 0, 0, 0, 0, data_target); 156 AppendLIR(load_pc_rel); 157 return load_pc_rel; 158 } 159 160 static int CountLeadingZeros(bool is_wide, uint64_t value) { 161 return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value); 162 } 163 164 static int CountTrailingZeros(bool is_wide, uint64_t value) { 165 return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value); 166 } 167 168 static int CountSetBits(bool is_wide, uint64_t value) { 169 return ((is_wide) ? 170 __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); 171 } 172 173 /** 174 * @brief Try encoding an immediate in the form required by logical instructions. 175 * 176 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value. 177 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as 178 * 32-bit if @p is_wide is false. 179 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed. 180 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate(). 181 */ 182 int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { 183 unsigned n, imm_s, imm_r; 184 185 // Logical immediates are encoded using parameters n, imm_s and imm_r using 186 // the following table: 187 // 188 // N imms immr size S R 189 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 190 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 191 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 192 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 193 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 194 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 195 // (s bits must not be all set) 196 // 197 // A pattern is constructed of size bits, where the least significant S+1 198 // bits are set. The pattern is rotated right by R, and repeated across a 199 // 32 or 64-bit value, depending on destination register width. 200 // 201 // To test if an arbitary immediate can be encoded using this scheme, an 202 // iterative algorithm is used. 203 // 204 205 // 1. If the value has all set or all clear bits, it can't be encoded. 206 if (value == 0 || value == ~UINT64_C(0) || 207 (!is_wide && (uint32_t)value == ~UINT32_C(0))) { 208 return -1; 209 } 210 211 unsigned lead_zero = CountLeadingZeros(is_wide, value); 212 unsigned lead_one = CountLeadingZeros(is_wide, ~value); 213 unsigned trail_zero = CountTrailingZeros(is_wide, value); 214 unsigned trail_one = CountTrailingZeros(is_wide, ~value); 215 unsigned set_bits = CountSetBits(is_wide, value); 216 217 // The fixed bits in the immediate s field. 218 // If width == 64 (X reg), start at 0xFFFFFF80. 219 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 220 // widths won't be executed. 221 unsigned width = (is_wide) ? 64 : 32; 222 int imm_s_fixed = (is_wide) ? -128 : -64; 223 int imm_s_mask = 0x3f; 224 225 for (;;) { 226 // 2. If the value is two bits wide, it can be encoded. 227 if (width == 2) { 228 n = 0; 229 imm_s = 0x3C; 230 imm_r = (value & 3) - 1; 231 break; 232 } 233 234 n = (width == 64) ? 1 : 0; 235 imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 236 if ((lead_zero + set_bits) == width) { 237 imm_r = 0; 238 } else { 239 imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 240 } 241 242 // 3. If the sum of leading zeros, trailing zeros and set bits is 243 // equal to the bit width of the value, it can be encoded. 244 if (lead_zero + trail_zero + set_bits == width) { 245 break; 246 } 247 248 // 4. If the sum of leading ones, trailing ones and unset bits in the 249 // value is equal to the bit width of the value, it can be encoded. 250 if (lead_one + trail_one + (width - set_bits) == width) { 251 break; 252 } 253 254 // 5. If the most-significant half of the bitwise value is equal to 255 // the least-significant half, return to step 2 using the 256 // least-significant half of the value. 257 uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1; 258 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 259 width >>= 1; 260 set_bits >>= 1; 261 imm_s_fixed >>= 1; 262 continue; 263 } 264 265 // 6. Otherwise, the value can't be encoded. 266 return -1; 267 } 268 269 return (n << 12 | imm_r << 6 | imm_s); 270 } 271 272 // Maximum number of instructions to use for encoding the immediate. 273 static const int max_num_ops_per_const_load = 2; 274 275 /** 276 * @brief Return the number of fast halfwords in the given uint64_t integer. 277 * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The 278 * number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for 279 * a more accurate description. 280 * @param value The input 64-bit integer. 281 * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is 282 * the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits 283 * set (0xffff). Additionally (retval & 0x8) is set when m > n. 284 */ 285 static int GetNumFastHalfWords(uint64_t value) { 286 unsigned int num_0000_halfwords = 0; 287 unsigned int num_ffff_halfwords = 0; 288 for (int shift = 0; shift < 64; shift += 16) { 289 uint16_t halfword = static_cast<uint16_t>(value >> shift); 290 if (halfword == 0) 291 num_0000_halfwords++; 292 else if (halfword == UINT16_C(0xffff)) 293 num_ffff_halfwords++; 294 } 295 if (num_0000_halfwords >= num_ffff_halfwords) { 296 DCHECK_LE(num_0000_halfwords, 4U); 297 return num_0000_halfwords; 298 } else { 299 DCHECK_LE(num_ffff_halfwords, 4U); 300 return num_ffff_halfwords | 0x8; 301 } 302 } 303 304 // The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a 305 // constant is considered for promotion. If the constant is "inexpensive" then the promotion 306 // algorithm will give it a low priority for promotion, even when it is referenced many times in 307 // the code. 308 309 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { 310 // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool). 311 // We therefore return true and give it a low priority for promotion. 312 return true; 313 } 314 315 bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { 316 return EncodeImmSingle(value) >= 0; 317 } 318 319 bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { 320 int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7); 321 if (num_slow_halfwords <= max_num_ops_per_const_load) { 322 return true; 323 } 324 return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0); 325 } 326 327 bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { 328 return EncodeImmDouble(value) >= 0; 329 } 330 331 // The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use 332 // when one of the operands is an immediate (e.g. register version or immediate version of add). 333 334 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { 335 switch (opcode) { 336 case Instruction::IF_EQ: 337 case Instruction::IF_NE: 338 case Instruction::IF_LT: 339 case Instruction::IF_GE: 340 case Instruction::IF_GT: 341 case Instruction::IF_LE: 342 case Instruction::ADD_INT: 343 case Instruction::ADD_INT_2ADDR: 344 case Instruction::SUB_INT: 345 case Instruction::SUB_INT_2ADDR: 346 // The code below is consistent with the implementation of OpRegRegImm(). 347 { 348 uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value); 349 if (abs_value < 0x1000) { 350 return true; 351 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 352 return true; 353 } 354 return false; 355 } 356 case Instruction::SHL_INT: 357 case Instruction::SHL_INT_2ADDR: 358 case Instruction::SHR_INT: 359 case Instruction::SHR_INT_2ADDR: 360 case Instruction::USHR_INT: 361 case Instruction::USHR_INT_2ADDR: 362 return true; 363 case Instruction::AND_INT: 364 case Instruction::AND_INT_2ADDR: 365 case Instruction::AND_INT_LIT16: 366 case Instruction::AND_INT_LIT8: 367 case Instruction::OR_INT: 368 case Instruction::OR_INT_2ADDR: 369 case Instruction::OR_INT_LIT16: 370 case Instruction::OR_INT_LIT8: 371 case Instruction::XOR_INT: 372 case Instruction::XOR_INT_2ADDR: 373 case Instruction::XOR_INT_LIT16: 374 case Instruction::XOR_INT_LIT8: 375 if (value == 0 || value == INT32_C(-1)) { 376 return true; 377 } 378 return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0); 379 default: 380 return false; 381 } 382 } 383 384 /* 385 * Load a immediate using one single instruction when possible; otherwise 386 * use a pair of movz and movk instructions. 387 * 388 * No additional register clobbering operation performed. Use this version when 389 * 1) r_dest is freshly returned from AllocTemp or 390 * 2) The codegen is under fixed register usage 391 */ 392 LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { 393 LIR* res; 394 395 if (r_dest.IsFloat()) { 396 return LoadFPConstantValue(r_dest, value); 397 } 398 399 if (r_dest.Is64Bit()) { 400 return LoadConstantWide(r_dest, value); 401 } 402 403 // Loading SP/ZR with an immediate is not supported. 404 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 405 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 406 407 // Compute how many movk, movz instructions are needed to load the value. 408 uint16_t high_bits = High16Bits(value); 409 uint16_t low_bits = Low16Bits(value); 410 411 bool low_fast = ((uint16_t)(low_bits + 1) <= 1); 412 bool high_fast = ((uint16_t)(high_bits + 1) <= 1); 413 414 if (LIKELY(low_fast || high_fast)) { 415 // 1 instruction is enough to load the immediate. 416 if (LIKELY(low_bits == high_bits)) { 417 // Value is either 0 or -1: we can just use wzr. 418 ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr; 419 res = NewLIR2(opcode, r_dest.GetReg(), rwzr); 420 } else { 421 uint16_t uniform_bits, useful_bits; 422 int shift; 423 424 if (LIKELY(high_fast)) { 425 shift = 0; 426 uniform_bits = high_bits; 427 useful_bits = low_bits; 428 } else { 429 shift = 1; 430 uniform_bits = low_bits; 431 useful_bits = high_bits; 432 } 433 434 if (UNLIKELY(uniform_bits != 0)) { 435 res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift); 436 } else { 437 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift); 438 } 439 } 440 } else { 441 // movk, movz require 2 instructions. Try detecting logical immediates. 442 int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value); 443 if (log_imm >= 0) { 444 res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm); 445 } else { 446 // Use 2 instructions. 447 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0); 448 NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1); 449 } 450 } 451 452 return res; 453 } 454 455 // TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). 456 LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { 457 if (r_dest.IsFloat()) { 458 return LoadFPConstantValueWide(r_dest, value); 459 } 460 461 DCHECK(r_dest.Is64Bit()); 462 463 // Loading SP/ZR with an immediate is not supported. 464 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 465 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 466 467 if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) { 468 // value is either 0 or -1: we can just use xzr. 469 ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr); 470 return NewLIR2(opcode, r_dest.GetReg(), rxzr); 471 } 472 473 // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. 474 uint64_t uvalue = static_cast<uint64_t>(value); 475 int num_fast_halfwords = GetNumFastHalfWords(uvalue); 476 int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7); 477 bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0; 478 479 if (num_slow_halfwords > 1) { 480 // A single movz/movn is not enough. Try the logical immediate route. 481 int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); 482 if (log_imm >= 0) { 483 return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm); 484 } 485 } 486 487 if (num_slow_halfwords <= max_num_ops_per_const_load) { 488 // We can encode the number using a movz/movn followed by one or more movk. 489 ArmOpcode op; 490 uint16_t background; 491 LIR* res = nullptr; 492 493 // Decide whether to use a movz or a movn. 494 if (more_ffff_halfwords) { 495 op = WIDE(kA64Movn3rdM); 496 background = 0xffff; 497 } else { 498 op = WIDE(kA64Movz3rdM); 499 background = 0; 500 } 501 502 // Emit the first instruction (movz, movn). 503 int shift; 504 for (shift = 0; shift < 4; shift++) { 505 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 506 if (halfword != background) { 507 res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift); 508 break; 509 } 510 } 511 512 // Emit the movk instructions. 513 for (shift++; shift < 4; shift++) { 514 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 515 if (halfword != background) { 516 NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift); 517 } 518 } 519 return res; 520 } 521 522 // Use the literal pool. 523 int32_t val_lo = Low32Bits(value); 524 int32_t val_hi = High32Bits(value); 525 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 526 if (data_target == NULL) { 527 data_target = AddWideData(&literal_list_, val_lo, val_hi); 528 } 529 530 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 531 LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), 532 r_dest.GetReg(), 0, 0, 0, 0, data_target); 533 AppendLIR(res); 534 return res; 535 } 536 537 LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { 538 LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */); 539 res->target = target; 540 return res; 541 } 542 543 LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { 544 LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc), 545 0 /* offset to be patched */); 546 branch->target = target; 547 return branch; 548 } 549 550 LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { 551 ArmOpcode opcode = kA64Brk1d; 552 switch (op) { 553 case kOpBlx: 554 opcode = kA64Blr1x; 555 break; 556 // TODO(Arm64): port kThumbBx. 557 // case kOpBx: 558 // opcode = kThumbBx; 559 // break; 560 default: 561 LOG(FATAL) << "Bad opcode " << op; 562 } 563 return NewLIR1(opcode, r_dest_src.GetReg()); 564 } 565 566 LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) { 567 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 568 CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit()); 569 ArmOpcode opcode = kA64Brk1d; 570 571 switch (op) { 572 case kOpCmn: 573 opcode = kA64Cmn3rro; 574 break; 575 case kOpCmp: 576 opcode = kA64Cmp3rro; 577 break; 578 case kOpMov: 579 opcode = kA64Mov2rr; 580 break; 581 case kOpMvn: 582 opcode = kA64Mvn2rr; 583 break; 584 case kOpNeg: 585 opcode = kA64Neg3rro; 586 break; 587 case kOpTst: 588 opcode = kA64Tst3rro; 589 break; 590 case kOpRev: 591 DCHECK_EQ(shift, 0); 592 // Binary, but rm is encoded twice. 593 return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 594 break; 595 case kOpRevsh: 596 // Binary, but rm is encoded twice. 597 NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 598 // "sxth r1, r2" is "sbfm r1, r2, #0, #15" 599 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15); 600 break; 601 case kOp2Byte: 602 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 603 // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 604 // For now we use sbfm directly. 605 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7); 606 case kOp2Short: 607 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 608 // For now we use sbfm rather than its alias, sbfx. 609 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 610 case kOp2Char: 611 // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 612 // For now we use ubfm directly. 613 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 614 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 615 default: 616 return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); 617 } 618 619 DCHECK(!IsPseudoLirOp(opcode)); 620 if (EncodingMap[opcode].flags & IS_BINARY_OP) { 621 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 622 return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 623 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 624 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 625 if (kind == kFmtShift) { 626 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift); 627 } 628 } 629 630 LOG(FATAL) << "Unexpected encoding operand count"; 631 return NULL; 632 } 633 634 LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, 635 A64RegExtEncodings ext, uint8_t amount) { 636 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 637 ArmOpcode opcode = kA64Brk1d; 638 639 switch (op) { 640 case kOpCmn: 641 opcode = kA64Cmn3Rre; 642 break; 643 case kOpCmp: 644 opcode = kA64Cmp3Rre; 645 break; 646 case kOpAdd: 647 // Note: intentional fallthrough 648 case kOpSub: 649 return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount); 650 break; 651 default: 652 LOG(FATAL) << "Bad Opcode: " << opcode; 653 break; 654 } 655 656 DCHECK(!IsPseudoLirOp(opcode)); 657 if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 658 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 659 if (kind == kFmtExtend) { 660 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 661 EncodeExtend(ext, amount)); 662 } 663 } 664 665 LOG(FATAL) << "Unexpected encoding operand count"; 666 return NULL; 667 } 668 669 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { 670 /* RegReg operations with SP in first parameter need extended register instruction form. 671 * Only CMN, CMP, ADD & SUB instructions are implemented. 672 */ 673 if (r_dest_src1 == rs_sp) { 674 return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0); 675 } else { 676 return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); 677 } 678 } 679 680 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { 681 UNIMPLEMENTED(FATAL); 682 return nullptr; 683 } 684 685 LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) { 686 UNIMPLEMENTED(FATAL); 687 return nullptr; 688 } 689 690 LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { 691 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64"; 692 return NULL; 693 } 694 695 LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, 696 RegStorage r_src2, int shift) { 697 ArmOpcode opcode = kA64Brk1d; 698 699 switch (op) { 700 case kOpAdd: 701 opcode = kA64Add4rrro; 702 break; 703 case kOpSub: 704 opcode = kA64Sub4rrro; 705 break; 706 // case kOpRsub: 707 // opcode = kA64RsubWWW; 708 // break; 709 case kOpAdc: 710 opcode = kA64Adc3rrr; 711 break; 712 case kOpAnd: 713 opcode = kA64And4rrro; 714 break; 715 case kOpXor: 716 opcode = kA64Eor4rrro; 717 break; 718 case kOpMul: 719 opcode = kA64Mul3rrr; 720 break; 721 case kOpDiv: 722 opcode = kA64Sdiv3rrr; 723 break; 724 case kOpOr: 725 opcode = kA64Orr4rrro; 726 break; 727 case kOpSbc: 728 opcode = kA64Sbc3rrr; 729 break; 730 case kOpLsl: 731 opcode = kA64Lsl3rrr; 732 break; 733 case kOpLsr: 734 opcode = kA64Lsr3rrr; 735 break; 736 case kOpAsr: 737 opcode = kA64Asr3rrr; 738 break; 739 case kOpRor: 740 opcode = kA64Ror3rrr; 741 break; 742 default: 743 LOG(FATAL) << "Bad opcode: " << op; 744 break; 745 } 746 747 // The instructions above belong to two kinds: 748 // - 4-operands instructions, where the last operand is a shift/extend immediate, 749 // - 3-operands instructions with no shift/extend. 750 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 751 CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); 752 CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); 753 if (EncodingMap[opcode].flags & IS_QUAD_OP) { 754 DCHECK(!IsExtendEncoding(shift)); 755 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); 756 } else { 757 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); 758 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 759 return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); 760 } 761 } 762 763 LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, 764 RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) { 765 ArmOpcode opcode = kA64Brk1d; 766 767 switch (op) { 768 case kOpAdd: 769 opcode = kA64Add4RRre; 770 break; 771 case kOpSub: 772 opcode = kA64Sub4RRre; 773 break; 774 default: 775 LOG(FATAL) << "Unimplemented opcode: " << op; 776 break; 777 } 778 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 779 780 if (r_dest.Is64Bit()) { 781 CHECK(r_src1.Is64Bit()); 782 783 // dest determines whether the op is wide or not. Up-convert src2 when necessary. 784 // Note: this is not according to aarch64 specifications, but our encoding. 785 if (!r_src2.Is64Bit()) { 786 r_src2 = As64BitReg(r_src2); 787 } 788 } else { 789 CHECK(!r_src1.Is64Bit()); 790 CHECK(!r_src2.Is64Bit()); 791 } 792 793 // Sanity checks. 794 // 1) Amount is in the range 0..4 795 CHECK_LE(amount, 4); 796 797 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), 798 EncodeExtend(ext, amount)); 799 } 800 801 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { 802 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); 803 } 804 805 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { 806 return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); 807 } 808 809 LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { 810 LIR* res; 811 bool neg = (value < 0); 812 uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value; 813 ArmOpcode opcode = kA64Brk1d; 814 ArmOpcode alt_opcode = kA64Brk1d; 815 bool is_logical = false; 816 bool is_wide = r_dest.Is64Bit(); 817 ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); 818 int info = 0; 819 820 switch (op) { 821 case kOpLsl: { 822 // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" 823 // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)". 824 // For now, we just use ubfm directly. 825 int max_value = (is_wide) ? 63 : 31; 826 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), 827 (-value) & max_value, max_value - value); 828 } 829 case kOpLsr: 830 return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 831 case kOpAsr: 832 return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 833 case kOpRor: 834 // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm". 835 // For now, we just use extr directly. 836 return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(), 837 value); 838 case kOpAdd: 839 neg = !neg; 840 // Note: intentional fallthrough 841 case kOpSub: 842 // Add and sub below read/write sp rather than xzr. 843 if (abs_value < 0x1000) { 844 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 845 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0); 846 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 847 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 848 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); 849 } else { 850 alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; 851 info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); 852 } 853 break; 854 case kOpAdc: 855 alt_opcode = kA64Adc3rrr; 856 break; 857 case kOpSbc: 858 alt_opcode = kA64Sbc3rrr; 859 break; 860 case kOpOr: 861 is_logical = true; 862 opcode = kA64Orr3Rrl; 863 alt_opcode = kA64Orr4rrro; 864 break; 865 case kOpAnd: 866 is_logical = true; 867 opcode = kA64And3Rrl; 868 alt_opcode = kA64And4rrro; 869 break; 870 case kOpXor: 871 is_logical = true; 872 opcode = kA64Eor3Rrl; 873 alt_opcode = kA64Eor4rrro; 874 break; 875 case kOpMul: 876 // TUNING: power of 2, shift & add 877 alt_opcode = kA64Mul3rrr; 878 break; 879 default: 880 LOG(FATAL) << "Bad opcode: " << op; 881 } 882 883 if (is_logical) { 884 int log_imm = EncodeLogicalImmediate(is_wide, value); 885 if (log_imm >= 0) { 886 return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); 887 } else { 888 // When the immediate is either 0 or ~0, the logical operation can be trivially reduced 889 // to a - possibly negated - assignment. 890 if (value == 0) { 891 switch (op) { 892 case kOpOr: 893 case kOpXor: 894 // Or/Xor by zero reduces to an assignment. 895 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 896 default: 897 // And by zero reduces to a `mov rdest, xzr'. 898 DCHECK(op == kOpAnd); 899 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); 900 } 901 } else if (value == INT64_C(-1) 902 || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) { 903 switch (op) { 904 case kOpAnd: 905 // And by -1 reduces to an assignment. 906 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 907 case kOpXor: 908 // Xor by -1 reduces to an `mvn rdest, rsrc'. 909 return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 910 default: 911 // Or by -1 reduces to a `mvn rdest, xzr'. 912 DCHECK(op == kOpOr); 913 return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); 914 } 915 } 916 } 917 } 918 919 RegStorage r_scratch; 920 if (is_wide) { 921 r_scratch = AllocTempWide(); 922 LoadConstantWide(r_scratch, value); 923 } else { 924 r_scratch = AllocTemp(); 925 LoadConstant(r_scratch, value); 926 } 927 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) 928 res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); 929 else 930 res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); 931 FreeTemp(r_scratch); 932 return res; 933 } 934 935 LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { 936 return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value)); 937 } 938 939 LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) { 940 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 941 ArmOpcode opcode = kA64Brk1d; 942 ArmOpcode neg_opcode = kA64Brk1d; 943 bool shift; 944 bool neg = (value < 0); 945 uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value; 946 947 if (LIKELY(abs_value < 0x1000)) { 948 // abs_value is a 12-bit immediate. 949 shift = false; 950 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 951 // abs_value is a shifted 12-bit immediate. 952 shift = true; 953 abs_value >>= 12; 954 } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { 955 // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. 956 // This works for both normal registers and SP. 957 // For a frame size == 0x2468, it will be encoded as: 958 // sub sp, #0x2000 959 // sub sp, #0x468 960 if (neg) { 961 op = (op == kOpAdd) ? kOpSub : kOpAdd; 962 } 963 OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); 964 return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); 965 } else { 966 RegStorage r_tmp; 967 LIR* res; 968 if (IS_WIDE(wide)) { 969 r_tmp = AllocTempWide(); 970 res = LoadConstantWide(r_tmp, value); 971 } else { 972 r_tmp = AllocTemp(); 973 res = LoadConstant(r_tmp, value); 974 } 975 OpRegReg(op, r_dest_src1, r_tmp); 976 FreeTemp(r_tmp); 977 return res; 978 } 979 980 switch (op) { 981 case kOpAdd: 982 neg_opcode = kA64Sub4RRdT; 983 opcode = kA64Add4RRdT; 984 break; 985 case kOpSub: 986 neg_opcode = kA64Add4RRdT; 987 opcode = kA64Sub4RRdT; 988 break; 989 case kOpCmp: 990 neg_opcode = kA64Cmn3RdT; 991 opcode = kA64Cmp3RdT; 992 break; 993 default: 994 LOG(FATAL) << "Bad op-kind in OpRegImm: " << op; 995 break; 996 } 997 998 if (UNLIKELY(neg)) 999 opcode = neg_opcode; 1000 1001 if (EncodingMap[opcode].flags & IS_QUAD_OP) 1002 return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, 1003 (shift) ? 1 : 0); 1004 else 1005 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); 1006 } 1007 1008 int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) { 1009 DCHECK_EQ(shift_type & 0x3, shift_type); 1010 DCHECK_EQ(amount & 0x3f, amount); 1011 return ((shift_type & 0x3) << 7) | (amount & 0x3f); 1012 } 1013 1014 int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) { 1015 DCHECK_EQ(extend_type & 0x7, extend_type); 1016 DCHECK_EQ(amount & 0x7, amount); 1017 return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7); 1018 } 1019 1020 bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { 1021 return ((1 << 6) & encoded_value) != 0; 1022 } 1023 1024 LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 1025 int scale, OpSize size) { 1026 LIR* load; 1027 int expected_scale = 0; 1028 ArmOpcode opcode = kA64Brk1d; 1029 r_base = Check64BitReg(r_base); 1030 1031 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 1032 // register offset load (rather than doing the sign extension in a separate instruction). 1033 if (r_index.Is32Bit()) { 1034 // Assemble: ``sxtw xN, wN''. 1035 r_index = As64BitReg(r_index); 1036 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 1037 } 1038 1039 if (r_dest.IsFloat()) { 1040 if (r_dest.IsDouble()) { 1041 DCHECK(size == k64 || size == kDouble); 1042 expected_scale = 3; 1043 opcode = FWIDE(kA64Ldr4fXxG); 1044 } else { 1045 DCHECK(r_dest.IsSingle()); 1046 DCHECK(size == k32 || size == kSingle); 1047 expected_scale = 2; 1048 opcode = kA64Ldr4fXxG; 1049 } 1050 1051 DCHECK(scale == 0 || scale == expected_scale); 1052 return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 1053 (scale != 0) ? 1 : 0); 1054 } 1055 1056 switch (size) { 1057 case kDouble: 1058 case kWord: 1059 case k64: 1060 r_dest = Check64BitReg(r_dest); 1061 opcode = WIDE(kA64Ldr4rXxG); 1062 expected_scale = 3; 1063 break; 1064 case kSingle: // Intentional fall-through. 1065 case k32: // Intentional fall-through. 1066 case kReference: 1067 r_dest = Check32BitReg(r_dest); 1068 opcode = kA64Ldr4rXxG; 1069 expected_scale = 2; 1070 break; 1071 case kUnsignedHalf: 1072 r_dest = Check32BitReg(r_dest); 1073 opcode = kA64Ldrh4wXxd; 1074 expected_scale = 1; 1075 break; 1076 case kSignedHalf: 1077 r_dest = Check32BitReg(r_dest); 1078 opcode = kA64Ldrsh4rXxd; 1079 expected_scale = 1; 1080 break; 1081 case kUnsignedByte: 1082 r_dest = Check32BitReg(r_dest); 1083 opcode = kA64Ldrb3wXx; 1084 break; 1085 case kSignedByte: 1086 r_dest = Check32BitReg(r_dest); 1087 opcode = kA64Ldrsb3rXx; 1088 break; 1089 default: 1090 LOG(FATAL) << "Bad size: " << size; 1091 } 1092 1093 if (UNLIKELY(expected_scale == 0)) { 1094 // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. 1095 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1096 DCHECK_EQ(scale, 0); 1097 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); 1098 } else { 1099 DCHECK(scale == 0 || scale == expected_scale); 1100 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 1101 (scale != 0) ? 1 : 0); 1102 } 1103 1104 return load; 1105 } 1106 1107 LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 1108 int scale) { 1109 return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference); 1110 } 1111 1112 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1113 int scale, OpSize size) { 1114 LIR* store; 1115 int expected_scale = 0; 1116 ArmOpcode opcode = kA64Brk1d; 1117 r_base = Check64BitReg(r_base); 1118 1119 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 1120 // register offset store (rather than doing the sign extension in a separate instruction). 1121 if (r_index.Is32Bit()) { 1122 // Assemble: ``sxtw xN, wN''. 1123 r_index = As64BitReg(r_index); 1124 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 1125 } 1126 1127 if (r_src.IsFloat()) { 1128 if (r_src.IsDouble()) { 1129 DCHECK(size == k64 || size == kDouble); 1130 expected_scale = 3; 1131 opcode = FWIDE(kA64Str4fXxG); 1132 } else { 1133 DCHECK(r_src.IsSingle()); 1134 DCHECK(size == k32 || size == kSingle); 1135 expected_scale = 2; 1136 opcode = kA64Str4fXxG; 1137 } 1138 1139 DCHECK(scale == 0 || scale == expected_scale); 1140 return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1141 (scale != 0) ? 1 : 0); 1142 } 1143 1144 switch (size) { 1145 case kDouble: // Intentional fall-trough. 1146 case kWord: // Intentional fall-trough. 1147 case k64: 1148 r_src = Check64BitReg(r_src); 1149 opcode = WIDE(kA64Str4rXxG); 1150 expected_scale = 3; 1151 break; 1152 case kSingle: // Intentional fall-trough. 1153 case k32: // Intentional fall-trough. 1154 case kReference: 1155 r_src = Check32BitReg(r_src); 1156 opcode = kA64Str4rXxG; 1157 expected_scale = 2; 1158 break; 1159 case kUnsignedHalf: 1160 case kSignedHalf: 1161 r_src = Check32BitReg(r_src); 1162 opcode = kA64Strh4wXxd; 1163 expected_scale = 1; 1164 break; 1165 case kUnsignedByte: 1166 case kSignedByte: 1167 r_src = Check32BitReg(r_src); 1168 opcode = kA64Strb3wXx; 1169 break; 1170 default: 1171 LOG(FATAL) << "Bad size: " << size; 1172 } 1173 1174 if (UNLIKELY(expected_scale == 0)) { 1175 // This is a tertiary op (e.g. strb), it does not not support scale. 1176 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1177 DCHECK_EQ(scale, 0); 1178 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); 1179 } else { 1180 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1181 (scale != 0) ? 1 : 0); 1182 } 1183 1184 return store; 1185 } 1186 1187 LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1188 int scale) { 1189 return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference); 1190 } 1191 1192 /* 1193 * Load value from base + displacement. Optionally perform null check 1194 * on base (which must have an associated s_reg and MIR). If not 1195 * performing null check, incoming MIR can be null. 1196 */ 1197 LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, 1198 OpSize size) { 1199 LIR* load = NULL; 1200 ArmOpcode opcode = kA64Brk1d; 1201 ArmOpcode alt_opcode = kA64Brk1d; 1202 int scale = 0; 1203 1204 switch (size) { 1205 case kDouble: // Intentional fall-through. 1206 case kWord: // Intentional fall-through. 1207 case k64: 1208 r_dest = Check64BitReg(r_dest); 1209 scale = 3; 1210 if (r_dest.IsFloat()) { 1211 DCHECK(r_dest.IsDouble()); 1212 opcode = FWIDE(kA64Ldr3fXD); 1213 alt_opcode = FWIDE(kA64Ldur3fXd); 1214 } else { 1215 opcode = WIDE(kA64Ldr3rXD); 1216 alt_opcode = WIDE(kA64Ldur3rXd); 1217 } 1218 break; 1219 case kSingle: // Intentional fall-through. 1220 case k32: // Intentional fall-trough. 1221 case kReference: 1222 r_dest = Check32BitReg(r_dest); 1223 scale = 2; 1224 if (r_dest.IsFloat()) { 1225 DCHECK(r_dest.IsSingle()); 1226 opcode = kA64Ldr3fXD; 1227 } else { 1228 opcode = kA64Ldr3rXD; 1229 } 1230 break; 1231 case kUnsignedHalf: 1232 scale = 1; 1233 opcode = kA64Ldrh3wXF; 1234 break; 1235 case kSignedHalf: 1236 scale = 1; 1237 opcode = kA64Ldrsh3rXF; 1238 break; 1239 case kUnsignedByte: 1240 opcode = kA64Ldrb3wXd; 1241 break; 1242 case kSignedByte: 1243 opcode = kA64Ldrsb3rXd; 1244 break; 1245 default: 1246 LOG(FATAL) << "Bad size: " << size; 1247 } 1248 1249 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1250 int scaled_disp = displacement >> scale; 1251 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1252 // Can use scaled load. 1253 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp); 1254 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1255 // Can use unscaled load. 1256 load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement); 1257 } else { 1258 // Use long sequence. 1259 // TODO: cleaner support for index/displacement registers? Not a reference, but must match width. 1260 RegStorage r_scratch = AllocTempWide(); 1261 LoadConstantWide(r_scratch, displacement); 1262 load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size); 1263 FreeTemp(r_scratch); 1264 } 1265 1266 // TODO: in future may need to differentiate Dalvik accesses w/ spills 1267 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1268 DCHECK(r_base == rs_sp); 1269 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); 1270 } 1271 return load; 1272 } 1273 1274 LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1275 OpSize size, VolatileKind is_volatile) { 1276 // LoadBaseDisp() will emit correct insn for atomic load on arm64 1277 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1278 1279 LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size); 1280 1281 if (UNLIKELY(is_volatile == kVolatile)) { 1282 // TODO: This should generate an acquire load instead of the barrier. 1283 GenMemBarrier(kLoadAny); 1284 } 1285 1286 return load; 1287 } 1288 1289 LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1290 VolatileKind is_volatile) { 1291 return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile); 1292 } 1293 1294 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, 1295 OpSize size) { 1296 LIR* store = NULL; 1297 ArmOpcode opcode = kA64Brk1d; 1298 ArmOpcode alt_opcode = kA64Brk1d; 1299 int scale = 0; 1300 1301 switch (size) { 1302 case kDouble: // Intentional fall-through. 1303 case kWord: // Intentional fall-through. 1304 case k64: 1305 r_src = Check64BitReg(r_src); 1306 scale = 3; 1307 if (r_src.IsFloat()) { 1308 DCHECK(r_src.IsDouble()); 1309 opcode = FWIDE(kA64Str3fXD); 1310 alt_opcode = FWIDE(kA64Stur3fXd); 1311 } else { 1312 opcode = FWIDE(kA64Str3rXD); 1313 alt_opcode = FWIDE(kA64Stur3rXd); 1314 } 1315 break; 1316 case kSingle: // Intentional fall-through. 1317 case k32: // Intentional fall-trough. 1318 case kReference: 1319 r_src = Check32BitReg(r_src); 1320 scale = 2; 1321 if (r_src.IsFloat()) { 1322 DCHECK(r_src.IsSingle()); 1323 opcode = kA64Str3fXD; 1324 } else { 1325 opcode = kA64Str3rXD; 1326 } 1327 break; 1328 case kUnsignedHalf: 1329 case kSignedHalf: 1330 scale = 1; 1331 opcode = kA64Strh3wXF; 1332 break; 1333 case kUnsignedByte: 1334 case kSignedByte: 1335 opcode = kA64Strb3wXd; 1336 break; 1337 default: 1338 LOG(FATAL) << "Bad size: " << size; 1339 } 1340 1341 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1342 int scaled_disp = displacement >> scale; 1343 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1344 // Can use scaled store. 1345 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp); 1346 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1347 // Can use unscaled store. 1348 store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement); 1349 } else { 1350 // Use long sequence. 1351 RegStorage r_scratch = AllocTempWide(); 1352 LoadConstantWide(r_scratch, displacement); 1353 store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); 1354 FreeTemp(r_scratch); 1355 } 1356 1357 // TODO: In future, may need to differentiate Dalvik & spill accesses. 1358 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1359 DCHECK(r_base == rs_sp); 1360 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); 1361 } 1362 return store; 1363 } 1364 1365 LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, 1366 OpSize size, VolatileKind is_volatile) { 1367 // TODO: This should generate a release store and no barriers. 1368 if (UNLIKELY(is_volatile == kVolatile)) { 1369 // Ensure that prior accesses become visible to other threads first. 1370 GenMemBarrier(kAnyStore); 1371 } 1372 1373 // StoreBaseDisp() will emit correct insn for atomic store on arm64 1374 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1375 1376 LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size); 1377 1378 if (UNLIKELY(is_volatile == kVolatile)) { 1379 // Preserve order with respect to any subsequent volatile loads. 1380 // We need StoreLoad, but that generally requires the most expensive barrier. 1381 GenMemBarrier(kAnyAny); 1382 } 1383 1384 return store; 1385 } 1386 1387 LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, 1388 VolatileKind is_volatile) { 1389 return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile); 1390 } 1391 1392 LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { 1393 LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64"; 1394 return NULL; 1395 } 1396 1397 LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { 1398 LOG(FATAL) << "Unexpected use of OpMem for Arm64"; 1399 return NULL; 1400 } 1401 1402 LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { 1403 return OpReg(op, r_tgt); 1404 } 1405 1406 } // namespace art 1407