1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that expands pseudo instructions into target 11 // instructions to allow proper scheduling and other late optimizations. This 12 // pass should be run after register allocation but before the post-regalloc 13 // scheduling pass. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/Support/MathExtras.h" 24 using namespace llvm; 25 26 namespace llvm { 27 void initializeAArch64ExpandPseudoPass(PassRegistry &); 28 } 29 30 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 31 32 namespace { 33 class AArch64ExpandPseudo : public MachineFunctionPass { 34 public: 35 static char ID; 36 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 37 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 38 } 39 40 const AArch64InstrInfo *TII; 41 42 bool runOnMachineFunction(MachineFunction &Fn) override; 43 44 const char *getPassName() const override { 45 return AARCH64_EXPAND_PSEUDO_NAME; 46 } 47 48 private: 49 bool expandMBB(MachineBasicBlock &MBB); 50 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 51 MachineBasicBlock::iterator &NextMBBI); 52 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 53 unsigned BitSize); 54 55 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 56 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 57 unsigned ExtendImm, unsigned ZeroReg, 58 MachineBasicBlock::iterator &NextMBBI); 59 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 60 MachineBasicBlock::iterator MBBI, 61 MachineBasicBlock::iterator &NextMBBI); 62 }; 63 char AArch64ExpandPseudo::ID = 0; 64 } 65 66 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 67 AARCH64_EXPAND_PSEUDO_NAME, false, false) 68 69 /// \brief Transfer implicit operands on the pseudo instruction to the 70 /// instructions created from the expansion. 71 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 72 MachineInstrBuilder &DefMI) { 73 const MCInstrDesc &Desc = OldMI.getDesc(); 74 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 75 ++i) { 76 const MachineOperand &MO = OldMI.getOperand(i); 77 assert(MO.isReg() && MO.getReg()); 78 if (MO.isUse()) 79 UseMI.addOperand(MO); 80 else 81 DefMI.addOperand(MO); 82 } 83 } 84 85 /// \brief Helper function which extracts the specified 16-bit chunk from a 86 /// 64-bit value. 87 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { 88 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 89 90 return (Imm >> (ChunkIdx * 16)) & 0xFFFF; 91 } 92 93 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit 94 /// value. Indices correspond to element numbers in a v4i16. 95 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { 96 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); 97 const unsigned ShiftAmt = ToIdx * 16; 98 99 // Replicate the source chunk to the destination position. 100 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; 101 // Clear the destination chunk. 102 Imm &= ~(0xFFFFLL << ShiftAmt); 103 // Insert the replicated chunk. 104 return Imm | Chunk; 105 } 106 107 /// \brief Helper function which tries to materialize a 64-bit value with an 108 /// ORR + MOVK instruction sequence. 109 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, 110 MachineBasicBlock &MBB, 111 MachineBasicBlock::iterator &MBBI, 112 const AArch64InstrInfo *TII, unsigned ChunkIdx) { 113 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 114 const unsigned ShiftAmt = ChunkIdx * 16; 115 116 uint64_t Encoding; 117 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { 118 // Create the ORR-immediate instruction. 119 MachineInstrBuilder MIB = 120 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 121 .addOperand(MI.getOperand(0)) 122 .addReg(AArch64::XZR) 123 .addImm(Encoding); 124 125 // Create the MOVK instruction. 126 const unsigned Imm16 = getChunk(UImm, ChunkIdx); 127 const unsigned DstReg = MI.getOperand(0).getReg(); 128 const bool DstIsDead = MI.getOperand(0).isDead(); 129 MachineInstrBuilder MIB1 = 130 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 131 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 132 .addReg(DstReg) 133 .addImm(Imm16) 134 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 135 136 transferImpOps(MI, MIB, MIB1); 137 MI.eraseFromParent(); 138 return true; 139 } 140 141 return false; 142 } 143 144 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width 145 /// can be materialized with an ORR instruction. 146 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { 147 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; 148 149 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); 150 } 151 152 /// \brief Check for identical 16-bit chunks within the constant and if so 153 /// materialize them with a single ORR instruction. The remaining one or two 154 /// 16-bit chunks will be materialized with MOVK instructions. 155 /// 156 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order 157 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with 158 /// an ORR instruction. 159 /// 160 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, 161 MachineBasicBlock &MBB, 162 MachineBasicBlock::iterator &MBBI, 163 const AArch64InstrInfo *TII) { 164 typedef DenseMap<uint64_t, unsigned> CountMap; 165 CountMap Counts; 166 167 // Scan the constant and count how often every chunk occurs. 168 for (unsigned Idx = 0; Idx < 4; ++Idx) 169 ++Counts[getChunk(UImm, Idx)]; 170 171 // Traverse the chunks to find one which occurs more than once. 172 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); 173 Chunk != End; ++Chunk) { 174 const uint64_t ChunkVal = Chunk->first; 175 const unsigned Count = Chunk->second; 176 177 uint64_t Encoding = 0; 178 179 // We are looking for chunks which have two or three instances and can be 180 // materialized with an ORR instruction. 181 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) 182 continue; 183 184 const bool CountThree = Count == 3; 185 // Create the ORR-immediate instruction. 186 MachineInstrBuilder MIB = 187 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 188 .addOperand(MI.getOperand(0)) 189 .addReg(AArch64::XZR) 190 .addImm(Encoding); 191 192 const unsigned DstReg = MI.getOperand(0).getReg(); 193 const bool DstIsDead = MI.getOperand(0).isDead(); 194 195 unsigned ShiftAmt = 0; 196 uint64_t Imm16 = 0; 197 // Find the first chunk not materialized with the ORR instruction. 198 for (; ShiftAmt < 64; ShiftAmt += 16) { 199 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 200 201 if (Imm16 != ChunkVal) 202 break; 203 } 204 205 // Create the first MOVK instruction. 206 MachineInstrBuilder MIB1 = 207 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 208 .addReg(DstReg, 209 RegState::Define | getDeadRegState(DstIsDead && CountThree)) 210 .addReg(DstReg) 211 .addImm(Imm16) 212 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 213 214 // In case we have three instances the whole constant is now materialized 215 // and we can exit. 216 if (CountThree) { 217 transferImpOps(MI, MIB, MIB1); 218 MI.eraseFromParent(); 219 return true; 220 } 221 222 // Find the remaining chunk which needs to be materialized. 223 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { 224 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 225 226 if (Imm16 != ChunkVal) 227 break; 228 } 229 230 // Create the second MOVK instruction. 231 MachineInstrBuilder MIB2 = 232 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 233 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 234 .addReg(DstReg) 235 .addImm(Imm16) 236 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 237 238 transferImpOps(MI, MIB, MIB2); 239 MI.eraseFromParent(); 240 return true; 241 } 242 243 return false; 244 } 245 246 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern 247 /// starts a contiguous sequence of ones if we look at the bits from the LSB 248 /// towards the MSB. 249 static bool isStartChunk(uint64_t Chunk) { 250 if (Chunk == 0 || Chunk == UINT64_MAX) 251 return false; 252 253 return isMask_64(~Chunk); 254 } 255 256 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern 257 /// ends a contiguous sequence of ones if we look at the bits from the LSB 258 /// towards the MSB. 259 static bool isEndChunk(uint64_t Chunk) { 260 if (Chunk == 0 || Chunk == UINT64_MAX) 261 return false; 262 263 return isMask_64(Chunk); 264 } 265 266 /// \brief Clear or set all bits in the chunk at the given index. 267 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { 268 const uint64_t Mask = 0xFFFF; 269 270 if (Clear) 271 // Clear chunk in the immediate. 272 Imm &= ~(Mask << (Idx * 16)); 273 else 274 // Set all bits in the immediate for the particular chunk. 275 Imm |= Mask << (Idx * 16); 276 277 return Imm; 278 } 279 280 /// \brief Check whether the constant contains a sequence of contiguous ones, 281 /// which might be interrupted by one or two chunks. If so, materialize the 282 /// sequence of contiguous ones with an ORR instruction. 283 /// Materialize the chunks which are either interrupting the sequence or outside 284 /// of the sequence with a MOVK instruction. 285 /// 286 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk 287 /// which ends the sequence (0...1...). Then we are looking for constants which 288 /// contain at least one S and E chunk. 289 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. 290 /// 291 /// We are also looking for constants like |S|A|B|E| where the contiguous 292 /// sequence of ones wraps around the MSB into the LSB. 293 /// 294 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, 295 MachineBasicBlock &MBB, 296 MachineBasicBlock::iterator &MBBI, 297 const AArch64InstrInfo *TII) { 298 const int NotSet = -1; 299 const uint64_t Mask = 0xFFFF; 300 301 int StartIdx = NotSet; 302 int EndIdx = NotSet; 303 // Try to find the chunks which start/end a contiguous sequence of ones. 304 for (int Idx = 0; Idx < 4; ++Idx) { 305 int64_t Chunk = getChunk(UImm, Idx); 306 // Sign extend the 16-bit chunk to 64-bit. 307 Chunk = (Chunk << 48) >> 48; 308 309 if (isStartChunk(Chunk)) 310 StartIdx = Idx; 311 else if (isEndChunk(Chunk)) 312 EndIdx = Idx; 313 } 314 315 // Early exit in case we can't find a start/end chunk. 316 if (StartIdx == NotSet || EndIdx == NotSet) 317 return false; 318 319 // Outside of the contiguous sequence of ones everything needs to be zero. 320 uint64_t Outside = 0; 321 // Chunks between the start and end chunk need to have all their bits set. 322 uint64_t Inside = Mask; 323 324 // If our contiguous sequence of ones wraps around from the MSB into the LSB, 325 // just swap indices and pretend we are materializing a contiguous sequence 326 // of zeros surrounded by a contiguous sequence of ones. 327 if (StartIdx > EndIdx) { 328 std::swap(StartIdx, EndIdx); 329 std::swap(Outside, Inside); 330 } 331 332 uint64_t OrrImm = UImm; 333 int FirstMovkIdx = NotSet; 334 int SecondMovkIdx = NotSet; 335 336 // Find out which chunks we need to patch up to obtain a contiguous sequence 337 // of ones. 338 for (int Idx = 0; Idx < 4; ++Idx) { 339 const uint64_t Chunk = getChunk(UImm, Idx); 340 341 // Check whether we are looking at a chunk which is not part of the 342 // contiguous sequence of ones. 343 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { 344 OrrImm = updateImm(OrrImm, Idx, Outside == 0); 345 346 // Remember the index we need to patch. 347 if (FirstMovkIdx == NotSet) 348 FirstMovkIdx = Idx; 349 else 350 SecondMovkIdx = Idx; 351 352 // Check whether we are looking a chunk which is part of the contiguous 353 // sequence of ones. 354 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { 355 OrrImm = updateImm(OrrImm, Idx, Inside != Mask); 356 357 // Remember the index we need to patch. 358 if (FirstMovkIdx == NotSet) 359 FirstMovkIdx = Idx; 360 else 361 SecondMovkIdx = Idx; 362 } 363 } 364 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); 365 366 // Create the ORR-immediate instruction. 367 uint64_t Encoding = 0; 368 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); 369 MachineInstrBuilder MIB = 370 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 371 .addOperand(MI.getOperand(0)) 372 .addReg(AArch64::XZR) 373 .addImm(Encoding); 374 375 const unsigned DstReg = MI.getOperand(0).getReg(); 376 const bool DstIsDead = MI.getOperand(0).isDead(); 377 378 const bool SingleMovk = SecondMovkIdx == NotSet; 379 // Create the first MOVK instruction. 380 MachineInstrBuilder MIB1 = 381 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 382 .addReg(DstReg, 383 RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) 384 .addReg(DstReg) 385 .addImm(getChunk(UImm, FirstMovkIdx)) 386 .addImm( 387 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); 388 389 // Early exit in case we only need to emit a single MOVK instruction. 390 if (SingleMovk) { 391 transferImpOps(MI, MIB, MIB1); 392 MI.eraseFromParent(); 393 return true; 394 } 395 396 // Create the second MOVK instruction. 397 MachineInstrBuilder MIB2 = 398 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 399 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 400 .addReg(DstReg) 401 .addImm(getChunk(UImm, SecondMovkIdx)) 402 .addImm( 403 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); 404 405 transferImpOps(MI, MIB, MIB2); 406 MI.eraseFromParent(); 407 return true; 408 } 409 410 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 411 /// real move-immediate instructions to synthesize the immediate. 412 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 413 MachineBasicBlock::iterator MBBI, 414 unsigned BitSize) { 415 MachineInstr &MI = *MBBI; 416 unsigned DstReg = MI.getOperand(0).getReg(); 417 uint64_t Imm = MI.getOperand(1).getImm(); 418 const unsigned Mask = 0xFFFF; 419 420 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 421 // Useless def, and we don't want to risk creating an invalid ORR (which 422 // would really write to sp). 423 MI.eraseFromParent(); 424 return true; 425 } 426 427 // Try a MOVI instruction (aka ORR-immediate with the zero register). 428 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 429 uint64_t Encoding; 430 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 431 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); 432 MachineInstrBuilder MIB = 433 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 434 .addOperand(MI.getOperand(0)) 435 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 436 .addImm(Encoding); 437 transferImpOps(MI, MIB, MIB); 438 MI.eraseFromParent(); 439 return true; 440 } 441 442 // Scan the immediate and count the number of 16-bit chunks which are either 443 // all ones or all zeros. 444 unsigned OneChunks = 0; 445 unsigned ZeroChunks = 0; 446 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { 447 const unsigned Chunk = (Imm >> Shift) & Mask; 448 if (Chunk == Mask) 449 OneChunks++; 450 else if (Chunk == 0) 451 ZeroChunks++; 452 } 453 454 // Since we can't materialize the constant with a single ORR instruction, 455 // let's see whether we can materialize 3/4 of the constant with an ORR 456 // instruction and use an additional MOVK instruction to materialize the 457 // remaining 1/4. 458 // 459 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. 460 // 461 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, 462 // we would create the following instruction sequence: 463 // 464 // ORR x0, xzr, |A|X|A|X| 465 // MOVK x0, |B|, LSL #16 466 // 467 // Only look at 64-bit constants which can't be materialized with a single 468 // instruction e.g. which have less than either three all zero or all one 469 // chunks. 470 // 471 // Ignore 32-bit constants here, they always can be materialized with a 472 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized 473 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. 474 // Thus we fall back to the default code below which in the best case creates 475 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). 476 // 477 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { 478 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 479 // identical? 480 if (getChunk(UImm, 0) == getChunk(UImm, 2)) { 481 // See if we can come up with a constant which can be materialized with 482 // ORR-immediate by replicating element 3 into element 1. 483 uint64_t OrrImm = replicateChunk(UImm, 3, 1); 484 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) 485 return true; 486 487 // See if we can come up with a constant which can be materialized with 488 // ORR-immediate by replicating element 1 into element 3. 489 OrrImm = replicateChunk(UImm, 1, 3); 490 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) 491 return true; 492 493 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 494 // identical? 495 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { 496 // See if we can come up with a constant which can be materialized with 497 // ORR-immediate by replicating element 2 into element 0. 498 uint64_t OrrImm = replicateChunk(UImm, 2, 0); 499 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) 500 return true; 501 502 // See if we can come up with a constant which can be materialized with 503 // ORR-immediate by replicating element 1 into element 3. 504 OrrImm = replicateChunk(UImm, 0, 2); 505 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) 506 return true; 507 } 508 } 509 510 // Check for identical 16-bit chunks within the constant and if so materialize 511 // them with a single ORR instruction. The remaining one or two 16-bit chunks 512 // will be materialized with MOVK instructions. 513 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) 514 return true; 515 516 // Check whether the constant contains a sequence of contiguous ones, which 517 // might be interrupted by one or two chunks. If so, materialize the sequence 518 // of contiguous ones with an ORR instruction. Materialize the chunks which 519 // are either interrupting the sequence or outside of the sequence with a 520 // MOVK instruction. 521 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) 522 return true; 523 524 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or 525 // more MOVK instructions to insert additional 16-bit portions into the 526 // lower bits. 527 bool isNeg = false; 528 529 // Use MOVN to materialize the high bits if we have more all one chunks 530 // than all zero chunks. 531 if (OneChunks > ZeroChunks) { 532 isNeg = true; 533 Imm = ~Imm; 534 } 535 536 unsigned FirstOpc; 537 if (BitSize == 32) { 538 Imm &= (1LL << 32) - 1; 539 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); 540 } else { 541 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); 542 } 543 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN 544 unsigned LastShift = 0; // LSL amount for last MOVK 545 if (Imm != 0) { 546 unsigned LZ = countLeadingZeros(Imm); 547 unsigned TZ = countTrailingZeros(Imm); 548 Shift = ((63 - LZ) / 16) * 16; 549 LastShift = (TZ / 16) * 16; 550 } 551 unsigned Imm16 = (Imm >> Shift) & Mask; 552 bool DstIsDead = MI.getOperand(0).isDead(); 553 MachineInstrBuilder MIB1 = 554 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) 555 .addReg(DstReg, RegState::Define | 556 getDeadRegState(DstIsDead && Shift == LastShift)) 557 .addImm(Imm16) 558 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 559 560 // If a MOVN was used for the high bits of a negative value, flip the rest 561 // of the bits back for use with MOVK. 562 if (isNeg) 563 Imm = ~Imm; 564 565 if (Shift == LastShift) { 566 transferImpOps(MI, MIB1, MIB1); 567 MI.eraseFromParent(); 568 return true; 569 } 570 571 MachineInstrBuilder MIB2; 572 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); 573 while (Shift != LastShift) { 574 Shift -= 16; 575 Imm16 = (Imm >> Shift) & Mask; 576 if (Imm16 == (isNeg ? Mask : 0)) 577 continue; // This 16-bit portion is already set correctly. 578 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 579 .addReg(DstReg, 580 RegState::Define | 581 getDeadRegState(DstIsDead && Shift == LastShift)) 582 .addReg(DstReg) 583 .addImm(Imm16) 584 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 585 } 586 587 transferImpOps(MI, MIB1, MIB2); 588 MI.eraseFromParent(); 589 return true; 590 } 591 592 static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { 593 for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) 594 MBB->addLiveIn(*I); 595 } 596 597 bool AArch64ExpandPseudo::expandCMP_SWAP( 598 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 599 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 600 MachineBasicBlock::iterator &NextMBBI) { 601 MachineInstr &MI = *MBBI; 602 DebugLoc DL = MI.getDebugLoc(); 603 MachineOperand &Dest = MI.getOperand(0); 604 unsigned StatusReg = MI.getOperand(1).getReg(); 605 MachineOperand &Addr = MI.getOperand(2); 606 MachineOperand &Desired = MI.getOperand(3); 607 MachineOperand &New = MI.getOperand(4); 608 609 LivePhysRegs LiveRegs(&TII->getRegisterInfo()); 610 LiveRegs.addLiveOuts(MBB); 611 for (auto I = std::prev(MBB.end()); I != MBBI; --I) 612 LiveRegs.stepBackward(*I); 613 614 MachineFunction *MF = MBB.getParent(); 615 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 616 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 617 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 618 619 MF->insert(++MBB.getIterator(), LoadCmpBB); 620 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 621 MF->insert(++StoreBB->getIterator(), DoneBB); 622 623 // .Lloadcmp: 624 // ldaxr xDest, [xAddr] 625 // cmp xDest, xDesired 626 // b.ne .Ldone 627 LoadCmpBB->addLiveIn(Addr.getReg()); 628 LoadCmpBB->addLiveIn(Dest.getReg()); 629 LoadCmpBB->addLiveIn(Desired.getReg()); 630 addPostLoopLiveIns(LoadCmpBB, LiveRegs); 631 632 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 633 .addReg(Addr.getReg()); 634 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 635 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 636 .addOperand(Desired) 637 .addImm(ExtendImm); 638 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 639 .addImm(AArch64CC::NE) 640 .addMBB(DoneBB) 641 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 642 LoadCmpBB->addSuccessor(DoneBB); 643 LoadCmpBB->addSuccessor(StoreBB); 644 645 // .Lstore: 646 // stlxr wStatus, xNew, [xAddr] 647 // cbnz wStatus, .Lloadcmp 648 StoreBB->addLiveIn(Addr.getReg()); 649 StoreBB->addLiveIn(New.getReg()); 650 addPostLoopLiveIns(StoreBB, LiveRegs); 651 652 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 653 .addOperand(New) 654 .addOperand(Addr); 655 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 656 .addReg(StatusReg, RegState::Kill) 657 .addMBB(LoadCmpBB); 658 StoreBB->addSuccessor(LoadCmpBB); 659 StoreBB->addSuccessor(DoneBB); 660 661 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 662 DoneBB->transferSuccessors(&MBB); 663 addPostLoopLiveIns(DoneBB, LiveRegs); 664 665 MBB.addSuccessor(LoadCmpBB); 666 667 NextMBBI = MBB.end(); 668 MI.eraseFromParent(); 669 return true; 670 } 671 672 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 673 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 674 MachineBasicBlock::iterator &NextMBBI) { 675 676 MachineInstr &MI = *MBBI; 677 DebugLoc DL = MI.getDebugLoc(); 678 MachineOperand &DestLo = MI.getOperand(0); 679 MachineOperand &DestHi = MI.getOperand(1); 680 unsigned StatusReg = MI.getOperand(2).getReg(); 681 MachineOperand &Addr = MI.getOperand(3); 682 MachineOperand &DesiredLo = MI.getOperand(4); 683 MachineOperand &DesiredHi = MI.getOperand(5); 684 MachineOperand &NewLo = MI.getOperand(6); 685 MachineOperand &NewHi = MI.getOperand(7); 686 687 LivePhysRegs LiveRegs(&TII->getRegisterInfo()); 688 LiveRegs.addLiveOuts(MBB); 689 for (auto I = std::prev(MBB.end()); I != MBBI; --I) 690 LiveRegs.stepBackward(*I); 691 692 MachineFunction *MF = MBB.getParent(); 693 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 694 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 695 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 696 697 MF->insert(++MBB.getIterator(), LoadCmpBB); 698 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 699 MF->insert(++StoreBB->getIterator(), DoneBB); 700 701 // .Lloadcmp: 702 // ldaxp xDestLo, xDestHi, [xAddr] 703 // cmp xDestLo, xDesiredLo 704 // sbcs xDestHi, xDesiredHi 705 // b.ne .Ldone 706 LoadCmpBB->addLiveIn(Addr.getReg()); 707 LoadCmpBB->addLiveIn(DestLo.getReg()); 708 LoadCmpBB->addLiveIn(DestHi.getReg()); 709 LoadCmpBB->addLiveIn(DesiredLo.getReg()); 710 LoadCmpBB->addLiveIn(DesiredHi.getReg()); 711 addPostLoopLiveIns(LoadCmpBB, LiveRegs); 712 713 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) 714 .addReg(DestLo.getReg(), RegState::Define) 715 .addReg(DestHi.getReg(), RegState::Define) 716 .addReg(Addr.getReg()); 717 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 718 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 719 .addOperand(DesiredLo) 720 .addImm(0); 721 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SBCSXr), AArch64::XZR) 722 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 723 .addOperand(DesiredHi); 724 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 725 .addImm(AArch64CC::NE) 726 .addMBB(DoneBB) 727 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 728 LoadCmpBB->addSuccessor(DoneBB); 729 LoadCmpBB->addSuccessor(StoreBB); 730 731 // .Lstore: 732 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 733 // cbnz wStatus, .Lloadcmp 734 StoreBB->addLiveIn(Addr.getReg()); 735 StoreBB->addLiveIn(NewLo.getReg()); 736 StoreBB->addLiveIn(NewHi.getReg()); 737 addPostLoopLiveIns(StoreBB, LiveRegs); 738 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) 739 .addOperand(NewLo) 740 .addOperand(NewHi) 741 .addOperand(Addr); 742 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 743 .addReg(StatusReg, RegState::Kill) 744 .addMBB(LoadCmpBB); 745 StoreBB->addSuccessor(LoadCmpBB); 746 StoreBB->addSuccessor(DoneBB); 747 748 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 749 DoneBB->transferSuccessors(&MBB); 750 addPostLoopLiveIns(DoneBB, LiveRegs); 751 752 MBB.addSuccessor(LoadCmpBB); 753 754 NextMBBI = MBB.end(); 755 MI.eraseFromParent(); 756 return true; 757 } 758 759 /// \brief If MBBI references a pseudo instruction that should be expanded here, 760 /// do the expansion and return true. Otherwise return false. 761 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 762 MachineBasicBlock::iterator MBBI, 763 MachineBasicBlock::iterator &NextMBBI) { 764 MachineInstr &MI = *MBBI; 765 unsigned Opcode = MI.getOpcode(); 766 switch (Opcode) { 767 default: 768 break; 769 770 case AArch64::ADDWrr: 771 case AArch64::SUBWrr: 772 case AArch64::ADDXrr: 773 case AArch64::SUBXrr: 774 case AArch64::ADDSWrr: 775 case AArch64::SUBSWrr: 776 case AArch64::ADDSXrr: 777 case AArch64::SUBSXrr: 778 case AArch64::ANDWrr: 779 case AArch64::ANDXrr: 780 case AArch64::BICWrr: 781 case AArch64::BICXrr: 782 case AArch64::ANDSWrr: 783 case AArch64::ANDSXrr: 784 case AArch64::BICSWrr: 785 case AArch64::BICSXrr: 786 case AArch64::EONWrr: 787 case AArch64::EONXrr: 788 case AArch64::EORWrr: 789 case AArch64::EORXrr: 790 case AArch64::ORNWrr: 791 case AArch64::ORNXrr: 792 case AArch64::ORRWrr: 793 case AArch64::ORRXrr: { 794 unsigned Opcode; 795 switch (MI.getOpcode()) { 796 default: 797 return false; 798 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 799 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 800 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 801 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 802 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 803 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 804 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 805 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 806 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 807 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 808 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 809 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 810 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 811 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 812 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 813 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 814 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 815 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 816 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 817 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 818 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 819 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 820 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 821 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 822 } 823 MachineInstrBuilder MIB1 = 824 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 825 MI.getOperand(0).getReg()) 826 .addOperand(MI.getOperand(1)) 827 .addOperand(MI.getOperand(2)) 828 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 829 transferImpOps(MI, MIB1, MIB1); 830 MI.eraseFromParent(); 831 return true; 832 } 833 834 case AArch64::LOADgot: { 835 // Expand into ADRP + LDR. 836 unsigned DstReg = MI.getOperand(0).getReg(); 837 const MachineOperand &MO1 = MI.getOperand(1); 838 unsigned Flags = MO1.getTargetFlags(); 839 MachineInstrBuilder MIB1 = 840 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 841 MachineInstrBuilder MIB2 = 842 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) 843 .addOperand(MI.getOperand(0)) 844 .addReg(DstReg); 845 846 if (MO1.isGlobal()) { 847 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 848 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 849 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 850 } else if (MO1.isSymbol()) { 851 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 852 MIB2.addExternalSymbol(MO1.getSymbolName(), 853 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 854 } else { 855 assert(MO1.isCPI() && 856 "Only expect globals, externalsymbols, or constant pools"); 857 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 858 Flags | AArch64II::MO_PAGE); 859 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 860 Flags | AArch64II::MO_PAGEOFF | 861 AArch64II::MO_NC); 862 } 863 864 transferImpOps(MI, MIB1, MIB2); 865 MI.eraseFromParent(); 866 return true; 867 } 868 869 case AArch64::MOVaddr: 870 case AArch64::MOVaddrJT: 871 case AArch64::MOVaddrCP: 872 case AArch64::MOVaddrBA: 873 case AArch64::MOVaddrTLS: 874 case AArch64::MOVaddrEXT: { 875 // Expand into ADRP + ADD. 876 unsigned DstReg = MI.getOperand(0).getReg(); 877 MachineInstrBuilder MIB1 = 878 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 879 .addOperand(MI.getOperand(1)); 880 881 MachineInstrBuilder MIB2 = 882 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 883 .addOperand(MI.getOperand(0)) 884 .addReg(DstReg) 885 .addOperand(MI.getOperand(2)) 886 .addImm(0); 887 888 transferImpOps(MI, MIB1, MIB2); 889 MI.eraseFromParent(); 890 return true; 891 } 892 893 case AArch64::MOVi32imm: 894 return expandMOVImm(MBB, MBBI, 32); 895 case AArch64::MOVi64imm: 896 return expandMOVImm(MBB, MBBI, 64); 897 case AArch64::RET_ReallyLR: { 898 MachineInstrBuilder MIB = 899 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 900 .addReg(AArch64::LR); 901 transferImpOps(MI, MIB, MIB); 902 MI.eraseFromParent(); 903 return true; 904 } 905 case AArch64::CMP_SWAP_8: 906 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 907 AArch64::SUBSWrx, 908 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 909 AArch64::WZR, NextMBBI); 910 case AArch64::CMP_SWAP_16: 911 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 912 AArch64::SUBSWrx, 913 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 914 AArch64::WZR, NextMBBI); 915 case AArch64::CMP_SWAP_32: 916 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 917 AArch64::SUBSWrs, 918 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 919 AArch64::WZR, NextMBBI); 920 case AArch64::CMP_SWAP_64: 921 return expandCMP_SWAP(MBB, MBBI, 922 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 923 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 924 AArch64::XZR, NextMBBI); 925 case AArch64::CMP_SWAP_128: 926 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 927 } 928 return false; 929 } 930 931 /// \brief Iterate over the instructions in basic block MBB and expand any 932 /// pseudo instructions. Return true if anything was modified. 933 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 934 bool Modified = false; 935 936 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 937 while (MBBI != E) { 938 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 939 Modified |= expandMI(MBB, MBBI, NMBBI); 940 MBBI = NMBBI; 941 } 942 943 return Modified; 944 } 945 946 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 947 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 948 949 bool Modified = false; 950 for (auto &MBB : MF) 951 Modified |= expandMBB(MBB); 952 return Modified; 953 } 954 955 /// \brief Returns an instance of the pseudo instruction expansion pass. 956 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 957 return new AArch64ExpandPseudo(); 958 } 959