1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that expands pseudo instructions into target 11 // instructions to allow proper scheduling and other late optimizations. This 12 // pass should be run after register allocation but before the post-regalloc 13 // scheduling pass. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/Support/MathExtras.h" 23 using namespace llvm; 24 25 namespace { 26 class AArch64ExpandPseudo : public MachineFunctionPass { 27 public: 28 static char ID; 29 AArch64ExpandPseudo() : MachineFunctionPass(ID) {} 30 31 const AArch64InstrInfo *TII; 32 33 bool runOnMachineFunction(MachineFunction &Fn) override; 34 35 const char *getPassName() const override { 36 return "AArch64 pseudo instruction expansion pass"; 37 } 38 39 private: 40 bool expandMBB(MachineBasicBlock &MBB); 41 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 42 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 43 unsigned BitSize); 44 }; 45 char AArch64ExpandPseudo::ID = 0; 46 } 47 48 /// \brief Transfer implicit operands on the pseudo instruction to the 49 /// instructions created from the expansion. 50 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 51 MachineInstrBuilder &DefMI) { 52 const MCInstrDesc &Desc = OldMI.getDesc(); 53 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 54 ++i) { 55 const MachineOperand &MO = OldMI.getOperand(i); 56 assert(MO.isReg() && MO.getReg()); 57 if (MO.isUse()) 58 UseMI.addOperand(MO); 59 else 60 DefMI.addOperand(MO); 61 } 62 } 63 64 /// \brief Helper function which extracts the specified 16-bit chunk from a 65 /// 64-bit value. 66 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { 67 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 68 69 return (Imm >> (ChunkIdx * 16)) & 0xFFFF; 70 } 71 72 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit 73 /// value. Indices correspond to element numbers in a v4i16. 74 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { 75 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); 76 const unsigned ShiftAmt = ToIdx * 16; 77 78 // Replicate the source chunk to the destination position. 79 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; 80 // Clear the destination chunk. 81 Imm &= ~(0xFFFFLL << ShiftAmt); 82 // Insert the replicated chunk. 83 return Imm | Chunk; 84 } 85 86 /// \brief Helper function which tries to materialize a 64-bit value with an 87 /// ORR + MOVK instruction sequence. 88 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, 89 MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator &MBBI, 91 const AArch64InstrInfo *TII, unsigned ChunkIdx) { 92 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 93 const unsigned ShiftAmt = ChunkIdx * 16; 94 95 uint64_t Encoding; 96 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { 97 // Create the ORR-immediate instruction. 98 MachineInstrBuilder MIB = 99 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 100 .addOperand(MI.getOperand(0)) 101 .addReg(AArch64::XZR) 102 .addImm(Encoding); 103 104 // Create the MOVK instruction. 105 const unsigned Imm16 = getChunk(UImm, ChunkIdx); 106 const unsigned DstReg = MI.getOperand(0).getReg(); 107 const bool DstIsDead = MI.getOperand(0).isDead(); 108 MachineInstrBuilder MIB1 = 109 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 110 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 111 .addReg(DstReg) 112 .addImm(Imm16) 113 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 114 115 transferImpOps(MI, MIB, MIB1); 116 MI.eraseFromParent(); 117 return true; 118 } 119 120 return false; 121 } 122 123 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width 124 /// can be materialized with an ORR instruction. 125 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { 126 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; 127 128 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); 129 } 130 131 /// \brief Check for identical 16-bit chunks within the constant and if so 132 /// materialize them with a single ORR instruction. The remaining one or two 133 /// 16-bit chunks will be materialized with MOVK instructions. 134 /// 135 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order 136 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with 137 /// an ORR instruction. 138 /// 139 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, 140 MachineBasicBlock &MBB, 141 MachineBasicBlock::iterator &MBBI, 142 const AArch64InstrInfo *TII) { 143 typedef DenseMap<uint64_t, unsigned> CountMap; 144 CountMap Counts; 145 146 // Scan the constant and count how often every chunk occurs. 147 for (unsigned Idx = 0; Idx < 4; ++Idx) 148 ++Counts[getChunk(UImm, Idx)]; 149 150 // Traverse the chunks to find one which occurs more than once. 151 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); 152 Chunk != End; ++Chunk) { 153 const uint64_t ChunkVal = Chunk->first; 154 const unsigned Count = Chunk->second; 155 156 uint64_t Encoding = 0; 157 158 // We are looking for chunks which have two or three instances and can be 159 // materialized with an ORR instruction. 160 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) 161 continue; 162 163 const bool CountThree = Count == 3; 164 // Create the ORR-immediate instruction. 165 MachineInstrBuilder MIB = 166 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 167 .addOperand(MI.getOperand(0)) 168 .addReg(AArch64::XZR) 169 .addImm(Encoding); 170 171 const unsigned DstReg = MI.getOperand(0).getReg(); 172 const bool DstIsDead = MI.getOperand(0).isDead(); 173 174 unsigned ShiftAmt = 0; 175 uint64_t Imm16 = 0; 176 // Find the first chunk not materialized with the ORR instruction. 177 for (; ShiftAmt < 64; ShiftAmt += 16) { 178 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 179 180 if (Imm16 != ChunkVal) 181 break; 182 } 183 184 // Create the first MOVK instruction. 185 MachineInstrBuilder MIB1 = 186 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 187 .addReg(DstReg, 188 RegState::Define | getDeadRegState(DstIsDead && CountThree)) 189 .addReg(DstReg) 190 .addImm(Imm16) 191 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 192 193 // In case we have three instances the whole constant is now materialized 194 // and we can exit. 195 if (CountThree) { 196 transferImpOps(MI, MIB, MIB1); 197 MI.eraseFromParent(); 198 return true; 199 } 200 201 // Find the remaining chunk which needs to be materialized. 202 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { 203 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 204 205 if (Imm16 != ChunkVal) 206 break; 207 } 208 209 // Create the second MOVK instruction. 210 MachineInstrBuilder MIB2 = 211 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 212 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 213 .addReg(DstReg) 214 .addImm(Imm16) 215 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 216 217 transferImpOps(MI, MIB, MIB2); 218 MI.eraseFromParent(); 219 return true; 220 } 221 222 return false; 223 } 224 225 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern 226 /// starts a contiguous sequence of ones if we look at the bits from the LSB 227 /// towards the MSB. 228 static bool isStartChunk(uint64_t Chunk) { 229 if (Chunk == 0 || Chunk == UINT64_MAX) 230 return false; 231 232 return isMask_64(~Chunk); 233 } 234 235 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern 236 /// ends a contiguous sequence of ones if we look at the bits from the LSB 237 /// towards the MSB. 238 static bool isEndChunk(uint64_t Chunk) { 239 if (Chunk == 0 || Chunk == UINT64_MAX) 240 return false; 241 242 return isMask_64(Chunk); 243 } 244 245 /// \brief Clear or set all bits in the chunk at the given index. 246 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { 247 const uint64_t Mask = 0xFFFF; 248 249 if (Clear) 250 // Clear chunk in the immediate. 251 Imm &= ~(Mask << (Idx * 16)); 252 else 253 // Set all bits in the immediate for the particular chunk. 254 Imm |= Mask << (Idx * 16); 255 256 return Imm; 257 } 258 259 /// \brief Check whether the constant contains a sequence of contiguous ones, 260 /// which might be interrupted by one or two chunks. If so, materialize the 261 /// sequence of contiguous ones with an ORR instruction. 262 /// Materialize the chunks which are either interrupting the sequence or outside 263 /// of the sequence with a MOVK instruction. 264 /// 265 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk 266 /// which ends the sequence (0...1...). Then we are looking for constants which 267 /// contain at least one S and E chunk. 268 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. 269 /// 270 /// We are also looking for constants like |S|A|B|E| where the contiguous 271 /// sequence of ones wraps around the MSB into the LSB. 272 /// 273 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, 274 MachineBasicBlock &MBB, 275 MachineBasicBlock::iterator &MBBI, 276 const AArch64InstrInfo *TII) { 277 const int NotSet = -1; 278 const uint64_t Mask = 0xFFFF; 279 280 int StartIdx = NotSet; 281 int EndIdx = NotSet; 282 // Try to find the chunks which start/end a contiguous sequence of ones. 283 for (int Idx = 0; Idx < 4; ++Idx) { 284 int64_t Chunk = getChunk(UImm, Idx); 285 // Sign extend the 16-bit chunk to 64-bit. 286 Chunk = (Chunk << 48) >> 48; 287 288 if (isStartChunk(Chunk)) 289 StartIdx = Idx; 290 else if (isEndChunk(Chunk)) 291 EndIdx = Idx; 292 } 293 294 // Early exit in case we can't find a start/end chunk. 295 if (StartIdx == NotSet || EndIdx == NotSet) 296 return false; 297 298 // Outside of the contiguous sequence of ones everything needs to be zero. 299 uint64_t Outside = 0; 300 // Chunks between the start and end chunk need to have all their bits set. 301 uint64_t Inside = Mask; 302 303 // If our contiguous sequence of ones wraps around from the MSB into the LSB, 304 // just swap indices and pretend we are materializing a contiguous sequence 305 // of zeros surrounded by a contiguous sequence of ones. 306 if (StartIdx > EndIdx) { 307 std::swap(StartIdx, EndIdx); 308 std::swap(Outside, Inside); 309 } 310 311 uint64_t OrrImm = UImm; 312 int FirstMovkIdx = NotSet; 313 int SecondMovkIdx = NotSet; 314 315 // Find out which chunks we need to patch up to obtain a contiguous sequence 316 // of ones. 317 for (int Idx = 0; Idx < 4; ++Idx) { 318 const uint64_t Chunk = getChunk(UImm, Idx); 319 320 // Check whether we are looking at a chunk which is not part of the 321 // contiguous sequence of ones. 322 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { 323 OrrImm = updateImm(OrrImm, Idx, Outside == 0); 324 325 // Remember the index we need to patch. 326 if (FirstMovkIdx == NotSet) 327 FirstMovkIdx = Idx; 328 else 329 SecondMovkIdx = Idx; 330 331 // Check whether we are looking a chunk which is part of the contiguous 332 // sequence of ones. 333 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { 334 OrrImm = updateImm(OrrImm, Idx, Inside != Mask); 335 336 // Remember the index we need to patch. 337 if (FirstMovkIdx == NotSet) 338 FirstMovkIdx = Idx; 339 else 340 SecondMovkIdx = Idx; 341 } 342 } 343 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); 344 345 // Create the ORR-immediate instruction. 346 uint64_t Encoding = 0; 347 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); 348 MachineInstrBuilder MIB = 349 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 350 .addOperand(MI.getOperand(0)) 351 .addReg(AArch64::XZR) 352 .addImm(Encoding); 353 354 const unsigned DstReg = MI.getOperand(0).getReg(); 355 const bool DstIsDead = MI.getOperand(0).isDead(); 356 357 const bool SingleMovk = SecondMovkIdx == NotSet; 358 // Create the first MOVK instruction. 359 MachineInstrBuilder MIB1 = 360 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 361 .addReg(DstReg, 362 RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) 363 .addReg(DstReg) 364 .addImm(getChunk(UImm, FirstMovkIdx)) 365 .addImm( 366 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); 367 368 // Early exit in case we only need to emit a single MOVK instruction. 369 if (SingleMovk) { 370 transferImpOps(MI, MIB, MIB1); 371 MI.eraseFromParent(); 372 return true; 373 } 374 375 // Create the second MOVK instruction. 376 MachineInstrBuilder MIB2 = 377 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 378 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 379 .addReg(DstReg) 380 .addImm(getChunk(UImm, SecondMovkIdx)) 381 .addImm( 382 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); 383 384 transferImpOps(MI, MIB, MIB2); 385 MI.eraseFromParent(); 386 return true; 387 } 388 389 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 390 /// real move-immediate instructions to synthesize the immediate. 391 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 392 MachineBasicBlock::iterator MBBI, 393 unsigned BitSize) { 394 MachineInstr &MI = *MBBI; 395 uint64_t Imm = MI.getOperand(1).getImm(); 396 const unsigned Mask = 0xFFFF; 397 398 // Try a MOVI instruction (aka ORR-immediate with the zero register). 399 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 400 uint64_t Encoding; 401 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 402 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); 403 MachineInstrBuilder MIB = 404 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 405 .addOperand(MI.getOperand(0)) 406 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 407 .addImm(Encoding); 408 transferImpOps(MI, MIB, MIB); 409 MI.eraseFromParent(); 410 return true; 411 } 412 413 // Scan the immediate and count the number of 16-bit chunks which are either 414 // all ones or all zeros. 415 unsigned OneChunks = 0; 416 unsigned ZeroChunks = 0; 417 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { 418 const unsigned Chunk = (Imm >> Shift) & Mask; 419 if (Chunk == Mask) 420 OneChunks++; 421 else if (Chunk == 0) 422 ZeroChunks++; 423 } 424 425 // Since we can't materialize the constant with a single ORR instruction, 426 // let's see whether we can materialize 3/4 of the constant with an ORR 427 // instruction and use an additional MOVK instruction to materialize the 428 // remaining 1/4. 429 // 430 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. 431 // 432 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, 433 // we would create the following instruction sequence: 434 // 435 // ORR x0, xzr, |A|X|A|X| 436 // MOVK x0, |B|, LSL #16 437 // 438 // Only look at 64-bit constants which can't be materialized with a single 439 // instruction e.g. which have less than either three all zero or all one 440 // chunks. 441 // 442 // Ignore 32-bit constants here, they always can be materialized with a 443 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized 444 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. 445 // Thus we fall back to the default code below which in the best case creates 446 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). 447 // 448 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { 449 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 450 // identical? 451 if (getChunk(UImm, 0) == getChunk(UImm, 2)) { 452 // See if we can come up with a constant which can be materialized with 453 // ORR-immediate by replicating element 3 into element 1. 454 uint64_t OrrImm = replicateChunk(UImm, 3, 1); 455 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) 456 return true; 457 458 // See if we can come up with a constant which can be materialized with 459 // ORR-immediate by replicating element 1 into element 3. 460 OrrImm = replicateChunk(UImm, 1, 3); 461 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) 462 return true; 463 464 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 465 // identical? 466 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { 467 // See if we can come up with a constant which can be materialized with 468 // ORR-immediate by replicating element 2 into element 0. 469 uint64_t OrrImm = replicateChunk(UImm, 2, 0); 470 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) 471 return true; 472 473 // See if we can come up with a constant which can be materialized with 474 // ORR-immediate by replicating element 1 into element 3. 475 OrrImm = replicateChunk(UImm, 0, 2); 476 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) 477 return true; 478 } 479 } 480 481 // Check for identical 16-bit chunks within the constant and if so materialize 482 // them with a single ORR instruction. The remaining one or two 16-bit chunks 483 // will be materialized with MOVK instructions. 484 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) 485 return true; 486 487 // Check whether the constant contains a sequence of contiguous ones, which 488 // might be interrupted by one or two chunks. If so, materialize the sequence 489 // of contiguous ones with an ORR instruction. Materialize the chunks which 490 // are either interrupting the sequence or outside of the sequence with a 491 // MOVK instruction. 492 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) 493 return true; 494 495 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or 496 // more MOVK instructions to insert additional 16-bit portions into the 497 // lower bits. 498 bool isNeg = false; 499 500 // Use MOVN to materialize the high bits if we have more all one chunks 501 // than all zero chunks. 502 if (OneChunks > ZeroChunks) { 503 isNeg = true; 504 Imm = ~Imm; 505 } 506 507 unsigned FirstOpc; 508 if (BitSize == 32) { 509 Imm &= (1LL << 32) - 1; 510 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); 511 } else { 512 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); 513 } 514 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN 515 unsigned LastShift = 0; // LSL amount for last MOVK 516 if (Imm != 0) { 517 unsigned LZ = countLeadingZeros(Imm); 518 unsigned TZ = countTrailingZeros(Imm); 519 Shift = ((63 - LZ) / 16) * 16; 520 LastShift = (TZ / 16) * 16; 521 } 522 unsigned Imm16 = (Imm >> Shift) & Mask; 523 unsigned DstReg = MI.getOperand(0).getReg(); 524 bool DstIsDead = MI.getOperand(0).isDead(); 525 MachineInstrBuilder MIB1 = 526 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) 527 .addReg(DstReg, RegState::Define | 528 getDeadRegState(DstIsDead && Shift == LastShift)) 529 .addImm(Imm16) 530 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 531 532 // If a MOVN was used for the high bits of a negative value, flip the rest 533 // of the bits back for use with MOVK. 534 if (isNeg) 535 Imm = ~Imm; 536 537 if (Shift == LastShift) { 538 transferImpOps(MI, MIB1, MIB1); 539 MI.eraseFromParent(); 540 return true; 541 } 542 543 MachineInstrBuilder MIB2; 544 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); 545 while (Shift != LastShift) { 546 Shift -= 16; 547 Imm16 = (Imm >> Shift) & Mask; 548 if (Imm16 == (isNeg ? Mask : 0)) 549 continue; // This 16-bit portion is already set correctly. 550 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 551 .addReg(DstReg, 552 RegState::Define | 553 getDeadRegState(DstIsDead && Shift == LastShift)) 554 .addReg(DstReg) 555 .addImm(Imm16) 556 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 557 } 558 559 transferImpOps(MI, MIB1, MIB2); 560 MI.eraseFromParent(); 561 return true; 562 } 563 564 /// \brief If MBBI references a pseudo instruction that should be expanded here, 565 /// do the expansion and return true. Otherwise return false. 566 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 567 MachineBasicBlock::iterator MBBI) { 568 MachineInstr &MI = *MBBI; 569 unsigned Opcode = MI.getOpcode(); 570 switch (Opcode) { 571 default: 572 break; 573 574 case AArch64::ADDWrr: 575 case AArch64::SUBWrr: 576 case AArch64::ADDXrr: 577 case AArch64::SUBXrr: 578 case AArch64::ADDSWrr: 579 case AArch64::SUBSWrr: 580 case AArch64::ADDSXrr: 581 case AArch64::SUBSXrr: 582 case AArch64::ANDWrr: 583 case AArch64::ANDXrr: 584 case AArch64::BICWrr: 585 case AArch64::BICXrr: 586 case AArch64::ANDSWrr: 587 case AArch64::ANDSXrr: 588 case AArch64::BICSWrr: 589 case AArch64::BICSXrr: 590 case AArch64::EONWrr: 591 case AArch64::EONXrr: 592 case AArch64::EORWrr: 593 case AArch64::EORXrr: 594 case AArch64::ORNWrr: 595 case AArch64::ORNXrr: 596 case AArch64::ORRWrr: 597 case AArch64::ORRXrr: { 598 unsigned Opcode; 599 switch (MI.getOpcode()) { 600 default: 601 return false; 602 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 603 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 604 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 605 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 606 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 607 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 608 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 609 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 610 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 611 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 612 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 613 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 614 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 615 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 616 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 617 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 618 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 619 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 620 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 621 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 622 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 623 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 624 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 625 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 626 } 627 MachineInstrBuilder MIB1 = 628 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 629 MI.getOperand(0).getReg()) 630 .addOperand(MI.getOperand(1)) 631 .addOperand(MI.getOperand(2)) 632 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 633 transferImpOps(MI, MIB1, MIB1); 634 MI.eraseFromParent(); 635 return true; 636 } 637 638 case AArch64::LOADgot: { 639 // Expand into ADRP + LDR. 640 unsigned DstReg = MI.getOperand(0).getReg(); 641 const MachineOperand &MO1 = MI.getOperand(1); 642 unsigned Flags = MO1.getTargetFlags(); 643 MachineInstrBuilder MIB1 = 644 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 645 MachineInstrBuilder MIB2 = 646 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) 647 .addOperand(MI.getOperand(0)) 648 .addReg(DstReg); 649 650 if (MO1.isGlobal()) { 651 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 652 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 653 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 654 } else if (MO1.isSymbol()) { 655 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 656 MIB2.addExternalSymbol(MO1.getSymbolName(), 657 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 658 } else { 659 assert(MO1.isCPI() && 660 "Only expect globals, externalsymbols, or constant pools"); 661 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 662 Flags | AArch64II::MO_PAGE); 663 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 664 Flags | AArch64II::MO_PAGEOFF | 665 AArch64II::MO_NC); 666 } 667 668 transferImpOps(MI, MIB1, MIB2); 669 MI.eraseFromParent(); 670 return true; 671 } 672 673 case AArch64::MOVaddr: 674 case AArch64::MOVaddrJT: 675 case AArch64::MOVaddrCP: 676 case AArch64::MOVaddrBA: 677 case AArch64::MOVaddrTLS: 678 case AArch64::MOVaddrEXT: { 679 // Expand into ADRP + ADD. 680 unsigned DstReg = MI.getOperand(0).getReg(); 681 MachineInstrBuilder MIB1 = 682 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 683 .addOperand(MI.getOperand(1)); 684 685 MachineInstrBuilder MIB2 = 686 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 687 .addOperand(MI.getOperand(0)) 688 .addReg(DstReg) 689 .addOperand(MI.getOperand(2)) 690 .addImm(0); 691 692 transferImpOps(MI, MIB1, MIB2); 693 MI.eraseFromParent(); 694 return true; 695 } 696 697 case AArch64::MOVi32imm: 698 return expandMOVImm(MBB, MBBI, 32); 699 case AArch64::MOVi64imm: 700 return expandMOVImm(MBB, MBBI, 64); 701 case AArch64::RET_ReallyLR: { 702 MachineInstrBuilder MIB = 703 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 704 .addReg(AArch64::LR); 705 transferImpOps(MI, MIB, MIB); 706 MI.eraseFromParent(); 707 return true; 708 } 709 } 710 return false; 711 } 712 713 /// \brief Iterate over the instructions in basic block MBB and expand any 714 /// pseudo instructions. Return true if anything was modified. 715 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 716 bool Modified = false; 717 718 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 719 while (MBBI != E) { 720 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 721 Modified |= expandMI(MBB, MBBI); 722 MBBI = NMBBI; 723 } 724 725 return Modified; 726 } 727 728 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 729 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 730 731 bool Modified = false; 732 for (auto &MBB : MF) 733 Modified |= expandMBB(MBB); 734 return Modified; 735 } 736 737 /// \brief Returns an instance of the pseudo instruction expansion pass. 738 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 739 return new AArch64ExpandPseudo(); 740 } 741