1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that expands pseudo instructions into target 11 // instructions to allow proper scheduling and other late optimizations. This 12 // pass should be run after register allocation but before the post-regalloc 13 // scheduling pass. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "AArch64InstrInfo.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/Support/MathExtras.h" 22 using namespace llvm; 23 24 namespace { 25 class AArch64ExpandPseudo : public MachineFunctionPass { 26 public: 27 static char ID; 28 AArch64ExpandPseudo() : MachineFunctionPass(ID) {} 29 30 const AArch64InstrInfo *TII; 31 32 bool runOnMachineFunction(MachineFunction &Fn) override; 33 34 const char *getPassName() const override { 35 return "AArch64 pseudo instruction expansion pass"; 36 } 37 38 private: 39 bool expandMBB(MachineBasicBlock &MBB); 40 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 41 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 42 unsigned BitSize); 43 }; 44 char AArch64ExpandPseudo::ID = 0; 45 } 46 47 /// \brief Transfer implicit operands on the pseudo instruction to the 48 /// instructions created from the expansion. 49 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 50 MachineInstrBuilder &DefMI) { 51 const MCInstrDesc &Desc = OldMI.getDesc(); 52 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 53 ++i) { 54 const MachineOperand &MO = OldMI.getOperand(i); 55 assert(MO.isReg() && MO.getReg()); 56 if (MO.isUse()) 57 UseMI.addOperand(MO); 58 else 59 DefMI.addOperand(MO); 60 } 61 } 62 63 /// \brief Helper function which extracts the specified 16-bit chunk from a 64 /// 64-bit value. 65 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { 66 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 67 68 return (Imm >> (ChunkIdx * 16)) & 0xFFFF; 69 } 70 71 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit 72 /// value. Indices correspond to element numbers in a v4i16. 73 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { 74 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); 75 const unsigned ShiftAmt = ToIdx * 16; 76 77 // Replicate the source chunk to the destination position. 78 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; 79 // Clear the destination chunk. 80 Imm &= ~(0xFFFFLL << ShiftAmt); 81 // Insert the replicated chunk. 82 return Imm | Chunk; 83 } 84 85 /// \brief Helper function which tries to materialize a 64-bit value with an 86 /// ORR + MOVK instruction sequence. 87 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, 88 MachineBasicBlock &MBB, 89 MachineBasicBlock::iterator &MBBI, 90 const AArch64InstrInfo *TII, unsigned ChunkIdx) { 91 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 92 const unsigned ShiftAmt = ChunkIdx * 16; 93 94 uint64_t Encoding; 95 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { 96 // Create the ORR-immediate instruction. 97 MachineInstrBuilder MIB = 98 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 99 .addOperand(MI.getOperand(0)) 100 .addReg(AArch64::XZR) 101 .addImm(Encoding); 102 103 // Create the MOVK instruction. 104 const unsigned Imm16 = getChunk(UImm, ChunkIdx); 105 const unsigned DstReg = MI.getOperand(0).getReg(); 106 const bool DstIsDead = MI.getOperand(0).isDead(); 107 MachineInstrBuilder MIB1 = 108 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 109 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 110 .addReg(DstReg) 111 .addImm(Imm16) 112 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 113 114 transferImpOps(MI, MIB, MIB1); 115 MI.eraseFromParent(); 116 return true; 117 } 118 119 return false; 120 } 121 122 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width 123 /// can be materialized with an ORR instruction. 124 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { 125 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; 126 127 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); 128 } 129 130 /// \brief Check for identical 16-bit chunks within the constant and if so 131 /// materialize them with a single ORR instruction. The remaining one or two 132 /// 16-bit chunks will be materialized with MOVK instructions. 133 /// 134 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order 135 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with 136 /// an ORR instruction. 137 /// 138 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, 139 MachineBasicBlock &MBB, 140 MachineBasicBlock::iterator &MBBI, 141 const AArch64InstrInfo *TII) { 142 typedef DenseMap<uint64_t, unsigned> CountMap; 143 CountMap Counts; 144 145 // Scan the constant and count how often every chunk occurs. 146 for (unsigned Idx = 0; Idx < 4; ++Idx) 147 ++Counts[getChunk(UImm, Idx)]; 148 149 // Traverse the chunks to find one which occurs more than once. 150 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); 151 Chunk != End; ++Chunk) { 152 const uint64_t ChunkVal = Chunk->first; 153 const unsigned Count = Chunk->second; 154 155 uint64_t Encoding = 0; 156 157 // We are looking for chunks which have two or three instances and can be 158 // materialized with an ORR instruction. 159 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) 160 continue; 161 162 const bool CountThree = Count == 3; 163 // Create the ORR-immediate instruction. 164 MachineInstrBuilder MIB = 165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 166 .addOperand(MI.getOperand(0)) 167 .addReg(AArch64::XZR) 168 .addImm(Encoding); 169 170 const unsigned DstReg = MI.getOperand(0).getReg(); 171 const bool DstIsDead = MI.getOperand(0).isDead(); 172 173 unsigned ShiftAmt = 0; 174 uint64_t Imm16 = 0; 175 // Find the first chunk not materialized with the ORR instruction. 176 for (; ShiftAmt < 64; ShiftAmt += 16) { 177 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 178 179 if (Imm16 != ChunkVal) 180 break; 181 } 182 183 // Create the first MOVK instruction. 184 MachineInstrBuilder MIB1 = 185 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 186 .addReg(DstReg, 187 RegState::Define | getDeadRegState(DstIsDead && CountThree)) 188 .addReg(DstReg) 189 .addImm(Imm16) 190 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 191 192 // In case we have three instances the whole constant is now materialized 193 // and we can exit. 194 if (CountThree) { 195 transferImpOps(MI, MIB, MIB1); 196 MI.eraseFromParent(); 197 return true; 198 } 199 200 // Find the remaining chunk which needs to be materialized. 201 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { 202 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 203 204 if (Imm16 != ChunkVal) 205 break; 206 } 207 208 // Create the second MOVK instruction. 209 MachineInstrBuilder MIB2 = 210 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 211 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 212 .addReg(DstReg) 213 .addImm(Imm16) 214 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 215 216 transferImpOps(MI, MIB, MIB2); 217 MI.eraseFromParent(); 218 return true; 219 } 220 221 return false; 222 } 223 224 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern 225 /// starts a contiguous sequence of ones if we look at the bits from the LSB 226 /// towards the MSB. 227 static bool isStartChunk(uint64_t Chunk) { 228 if (Chunk == 0 || Chunk == UINT64_MAX) 229 return false; 230 231 return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; 232 } 233 234 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern 235 /// ends a contiguous sequence of ones if we look at the bits from the LSB 236 /// towards the MSB. 237 static bool isEndChunk(uint64_t Chunk) { 238 if (Chunk == 0 || Chunk == UINT64_MAX) 239 return false; 240 241 return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; 242 } 243 244 /// \brief Clear or set all bits in the chunk at the given index. 245 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { 246 const uint64_t Mask = 0xFFFF; 247 248 if (Clear) 249 // Clear chunk in the immediate. 250 Imm &= ~(Mask << (Idx * 16)); 251 else 252 // Set all bits in the immediate for the particular chunk. 253 Imm |= Mask << (Idx * 16); 254 255 return Imm; 256 } 257 258 /// \brief Check whether the constant contains a sequence of contiguous ones, 259 /// which might be interrupted by one or two chunks. If so, materialize the 260 /// sequence of contiguous ones with an ORR instruction. 261 /// Materialize the chunks which are either interrupting the sequence or outside 262 /// of the sequence with a MOVK instruction. 263 /// 264 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk 265 /// which ends the sequence (0...1...). Then we are looking for constants which 266 /// contain at least one S and E chunk. 267 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. 268 /// 269 /// We are also looking for constants like |S|A|B|E| where the contiguous 270 /// sequence of ones wraps around the MSB into the LSB. 271 /// 272 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, 273 MachineBasicBlock &MBB, 274 MachineBasicBlock::iterator &MBBI, 275 const AArch64InstrInfo *TII) { 276 const int NotSet = -1; 277 const uint64_t Mask = 0xFFFF; 278 279 int StartIdx = NotSet; 280 int EndIdx = NotSet; 281 // Try to find the chunks which start/end a contiguous sequence of ones. 282 for (int Idx = 0; Idx < 4; ++Idx) { 283 int64_t Chunk = getChunk(UImm, Idx); 284 // Sign extend the 16-bit chunk to 64-bit. 285 Chunk = (Chunk << 48) >> 48; 286 287 if (isStartChunk(Chunk)) 288 StartIdx = Idx; 289 else if (isEndChunk(Chunk)) 290 EndIdx = Idx; 291 } 292 293 // Early exit in case we can't find a start/end chunk. 294 if (StartIdx == NotSet || EndIdx == NotSet) 295 return false; 296 297 // Outside of the contiguous sequence of ones everything needs to be zero. 298 uint64_t Outside = 0; 299 // Chunks between the start and end chunk need to have all their bits set. 300 uint64_t Inside = Mask; 301 302 // If our contiguous sequence of ones wraps around from the MSB into the LSB, 303 // just swap indices and pretend we are materializing a contiguous sequence 304 // of zeros surrounded by a contiguous sequence of ones. 305 if (StartIdx > EndIdx) { 306 std::swap(StartIdx, EndIdx); 307 std::swap(Outside, Inside); 308 } 309 310 uint64_t OrrImm = UImm; 311 int FirstMovkIdx = NotSet; 312 int SecondMovkIdx = NotSet; 313 314 // Find out which chunks we need to patch up to obtain a contiguous sequence 315 // of ones. 316 for (int Idx = 0; Idx < 4; ++Idx) { 317 const uint64_t Chunk = getChunk(UImm, Idx); 318 319 // Check whether we are looking at a chunk which is not part of the 320 // contiguous sequence of ones. 321 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { 322 OrrImm = updateImm(OrrImm, Idx, Outside == 0); 323 324 // Remember the index we need to patch. 325 if (FirstMovkIdx == NotSet) 326 FirstMovkIdx = Idx; 327 else 328 SecondMovkIdx = Idx; 329 330 // Check whether we are looking a chunk which is part of the contiguous 331 // sequence of ones. 332 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { 333 OrrImm = updateImm(OrrImm, Idx, Inside != Mask); 334 335 // Remember the index we need to patch. 336 if (FirstMovkIdx == NotSet) 337 FirstMovkIdx = Idx; 338 else 339 SecondMovkIdx = Idx; 340 } 341 } 342 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); 343 344 // Create the ORR-immediate instruction. 345 uint64_t Encoding = 0; 346 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); 347 MachineInstrBuilder MIB = 348 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 349 .addOperand(MI.getOperand(0)) 350 .addReg(AArch64::XZR) 351 .addImm(Encoding); 352 353 const unsigned DstReg = MI.getOperand(0).getReg(); 354 const bool DstIsDead = MI.getOperand(0).isDead(); 355 356 const bool SingleMovk = SecondMovkIdx == NotSet; 357 // Create the first MOVK instruction. 358 MachineInstrBuilder MIB1 = 359 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 360 .addReg(DstReg, 361 RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) 362 .addReg(DstReg) 363 .addImm(getChunk(UImm, FirstMovkIdx)) 364 .addImm( 365 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); 366 367 // Early exit in case we only need to emit a single MOVK instruction. 368 if (SingleMovk) { 369 transferImpOps(MI, MIB, MIB1); 370 MI.eraseFromParent(); 371 return true; 372 } 373 374 // Create the second MOVK instruction. 375 MachineInstrBuilder MIB2 = 376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 377 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 378 .addReg(DstReg) 379 .addImm(getChunk(UImm, SecondMovkIdx)) 380 .addImm( 381 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); 382 383 transferImpOps(MI, MIB, MIB2); 384 MI.eraseFromParent(); 385 return true; 386 } 387 388 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 389 /// real move-immediate instructions to synthesize the immediate. 390 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 391 MachineBasicBlock::iterator MBBI, 392 unsigned BitSize) { 393 MachineInstr &MI = *MBBI; 394 uint64_t Imm = MI.getOperand(1).getImm(); 395 const unsigned Mask = 0xFFFF; 396 397 // Try a MOVI instruction (aka ORR-immediate with the zero register). 398 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 399 uint64_t Encoding; 400 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 401 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); 402 MachineInstrBuilder MIB = 403 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 404 .addOperand(MI.getOperand(0)) 405 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 406 .addImm(Encoding); 407 transferImpOps(MI, MIB, MIB); 408 MI.eraseFromParent(); 409 return true; 410 } 411 412 // Scan the immediate and count the number of 16-bit chunks which are either 413 // all ones or all zeros. 414 unsigned OneChunks = 0; 415 unsigned ZeroChunks = 0; 416 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { 417 const unsigned Chunk = (Imm >> Shift) & Mask; 418 if (Chunk == Mask) 419 OneChunks++; 420 else if (Chunk == 0) 421 ZeroChunks++; 422 } 423 424 // Since we can't materialize the constant with a single ORR instruction, 425 // let's see whether we can materialize 3/4 of the constant with an ORR 426 // instruction and use an additional MOVK instruction to materialize the 427 // remaining 1/4. 428 // 429 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. 430 // 431 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, 432 // we would create the following instruction sequence: 433 // 434 // ORR x0, xzr, |A|X|A|X| 435 // MOVK x0, |B|, LSL #16 436 // 437 // Only look at 64-bit constants which can't be materialized with a single 438 // instruction e.g. which have less than either three all zero or all one 439 // chunks. 440 // 441 // Ignore 32-bit constants here, they always can be materialized with a 442 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized 443 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. 444 // Thus we fall back to the default code below which in the best case creates 445 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). 446 // 447 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { 448 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 449 // identical? 450 if (getChunk(UImm, 0) == getChunk(UImm, 2)) { 451 // See if we can come up with a constant which can be materialized with 452 // ORR-immediate by replicating element 3 into element 1. 453 uint64_t OrrImm = replicateChunk(UImm, 3, 1); 454 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) 455 return true; 456 457 // See if we can come up with a constant which can be materialized with 458 // ORR-immediate by replicating element 1 into element 3. 459 OrrImm = replicateChunk(UImm, 1, 3); 460 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) 461 return true; 462 463 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 464 // identical? 465 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { 466 // See if we can come up with a constant which can be materialized with 467 // ORR-immediate by replicating element 2 into element 0. 468 uint64_t OrrImm = replicateChunk(UImm, 2, 0); 469 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) 470 return true; 471 472 // See if we can come up with a constant which can be materialized with 473 // ORR-immediate by replicating element 1 into element 3. 474 OrrImm = replicateChunk(UImm, 0, 2); 475 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) 476 return true; 477 } 478 } 479 480 // Check for identical 16-bit chunks within the constant and if so materialize 481 // them with a single ORR instruction. The remaining one or two 16-bit chunks 482 // will be materialized with MOVK instructions. 483 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) 484 return true; 485 486 // Check whether the constant contains a sequence of contiguous ones, which 487 // might be interrupted by one or two chunks. If so, materialize the sequence 488 // of contiguous ones with an ORR instruction. Materialize the chunks which 489 // are either interrupting the sequence or outside of the sequence with a 490 // MOVK instruction. 491 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) 492 return true; 493 494 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or 495 // more MOVK instructions to insert additional 16-bit portions into the 496 // lower bits. 497 bool isNeg = false; 498 499 // Use MOVN to materialize the high bits if we have more all one chunks 500 // than all zero chunks. 501 if (OneChunks > ZeroChunks) { 502 isNeg = true; 503 Imm = ~Imm; 504 } 505 506 unsigned FirstOpc; 507 if (BitSize == 32) { 508 Imm &= (1LL << 32) - 1; 509 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); 510 } else { 511 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); 512 } 513 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN 514 unsigned LastShift = 0; // LSL amount for last MOVK 515 if (Imm != 0) { 516 unsigned LZ = countLeadingZeros(Imm); 517 unsigned TZ = countTrailingZeros(Imm); 518 Shift = ((63 - LZ) / 16) * 16; 519 LastShift = (TZ / 16) * 16; 520 } 521 unsigned Imm16 = (Imm >> Shift) & Mask; 522 unsigned DstReg = MI.getOperand(0).getReg(); 523 bool DstIsDead = MI.getOperand(0).isDead(); 524 MachineInstrBuilder MIB1 = 525 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) 526 .addReg(DstReg, RegState::Define | 527 getDeadRegState(DstIsDead && Shift == LastShift)) 528 .addImm(Imm16) 529 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 530 531 // If a MOVN was used for the high bits of a negative value, flip the rest 532 // of the bits back for use with MOVK. 533 if (isNeg) 534 Imm = ~Imm; 535 536 if (Shift == LastShift) { 537 transferImpOps(MI, MIB1, MIB1); 538 MI.eraseFromParent(); 539 return true; 540 } 541 542 MachineInstrBuilder MIB2; 543 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); 544 while (Shift != LastShift) { 545 Shift -= 16; 546 Imm16 = (Imm >> Shift) & Mask; 547 if (Imm16 == (isNeg ? Mask : 0)) 548 continue; // This 16-bit portion is already set correctly. 549 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 550 .addReg(DstReg, 551 RegState::Define | 552 getDeadRegState(DstIsDead && Shift == LastShift)) 553 .addReg(DstReg) 554 .addImm(Imm16) 555 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 556 } 557 558 transferImpOps(MI, MIB1, MIB2); 559 MI.eraseFromParent(); 560 return true; 561 } 562 563 /// \brief If MBBI references a pseudo instruction that should be expanded here, 564 /// do the expansion and return true. Otherwise return false. 565 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 566 MachineBasicBlock::iterator MBBI) { 567 MachineInstr &MI = *MBBI; 568 unsigned Opcode = MI.getOpcode(); 569 switch (Opcode) { 570 default: 571 break; 572 573 case AArch64::ADDWrr: 574 case AArch64::SUBWrr: 575 case AArch64::ADDXrr: 576 case AArch64::SUBXrr: 577 case AArch64::ADDSWrr: 578 case AArch64::SUBSWrr: 579 case AArch64::ADDSXrr: 580 case AArch64::SUBSXrr: 581 case AArch64::ANDWrr: 582 case AArch64::ANDXrr: 583 case AArch64::BICWrr: 584 case AArch64::BICXrr: 585 case AArch64::ANDSWrr: 586 case AArch64::ANDSXrr: 587 case AArch64::BICSWrr: 588 case AArch64::BICSXrr: 589 case AArch64::EONWrr: 590 case AArch64::EONXrr: 591 case AArch64::EORWrr: 592 case AArch64::EORXrr: 593 case AArch64::ORNWrr: 594 case AArch64::ORNXrr: 595 case AArch64::ORRWrr: 596 case AArch64::ORRXrr: { 597 unsigned Opcode; 598 switch (MI.getOpcode()) { 599 default: 600 return false; 601 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 602 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 603 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 604 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 605 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 606 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 607 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 608 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 609 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 610 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 611 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 612 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 613 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 614 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 615 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 616 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 617 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 618 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 619 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 620 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 621 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 622 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 623 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 624 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 625 } 626 MachineInstrBuilder MIB1 = 627 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 628 MI.getOperand(0).getReg()) 629 .addOperand(MI.getOperand(1)) 630 .addOperand(MI.getOperand(2)) 631 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 632 transferImpOps(MI, MIB1, MIB1); 633 MI.eraseFromParent(); 634 return true; 635 } 636 637 case AArch64::FCVTSHpseudo: { 638 MachineOperand Src = MI.getOperand(1); 639 Src.setImplicit(); 640 unsigned SrcH = 641 TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub); 642 auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr)) 643 .addOperand(MI.getOperand(0)) 644 .addReg(SrcH, RegState::Undef) 645 .addOperand(Src); 646 transferImpOps(MI, MIB, MIB); 647 MI.eraseFromParent(); 648 return true; 649 } 650 case AArch64::LOADgot: { 651 // Expand into ADRP + LDR. 652 unsigned DstReg = MI.getOperand(0).getReg(); 653 const MachineOperand &MO1 = MI.getOperand(1); 654 unsigned Flags = MO1.getTargetFlags(); 655 MachineInstrBuilder MIB1 = 656 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 657 MachineInstrBuilder MIB2 = 658 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) 659 .addOperand(MI.getOperand(0)) 660 .addReg(DstReg); 661 662 if (MO1.isGlobal()) { 663 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 664 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 665 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 666 } else if (MO1.isSymbol()) { 667 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 668 MIB2.addExternalSymbol(MO1.getSymbolName(), 669 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 670 } else { 671 assert(MO1.isCPI() && 672 "Only expect globals, externalsymbols, or constant pools"); 673 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 674 Flags | AArch64II::MO_PAGE); 675 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 676 Flags | AArch64II::MO_PAGEOFF | 677 AArch64II::MO_NC); 678 } 679 680 transferImpOps(MI, MIB1, MIB2); 681 MI.eraseFromParent(); 682 return true; 683 } 684 685 case AArch64::MOVaddr: 686 case AArch64::MOVaddrJT: 687 case AArch64::MOVaddrCP: 688 case AArch64::MOVaddrBA: 689 case AArch64::MOVaddrTLS: 690 case AArch64::MOVaddrEXT: { 691 // Expand into ADRP + ADD. 692 unsigned DstReg = MI.getOperand(0).getReg(); 693 MachineInstrBuilder MIB1 = 694 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 695 .addOperand(MI.getOperand(1)); 696 697 MachineInstrBuilder MIB2 = 698 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 699 .addOperand(MI.getOperand(0)) 700 .addReg(DstReg) 701 .addOperand(MI.getOperand(2)) 702 .addImm(0); 703 704 transferImpOps(MI, MIB1, MIB2); 705 MI.eraseFromParent(); 706 return true; 707 } 708 709 case AArch64::MOVi32imm: 710 return expandMOVImm(MBB, MBBI, 32); 711 case AArch64::MOVi64imm: 712 return expandMOVImm(MBB, MBBI, 64); 713 case AArch64::RET_ReallyLR: 714 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 715 .addReg(AArch64::LR); 716 MI.eraseFromParent(); 717 return true; 718 } 719 return false; 720 } 721 722 /// \brief Iterate over the instructions in basic block MBB and expand any 723 /// pseudo instructions. Return true if anything was modified. 724 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 725 bool Modified = false; 726 727 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 728 while (MBBI != E) { 729 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 730 Modified |= expandMI(MBB, MBBI); 731 MBBI = NMBBI; 732 } 733 734 return Modified; 735 } 736 737 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 738 TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); 739 740 bool Modified = false; 741 for (auto &MBB : MF) 742 Modified |= expandMBB(MBB); 743 return Modified; 744 } 745 746 /// \brief Returns an instance of the pseudo instruction expansion pass. 747 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 748 return new AArch64ExpandPseudo(); 749 } 750