1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64.h" 15 #include "AArch64InstrInfo.h" 16 #include "AArch64MachineFunctionInfo.h" 17 #include "AArch64TargetMachine.h" 18 #include "MCTargetDesc/AArch64MCTargetDesc.h" 19 #include "Utils/AArch64BaseInfo.h" 20 #include "llvm/CodeGen/MachineConstantPool.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/TargetRegistry.h" 29 30 #include <algorithm> 31 32 #define GET_INSTRINFO_CTOR 33 #include "AArch64GenInstrInfo.inc" 34 35 using namespace llvm; 36 37 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 38 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 39 Subtarget(STI) {} 40 41 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 42 MachineBasicBlock::iterator I, DebugLoc DL, 43 unsigned DestReg, unsigned SrcReg, 44 bool KillSrc) const { 45 unsigned Opc = 0; 46 unsigned ZeroReg = 0; 47 if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { 48 // E.g. ADD xDst, xsp, #0 (, lsl #0) 49 BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) 50 .addReg(SrcReg) 51 .addImm(0); 52 return; 53 } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 54 // E.g. ADD wDST, wsp, #0 (, lsl #0) 55 BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) 56 .addReg(SrcReg) 57 .addImm(0); 58 return; 59 } else if (DestReg == AArch64::NZCV) { 60 assert(AArch64::GPR64RegClass.contains(SrcReg)); 61 // E.g. MSR NZCV, xDST 62 BuildMI(MBB, I, DL, get(AArch64::MSRix)) 63 .addImm(A64SysReg::NZCV) 64 .addReg(SrcReg); 65 } else if (SrcReg == AArch64::NZCV) { 66 assert(AArch64::GPR64RegClass.contains(DestReg)); 67 // E.g. MRS xDST, NZCV 68 BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) 69 .addImm(A64SysReg::NZCV); 70 } else if (AArch64::GPR64RegClass.contains(DestReg)) { 71 assert(AArch64::GPR64RegClass.contains(SrcReg)); 72 Opc = AArch64::ORRxxx_lsl; 73 ZeroReg = AArch64::XZR; 74 } else if (AArch64::GPR32RegClass.contains(DestReg)) { 75 assert(AArch64::GPR32RegClass.contains(SrcReg)); 76 Opc = AArch64::ORRwww_lsl; 77 ZeroReg = AArch64::WZR; 78 } else if (AArch64::FPR32RegClass.contains(DestReg)) { 79 assert(AArch64::FPR32RegClass.contains(SrcReg)); 80 BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) 81 .addReg(SrcReg); 82 return; 83 } else if (AArch64::FPR64RegClass.contains(DestReg)) { 84 assert(AArch64::FPR64RegClass.contains(SrcReg)); 85 BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) 86 .addReg(SrcReg); 87 return; 88 } else if (AArch64::FPR128RegClass.contains(DestReg)) { 89 assert(AArch64::FPR128RegClass.contains(SrcReg)); 90 91 // FIXME: there's no good way to do this, at least without NEON: 92 // + There's no single move instruction for q-registers 93 // + We can't create a spill slot and use normal STR/LDR because stack 94 // allocation has already happened 95 // + We can't go via X-registers with FMOV because register allocation has 96 // already happened. 97 // This may not be efficient, but at least it works. 98 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) 99 .addReg(SrcReg) 100 .addReg(AArch64::XSP) 101 .addImm(0x1ff & -16); 102 103 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) 104 .addReg(AArch64::XSP, RegState::Define) 105 .addReg(AArch64::XSP) 106 .addImm(16); 107 return; 108 } else { 109 llvm_unreachable("Unknown register class in copyPhysReg"); 110 } 111 112 // E.g. ORR xDst, xzr, xSrc, lsl #0 113 BuildMI(MBB, I, DL, get(Opc), DestReg) 114 .addReg(ZeroReg) 115 .addReg(SrcReg) 116 .addImm(0); 117 } 118 119 /// Does the Opcode represent a conditional branch that we can remove and re-add 120 /// at the end of a basic block? 121 static bool isCondBranch(unsigned Opc) { 122 return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || 123 Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || 124 Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || 125 Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; 126 } 127 128 /// Takes apart a given conditional branch MachineInstr (see isCondBranch), 129 /// setting TBB to the destination basic block and populating the Cond vector 130 /// with data necessary to recreate the conditional branch at a later 131 /// date. First element will be the opcode, and subsequent ones define the 132 /// conditions being branched on in an instruction-specific manner. 133 static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, 134 SmallVectorImpl<MachineOperand> &Cond) { 135 switch(I->getOpcode()) { 136 case AArch64::Bcc: 137 case AArch64::CBZw: 138 case AArch64::CBZx: 139 case AArch64::CBNZw: 140 case AArch64::CBNZx: 141 // These instructions just have one predicate operand in position 0 (either 142 // a condition code or a register being compared). 143 Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); 144 Cond.push_back(I->getOperand(0)); 145 TBB = I->getOperand(1).getMBB(); 146 return; 147 case AArch64::TBZwii: 148 case AArch64::TBZxii: 149 case AArch64::TBNZwii: 150 case AArch64::TBNZxii: 151 // These have two predicate operands: a register and a bit position. 152 Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); 153 Cond.push_back(I->getOperand(0)); 154 Cond.push_back(I->getOperand(1)); 155 TBB = I->getOperand(2).getMBB(); 156 return; 157 default: 158 llvm_unreachable("Unknown conditional branch to classify"); 159 } 160 } 161 162 163 bool 164 AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 165 MachineBasicBlock *&FBB, 166 SmallVectorImpl<MachineOperand> &Cond, 167 bool AllowModify) const { 168 // If the block has no terminators, it just falls into the block after it. 169 MachineBasicBlock::iterator I = MBB.end(); 170 if (I == MBB.begin()) 171 return false; 172 --I; 173 while (I->isDebugValue()) { 174 if (I == MBB.begin()) 175 return false; 176 --I; 177 } 178 if (!isUnpredicatedTerminator(I)) 179 return false; 180 181 // Get the last instruction in the block. 182 MachineInstr *LastInst = I; 183 184 // If there is only one terminator instruction, process it. 185 unsigned LastOpc = LastInst->getOpcode(); 186 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 187 if (LastOpc == AArch64::Bimm) { 188 TBB = LastInst->getOperand(0).getMBB(); 189 return false; 190 } 191 if (isCondBranch(LastOpc)) { 192 classifyCondBranch(LastInst, TBB, Cond); 193 return false; 194 } 195 return true; // Can't handle indirect branch. 196 } 197 198 // Get the instruction before it if it is a terminator. 199 MachineInstr *SecondLastInst = I; 200 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 201 202 // If AllowModify is true and the block ends with two or more unconditional 203 // branches, delete all but the first unconditional branch. 204 if (AllowModify && LastOpc == AArch64::Bimm) { 205 while (SecondLastOpc == AArch64::Bimm) { 206 LastInst->eraseFromParent(); 207 LastInst = SecondLastInst; 208 LastOpc = LastInst->getOpcode(); 209 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 210 // Return now the only terminator is an unconditional branch. 211 TBB = LastInst->getOperand(0).getMBB(); 212 return false; 213 } else { 214 SecondLastInst = I; 215 SecondLastOpc = SecondLastInst->getOpcode(); 216 } 217 } 218 } 219 220 // If there are three terminators, we don't know what sort of block this is. 221 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 222 return true; 223 224 // If the block ends with a B and a Bcc, handle it. 225 if (LastOpc == AArch64::Bimm) { 226 if (SecondLastOpc == AArch64::Bcc) { 227 TBB = SecondLastInst->getOperand(1).getMBB(); 228 Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); 229 Cond.push_back(SecondLastInst->getOperand(0)); 230 FBB = LastInst->getOperand(0).getMBB(); 231 return false; 232 } else if (isCondBranch(SecondLastOpc)) { 233 classifyCondBranch(SecondLastInst, TBB, Cond); 234 FBB = LastInst->getOperand(0).getMBB(); 235 return false; 236 } 237 } 238 239 // If the block ends with two unconditional branches, handle it. The second 240 // one is not executed, so remove it. 241 if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { 242 TBB = SecondLastInst->getOperand(0).getMBB(); 243 I = LastInst; 244 if (AllowModify) 245 I->eraseFromParent(); 246 return false; 247 } 248 249 // Otherwise, can't handle this. 250 return true; 251 } 252 253 bool AArch64InstrInfo::ReverseBranchCondition( 254 SmallVectorImpl<MachineOperand> &Cond) const { 255 switch (Cond[0].getImm()) { 256 case AArch64::Bcc: { 257 A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm()); 258 CC = A64InvertCondCode(CC); 259 Cond[1].setImm(CC); 260 return false; 261 } 262 case AArch64::CBZw: 263 Cond[0].setImm(AArch64::CBNZw); 264 return false; 265 case AArch64::CBZx: 266 Cond[0].setImm(AArch64::CBNZx); 267 return false; 268 case AArch64::CBNZw: 269 Cond[0].setImm(AArch64::CBZw); 270 return false; 271 case AArch64::CBNZx: 272 Cond[0].setImm(AArch64::CBZx); 273 return false; 274 case AArch64::TBZwii: 275 Cond[0].setImm(AArch64::TBNZwii); 276 return false; 277 case AArch64::TBZxii: 278 Cond[0].setImm(AArch64::TBNZxii); 279 return false; 280 case AArch64::TBNZwii: 281 Cond[0].setImm(AArch64::TBZwii); 282 return false; 283 case AArch64::TBNZxii: 284 Cond[0].setImm(AArch64::TBZxii); 285 return false; 286 default: 287 llvm_unreachable("Unknown branch type"); 288 } 289 } 290 291 292 unsigned 293 AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 294 MachineBasicBlock *FBB, 295 const SmallVectorImpl<MachineOperand> &Cond, 296 DebugLoc DL) const { 297 if (FBB == 0 && Cond.empty()) { 298 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); 299 return 1; 300 } else if (FBB == 0) { 301 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); 302 for (int i = 1, e = Cond.size(); i != e; ++i) 303 MIB.addOperand(Cond[i]); 304 MIB.addMBB(TBB); 305 return 1; 306 } 307 308 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); 309 for (int i = 1, e = Cond.size(); i != e; ++i) 310 MIB.addOperand(Cond[i]); 311 MIB.addMBB(TBB); 312 313 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); 314 return 2; 315 } 316 317 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 318 MachineBasicBlock::iterator I = MBB.end(); 319 if (I == MBB.begin()) return 0; 320 --I; 321 while (I->isDebugValue()) { 322 if (I == MBB.begin()) 323 return 0; 324 --I; 325 } 326 if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) 327 return 0; 328 329 // Remove the branch. 330 I->eraseFromParent(); 331 332 I = MBB.end(); 333 334 if (I == MBB.begin()) return 1; 335 --I; 336 if (!isCondBranch(I->getOpcode())) 337 return 1; 338 339 // Remove the branch. 340 I->eraseFromParent(); 341 return 2; 342 } 343 344 bool 345 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { 346 MachineInstr &MI = *MBBI; 347 MachineBasicBlock &MBB = *MI.getParent(); 348 349 unsigned Opcode = MI.getOpcode(); 350 switch (Opcode) { 351 case AArch64::TLSDESC_BLRx: { 352 MachineInstr *NewMI = 353 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) 354 .addOperand(MI.getOperand(1)); 355 MI.setDesc(get(AArch64::BLRx)); 356 357 llvm::finalizeBundle(MBB, NewMI, *++MBBI); 358 return true; 359 } 360 default: 361 return false; 362 } 363 364 return false; 365 } 366 367 void 368 AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 369 MachineBasicBlock::iterator MBBI, 370 unsigned SrcReg, bool isKill, 371 int FrameIdx, 372 const TargetRegisterClass *RC, 373 const TargetRegisterInfo *TRI) const { 374 DebugLoc DL = MBB.findDebugLoc(MBBI); 375 MachineFunction &MF = *MBB.getParent(); 376 MachineFrameInfo &MFI = *MF.getFrameInfo(); 377 unsigned Align = MFI.getObjectAlignment(FrameIdx); 378 379 MachineMemOperand *MMO 380 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 381 MachineMemOperand::MOStore, 382 MFI.getObjectSize(FrameIdx), 383 Align); 384 385 unsigned StoreOp = 0; 386 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { 387 switch(RC->getSize()) { 388 case 4: StoreOp = AArch64::LS32_STR; break; 389 case 8: StoreOp = AArch64::LS64_STR; break; 390 default: 391 llvm_unreachable("Unknown size for regclass"); 392 } 393 } else { 394 assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || 395 RC->hasType(MVT::f128)) 396 && "Expected integer or floating type for store"); 397 switch (RC->getSize()) { 398 case 4: StoreOp = AArch64::LSFP32_STR; break; 399 case 8: StoreOp = AArch64::LSFP64_STR; break; 400 case 16: StoreOp = AArch64::LSFP128_STR; break; 401 default: 402 llvm_unreachable("Unknown size for regclass"); 403 } 404 } 405 406 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); 407 NewMI.addReg(SrcReg, getKillRegState(isKill)) 408 .addFrameIndex(FrameIdx) 409 .addImm(0) 410 .addMemOperand(MMO); 411 412 } 413 414 void 415 AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 416 MachineBasicBlock::iterator MBBI, 417 unsigned DestReg, int FrameIdx, 418 const TargetRegisterClass *RC, 419 const TargetRegisterInfo *TRI) const { 420 DebugLoc DL = MBB.findDebugLoc(MBBI); 421 MachineFunction &MF = *MBB.getParent(); 422 MachineFrameInfo &MFI = *MF.getFrameInfo(); 423 unsigned Align = MFI.getObjectAlignment(FrameIdx); 424 425 MachineMemOperand *MMO 426 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 427 MachineMemOperand::MOLoad, 428 MFI.getObjectSize(FrameIdx), 429 Align); 430 431 unsigned LoadOp = 0; 432 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { 433 switch(RC->getSize()) { 434 case 4: LoadOp = AArch64::LS32_LDR; break; 435 case 8: LoadOp = AArch64::LS64_LDR; break; 436 default: 437 llvm_unreachable("Unknown size for regclass"); 438 } 439 } else { 440 assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) 441 || RC->hasType(MVT::f128)) 442 && "Expected integer or floating type for store"); 443 switch (RC->getSize()) { 444 case 4: LoadOp = AArch64::LSFP32_LDR; break; 445 case 8: LoadOp = AArch64::LSFP64_LDR; break; 446 case 16: LoadOp = AArch64::LSFP128_LDR; break; 447 default: 448 llvm_unreachable("Unknown size for regclass"); 449 } 450 } 451 452 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); 453 NewMI.addFrameIndex(FrameIdx) 454 .addImm(0) 455 .addMemOperand(MMO); 456 } 457 458 unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { 459 unsigned Limit = (1 << 16) - 1; 460 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { 461 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); 462 I != E; ++I) { 463 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 464 if (!I->getOperand(i).isFI()) continue; 465 466 // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff 467 // is the largest offset guaranteed to fit in the immediate offset. 468 if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { 469 Limit = std::min(Limit, 0xfffu); 470 break; 471 } 472 473 int AccessScale, MinOffset, MaxOffset; 474 getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); 475 Limit = std::min(Limit, static_cast<unsigned>(MaxOffset)); 476 477 break; // At most one FI per instruction 478 } 479 } 480 } 481 482 return Limit; 483 } 484 void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, 485 int &AccessScale, int &MinOffset, 486 int &MaxOffset) const { 487 switch (MI.getOpcode()) { 488 default: llvm_unreachable("Unkown load/store kind"); 489 case TargetOpcode::DBG_VALUE: 490 AccessScale = 1; 491 MinOffset = INT_MIN; 492 MaxOffset = INT_MAX; 493 return; 494 case AArch64::LS8_LDR: case AArch64::LS8_STR: 495 case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: 496 case AArch64::LDRSBw: 497 case AArch64::LDRSBx: 498 AccessScale = 1; 499 MinOffset = 0; 500 MaxOffset = 0xfff; 501 return; 502 case AArch64::LS16_LDR: case AArch64::LS16_STR: 503 case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: 504 case AArch64::LDRSHw: 505 case AArch64::LDRSHx: 506 AccessScale = 2; 507 MinOffset = 0; 508 MaxOffset = 0xfff * AccessScale; 509 return; 510 case AArch64::LS32_LDR: case AArch64::LS32_STR: 511 case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: 512 case AArch64::LDRSWx: 513 case AArch64::LDPSWx: 514 AccessScale = 4; 515 MinOffset = 0; 516 MaxOffset = 0xfff * AccessScale; 517 return; 518 case AArch64::LS64_LDR: case AArch64::LS64_STR: 519 case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: 520 case AArch64::PRFM: 521 AccessScale = 8; 522 MinOffset = 0; 523 MaxOffset = 0xfff * AccessScale; 524 return; 525 case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: 526 AccessScale = 16; 527 MinOffset = 0; 528 MaxOffset = 0xfff * AccessScale; 529 return; 530 case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: 531 case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: 532 AccessScale = 4; 533 MinOffset = -0x40 * AccessScale; 534 MaxOffset = 0x3f * AccessScale; 535 return; 536 case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: 537 case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: 538 AccessScale = 8; 539 MinOffset = -0x40 * AccessScale; 540 MaxOffset = 0x3f * AccessScale; 541 return; 542 case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: 543 AccessScale = 16; 544 MinOffset = -0x40 * AccessScale; 545 MaxOffset = 0x3f * AccessScale; 546 return; 547 } 548 } 549 550 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 551 const MCInstrDesc &MCID = MI.getDesc(); 552 const MachineBasicBlock &MBB = *MI.getParent(); 553 const MachineFunction &MF = *MBB.getParent(); 554 const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); 555 556 if (MCID.getSize()) 557 return MCID.getSize(); 558 559 if (MI.getOpcode() == AArch64::INLINEASM) 560 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); 561 562 if (MI.isLabel()) 563 return 0; 564 565 switch (MI.getOpcode()) { 566 case TargetOpcode::BUNDLE: 567 return getInstBundleLength(MI); 568 case TargetOpcode::IMPLICIT_DEF: 569 case TargetOpcode::KILL: 570 case TargetOpcode::PROLOG_LABEL: 571 case TargetOpcode::EH_LABEL: 572 case TargetOpcode::DBG_VALUE: 573 return 0; 574 case AArch64::TLSDESCCALL: 575 return 0; 576 default: 577 llvm_unreachable("Unknown instruction class"); 578 } 579 } 580 581 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 582 unsigned Size = 0; 583 MachineBasicBlock::const_instr_iterator I = MI; 584 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 585 while (++I != E && I->isInsideBundle()) { 586 assert(!I->isBundle() && "No nested bundle!"); 587 Size += getInstSizeInBytes(*I); 588 } 589 return Size; 590 } 591 592 bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 593 unsigned FrameReg, int &Offset, 594 const AArch64InstrInfo &TII) { 595 MachineBasicBlock &MBB = *MI.getParent(); 596 MachineFunction &MF = *MBB.getParent(); 597 MachineFrameInfo &MFI = *MF.getFrameInfo(); 598 599 MFI.getObjectOffset(FrameRegIdx); 600 llvm_unreachable("Unimplemented rewriteFrameIndex"); 601 } 602 603 void llvm::emitRegUpdate(MachineBasicBlock &MBB, 604 MachineBasicBlock::iterator MBBI, 605 DebugLoc dl, const TargetInstrInfo &TII, 606 unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, 607 int64_t NumBytes, MachineInstr::MIFlag MIFlags) { 608 if (NumBytes == 0 && DstReg == SrcReg) 609 return; 610 else if (abs64(NumBytes) & ~0xffffff) { 611 // Generically, we have to materialize the offset into a temporary register 612 // and subtract it. There are a couple of ways this could be done, for now 613 // we'll use a movz/movk or movn/movk sequence. 614 uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes)); 615 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) 616 .addImm(0xffff & Bits).addImm(0) 617 .setMIFlags(MIFlags); 618 619 Bits >>= 16; 620 if (Bits & 0xffff) { 621 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) 622 .addReg(ScratchReg) 623 .addImm(0xffff & Bits).addImm(1) 624 .setMIFlags(MIFlags); 625 } 626 627 Bits >>= 16; 628 if (Bits & 0xffff) { 629 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) 630 .addReg(ScratchReg) 631 .addImm(0xffff & Bits).addImm(2) 632 .setMIFlags(MIFlags); 633 } 634 635 Bits >>= 16; 636 if (Bits & 0xffff) { 637 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) 638 .addReg(ScratchReg) 639 .addImm(0xffff & Bits).addImm(3) 640 .setMIFlags(MIFlags); 641 } 642 643 // ADD DST, SRC, xTMP (, lsl #0) 644 unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; 645 BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) 646 .addReg(SrcReg, RegState::Kill) 647 .addReg(ScratchReg, RegState::Kill) 648 .addImm(0) 649 .setMIFlag(MIFlags); 650 return; 651 } 652 653 // Now we know that the adjustment can be done in at most two add/sub 654 // (immediate) instructions, which is always more efficient than a 655 // literal-pool load, or even a hypothetical movz/movk/add sequence 656 657 // Decide whether we're doing addition or subtraction 658 unsigned LowOp, HighOp; 659 if (NumBytes >= 0) { 660 LowOp = AArch64::ADDxxi_lsl0_s; 661 HighOp = AArch64::ADDxxi_lsl12_s; 662 } else { 663 LowOp = AArch64::SUBxxi_lsl0_s; 664 HighOp = AArch64::SUBxxi_lsl12_s; 665 NumBytes = abs64(NumBytes); 666 } 667 668 // If we're here, at the very least a move needs to be produced, which just 669 // happens to be materializable by an ADD. 670 if ((NumBytes & 0xfff) || NumBytes == 0) { 671 BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) 672 .addReg(SrcReg, RegState::Kill) 673 .addImm(NumBytes & 0xfff) 674 .setMIFlag(MIFlags); 675 676 // Next update should use the register we've just defined. 677 SrcReg = DstReg; 678 } 679 680 if (NumBytes & 0xfff000) { 681 BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) 682 .addReg(SrcReg, RegState::Kill) 683 .addImm(NumBytes >> 12) 684 .setMIFlag(MIFlags); 685 } 686 } 687 688 void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 689 DebugLoc dl, const TargetInstrInfo &TII, 690 unsigned ScratchReg, int64_t NumBytes, 691 MachineInstr::MIFlag MIFlags) { 692 emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, 693 NumBytes, MIFlags); 694 } 695 696 697 namespace { 698 struct LDTLSCleanup : public MachineFunctionPass { 699 static char ID; 700 LDTLSCleanup() : MachineFunctionPass(ID) {} 701 702 virtual bool runOnMachineFunction(MachineFunction &MF) { 703 AArch64MachineFunctionInfo* MFI 704 = MF.getInfo<AArch64MachineFunctionInfo>(); 705 if (MFI->getNumLocalDynamicTLSAccesses() < 2) { 706 // No point folding accesses if there isn't at least two. 707 return false; 708 } 709 710 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 711 return VisitNode(DT->getRootNode(), 0); 712 } 713 714 // Visit the dominator subtree rooted at Node in pre-order. 715 // If TLSBaseAddrReg is non-null, then use that to replace any 716 // TLS_base_addr instructions. Otherwise, create the register 717 // when the first such instruction is seen, and then use it 718 // as we encounter more instructions. 719 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { 720 MachineBasicBlock *BB = Node->getBlock(); 721 bool Changed = false; 722 723 // Traverse the current block. 724 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; 725 ++I) { 726 switch (I->getOpcode()) { 727 case AArch64::TLSDESC_BLRx: 728 // Make sure it's a local dynamic access. 729 if (!I->getOperand(1).isSymbol() || 730 strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) 731 break; 732 733 if (TLSBaseAddrReg) 734 I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); 735 else 736 I = SetRegister(I, &TLSBaseAddrReg); 737 Changed = true; 738 break; 739 default: 740 break; 741 } 742 } 743 744 // Visit the children of this block in the dominator tree. 745 for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); 746 I != E; ++I) { 747 Changed |= VisitNode(*I, TLSBaseAddrReg); 748 } 749 750 return Changed; 751 } 752 753 // Replace the TLS_base_addr instruction I with a copy from 754 // TLSBaseAddrReg, returning the new instruction. 755 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, 756 unsigned TLSBaseAddrReg) { 757 MachineFunction *MF = I->getParent()->getParent(); 758 const AArch64TargetMachine *TM = 759 static_cast<const AArch64TargetMachine *>(&MF->getTarget()); 760 const AArch64InstrInfo *TII = TM->getInstrInfo(); 761 762 // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the 763 // code sequence assumes the address will be. 764 MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), 765 TII->get(TargetOpcode::COPY), 766 AArch64::X0) 767 .addReg(TLSBaseAddrReg); 768 769 // Erase the TLS_base_addr instruction. 770 I->eraseFromParent(); 771 772 return Copy; 773 } 774 775 // Create a virtal register in *TLSBaseAddrReg, and populate it by 776 // inserting a copy instruction after I. Returns the new instruction. 777 MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { 778 MachineFunction *MF = I->getParent()->getParent(); 779 const AArch64TargetMachine *TM = 780 static_cast<const AArch64TargetMachine *>(&MF->getTarget()); 781 const AArch64InstrInfo *TII = TM->getInstrInfo(); 782 783 // Create a virtual register for the TLS base address. 784 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 785 *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); 786 787 // Insert a copy from X0 to TLSBaseAddrReg for later. 788 MachineInstr *Next = I->getNextNode(); 789 MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), 790 TII->get(TargetOpcode::COPY), 791 *TLSBaseAddrReg) 792 .addReg(AArch64::X0); 793 794 return Copy; 795 } 796 797 virtual const char *getPassName() const { 798 return "Local Dynamic TLS Access Clean-up"; 799 } 800 801 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 802 AU.setPreservesCFG(); 803 AU.addRequired<MachineDominatorTree>(); 804 MachineFunctionPass::getAnalysisUsage(AU); 805 } 806 }; 807 } 808 809 char LDTLSCleanup::ID = 0; 810 FunctionPass* 811 llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } 812