1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #define DEBUG_TYPE "t2-reduce-size" 11 #include "ARM.h" 12 #include "ARMBaseInstrInfo.h" 13 #include "ARMBaseRegisterInfo.h" 14 #include "ARMSubtarget.h" 15 #include "MCTargetDesc/ARMAddressingModes.h" 16 #include "Thumb2InstrInfo.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/IR/Function.h" // To access Function attributes 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_ostream.h" 26 using namespace llvm; 27 28 STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 29 STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 30 STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 31 32 static cl::opt<int> ReduceLimit("t2-reduce-limit", 33 cl::init(-1), cl::Hidden); 34 static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 35 cl::init(-1), cl::Hidden); 36 static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 37 cl::init(-1), cl::Hidden); 38 39 namespace { 40 /// ReduceTable - A static table with information on mapping from wide 41 /// opcodes to narrow 42 struct ReduceEntry { 43 uint16_t WideOpc; // Wide opcode 44 uint16_t NarrowOpc1; // Narrow opcode to transform to 45 uint16_t NarrowOpc2; // Narrow opcode when it's two-address 46 uint8_t Imm1Limit; // Limit of immediate field (bits) 47 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 48 unsigned LowRegs1 : 1; // Only possible if low-registers are used 49 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 50 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 51 // 1 - No cc field. 52 // 2 - Always set CPSR. 53 unsigned PredCC2 : 2; 54 unsigned PartFlag : 1; // 16-bit instruction does partial flag update 55 unsigned Special : 1; // Needs to be dealt with specially 56 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) 57 }; 58 59 static const ReduceEntry ReduceTable[] = { 60 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM 61 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, 62 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, 63 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, 64 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, 65 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 66 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, 67 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 68 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 69 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, 70 //FIXME: Disable CMN, as CCodes are backwards from compare expectations 71 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 72 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 73 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, 74 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, 75 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, 76 // FIXME: adr.n immediate offset must be multiple of 4. 77 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 78 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 79 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 80 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 81 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 82 // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less 83 // likely to cause issue in the loop. As a size / performance workaround, 84 // they are not marked as such. 85 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 }, 86 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 }, 87 // FIXME: Do we need the 16-bit 'S' variant? 88 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, 89 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, 90 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 91 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, 92 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 93 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 94 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 95 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, 96 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 97 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 98 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, 99 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, 100 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 101 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, 102 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 103 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 104 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 105 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 106 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 107 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 108 109 // FIXME: Clean this up after splitting each Thumb load / store opcode 110 // into multiple ones. 111 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 112 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 113 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 114 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 115 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 116 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 117 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 118 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 119 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 120 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 121 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 122 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 123 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 124 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 125 126 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 127 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, 128 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, 129 // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent 130 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 131 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } 132 }; 133 134 class Thumb2SizeReduce : public MachineFunctionPass { 135 public: 136 static char ID; 137 Thumb2SizeReduce(); 138 139 const Thumb2InstrInfo *TII; 140 const ARMSubtarget *STI; 141 142 virtual bool runOnMachineFunction(MachineFunction &MF); 143 144 virtual const char *getPassName() const { 145 return "Thumb2 instruction size reduction pass"; 146 } 147 148 private: 149 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 150 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 151 152 bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, 153 bool IsSelfLoop); 154 155 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 156 bool is2Addr, ARMCC::CondCodes Pred, 157 bool LiveCPSR, bool &HasCC, bool &CCDead); 158 159 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 160 const ReduceEntry &Entry); 161 162 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 163 const ReduceEntry &Entry, bool LiveCPSR, 164 MachineInstr *CPSRDef, bool IsSelfLoop); 165 166 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 167 /// instruction. 168 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 169 const ReduceEntry &Entry, 170 bool LiveCPSR, MachineInstr *CPSRDef, 171 bool IsSelfLoop); 172 173 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 174 /// non-two-address instruction. 175 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 176 const ReduceEntry &Entry, 177 bool LiveCPSR, MachineInstr *CPSRDef, 178 bool IsSelfLoop); 179 180 /// ReduceMI - Attempt to reduce MI, return true on success. 181 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 182 bool LiveCPSR, MachineInstr *CPSRDef, 183 bool IsSelfLoop); 184 185 /// ReduceMBB - Reduce width of instructions in the specified basic block. 186 bool ReduceMBB(MachineBasicBlock &MBB); 187 188 bool OptimizeSize; 189 bool MinimizeSize; 190 }; 191 char Thumb2SizeReduce::ID = 0; 192 } 193 194 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { 195 OptimizeSize = MinimizeSize = false; 196 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 197 unsigned FromOpc = ReduceTable[i].WideOpc; 198 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 199 assert(false && "Duplicated entries?"); 200 } 201 } 202 203 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { 204 for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) 205 if (*Regs == ARM::CPSR) 206 return true; 207 return false; 208 } 209 210 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, 211 /// the 's' 16-bit instruction partially update CPSR. Abort the 212 /// transformation to avoid adding false dependency on last CPSR setting 213 /// instruction which hurts the ability for out-of-order execution engine 214 /// to do register renaming magic. 215 /// This function checks if there is a read-of-write dependency between the 216 /// last instruction that defines the CPSR and the current instruction. If there 217 /// is, then there is no harm done since the instruction cannot be retired 218 /// before the CPSR setting instruction anyway. 219 /// Note, we are not doing full dependency analysis here for the sake of compile 220 /// time. We're not looking for cases like: 221 /// r0 = muls ... 222 /// r1 = add.w r0, ... 223 /// ... 224 /// = mul.w r1 225 /// In this case it would have been ok to narrow the mul.w to muls since there 226 /// are indirect RAW dependency between the muls and the mul.w 227 bool 228 Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, 229 bool FirstInSelfLoop) { 230 // Disable the check for -Oz (aka OptimizeForSizeHarder). 231 if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) 232 return false; 233 234 if (!Def) 235 // If this BB loops back to itself, conservatively avoid narrowing the 236 // first instruction that does partial flag update. 237 return FirstInSelfLoop; 238 239 SmallSet<unsigned, 2> Defs; 240 for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { 241 const MachineOperand &MO = Def->getOperand(i); 242 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 243 continue; 244 unsigned Reg = MO.getReg(); 245 if (Reg == 0 || Reg == ARM::CPSR) 246 continue; 247 Defs.insert(Reg); 248 } 249 250 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { 251 const MachineOperand &MO = Use->getOperand(i); 252 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 253 continue; 254 unsigned Reg = MO.getReg(); 255 if (Defs.count(Reg)) 256 return false; 257 } 258 259 // No read-after-write dependency. The narrowing will add false dependency. 260 return true; 261 } 262 263 bool 264 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 265 bool is2Addr, ARMCC::CondCodes Pred, 266 bool LiveCPSR, bool &HasCC, bool &CCDead) { 267 if ((is2Addr && Entry.PredCC2 == 0) || 268 (!is2Addr && Entry.PredCC1 == 0)) { 269 if (Pred == ARMCC::AL) { 270 // Not predicated, must set CPSR. 271 if (!HasCC) { 272 // Original instruction was not setting CPSR, but CPSR is not 273 // currently live anyway. It's ok to set it. The CPSR def is 274 // dead though. 275 if (!LiveCPSR) { 276 HasCC = true; 277 CCDead = true; 278 return true; 279 } 280 return false; 281 } 282 } else { 283 // Predicated, must not set CPSR. 284 if (HasCC) 285 return false; 286 } 287 } else if ((is2Addr && Entry.PredCC2 == 2) || 288 (!is2Addr && Entry.PredCC1 == 2)) { 289 /// Old opcode has an optional def of CPSR. 290 if (HasCC) 291 return true; 292 // If old opcode does not implicitly define CPSR, then it's not ok since 293 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. 294 if (!HasImplicitCPSRDef(MI->getDesc())) 295 return false; 296 HasCC = true; 297 } else { 298 // 16-bit instruction does not set CPSR. 299 if (HasCC) 300 return false; 301 } 302 303 return true; 304 } 305 306 static bool VerifyLowRegs(MachineInstr *MI) { 307 unsigned Opc = MI->getOpcode(); 308 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || 309 Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || 310 Opc == ARM::t2LDMDB_UPD); 311 bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); 312 bool isSPOk = isPCOk || isLROk; 313 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 314 const MachineOperand &MO = MI->getOperand(i); 315 if (!MO.isReg() || MO.isImplicit()) 316 continue; 317 unsigned Reg = MO.getReg(); 318 if (Reg == 0 || Reg == ARM::CPSR) 319 continue; 320 if (isPCOk && Reg == ARM::PC) 321 continue; 322 if (isLROk && Reg == ARM::LR) 323 continue; 324 if (Reg == ARM::SP) { 325 if (isSPOk) 326 continue; 327 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 328 // Special case for these ldr / str with sp as base register. 329 continue; 330 } 331 if (!isARMLowRegister(Reg)) 332 return false; 333 } 334 return true; 335 } 336 337 bool 338 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 339 const ReduceEntry &Entry) { 340 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 341 return false; 342 343 unsigned Scale = 1; 344 bool HasImmOffset = false; 345 bool HasShift = false; 346 bool HasOffReg = true; 347 bool isLdStMul = false; 348 unsigned Opc = Entry.NarrowOpc1; 349 unsigned OpNum = 3; // First 'rest' of operands. 350 uint8_t ImmLimit = Entry.Imm1Limit; 351 352 switch (Entry.WideOpc) { 353 default: 354 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 355 case ARM::t2LDRi12: 356 case ARM::t2STRi12: 357 if (MI->getOperand(1).getReg() == ARM::SP) { 358 Opc = Entry.NarrowOpc2; 359 ImmLimit = Entry.Imm2Limit; 360 HasOffReg = false; 361 } 362 363 Scale = 4; 364 HasImmOffset = true; 365 HasOffReg = false; 366 break; 367 case ARM::t2LDRBi12: 368 case ARM::t2STRBi12: 369 HasImmOffset = true; 370 HasOffReg = false; 371 break; 372 case ARM::t2LDRHi12: 373 case ARM::t2STRHi12: 374 Scale = 2; 375 HasImmOffset = true; 376 HasOffReg = false; 377 break; 378 case ARM::t2LDRs: 379 case ARM::t2LDRBs: 380 case ARM::t2LDRHs: 381 case ARM::t2LDRSBs: 382 case ARM::t2LDRSHs: 383 case ARM::t2STRs: 384 case ARM::t2STRBs: 385 case ARM::t2STRHs: 386 HasShift = true; 387 OpNum = 4; 388 break; 389 case ARM::t2LDMIA: 390 case ARM::t2LDMDB: { 391 unsigned BaseReg = MI->getOperand(0).getReg(); 392 if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) 393 return false; 394 395 // For the non-writeback version (this one), the base register must be 396 // one of the registers being loaded. 397 bool isOK = false; 398 for (unsigned i = 4; i < MI->getNumOperands(); ++i) { 399 if (MI->getOperand(i).getReg() == BaseReg) { 400 isOK = true; 401 break; 402 } 403 } 404 405 if (!isOK) 406 return false; 407 408 OpNum = 0; 409 isLdStMul = true; 410 break; 411 } 412 case ARM::t2LDMIA_RET: { 413 unsigned BaseReg = MI->getOperand(1).getReg(); 414 if (BaseReg != ARM::SP) 415 return false; 416 Opc = Entry.NarrowOpc2; // tPOP_RET 417 OpNum = 2; 418 isLdStMul = true; 419 break; 420 } 421 case ARM::t2LDMIA_UPD: 422 case ARM::t2LDMDB_UPD: 423 case ARM::t2STMIA_UPD: 424 case ARM::t2STMDB_UPD: { 425 OpNum = 0; 426 427 unsigned BaseReg = MI->getOperand(1).getReg(); 428 if (BaseReg == ARM::SP && 429 (Entry.WideOpc == ARM::t2LDMIA_UPD || 430 Entry.WideOpc == ARM::t2STMDB_UPD)) { 431 Opc = Entry.NarrowOpc2; // tPOP or tPUSH 432 OpNum = 2; 433 } else if (!isARMLowRegister(BaseReg) || 434 (Entry.WideOpc != ARM::t2LDMIA_UPD && 435 Entry.WideOpc != ARM::t2STMIA_UPD)) { 436 return false; 437 } 438 439 isLdStMul = true; 440 break; 441 } 442 } 443 444 unsigned OffsetReg = 0; 445 bool OffsetKill = false; 446 if (HasShift) { 447 OffsetReg = MI->getOperand(2).getReg(); 448 OffsetKill = MI->getOperand(2).isKill(); 449 450 if (MI->getOperand(3).getImm()) 451 // Thumb1 addressing mode doesn't support shift. 452 return false; 453 } 454 455 unsigned OffsetImm = 0; 456 if (HasImmOffset) { 457 OffsetImm = MI->getOperand(2).getImm(); 458 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 459 460 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) 461 // Make sure the immediate field fits. 462 return false; 463 } 464 465 // Add the 16-bit load / store instruction. 466 DebugLoc dl = MI->getDebugLoc(); 467 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); 468 if (!isLdStMul) { 469 MIB.addOperand(MI->getOperand(0)); 470 MIB.addOperand(MI->getOperand(1)); 471 472 if (HasImmOffset) 473 MIB.addImm(OffsetImm / Scale); 474 475 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 476 477 if (HasOffReg) 478 MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 479 } 480 481 // Transfer the rest of operands. 482 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 483 MIB.addOperand(MI->getOperand(OpNum)); 484 485 // Transfer memoperands. 486 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 487 488 // Transfer MI flags. 489 MIB.setMIFlags(MI->getFlags()); 490 491 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 492 493 MBB.erase_instr(MI); 494 ++NumLdSts; 495 return true; 496 } 497 498 bool 499 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 500 const ReduceEntry &Entry, 501 bool LiveCPSR, MachineInstr *CPSRDef, 502 bool IsSelfLoop) { 503 unsigned Opc = MI->getOpcode(); 504 if (Opc == ARM::t2ADDri) { 505 // If the source register is SP, try to reduce to tADDrSPi, otherwise 506 // it's a normal reduce. 507 if (MI->getOperand(1).getReg() != ARM::SP) { 508 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) 509 return true; 510 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 511 } 512 // Try to reduce to tADDrSPi. 513 unsigned Imm = MI->getOperand(2).getImm(); 514 // The immediate must be in range, the destination register must be a low 515 // reg, the predicate must be "always" and the condition flags must not 516 // be being set. 517 if (Imm & 3 || Imm > 1020) 518 return false; 519 if (!isARMLowRegister(MI->getOperand(0).getReg())) 520 return false; 521 if (MI->getOperand(3).getImm() != ARMCC::AL) 522 return false; 523 const MCInstrDesc &MCID = MI->getDesc(); 524 if (MCID.hasOptionalDef() && 525 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) 526 return false; 527 528 MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), 529 TII->get(ARM::tADDrSPi)) 530 .addOperand(MI->getOperand(0)) 531 .addOperand(MI->getOperand(1)) 532 .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. 533 AddDefaultPred(MIB); 534 535 // Transfer MI flags. 536 MIB.setMIFlags(MI->getFlags()); 537 538 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); 539 540 MBB.erase_instr(MI); 541 ++NumNarrows; 542 return true; 543 } 544 545 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 546 return false; 547 548 if (MI->mayLoad() || MI->mayStore()) 549 return ReduceLoadStore(MBB, MI, Entry); 550 551 switch (Opc) { 552 default: break; 553 case ARM::t2ADDSri: 554 case ARM::t2ADDSrr: { 555 unsigned PredReg = 0; 556 if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 557 switch (Opc) { 558 default: break; 559 case ARM::t2ADDSri: { 560 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) 561 return true; 562 // fallthrough 563 } 564 case ARM::t2ADDSrr: 565 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 566 } 567 } 568 break; 569 } 570 case ARM::t2RSBri: 571 case ARM::t2RSBSri: 572 case ARM::t2SXTB: 573 case ARM::t2SXTH: 574 case ARM::t2UXTB: 575 case ARM::t2UXTH: 576 if (MI->getOperand(2).getImm() == 0) 577 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 578 break; 579 case ARM::t2MOVi16: 580 // Can convert only 'pure' immediate operands, not immediates obtained as 581 // globals' addresses. 582 if (MI->getOperand(1).isImm()) 583 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 584 break; 585 case ARM::t2CMPrr: { 586 // Try to reduce to the lo-reg only version first. Why there are two 587 // versions of the instruction is a mystery. 588 // It would be nice to just have two entries in the master table that 589 // are prioritized, but the table assumes a unique entry for each 590 // source insn opcode. So for now, we hack a local entry record to use. 591 static const ReduceEntry NarrowEntry = 592 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; 593 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) 594 return true; 595 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 596 } 597 } 598 return false; 599 } 600 601 bool 602 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 603 const ReduceEntry &Entry, 604 bool LiveCPSR, MachineInstr *CPSRDef, 605 bool IsSelfLoop) { 606 607 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 608 return false; 609 610 if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && 611 STI->avoidMOVsShifterOperand()) 612 // Don't issue movs with shifter operand for some CPUs unless we 613 // are optimizing / minimizing for size. 614 return false; 615 616 unsigned Reg0 = MI->getOperand(0).getReg(); 617 unsigned Reg1 = MI->getOperand(1).getReg(); 618 // t2MUL is "special". The tied source operand is second, not first. 619 if (MI->getOpcode() == ARM::t2MUL) { 620 unsigned Reg2 = MI->getOperand(2).getReg(); 621 // Early exit if the regs aren't all low regs. 622 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) 623 || !isARMLowRegister(Reg2)) 624 return false; 625 if (Reg0 != Reg2) { 626 // If the other operand also isn't the same as the destination, we 627 // can't reduce. 628 if (Reg1 != Reg0) 629 return false; 630 // Try to commute the operands to make it a 2-address instruction. 631 MachineInstr *CommutedMI = TII->commuteInstruction(MI); 632 if (!CommutedMI) 633 return false; 634 } 635 } else if (Reg0 != Reg1) { 636 // Try to commute the operands to make it a 2-address instruction. 637 unsigned CommOpIdx1, CommOpIdx2; 638 if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || 639 CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) 640 return false; 641 MachineInstr *CommutedMI = TII->commuteInstruction(MI); 642 if (!CommutedMI) 643 return false; 644 } 645 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 646 return false; 647 if (Entry.Imm2Limit) { 648 unsigned Imm = MI->getOperand(2).getImm(); 649 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 650 if (Imm > Limit) 651 return false; 652 } else { 653 unsigned Reg2 = MI->getOperand(2).getReg(); 654 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 655 return false; 656 } 657 658 // Check if it's possible / necessary to transfer the predicate. 659 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); 660 unsigned PredReg = 0; 661 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 662 bool SkipPred = false; 663 if (Pred != ARMCC::AL) { 664 if (!NewMCID.isPredicable()) 665 // Can't transfer predicate, fail. 666 return false; 667 } else { 668 SkipPred = !NewMCID.isPredicable(); 669 } 670 671 bool HasCC = false; 672 bool CCDead = false; 673 const MCInstrDesc &MCID = MI->getDesc(); 674 if (MCID.hasOptionalDef()) { 675 unsigned NumOps = MCID.getNumOperands(); 676 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 677 if (HasCC && MI->getOperand(NumOps-1).isDead()) 678 CCDead = true; 679 } 680 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 681 return false; 682 683 // Avoid adding a false dependency on partial flag update by some 16-bit 684 // instructions which has the 's' bit set. 685 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 686 canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) 687 return false; 688 689 // Add the 16-bit instruction. 690 DebugLoc dl = MI->getDebugLoc(); 691 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 692 MIB.addOperand(MI->getOperand(0)); 693 if (NewMCID.hasOptionalDef()) { 694 if (HasCC) 695 AddDefaultT1CC(MIB, CCDead); 696 else 697 AddNoT1CC(MIB); 698 } 699 700 // Transfer the rest of operands. 701 unsigned NumOps = MCID.getNumOperands(); 702 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 703 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 704 continue; 705 if (SkipPred && MCID.OpInfo[i].isPredicate()) 706 continue; 707 MIB.addOperand(MI->getOperand(i)); 708 } 709 710 // Transfer MI flags. 711 MIB.setMIFlags(MI->getFlags()); 712 713 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 714 715 MBB.erase_instr(MI); 716 ++Num2Addrs; 717 return true; 718 } 719 720 bool 721 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 722 const ReduceEntry &Entry, 723 bool LiveCPSR, MachineInstr *CPSRDef, 724 bool IsSelfLoop) { 725 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 726 return false; 727 728 if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && 729 STI->avoidMOVsShifterOperand()) 730 // Don't issue movs with shifter operand for some CPUs unless we 731 // are optimizing / minimizing for size. 732 return false; 733 734 unsigned Limit = ~0U; 735 if (Entry.Imm1Limit) 736 Limit = (1 << Entry.Imm1Limit) - 1; 737 738 const MCInstrDesc &MCID = MI->getDesc(); 739 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { 740 if (MCID.OpInfo[i].isPredicate()) 741 continue; 742 const MachineOperand &MO = MI->getOperand(i); 743 if (MO.isReg()) { 744 unsigned Reg = MO.getReg(); 745 if (!Reg || Reg == ARM::CPSR) 746 continue; 747 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 748 return false; 749 } else if (MO.isImm() && 750 !MCID.OpInfo[i].isPredicate()) { 751 if (((unsigned)MO.getImm()) > Limit) 752 return false; 753 } 754 } 755 756 // Check if it's possible / necessary to transfer the predicate. 757 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); 758 unsigned PredReg = 0; 759 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 760 bool SkipPred = false; 761 if (Pred != ARMCC::AL) { 762 if (!NewMCID.isPredicable()) 763 // Can't transfer predicate, fail. 764 return false; 765 } else { 766 SkipPred = !NewMCID.isPredicable(); 767 } 768 769 bool HasCC = false; 770 bool CCDead = false; 771 if (MCID.hasOptionalDef()) { 772 unsigned NumOps = MCID.getNumOperands(); 773 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 774 if (HasCC && MI->getOperand(NumOps-1).isDead()) 775 CCDead = true; 776 } 777 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 778 return false; 779 780 // Avoid adding a false dependency on partial flag update by some 16-bit 781 // instructions which has the 's' bit set. 782 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 783 canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) 784 return false; 785 786 // Add the 16-bit instruction. 787 DebugLoc dl = MI->getDebugLoc(); 788 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 789 MIB.addOperand(MI->getOperand(0)); 790 if (NewMCID.hasOptionalDef()) { 791 if (HasCC) 792 AddDefaultT1CC(MIB, CCDead); 793 else 794 AddNoT1CC(MIB); 795 } 796 797 // Transfer the rest of operands. 798 unsigned NumOps = MCID.getNumOperands(); 799 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 800 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 801 continue; 802 if ((MCID.getOpcode() == ARM::t2RSBSri || 803 MCID.getOpcode() == ARM::t2RSBri || 804 MCID.getOpcode() == ARM::t2SXTB || 805 MCID.getOpcode() == ARM::t2SXTH || 806 MCID.getOpcode() == ARM::t2UXTB || 807 MCID.getOpcode() == ARM::t2UXTH) && i == 2) 808 // Skip the zero immediate operand, it's now implicit. 809 continue; 810 bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); 811 if (SkipPred && isPred) 812 continue; 813 const MachineOperand &MO = MI->getOperand(i); 814 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 815 // Skip implicit def of CPSR. Either it's modeled as an optional 816 // def now or it's already an implicit def on the new instruction. 817 continue; 818 MIB.addOperand(MO); 819 } 820 if (!MCID.isPredicable() && NewMCID.isPredicable()) 821 AddDefaultPred(MIB); 822 823 // Transfer MI flags. 824 MIB.setMIFlags(MI->getFlags()); 825 826 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 827 828 MBB.erase_instr(MI); 829 ++NumNarrows; 830 return true; 831 } 832 833 static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { 834 bool HasDef = false; 835 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 836 const MachineOperand &MO = MI.getOperand(i); 837 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 838 continue; 839 if (MO.getReg() != ARM::CPSR) 840 continue; 841 842 DefCPSR = true; 843 if (!MO.isDead()) 844 HasDef = true; 845 } 846 847 return HasDef || LiveCPSR; 848 } 849 850 static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 851 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 852 const MachineOperand &MO = MI.getOperand(i); 853 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 854 continue; 855 if (MO.getReg() != ARM::CPSR) 856 continue; 857 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 858 if (MO.isKill()) { 859 LiveCPSR = false; 860 break; 861 } 862 } 863 864 return LiveCPSR; 865 } 866 867 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 868 bool LiveCPSR, MachineInstr *CPSRDef, 869 bool IsSelfLoop) { 870 unsigned Opcode = MI->getOpcode(); 871 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 872 if (OPI == ReduceOpcodeMap.end()) 873 return false; 874 const ReduceEntry &Entry = ReduceTable[OPI->second]; 875 876 // Don't attempt normal reductions on "special" cases for now. 877 if (Entry.Special) 878 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); 879 880 // Try to transform to a 16-bit two-address instruction. 881 if (Entry.NarrowOpc2 && 882 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) 883 return true; 884 885 // Try to transform to a 16-bit non-two-address instruction. 886 if (Entry.NarrowOpc1 && 887 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) 888 return true; 889 890 return false; 891 } 892 893 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 894 bool Modified = false; 895 896 // Yes, CPSR could be livein. 897 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 898 MachineInstr *CPSRDef = 0; 899 MachineInstr *BundleMI = 0; 900 901 // If this BB loops back to itself, conservatively avoid narrowing the 902 // first instruction that does partial flag update. 903 bool IsSelfLoop = MBB.isSuccessor(&MBB); 904 MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); 905 MachineBasicBlock::instr_iterator NextMII; 906 for (; MII != E; MII = NextMII) { 907 NextMII = llvm::next(MII); 908 909 MachineInstr *MI = &*MII; 910 if (MI->isBundle()) { 911 BundleMI = MI; 912 continue; 913 } 914 915 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 916 917 // Does NextMII belong to the same bundle as MI? 918 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); 919 920 if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) { 921 Modified = true; 922 MachineBasicBlock::instr_iterator I = prior(NextMII); 923 MI = &*I; 924 // Removing and reinserting the first instruction in a bundle will break 925 // up the bundle. Fix the bundling if it was broken. 926 if (NextInSameBundle && !NextMII->isBundledWithPred()) 927 NextMII->bundleWithPred(); 928 } 929 930 if (!NextInSameBundle && MI->isInsideBundle()) { 931 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill 932 // marker is only on the BUNDLE instruction. Process the BUNDLE 933 // instruction as we finish with the bundled instruction to work around 934 // the inconsistency. 935 if (BundleMI->killsRegister(ARM::CPSR)) 936 LiveCPSR = false; 937 MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); 938 if (MO && !MO->isDead()) 939 LiveCPSR = true; 940 } 941 942 bool DefCPSR = false; 943 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); 944 if (MI->isCall()) { 945 // Calls don't really set CPSR. 946 CPSRDef = 0; 947 IsSelfLoop = false; 948 } else if (DefCPSR) { 949 // This is the last CPSR defining instruction. 950 CPSRDef = MI; 951 IsSelfLoop = false; 952 } 953 } 954 955 return Modified; 956 } 957 958 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 959 const TargetMachine &TM = MF.getTarget(); 960 TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 961 STI = &TM.getSubtarget<ARMSubtarget>(); 962 963 // Optimizing / minimizing size? 964 AttributeSet FnAttrs = MF.getFunction()->getAttributes(); 965 OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 966 Attribute::OptimizeForSize); 967 MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 968 Attribute::MinSize); 969 970 bool Modified = false; 971 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 972 Modified |= ReduceMBB(*I); 973 return Modified; 974 } 975 976 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 977 /// reduction pass. 978 FunctionPass *llvm::createThumb2SizeReductionPass() { 979 return new Thumb2SizeReduce(); 980 } 981