1 //===--- HexagonStoreWidening.cpp------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // Replace sequences of "narrow" stores to adjacent memory locations with 10 // a fewer "wide" stores that have the same effect. 11 // For example, replace: 12 // S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte 13 // S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte 14 // with 15 // S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword 16 // The above is the general idea. The actual cases handled by the code 17 // may be a bit more complex. 18 // The purpose of this pass is to reduce the number of outstanding stores, 19 // or as one could say, "reduce store queue pressure". Also, wide stores 20 // mean fewer stores, and since there are only two memory instructions allowed 21 // per packet, it also means fewer packets, and ultimately fewer cycles. 22 //===---------------------------------------------------------------------===// 23 24 #define DEBUG_TYPE "hexagon-widen-stores" 25 26 #include "HexagonTargetMachine.h" 27 28 #include "llvm/PassSupport.h" 29 #include "llvm/Analysis/AliasAnalysis.h" 30 #include "llvm/CodeGen/Passes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/MC/MCInstrDesc.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Target/TargetMachine.h" 39 #include "llvm/Target/TargetRegisterInfo.h" 40 #include "llvm/Target/TargetInstrInfo.h" 41 42 #include <algorithm> 43 44 45 using namespace llvm; 46 47 namespace llvm { 48 FunctionPass *createHexagonStoreWidening(); 49 void initializeHexagonStoreWideningPass(PassRegistry&); 50 } 51 52 namespace { 53 struct HexagonStoreWidening : public MachineFunctionPass { 54 const HexagonInstrInfo *TII; 55 const HexagonRegisterInfo *TRI; 56 const MachineRegisterInfo *MRI; 57 AliasAnalysis *AA; 58 MachineFunction *MF; 59 60 public: 61 static char ID; 62 HexagonStoreWidening() : MachineFunctionPass(ID) { 63 initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); 64 } 65 66 bool runOnMachineFunction(MachineFunction &MF) override; 67 68 const char *getPassName() const override { 69 return "Hexagon Store Widening"; 70 } 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.addRequired<AAResultsWrapperPass>(); 74 AU.addPreserved<AAResultsWrapperPass>(); 75 MachineFunctionPass::getAnalysisUsage(AU); 76 } 77 78 static bool handledStoreType(const MachineInstr *MI); 79 80 private: 81 static const int MaxWideSize = 4; 82 83 typedef std::vector<MachineInstr*> InstrGroup; 84 typedef std::vector<InstrGroup> InstrGroupList; 85 86 bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); 87 bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); 88 void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, 89 InstrGroup::iterator End, InstrGroup &Group); 90 void createStoreGroups(MachineBasicBlock &MBB, 91 InstrGroupList &StoreGroups); 92 bool processBasicBlock(MachineBasicBlock &MBB); 93 bool processStoreGroup(InstrGroup &Group); 94 bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, 95 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); 96 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); 97 bool replaceStores(InstrGroup &OG, InstrGroup &NG); 98 bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); 99 }; 100 101 } // namespace 102 103 104 namespace { 105 106 // Some local helper functions... 107 unsigned getBaseAddressRegister(const MachineInstr *MI) { 108 const MachineOperand &MO = MI->getOperand(0); 109 assert(MO.isReg() && "Expecting register operand"); 110 return MO.getReg(); 111 } 112 113 int64_t getStoreOffset(const MachineInstr *MI) { 114 unsigned OpC = MI->getOpcode(); 115 assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); 116 117 switch (OpC) { 118 case Hexagon::S4_storeirb_io: 119 case Hexagon::S4_storeirh_io: 120 case Hexagon::S4_storeiri_io: { 121 const MachineOperand &MO = MI->getOperand(1); 122 assert(MO.isImm() && "Expecting immediate offset"); 123 return MO.getImm(); 124 } 125 } 126 dbgs() << *MI; 127 llvm_unreachable("Store offset calculation missing for a handled opcode"); 128 return 0; 129 } 130 131 const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { 132 assert(!MI->memoperands_empty() && "Expecting memory operands"); 133 return **MI->memoperands_begin(); 134 } 135 136 } // namespace 137 138 139 char HexagonStoreWidening::ID = 0; 140 141 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", 142 "Hexason Store Widening", false, false) 143 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 144 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", 145 "Hexagon Store Widening", false, false) 146 147 148 // Filtering function: any stores whose opcodes are not "approved" of by 149 // this function will not be subjected to widening. 150 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { 151 // For now, only handle stores of immediate values. 152 // Also, reject stores to stack slots. 153 unsigned Opc = MI->getOpcode(); 154 switch (Opc) { 155 case Hexagon::S4_storeirb_io: 156 case Hexagon::S4_storeirh_io: 157 case Hexagon::S4_storeiri_io: 158 // Base address must be a register. (Implement FI later.) 159 return MI->getOperand(0).isReg(); 160 default: 161 return false; 162 } 163 } 164 165 166 // Check if the machine memory operand MMO is aliased with any of the 167 // stores in the store group Stores. 168 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 169 const MachineMemOperand &MMO) { 170 if (!MMO.getValue()) 171 return true; 172 173 MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); 174 175 for (auto SI : Stores) { 176 const MachineMemOperand &SMO = getStoreTarget(SI); 177 if (!SMO.getValue()) 178 return true; 179 180 MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); 181 if (AA->alias(L, SL)) 182 return true; 183 } 184 185 return false; 186 } 187 188 189 // Check if the machine instruction MI accesses any storage aliased with 190 // any store in the group Stores. 191 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 192 const MachineInstr *MI) { 193 for (auto &I : MI->memoperands()) 194 if (instrAliased(Stores, *I)) 195 return true; 196 return false; 197 } 198 199 200 // Inspect a machine basic block, and generate store groups out of stores 201 // encountered in the block. 202 // 203 // A store group is a group of stores that use the same base register, 204 // and which can be reordered within that group without altering the 205 // semantics of the program. A single store group could be widened as 206 // a whole, if there existed a single store instruction with the same 207 // semantics as the entire group. In many cases, a single store group 208 // may need more than one wide store. 209 void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, 210 InstrGroupList &StoreGroups) { 211 InstrGroup AllInsns; 212 213 // Copy all instruction pointers from the basic block to a temporary 214 // list. This will allow operating on the list, and modifying its 215 // elements without affecting the basic block. 216 for (auto &I : MBB) 217 AllInsns.push_back(&I); 218 219 // Traverse all instructions in the AllInsns list, and if we encounter 220 // a store, then try to create a store group starting at that instruction 221 // i.e. a sequence of independent stores that can be widened. 222 for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { 223 MachineInstr *MI = *I; 224 // Skip null pointers (processed instructions). 225 if (!MI || !handledStoreType(MI)) 226 continue; 227 228 // Found a store. Try to create a store group. 229 InstrGroup G; 230 createStoreGroup(MI, I+1, E, G); 231 if (G.size() > 1) 232 StoreGroups.push_back(G); 233 } 234 } 235 236 237 // Create a single store group. The stores need to be independent between 238 // themselves, and also there cannot be other instructions between them 239 // that could read or modify storage being stored into. 240 void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, 241 InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { 242 assert(handledStoreType(BaseStore) && "Unexpected instruction"); 243 unsigned BaseReg = getBaseAddressRegister(BaseStore); 244 InstrGroup Other; 245 246 Group.push_back(BaseStore); 247 248 for (auto I = Begin; I != End; ++I) { 249 MachineInstr *MI = *I; 250 if (!MI) 251 continue; 252 253 if (handledStoreType(MI)) { 254 // If this store instruction is aliased with anything already in the 255 // group, terminate the group now. 256 if (instrAliased(Group, getStoreTarget(MI))) 257 return; 258 // If this store is aliased to any of the memory instructions we have 259 // seen so far (that are not a part of this group), terminate the group. 260 if (instrAliased(Other, getStoreTarget(MI))) 261 return; 262 263 unsigned BR = getBaseAddressRegister(MI); 264 if (BR == BaseReg) { 265 Group.push_back(MI); 266 *I = 0; 267 continue; 268 } 269 } 270 271 // Assume calls are aliased to everything. 272 if (MI->isCall() || MI->hasUnmodeledSideEffects()) 273 return; 274 275 if (MI->mayLoad() || MI->mayStore()) { 276 if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) 277 return; 278 Other.push_back(MI); 279 } 280 } // for 281 } 282 283 284 // Check if store instructions S1 and S2 are adjacent. More precisely, 285 // S2 has to access memory immediately following that accessed by S1. 286 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, 287 const MachineInstr *S2) { 288 if (!handledStoreType(S1) || !handledStoreType(S2)) 289 return false; 290 291 const MachineMemOperand &S1MO = getStoreTarget(S1); 292 293 // Currently only handling immediate stores. 294 int Off1 = S1->getOperand(1).getImm(); 295 int Off2 = S2->getOperand(1).getImm(); 296 297 return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) 298 : int(Off1+S1MO.getSize()) == Off2; 299 } 300 301 302 /// Given a sequence of adjacent stores, and a maximum size of a single wide 303 /// store, pick a group of stores that can be replaced by a single store 304 /// of size not exceeding MaxSize. The selected sequence will be recorded 305 /// in OG ("old group" of instructions). 306 /// OG should be empty on entry, and should be left empty if the function 307 /// fails. 308 bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, 309 InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, 310 unsigned MaxSize) { 311 assert(Begin != End && "No instructions to analyze"); 312 assert(OG.empty() && "Old group not empty on entry"); 313 314 if (std::distance(Begin, End) <= 1) 315 return false; 316 317 MachineInstr *FirstMI = *Begin; 318 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); 319 const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); 320 unsigned Alignment = FirstMMO.getAlignment(); 321 unsigned SizeAccum = FirstMMO.getSize(); 322 unsigned FirstOffset = getStoreOffset(FirstMI); 323 324 // The initial value of SizeAccum should always be a power of 2. 325 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); 326 327 // If the size of the first store equals to or exceeds the limit, do nothing. 328 if (SizeAccum >= MaxSize) 329 return false; 330 331 // If the size of the first store is greater than or equal to the address 332 // stored to, then the store cannot be made any wider. 333 if (SizeAccum >= Alignment) 334 return false; 335 336 // The offset of a store will put restrictions on how wide the store can be. 337 // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. 338 // If the first store already exhausts the offset limits, quit. Test this 339 // by checking if the next wider size would exceed the limit. 340 if ((2*SizeAccum-1) & FirstOffset) 341 return false; 342 343 OG.push_back(FirstMI); 344 MachineInstr *S1 = FirstMI, *S2 = *(Begin+1); 345 InstrGroup::iterator I = Begin+1; 346 347 // Pow2Num will be the largest number of elements in OG such that the sum 348 // of sizes of stores 0...Pow2Num-1 will be a power of 2. 349 unsigned Pow2Num = 1; 350 unsigned Pow2Size = SizeAccum; 351 352 // Be greedy: keep accumulating stores as long as they are to adjacent 353 // memory locations, and as long as the total number of bytes stored 354 // does not exceed the limit (MaxSize). 355 // Keep track of when the total size covered is a power of 2, since 356 // this is a size a single store can cover. 357 while (I != End) { 358 S2 = *I; 359 // Stores are sorted, so if S1 and S2 are not adjacent, there won't be 360 // any other store to fill the "hole". 361 if (!storesAreAdjacent(S1, S2)) 362 break; 363 364 unsigned S2Size = getStoreTarget(S2).getSize(); 365 if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) 366 break; 367 368 OG.push_back(S2); 369 SizeAccum += S2Size; 370 if (isPowerOf2_32(SizeAccum)) { 371 Pow2Num = OG.size(); 372 Pow2Size = SizeAccum; 373 } 374 if ((2*Pow2Size-1) & FirstOffset) 375 break; 376 377 S1 = S2; 378 ++I; 379 } 380 381 // The stores don't add up to anything that can be widened. Clean up. 382 if (Pow2Num <= 1) { 383 OG.clear(); 384 return false; 385 } 386 387 // Only leave the stored being widened. 388 OG.resize(Pow2Num); 389 TotalSize = Pow2Size; 390 return true; 391 } 392 393 394 /// Given an "old group" OG of stores, create a "new group" NG of instructions 395 /// to replace them. Ideally, NG would only have a single instruction in it, 396 /// but that may only be possible for store-immediate. 397 bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, 398 unsigned TotalSize) { 399 // XXX Current limitations: 400 // - only expect stores of immediate values in OG, 401 // - only handle a TotalSize of up to 4. 402 403 if (TotalSize > 4) 404 return false; 405 406 unsigned Acc = 0; // Value accumulator. 407 unsigned Shift = 0; 408 409 for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { 410 MachineInstr *MI = *I; 411 const MachineMemOperand &MMO = getStoreTarget(MI); 412 MachineOperand &SO = MI->getOperand(2); // Source. 413 assert(SO.isImm() && "Expecting an immediate operand"); 414 415 unsigned NBits = MMO.getSize()*8; 416 unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); 417 unsigned Val = (SO.getImm() & Mask) << Shift; 418 Acc |= Val; 419 Shift += NBits; 420 } 421 422 423 MachineInstr *FirstSt = OG.front(); 424 DebugLoc DL = OG.back()->getDebugLoc(); 425 const MachineMemOperand &OldM = getStoreTarget(FirstSt); 426 MachineMemOperand *NewM = 427 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), 428 TotalSize, OldM.getAlignment(), 429 OldM.getAAInfo()); 430 431 if (Acc < 0x10000) { 432 // Create mem[hw] = #Acc 433 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : 434 (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; 435 assert(WOpc && "Unexpected size"); 436 437 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); 438 const MCInstrDesc &StD = TII->get(WOpc); 439 MachineOperand &MR = FirstSt->getOperand(0); 440 int64_t Off = FirstSt->getOperand(1).getImm(); 441 MachineInstr *StI = BuildMI(*MF, DL, StD) 442 .addReg(MR.getReg(), getKillRegState(MR.isKill())) 443 .addImm(Off) 444 .addImm(Val); 445 StI->addMemOperand(*MF, NewM); 446 NG.push_back(StI); 447 } else { 448 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg 449 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); 450 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); 451 unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); 452 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) 453 .addImm(int(Acc)); 454 NG.push_back(TfrI); 455 456 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : 457 (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; 458 assert(WOpc && "Unexpected size"); 459 460 const MCInstrDesc &StD = TII->get(WOpc); 461 MachineOperand &MR = FirstSt->getOperand(0); 462 int64_t Off = FirstSt->getOperand(1).getImm(); 463 MachineInstr *StI = BuildMI(*MF, DL, StD) 464 .addReg(MR.getReg(), getKillRegState(MR.isKill())) 465 .addImm(Off) 466 .addReg(VReg, RegState::Kill); 467 StI->addMemOperand(*MF, NewM); 468 NG.push_back(StI); 469 } 470 471 return true; 472 } 473 474 475 // Replace instructions from the old group OG with instructions from the 476 // new group NG. Conceptually, remove all instructions in OG, and then 477 // insert all instructions in NG, starting at where the first instruction 478 // from OG was (in the order in which they appeared in the basic block). 479 // (The ordering in OG does not have to match the order in the basic block.) 480 bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { 481 DEBUG({ 482 dbgs() << "Replacing:\n"; 483 for (auto I : OG) 484 dbgs() << " " << *I; 485 dbgs() << "with\n"; 486 for (auto I : NG) 487 dbgs() << " " << *I; 488 }); 489 490 MachineBasicBlock *MBB = OG.back()->getParent(); 491 MachineBasicBlock::iterator InsertAt = MBB->end(); 492 493 // Need to establish the insertion point. The best one is right before 494 // the first store in the OG, but in the order in which the stores occur 495 // in the program list. Since the ordering in OG does not correspond 496 // to the order in the program list, we need to do some work to find 497 // the insertion point. 498 499 // Create a set of all instructions in OG (for quick lookup). 500 SmallPtrSet<MachineInstr*, 4> InstrSet; 501 for (auto I : OG) 502 InstrSet.insert(I); 503 504 // Traverse the block, until we hit an instruction from OG. 505 for (auto &I : *MBB) { 506 if (InstrSet.count(&I)) { 507 InsertAt = I; 508 break; 509 } 510 } 511 512 assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); 513 514 bool AtBBStart = false; 515 516 // InsertAt points at the first instruction that will be removed. We need 517 // to move it out of the way, so it remains valid after removing all the 518 // old stores, and so we are able to recover it back to the proper insertion 519 // position. 520 if (InsertAt != MBB->begin()) 521 --InsertAt; 522 else 523 AtBBStart = true; 524 525 for (auto I : OG) 526 I->eraseFromParent(); 527 528 if (!AtBBStart) 529 ++InsertAt; 530 else 531 InsertAt = MBB->begin(); 532 533 for (auto I : NG) 534 MBB->insert(InsertAt, I); 535 536 return true; 537 } 538 539 540 // Break up the group into smaller groups, each of which can be replaced by 541 // a single wide store. Widen each such smaller group and replace the old 542 // instructions with the widened ones. 543 bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { 544 bool Changed = false; 545 InstrGroup::iterator I = Group.begin(), E = Group.end(); 546 InstrGroup OG, NG; // Old and new groups. 547 unsigned CollectedSize; 548 549 while (I != E) { 550 OG.clear(); 551 NG.clear(); 552 553 bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && 554 createWideStores(OG, NG, CollectedSize) && 555 replaceStores(OG, NG); 556 if (!Succ) 557 continue; 558 559 assert(OG.size() > 1 && "Created invalid group"); 560 assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); 561 I += OG.size()-1; 562 563 Changed = true; 564 } 565 566 return Changed; 567 } 568 569 570 // Process a single basic block: create the store groups, and replace them 571 // with the widened stores, if possible. Processing of each basic block 572 // is independent from processing of any other basic block. This transfor- 573 // mation could be stopped after having processed any basic block without 574 // any ill effects (other than not having performed widening in the unpro- 575 // cessed blocks). Also, the basic blocks can be processed in any order. 576 bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { 577 InstrGroupList SGs; 578 bool Changed = false; 579 580 createStoreGroups(MBB, SGs); 581 582 auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { 583 return getStoreOffset(A) < getStoreOffset(B); 584 }; 585 for (auto &G : SGs) { 586 assert(G.size() > 1 && "Store group with fewer than 2 elements"); 587 std::sort(G.begin(), G.end(), Less); 588 589 Changed |= processStoreGroup(G); 590 } 591 592 return Changed; 593 } 594 595 596 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { 597 if (skipFunction(*MFn.getFunction())) 598 return false; 599 600 MF = &MFn; 601 auto &ST = MFn.getSubtarget<HexagonSubtarget>(); 602 TII = ST.getInstrInfo(); 603 TRI = ST.getRegisterInfo(); 604 MRI = &MFn.getRegInfo(); 605 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 606 607 bool Changed = false; 608 609 for (auto &B : MFn) 610 Changed |= processBasicBlock(B); 611 612 return Changed; 613 } 614 615 616 FunctionPass *llvm::createHexagonStoreWidening() { 617 return new HexagonStoreWidening(); 618 } 619 620